Handle fork in target application (#191)

* Always print PID in log messages

* omnitrace-dl updates

- omnitrace_preload does not call omnitrace_init or omnitrace_init_tooling
- omnitrace_preload will call omnitrace_set_mpi if OMNITRACE_USE_MPI
  or OMNITRACE_USE_MPIP in the env is true but not call it otherwise
  because doing so either overrides OMNITRACE_USE_PID (when true) or
  disable mpip from initialization (when false) and the MPI
  init can be caught later and override OMNITRACE_USE_PID

* config updates

- set_setting_value sets user update type
- remove volatile from get_settings_configured
- don't override settings::default_process_suffix
- don't kill process in omnitrace_exit_action
- set_state ignores updating state if >= State::Finalized

* Handle state > State::Finalized

* fork gotcha updates

- unsets LD_PRELOAD
- sets OMNITRACE_ROOT_PROCESS
- sets OMNITRACE_CHILD_PROCESS

* libomnitrace library.cpp updates

- basic_bundle for fini metrics
- handle finalization from child process

* sampling updates

- sampling::shutdown handles when child process

* Add example and test using fork

* Update run-ci script to support not submitting

* Tweak test envs

* Update build flags when codecov enabled

* remove unnecessary includes of sampling header

* Replace mpi copy/fini static lambda with free-funcs

* Update codecov job

* Fix OMPT segfaults after finalization

* Miscellaneous updates after rebase

* fixes for causal profiling

* revert some run-ci.sh changes

* Disable storing env in sampling::shutdown

* formatting fix

* Update timemory submodule

- fixed occasional synchronization issues with allocator offloading
- exclude protozero:: from internal samples

* improve root/child process detection

- avoid omnitrace_finalize in MPI when child process
- revert some testing tweaks
This commit is contained in:
Jonathan R. Madsen
2023-02-08 01:31:38 -06:00
کامیت شده توسط GitHub
والد 0da62c980e
کامیت 32b15fe7b7
39فایلهای تغییر یافته به همراه440 افزوده شده و 133 حذف شده
@@ -589,6 +589,8 @@ jobs:
-DOMNITRACE_BUILD_CI=OFF
-DOMNITRACE_BUILD_TESTING=ON
-DOMNITRACE_BUILD_DYNINST=OFF
-DOMNITRACE_BUILD_DEBUG=ON
-DOMNITRACE_BUILD_HIDDEN_VISIBILITY=OFF
-DOMNITRACE_USE_MPI=ON
-DOMNITRACE_USE_PYTHON=ON
-DOMNITRACE_USE_OMPT=ON
+28 -24
مشاهده پرونده
@@ -19,7 +19,6 @@ omnitrace_add_option(OMNITRACE_BUILD_EXTRA_OPTIMIZATIONS "Extra optimization fla
omnitrace_add_option(OMNITRACE_BUILD_LTO "Build with link-time optimization" OFF)
omnitrace_add_option(OMNITRACE_USE_COMPILE_TIMING
"Build with timing metrics for compilation" OFF)
omnitrace_add_option(OMNITRACE_USE_COVERAGE "Build with code-coverage flags" OFF)
omnitrace_add_option(OMNITRACE_USE_SANITIZER
"Build with -fsanitze=\${OMNITRACE_SANITIZER_TYPE}" OFF)
omnitrace_add_option(OMNITRACE_BUILD_STATIC_LIBGCC
@@ -145,13 +144,15 @@ endif()
# non-debug optimizations
#
omnitrace_add_interface_library(omnitrace-compile-extra "Extra optimization flags")
if(NOT OMNITRACE_USE_COVERAGE AND OMNITRACE_BUILD_EXTRA_OPTIMIZATIONS)
if(NOT OMNITRACE_BUILD_CODECOV AND OMNITRACE_BUILD_EXTRA_OPTIMIZATIONS)
add_target_flag_if_avail(
omnitrace-compile-extra "-finline-functions" "-funroll-loops" "-ftree-vectorize"
"-ftree-loop-optimize" "-ftree-loop-vectorize")
endif()
if(NOT "${CMAKE_BUILD_TYPE}" STREQUAL "Debug" AND OMNITRACE_BUILD_EXTRA_OPTIMIZATIONS)
if(NOT "${CMAKE_BUILD_TYPE}" STREQUAL "Debug"
AND OMNITRACE_BUILD_EXTRA_OPTIMIZATIONS
AND NOT OMNITRACE_BUILD_CODECOV)
target_link_libraries(omnitrace-compile-options
INTERFACE $<BUILD_INTERFACE:omnitrace-compile-extra>)
add_flag_if_avail(
@@ -166,29 +167,32 @@ endif()
#
add_cxx_flag_if_avail("-faligned-new")
omnitrace_save_variables(FLTO VARIABLES CMAKE_CXX_FLAGS)
set(_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS "-flto=thin ${_CXX_FLAGS}")
omnitrace_add_interface_library(omnitrace-lto "Adds link-time-optimization flags")
add_target_flag_if_avail(omnitrace-lto "-flto=thin")
if(NOT cxx_omnitrace_lto_flto_thin)
set(CMAKE_CXX_FLAGS "-flto ${_CXX_FLAGS}")
add_target_flag_if_avail(omnitrace-lto "-flto")
if(NOT cxx_omnitrace_lto_flto)
set(OMNITRACE_BUILD_LTO OFF)
else()
target_link_options(omnitrace-lto INTERFACE -flto)
endif()
add_target_flag_if_avail(omnitrace-lto "-fno-fat-lto-objects")
if(cxx_omnitrace_lto_fno_fat_lto_objects)
target_link_options(omnitrace-lto INTERFACE -fno-fat-lto-objects)
endif()
else()
target_link_options(omnitrace-lto INTERFACE -flto=thin)
endif()
omnitrace_restore_variables(FLTO VARIABLES CMAKE_CXX_FLAGS)
if(NOT OMNITRACE_BUILD_CODECOV)
omnitrace_save_variables(FLTO VARIABLES CMAKE_CXX_FLAGS)
set(_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS "-flto=thin ${_CXX_FLAGS}")
add_target_flag_if_avail(omnitrace-lto "-flto=thin")
if(NOT cxx_omnitrace_lto_flto_thin)
set(CMAKE_CXX_FLAGS "-flto ${_CXX_FLAGS}")
add_target_flag_if_avail(omnitrace-lto "-flto")
if(NOT cxx_omnitrace_lto_flto)
set(OMNITRACE_BUILD_LTO OFF)
else()
target_link_options(omnitrace-lto INTERFACE -flto)
endif()
add_target_flag_if_avail(omnitrace-lto "-fno-fat-lto-objects")
if(cxx_omnitrace_lto_fno_fat_lto_objects)
target_link_options(omnitrace-lto INTERFACE -fno-fat-lto-objects)
endif()
else()
target_link_options(omnitrace-lto INTERFACE -flto=thin)
endif()
omnitrace_restore_variables(FLTO VARIABLES CMAKE_CXX_FLAGS)
endif()
# ----------------------------------------------------------------------------------------#
# print compilation timing reports (Clang compiler)
+1
مشاهده پرونده
@@ -54,3 +54,4 @@ add_subdirectory(rccl)
add_subdirectory(rewrite-caller)
add_subdirectory(causal)
add_subdirectory(trace-time-window)
add_subdirectory(fork)
@@ -0,0 +1,18 @@
cmake_minimum_required(VERSION 3.15 FATAL_ERROR)
project(omnitrace-fork LANGUAGES CXX)
set(CMAKE_BUILD_TYPE "RelWithDebInfo")
string(REPLACE " " ";" _FLAGS "${CMAKE_CXX_FLAGS_DEBUG}")
find_package(Threads REQUIRED)
add_executable(fork-example fork.cpp)
target_link_libraries(fork-example PRIVATE Threads::Threads)
target_compile_options(fork-example PRIVATE ${_FLAGS})
if(OMNITRACE_INSTALL_EXAMPLES)
install(
TARGETS fork-example
DESTINATION bin
COMPONENT omnitrace-examples)
endif()
+78
مشاهده پرونده
@@ -0,0 +1,78 @@
#include <chrono>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
#include <sys/wait.h>
#include <thread>
#include <unistd.h>
void
print_info(const char* _name)
{
printf("[%s] pid = %i, ppid = %i\n", _name, getpid(), getppid());
}
int
run(const char* _name, int nchildren)
{
for(int i = 0; i < nchildren; ++i)
{
auto _run = [i, _name]() {
pid_t _pid = fork();
if(_pid == 0)
{
// child code
print_info(_name);
auto _sleep = [=]() {
std::this_thread::sleep_for(std::chrono::seconds{ i + 1 });
};
std::thread{ _sleep }.join();
exit(EXIT_SUCCESS);
}
};
std::thread{ _run }.join();
//_run();
}
int _status = 0;
pid_t _wait_pid = 0;
// parent waits for all the child processes
while((_wait_pid = wait(&_status)) > 0)
{
printf("[%s][%i] returned from wait with pid = %i :: ", _name, getpid(),
_wait_pid);
if(WIFEXITED(_status))
{
printf("exited, status=%d\n", WEXITSTATUS(_status));
}
else if(WIFSIGNALED(_status))
{
printf("killed by signal %d\n", WTERMSIG(_status));
}
else if(WIFSTOPPED(_status))
{
printf("stopped by signal %d\n", WSTOPSIG(_status));
}
else if(WIFCONTINUED(_status))
{
printf("continued\n");
}
else
{
printf("unknown\n");
}
}
return _status;
}
int
main(int argc, char** argv)
{
int _n = 4;
if(argc > 1) _n = std::stoi(argv[1]);
print_info(argv[0]);
return run(argv[0], _n);
}
فروخته شده
+1 -1
+21 -10
مشاهده پرونده
@@ -200,51 +200,62 @@ set(CTEST_BINARY_DIRECTORY ${BINARY_DIR})
set(CTEST_UPDATE_COMMAND ${GIT_CMD})
set(CTEST_CONFIGURE_COMMAND "${CMAKE_CMD} -B ${BINARY_DIR} ${SOURCE_DIR} -DOMNITRACE_BUILD_CI=ON ${CMAKE_ARGS}")
set(CTEST_BUILD_COMMAND "${CMAKE_CMD} --build ${BINARY_DIR} --target all")
set(CTEST_BUILD_COMMAND "${CMAKE_CMD} --build ${BINARY_DIR} --target all --parallel ${CMAKE_BUILD_PARALLEL_LEVEL}")
set(CTEST_COVERAGE_COMMAND ${GCOV_CMD})
EOF
verbose-run cd ${BINARY_DIR}
cat << EOF > dashboard.cmake
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
include("\${CMAKE_CURRENT_LIST_DIR}/CTestCustom.cmake")
set(_STAGES ${DASHBOARD_STAGES})
macro(handle_submit)
if("Submit" IN_LIST _STAGES)
ctest_submit(
${ARGN}
CAPTURE_CMAKE_ERROR _submit_err)
if(NOT \${_submit_err} EQUAL 0)
message(WARNING "Submission failed: ctest_submit(\${ARGN})")
endif()
endif()
endmacro()
macro(handle_error _message _ret)
if(NOT \${\${_ret}} EQUAL 0)
ctest_submit(PARTS Done RETURN_VALUE _submit_ret)
handle_submit(PARTS Done RETURN_VALUE _submit_ret)
message(FATAL_ERROR "\${_message} failed: \${\${_ret}}")
endif()
endmacro()
ctest_start(${DASHBOARD_MODE})
ctest_update(SOURCE "${SOURCE_DIR}")
ctest_submit(PARTS Start Update RETURN_VALUE _submit_ret)
ctest_configure(BUILD "${BINARY_DIR}" RETURN_VALUE _configure_ret)
ctest_submit(PARTS Configure RETURN_VALUE _submit_ret)
handle_submit(PARTS Start Update Configure RETURN_VALUE _submit_ret)
handle_error("Configure" _configure_ret)
ctest_build(BUILD "${BINARY_DIR}" RETURN_VALUE _build_ret)
ctest_submit(PARTS Build RETURN_VALUE _submit_ret)
handle_submit(PARTS Build RETURN_VALUE _submit_ret)
handle_error("Build" _build_ret)
ctest_test(BUILD "${BINARY_DIR}" RETURN_VALUE _test_ret)
ctest_submit(PARTS Test RETURN_VALUE _submit_ret)
handle_submit(PARTS Test RETURN_VALUE _submit_ret)
if("${CODECOV}" GREATER 0)
ctest_coverage(
BUILD "${BINARY_DIR}"
RETURN_VALUE _coverage_ret
CAPTURE_CMAKE_ERROR _coverage_err)
ctest_submit(PARTS Coverage RETURN_VALUE _submit_ret)
handle_submit(PARTS Coverage RETURN_VALUE _submit_ret)
endif()
handle_error("Testing" _test_ret)
ctest_submit(PARTS Done RETURN_VALUE _submit_ret)
handle_submit(PARTS Done RETURN_VALUE _submit_ret)
EOF
verbose-run cat CTestCustom.cmake
@@ -29,6 +29,7 @@
#include "get_availability.hpp"
#include "info_type.hpp"
#include "api.hpp"
#include "core/config.hpp"
#include "core/gpu.hpp"
#include "library/rocprofiler.hpp"
+11 -9
مشاهده پرونده
@@ -29,6 +29,7 @@
#include <cstring>
#include <functional>
#include <string>
#include <unistd.h>
#if !defined(OMNITRACE_COMMON_LIBRARY_NAME)
# define OMNITRACE_COMMON_LIBRARY_NAME "common"
@@ -77,9 +78,9 @@ ignore(const char* _name, int _verbose, int _value, const char* _reason, Args...
fflush(stderr);
fprintf(stderr,
"[omnitrace][" OMNITRACE_COMMON_LIBRARY_NAME
"][%li] %s(%s) was ignored :: %s\n",
get_thread_index(), _name, join(QuoteStrings{}, ", ", _args...).c_str(),
_reason);
"][%i][%li] %s(%s) was ignored :: %s\n",
getpid(), get_thread_index(), _name,
join(QuoteStrings{}, ", ", _args...).c_str(), _reason);
fflush(stderr);
}
}
@@ -109,8 +110,8 @@ invoke(const char* _name, int _verbose, bool& _toggle, FuncT&& _func, Args... _a
OMNITRACE_COMMON_LIBRARY_LOG_START
fprintf(stderr,
"[omnitrace][" OMNITRACE_COMMON_LIBRARY_NAME
"][%li][%i] %s(%s)\n",
get_thread_index(), _lk, _name,
"][%i][%li][%i] %s(%s)\n",
getpid(), get_thread_index(), _lk, _name,
join(QuoteStrings{}, ", ", _args...).c_str());
OMNITRACE_COMMON_LIBRARY_LOG_END
fflush(stderr);
@@ -123,8 +124,8 @@ invoke(const char* _name, int _verbose, bool& _toggle, FuncT&& _func, Args... _a
OMNITRACE_COMMON_LIBRARY_LOG_START
fprintf(stderr,
"[omnitrace][" OMNITRACE_COMMON_LIBRARY_NAME
"][%li] %s(%s) was guarded :: value = %i\n",
get_thread_index(), _name,
"][%i][%li] %s(%s) was guarded :: value = %i\n",
getpid(), get_thread_index(), _name,
join(QuoteStrings{}, ", ", _args...).c_str(), _lk);
OMNITRACE_COMMON_LIBRARY_LOG_END
fflush(stderr);
@@ -135,8 +136,9 @@ invoke(const char* _name, int _verbose, bool& _toggle, FuncT&& _func, Args... _a
OMNITRACE_COMMON_LIBRARY_LOG_START
fprintf(stderr,
"[omnitrace][" OMNITRACE_COMMON_LIBRARY_NAME
"][%li] %s(%s) ignored :: null function pointer\n",
get_thread_index(), _name, join(QuoteStrings{}, ", ", _args...).c_str());
"][%i][%li] %s(%s) ignored :: null function pointer\n",
getpid(), get_thread_index(), _name,
join(QuoteStrings{}, ", ", _args...).c_str());
OMNITRACE_COMMON_LIBRARY_LOG_END
}
+8 -8
مشاهده پرونده
@@ -227,20 +227,18 @@ finalize()
bool
settings_are_configured()
{
volatile bool _v = _settings_are_configured();
return _v;
return _settings_are_configured();
}
void
configure_settings(bool _init)
{
volatile bool _v = _settings_are_configured();
if(_v) return;
static bool _once = false;
if(_once) return;
_once = true;
if(settings_are_configured()) return;
if(get_is_continuous_integration() && get_state() < State::Init)
{
timemory_print_demangled_backtrace<64>();
@@ -2192,9 +2190,11 @@ get_perfetto_output_filename()
_ext = _val.substr(_pos_ext + 1);
_val = _val.substr(0, _pos_ext);
}
_val = settings::compose_output_filename(_val, _ext, settings::use_output_suffix(),
settings::default_process_suffix(), false,
_dir);
auto _cfg = settings::compose_filename_config{ settings::use_output_suffix(),
settings::default_process_suffix(),
false, _dir };
_val = settings::compose_output_filename(_val, _ext, _cfg);
if(!_val.empty() && _val.at(0) != '/')
return settings::format(JOIN('/', "%env{PWD}%", _val), get_config()->get_tag());
return _val;
+6 -1
مشاهده پرونده
@@ -100,11 +100,16 @@ template <typename Tp>
bool
set_setting_value(const std::string& _name, Tp&& _v)
{
auto _user_upd = tim::settings::update_type::user;
auto _instance = tim::settings::shared_instance();
auto _setting = _instance->find(_name);
if(_setting == _instance->end()) return false;
if(!_setting->second) return false;
return _setting->second->set(std::forward<Tp>(_v));
auto& itr = _setting->second;
auto _upd = itr->set_user_updated();
auto _success = itr->set(std::forward<Tp>(_v), _user_upd);
if(!_success) itr->set_updated(_upd);
return _success;
}
template <typename Tp>
+46 -5
مشاهده پرونده
@@ -25,6 +25,7 @@
#include "state.hpp"
#include <timemory/log/color.hpp>
#include <timemory/process/threading.hpp>
#include <timemory/utility/filepath.hpp>
#include <iomanip>
@@ -43,6 +44,29 @@ struct source_location_history
size_t size = 0;
};
const std::string&
get_file_name()
{
static auto _fname = tim::get_env<std::string>("OMNITRACE_LOG_FILE", "");
return _fname;
}
std::atomic<FILE*>&
get_file_pointer()
{
static auto _v = std::atomic<FILE*>{ []() {
const auto&_fname= get_file_name();
if(!_fname.empty()) tim::log::monochrome() = true;
return (_fname.empty())
? stderr
: filepath::fopen(
settings::format(_fname, filepath::basename(filepath::realpath(
"/proc/self/exe", nullptr, false))),
"w");
}() };
return _v;
}
auto&
get_source_location_history()
{
@@ -88,11 +112,28 @@ lock::~lock()
FILE*
get_file()
{
static FILE* _v = []() {
auto&& _fname = tim::get_env<std::string>("OMNITRACE_LOG_FILE", "");
if(!_fname.empty()) tim::log::monochrome() = true;
return (_fname.empty()) ? stderr : tim::filepath::fopen(_fname, "w");
}();
return get_file_pointer();
}
void
close_file()
{
if(get_file() != stderr)
{
auto* _file = get_file_pointer().load();
get_file_pointer().store(stderr);
fclose(_file);
// Write the trace into a file.
if(get_verbose() >= 0)
operation::file_output_message<tim::project::omnitrace>{}(
get_file_name(), std::string{ "debug" });
}
}
int64_t
get_tid()
{
static thread_local auto _v = threading::get_id();
return _v;
}
} // namespace debug
+7 -5
مشاهده پرونده
@@ -86,6 +86,12 @@ set_source_location(source_location&&);
FILE*
get_file();
//
void
close_file();
//
int64_t
get_tid();
//
inline void
flush()
{
@@ -168,7 +174,7 @@ as_hex<void*>(void*, size_t);
#endif
#if !defined(OMNITRACE_DEBUG_THREAD_IDENTIFIER)
# define OMNITRACE_DEBUG_THREAD_IDENTIFIER ::tim::threading::get_id()
# define OMNITRACE_DEBUG_THREAD_IDENTIFIER ::omnitrace::debug::get_tid()
#endif
#if !defined(OMNITRACE_SOURCE_LOCATION)
@@ -394,7 +400,6 @@ as_hex<void*>(void*, size_t);
fprintf(::omnitrace::debug::get_file(), __VA_ARGS__); \
::omnitrace::debug::flush(); \
::omnitrace::set_state(::omnitrace::State::Finalized); \
::tim::signals::disable_signal_detection(); \
timemory_print_demangled_backtrace<64>(); \
METHOD; \
}
@@ -410,7 +415,6 @@ as_hex<void*>(void*, size_t);
fprintf(::omnitrace::debug::get_file(), __VA_ARGS__); \
::omnitrace::debug::flush(); \
::omnitrace::set_state(::omnitrace::State::Finalized); \
::tim::signals::disable_signal_detection(); \
timemory_print_demangled_backtrace<64>(); \
METHOD; \
}
@@ -427,7 +431,6 @@ as_hex<void*>(void*, size_t);
fprintf(::omnitrace::debug::get_file(), __VA_ARGS__); \
::omnitrace::debug::flush(); \
::omnitrace::set_state(::omnitrace::State::Finalized); \
::tim::signals::disable_signal_detection(); \
timemory_print_demangled_backtrace<64>(); \
METHOD; \
}
@@ -443,7 +446,6 @@ as_hex<void*>(void*, size_t);
fprintf(::omnitrace::debug::get_file(), __VA_ARGS__); \
::omnitrace::debug::flush(); \
::omnitrace::set_state(::omnitrace::State::Finalized); \
::tim::signals::disable_signal_detection(); \
timemory_print_demangled_backtrace<64>(); \
METHOD; \
}
+25 -3
مشاهده پرونده
@@ -128,6 +128,16 @@ reset_omnitrace_preload()
}
}
inline pid_t
get_omnitrace_root_pid()
{
auto _pid = getpid();
setenv("OMNITRACE_ROOT_PROCESS", std::to_string(_pid).c_str(), 0);
return get_env("OMNITRACE_ROOT_PROCESS", _pid);
}
pid_t _omnitrace_root_pid = get_omnitrace_root_pid();
// environment priority:
// - OMNITRACE_DL_DEBUG
// - OMNITRACE_DL_VERBOSE
@@ -538,7 +548,9 @@ bool _omnitrace_dl_fini = (std::atexit([]() {
{ \
fflush(stderr); \
OMNITRACE_COMMON_LIBRARY_LOG_START \
fprintf(stderr, "[omnitrace][" OMNITRACE_COMMON_LIBRARY_NAME "] " __VA_ARGS__); \
fprintf(stderr, "[omnitrace][" OMNITRACE_COMMON_LIBRARY_NAME "][%i] ", \
getpid()); \
fprintf(stderr, __VA_ARGS__); \
OMNITRACE_COMMON_LIBRARY_LOG_END \
fflush(stderr); \
}
@@ -1040,7 +1052,6 @@ extern "C"
ompt_start_tool_result_t* ompt_start_tool(unsigned int omp_version,
const char* runtime_version)
{
if(!omnitrace::common::get_env("OMNITRACE_USE_OMPT", true)) return nullptr;
return OMNITRACE_DL_INVOKE(get_indirect().ompt_start_tool_f, omp_version,
runtime_version);
}
@@ -1060,6 +1071,7 @@ bool
omnitrace_preload()
{
auto _preload = get_omnitrace_preload() && get_env("OMNITRACE_ENABLED", true);
auto _use_mpi = get_env("OMNITRACE_USE_MPI", get_env("OMNITRACE_USE_MPIP", false));
static bool _once = false;
if(_once) return _preload;
@@ -1067,7 +1079,7 @@ omnitrace_preload()
if(_preload)
{
// reset_omnitrace_preload();
reset_omnitrace_preload();
omnitrace_preinit_library();
auto _causal = get_env("OMNITRACE_USE_CAUSAL", false);
auto _mode = get_env("OMNITRACE_MODE", (_causal) ? "causal" : "sampling");
@@ -1075,6 +1087,16 @@ omnitrace_preload()
::omnitrace::join(::omnitrace::QuoteStrings{}, ", ", _mode,
false, "omnitrace")
.c_str());
if(_use_mpi && !(_causal && _mode == "causal"))
{
// only make this call if true bc otherwise, if
// false, it will disable the MPIP component and
// we may intercept the MPI init call later.
// If _use_mpi defaults to true above, calling this
// will override can current env or config value for
// OMNITRACE_USE_PID.
omnitrace_set_mpi(_use_mpi, false);
}
omnitrace_init(_mode.c_str(), false, nullptr);
omnitrace_init_tooling();
}
@@ -68,6 +68,7 @@
#include <timemory/signals/signal_mask.hpp>
#include <timemory/signals/types.hpp>
#include <timemory/utility/backtrace.hpp>
#include <timemory/utility/join.hpp>
#include <timemory/utility/procfs/maps.hpp>
#include <atomic>
@@ -679,6 +680,8 @@ omnitrace_finalize_hidden(void)
// disable initialization callback
threading::remove_callback(&ensure_initialization);
bool _is_child = is_child_process();
set_thread_state(ThreadState::Completed);
// return if not active
@@ -688,6 +691,12 @@ omnitrace_finalize_hidden(void)
std::to_string(get_state()).c_str());
return;
}
else if(_is_child)
{
set_state(State::Finalized);
std::quick_exit(EXIT_SUCCESS);
return;
}
if(get_verbose() >= 0 || get_debug()) fprintf(stderr, "\n");
OMNITRACE_VERBOSE_F(0, "finalizing...\n");
@@ -1104,9 +1113,14 @@ omnitrace_finalize_hidden(void)
_push_count, "vs. popped:", _pop_count)
.c_str());
debug::close_file();
config::finalize();
OMNITRACE_VERBOSE_F(0, "Finalized: %s\n", _finalization.as_string().c_str());
tim::signals::enable_signal_detection(
{ tim::signals::sys_signal::SegFault, tim::signals::sys_signal::Stop },
[](int) {});
}
//======================================================================================//
@@ -127,6 +127,7 @@ backtrace::filter_and_patch(const std::vector<entry_type>& _data)
if(_lbl.find("rocprofiler_") != _npos) return -1;
if(_lbl.find("roctracer_") != _npos) return -1;
if(_lbl.find("perfetto::") != _npos) return -1;
if(_lbl.find("protozero::") == 0) return -1;
return 1;
};
@@ -28,11 +28,11 @@
#include "core/timemory.hpp"
#include "library/thread_data.hpp"
#include <timemory/components/base.hpp>
#include <timemory/macros/language.hpp>
#include <timemory/components/base/declaration.hpp>
#include <timemory/mpl/concepts.hpp>
#include <timemory/utility/unwind.hpp>
#include <timemory/variadic/types.hpp>
#include <timemory/unwind/cache.hpp>
#include <timemory/unwind/processed_entry.hpp>
#include <timemory/unwind/stack.hpp>
#include <array>
#include <chrono>
@@ -28,7 +28,6 @@
#include "library/components/ensure_storage.hpp"
#include "library/ptl.hpp"
#include "library/runtime.hpp"
#include "library/sampling.hpp"
#include "library/thread_info.hpp"
#include "library/tracing.hpp"
@@ -137,6 +137,8 @@ category_region<CategoryT>::start(std::string_view name, Args&&... args)
if(get_thread_state() == ThreadState::Disabled) return;
if(get_state() >= State::Finalized) return;
if(name.empty()) return;
OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal);
// the expectation here is that if the state is not active then the call
@@ -186,7 +188,7 @@ category_region<CategoryT>::start(std::string_view name, Args&&... args)
{
if(get_use_timemory())
{
tracing::push_timemory(CategoryT{}, name.data(), std::forward<Args>(args)...);
tracing::push_timemory(CategoryT{}, name, std::forward<Args>(args)...);
}
}
@@ -248,8 +250,7 @@ category_region<CategoryT>::stop(std::string_view name, Args&&... args)
{
if(get_use_timemory())
{
tracing::pop_timemory(CategoryT{}, name.data(),
std::forward<Args>(args)...);
tracing::pop_timemory(CategoryT{}, name, std::forward<Args>(args)...);
}
}
@@ -60,28 +60,33 @@ invoke_exit_gotcha(const exit_gotcha::gotcha_data& _data, FuncT _func, Args... _
{
threading::clear_callbacks();
if(config::settings_are_configured())
if(get_state() < State::Finalized)
{
OMNITRACE_VERBOSE(0, "%s called %s(%s)...\n", get_exe_name().c_str(),
_data.tool_id.c_str(), JOIN(", ", _args...).c_str());
}
else
{
OMNITRACE_BASIC_VERBOSE(0, "%s called %s(%s)...\n", get_exe_name().c_str(),
_data.tool_id.c_str(), JOIN(", ", _args...).c_str());
}
if(config::settings_are_configured())
{
OMNITRACE_VERBOSE(0, "finalizing %s before calling %s(%s)...\n",
get_exe_name().c_str(), _data.tool_id.c_str(),
JOIN(", ", _args...).c_str());
}
else
{
OMNITRACE_BASIC_VERBOSE(0, "finalizing %s before calling %s(%s)...\n",
get_exe_name().c_str(), _data.tool_id.c_str(),
JOIN(", ", _args...).c_str());
}
if(get_state() != State::Finalized) omnitrace_finalize_hidden();
omnitrace_finalize();
}
if(config::settings_are_configured())
{
OMNITRACE_VERBOSE(0, "%s called %s(%s)...\n", get_exe_name().c_str(),
_data.tool_id.c_str(), JOIN(", ", _args...).c_str());
OMNITRACE_VERBOSE(0, "calling %s(%s) in %s...\n", _data.tool_id.c_str(),
JOIN(", ", _args...).c_str(), get_exe_name().c_str());
}
else
{
OMNITRACE_BASIC_VERBOSE(0, "%s called %s(%s)...\n", get_exe_name().c_str(),
_data.tool_id.c_str(), JOIN(", ", _args...).c_str());
OMNITRACE_BASIC_VERBOSE(0, "calling %s(%s) in %s...\n", _data.tool_id.c_str(),
JOIN(", ", _args...).c_str(), get_exe_name().c_str());
}
(*_func)(_args...);
@@ -20,10 +20,15 @@
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "library/components/fork_gotcha.hpp"
#include "api.hpp"
#include "core/config.hpp"
#include "core/debug.hpp"
#include "core/perfetto.hpp"
#include "core/state.hpp"
#include "library/components/fork_gotcha.hpp"
#include "library/runtime.hpp"
#include "library/sampling.hpp"
#include <timemory/backends/process.hpp>
#include <timemory/backends/threading.hpp>
@@ -46,8 +51,10 @@ void
fork_gotcha::audit(const gotcha_data_t&, audit::incoming)
{
tim::set_env("OMNITRACE_PRELOAD", "0", 1);
OMNITRACE_VERBOSE(1, "fork() called on PID %i (rank: %i), TID %li\n",
process::get_id(), dmp::rank(), threading::get_id());
tim::set_env("OMNITRACE_ROOT_PROCESS", process::get_id(), 0);
omnitrace_reset_preload_hidden();
OMNITRACE_BASIC_VERBOSE(0, "fork() called on PID %i (rank: %i), TID %li\n",
process::get_id(), dmp::rank(), threading::get_id());
OMNITRACE_BASIC_DEBUG(
"Warning! Calling fork() within an OpenMPI application using libfabric "
"may result is segmentation fault\n");
@@ -59,9 +66,31 @@ fork_gotcha::audit(const gotcha_data_t&, audit::outgoing, pid_t _pid)
{
if(_pid != 0)
{
OMNITRACE_VERBOSE(1, "fork() called on PID %i created PID %i\n", getppid(), _pid);
OMNITRACE_BASIC_VERBOSE(0, "fork() called on PID %i created PID %i\n", getppid(),
_pid);
}
else
{
OMNITRACE_REQUIRE(is_child_process())
<< "Error! child process " << process::get_id()
<< " believes it is the root process " << get_root_process_id() << "\n";
settings::enabled() = false;
settings::verbose() = -127;
settings::debug() = false;
omnitrace::sampling::shutdown();
omnitrace::categories::shutdown();
omnitrace::get_perfetto_session().release();
set_thread_state(::omnitrace::ThreadState::Disabled);
}
if(!settings::use_output_suffix())
{
settings::use_output_suffix() = true;
settings::default_process_suffix() = process::get_id();
OMNITRACE_BASIC_VERBOSE(
0, "call to fork() enables using an output suffix. PID %i will use %i\n",
process::get_id(), process::get_id());
}
}
} // namespace component
@@ -102,29 +102,34 @@ auto mpi_comm_records = std::map<uintptr_t, comm_rank_data>{};
using tim::auto_lock_t;
using tim::type_mutex;
#if defined(TIMEMORY_USE_MPI)
int
omnitrace_mpi_copy(MPI_Comm, int, void*, void*, void*, int*)
{
return MPI_SUCCESS;
}
int
omnitrace_mpi_fini(MPI_Comm, int, void*, void*)
{
OMNITRACE_DEBUG("MPI Comm attribute finalize\n");
auto _blocked = get_sampling_signals();
if(!_blocked.empty())
tim::signals::block_signals(_blocked, tim::signals::sigmask_scope::process);
if(mpip_index != std::numeric_limits<uint64_t>::max())
comp::deactivate_mpip<mpip_bundle_t, project::omnitrace>(mpip_index);
if(is_root_process()) omnitrace_finalize_hidden();
return MPI_SUCCESS;
}
#endif
// this ensures omnitrace_finalize is called before MPI_Finalize
void
omnitrace_mpi_set_attr()
{
#if defined(TIMEMORY_USE_MPI)
static auto _mpi_copy = [](MPI_Comm, int, void*, void*, void*, int*) {
return MPI_SUCCESS;
};
static auto _mpi_fini = [](MPI_Comm, int, void*, void*) {
OMNITRACE_DEBUG("MPI Comm attribute finalize\n");
auto _blocked = get_sampling_signals();
if(!_blocked.empty())
tim::signals::block_signals(_blocked, tim::signals::sigmask_scope::process);
if(mpip_index != std::numeric_limits<uint64_t>::max())
comp::deactivate_mpip<mpip_bundle_t, project::omnitrace>(mpip_index);
omnitrace_finalize_hidden();
return MPI_SUCCESS;
};
using copy_func_t = int (*)(MPI_Comm, int, void*, void*, void*, int*);
using fini_func_t = int (*)(MPI_Comm, int, void*, void*);
int _comm_key = -1;
if(PMPI_Comm_create_keyval(static_cast<copy_func_t>(_mpi_copy),
static_cast<fini_func_t>(_mpi_fini), &_comm_key,
int _comm_key = -1;
if(PMPI_Comm_create_keyval(&omnitrace_mpi_copy, &omnitrace_mpi_fini, &_comm_key,
nullptr) == MPI_SUCCESS)
PMPI_Comm_set_attr(MPI_COMM_SELF, _comm_key, nullptr);
#endif
@@ -258,7 +263,7 @@ mpi_gotcha::audit(const gotcha_data_t& _data, audit::incoming)
tim::mpi::is_initialized_callback() = []() { return false; };
tim::mpi::is_finalized() = true;
#else
omnitrace_finalize_hidden();
if(is_root_process()) omnitrace_finalize_hidden();
#endif
}
@@ -27,7 +27,6 @@
#include "library/components/pthread_create_gotcha.hpp"
#include "library/components/pthread_mutex_gotcha.hpp"
#include "library/runtime.hpp"
#include "library/sampling.hpp"
#include "library/thread_data.hpp"
#include <timemory/backends/threading.hpp>
@@ -27,7 +27,6 @@
#include "library/components/category_region.hpp"
#include "library/critical_trace.hpp"
#include "library/runtime.hpp"
#include "library/sampling.hpp"
#include "library/thread_info.hpp"
#include <timemory/backends/threading.hpp>
@@ -29,7 +29,6 @@
#include "core/redirect.hpp"
#include "library/roctracer.hpp"
#include "library/runtime.hpp"
#include "library/sampling.hpp"
#include "library/thread_data.hpp"
namespace omnitrace
@@ -337,7 +337,7 @@ omnitrace_register_coverage_hidden(const char* file, const char* func, size_t ad
if(omnitrace::get_state() < omnitrace::State::Active &&
!omnitrace_init_tooling_hidden())
return;
else if(omnitrace::get_state() == omnitrace::State::Finalized)
else if(omnitrace::get_state() >= omnitrace::State::Finalized)
return;
OMNITRACE_BASIC_VERBOSE_F(3, "[0x%x] %-20s :: %20s\n", (unsigned int) address, func,
@@ -35,6 +35,7 @@
# include <timemory/components/ompt.hpp>
# include <timemory/components/ompt/extern.hpp>
# include <timemory/mpl/type_traits.hpp>
# include <timemory/timemory.hpp>
# include <memory>
@@ -85,6 +86,7 @@ shutdown()
_protect = true;
if(f_bundle)
{
if(tim::manager::instance()) tim::manager::instance()->cleanup("omnitrace-ompt");
f_bundle->stop();
ompt_context_t::cleanup();
trait::runtime_enabled<ompt_toolset_t>::set(false);
@@ -26,7 +26,6 @@
#include "library/cpu_freq.hpp"
#include "library/rocm_smi.hpp"
#include "library/runtime.hpp"
#include "library/sampling.hpp"
#include <memory>
#include <vector>
@@ -95,11 +94,11 @@ sampler::poll(std::atomic<State>* _state, nsec_t _interval, promise_t* _ready)
auto _now = std::chrono::steady_clock::now();
auto _end =
_now + std::chrono::nanoseconds{ static_cast<uint64_t>(_duration * units::sec) };
while(_state && _state->load() != State::Finalized && get_state() != State::Finalized)
while(_state && _state->load() < State::Finalized && get_state() < State::Finalized)
{
std::this_thread::sleep_until(_now);
if(_state->load() != State::Active) continue;
if(get_state() == State::Finalized) break;
if(get_state() >= State::Finalized) break;
if(get_state() != State::Active) continue;
get_sampler_is_sampling().store(true);
for(auto& itr : instances)
@@ -112,7 +111,7 @@ sampler::poll(std::atomic<State>* _state, nsec_t _interval, promise_t* _ready)
// ensure this is always false
get_sampler_is_sampling().store(false);
if(_has_duration && _now >= _end && get_state() != State::Finalized)
if(_has_duration && _now >= _end && get_state() < State::Finalized)
{
OMNITRACE_VERBOSE(
1,
@@ -96,9 +96,9 @@ get_thread_pool_state()
PTL::ThreadPool&
get_thread_pool()
{
static auto _v =
(get_thread_pool_state() = State::Active, PTL::ThreadPool{ _thread_pool_cfg() });
return _v;
static auto* _v = (get_thread_pool_state() = State::Active,
new PTL::ThreadPool{ _thread_pool_cfg() });
return *_v;
}
} // namespace
@@ -32,7 +32,6 @@
#include "library/rocprofiler.hpp"
#include "library/roctracer.hpp"
#include "library/runtime.hpp"
#include "library/sampling.hpp"
#include "library/thread_data.hpp"
#include "library/tracing.hpp"
@@ -232,8 +232,6 @@ data::shutdown()
void
data::post_process(uint32_t _dev_id)
{
OMNITRACE_VERBOSE(1, "Post-processing rocm-smi data for device %u\n", _dev_id);
using component::sampling_gpu_busy;
using component::sampling_gpu_memory;
using component::sampling_gpu_power;
@@ -245,6 +243,9 @@ data::post_process(uint32_t _dev_id)
auto _rocm_smi = (_rocm_smi_v) ? *_rocm_smi_v : std::deque<rocm_smi::data>{};
const auto& _thread_info = thread_info::get(0, InternalTID);
OMNITRACE_VERBOSE(1, "Post-processing %zu rocm-smi samples from device %u\n",
_rocm_smi.size(), _dev_id);
OMNITRACE_CI_THROW(!_thread_info, "Missing thread info for thread 0");
if(!_thread_info) return;
@@ -35,6 +35,7 @@
#include <timemory/backends/threading.hpp>
#include <timemory/components/rusage/backends.hpp>
#include <timemory/environment.hpp>
#include <timemory/process/process.hpp>
#include <timemory/sampling/allocator.hpp>
#include <timemory/settings.hpp>
#include <timemory/settings/types.hpp>
@@ -55,6 +56,8 @@ namespace omnitrace
{
namespace
{
auto root_process_id = get_env<pid_t>("OMNITRACE_ROOT_PROCESS", process::get_id(), false);
auto&
get_sampling_on_child_threads_history(int64_t _idx = utility::get_thread_index())
{
@@ -259,4 +262,22 @@ set_sampling_on_all_future_threads(bool _v)
for(size_t i = 0; i < max_supported_threads; ++i)
get_sampling_on_child_threads_history(i).emplace_back(_v);
}
pid_t
get_root_process_id()
{
return root_process_id;
}
bool
is_root_process()
{
return (root_process_id == process::get_id());
}
bool
is_child_process()
{
return (root_process_id != process::get_id());
}
} // namespace omnitrace
@@ -121,6 +121,15 @@ struct scoped_child_sampling
scoped_child_sampling(bool _v) { push_enable_sampling_on_child_threads(_v); }
~scoped_child_sampling() { pop_enable_sampling_on_child_threads(); }
};
pid_t
get_root_process_id();
bool
is_root_process();
bool
is_child_process();
} // namespace omnitrace
#define OMNITRACE_SCOPED_SAMPLING_ON_CHILD_THREADS(VALUE) \
@@ -333,7 +333,7 @@ start_duration_thread()
if(!_lk.owns_lock()) _lk.lock();
get_duration_cv().wait_until(_lk, _end);
auto _premature = (std::chrono::steady_clock::now() < _end);
auto _finalized = (get_state() == State::Finalized);
auto _finalized = (get_state() >= State::Finalized);
if(_premature && !_finalized)
{
// protect against spurious wakeups
@@ -480,6 +480,7 @@ configure(bool _setup, int64_t _tid)
"causal profiling is enabled");
OMNITRACE_SCOPED_SAMPLING_ON_CHILD_THREADS(false);
OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal);
auto&& _cpu_tids = get_sampling_cpu_tids();
auto&& _real_tids = get_sampling_real_tids();
@@ -673,6 +674,13 @@ setup()
std::set<int>
shutdown()
{
if(is_child_process())
{
for(auto& itr : sampler_instances::instances())
itr.release();
return std::set<int>{};
}
auto _v = configure(false);
if(utility::get_thread_index() == 0) stop_duration_thread();
return _v;
@@ -22,7 +22,6 @@
#pragma once
#include "api.hpp"
#include "core/common.hpp"
#include "core/concepts.hpp"
#include "core/config.hpp"
@@ -21,6 +21,7 @@
// SOFTWARE.
#include "library/thread_deleter.hpp"
#include "api.hpp"
#include "core/utility.hpp"
#include "library/components/pthread_create_gotcha.hpp"
#include "library/thread_info.hpp"
@@ -35,10 +36,22 @@ template struct component_bundle_cache<instrumentation_bundle_t>;
void
thread_deleter<void>::operator()() const
{
component::pthread_create_gotcha::shutdown(threading::get_id());
set_thread_state(ThreadState::Completed);
if(get_state() != State::Finalized && threading::get_id() == 0)
omnitrace_finalize_hidden();
// called after thread info is deleted
if(!thread_info::exists()) return;
const auto& _info = thread_info::get();
if(_info && _info->index_data)
{
auto _tid = _info->index_data->sequent_value;
component::pthread_create_gotcha::shutdown(_tid);
set_thread_state(ThreadState::Completed);
if(get_state() < State::Finalized && _tid == 0) omnitrace_finalize_hidden();
}
else
{
set_thread_state(ThreadState::Completed);
}
}
template struct thread_deleter<void>;
@@ -78,7 +78,7 @@ init_index_data(int64_t _tid, bool _offset = false)
itr = thread_index_data{};
int _verb = 2;
// if thread created using finalization, bump up the minimum verbosity level
if(get_state() == State::Finalized && _offset) _verb += 2;
if(get_state() >= State::Finalized && _offset) _verb += 2;
if(!config::settings_are_configured())
{
OMNITRACE_BASIC_VERBOSE_F(
@@ -145,6 +145,12 @@ grow_data(int64_t _tid)
return _max_threads;
}
bool
thread_info::exists()
{
return (get_info_data() != nullptr);
}
const std::optional<thread_info>&
thread_info::init(bool _offset)
{
@@ -177,6 +183,11 @@ thread_info::init(bool _offset)
const std::optional<thread_info>&
thread_info::get()
{
if(!exists())
{
static thread_local auto _v = std::optional<thread_info>{};
return _v;
}
return get_info_data(utility::get_thread_index());
}
@@ -95,6 +95,7 @@ struct thread_info
std::string as_string() const;
static bool exists();
static const std::optional<thread_info>& init(bool _offset = false);
static const std::optional<thread_info>& get();
static const std::optional<thread_info>& get(int64_t _tid, ThreadIdType _type);
+7 -1
مشاهده پرونده
@@ -810,6 +810,13 @@ omnitrace_add_validation_test(
0
-p)
omnitrace_add_test(
NAME fork
TARGET fork-example
REWRITE_ARGS -e -v 2 --print-instrumented modules -i 16
RUNTIME_ARGS -e -v 1 --label file -i 16
ENVIRONMENT "${_base_environment};OMNITRACE_CRITICAL_TRACE=ON")
# -------------------------------------------------------------------------------------- #
#
# critical-trace tests
@@ -846,7 +853,6 @@ set(_parallel_overhead_critical_trace_environ
"OMNITRACE_OUTPUT_PREFIX=parallel-overhead-critical-trace/"
"OMNITRACE_CRITICAL_TRACE_DEBUG=ON"
"OMNITRACE_VERBOSE=4"
"OMNITRACE_DEBUG=ON"
"OMNITRACE_USE_PID=OFF"
"OMNITRACE_TIME_OUTPUT=OFF")