Arquivos
rocm-systems/projects/rocprofiler-systems/source/lib/omnitrace/library/runtime.hpp
T
Jonathan R. Madsen 1c6aaafe96 Handle fork in target application (#191)
* Always print PID in log messages

* omnitrace-dl updates

- omnitrace_preload does not call omnitrace_init or omnitrace_init_tooling
- omnitrace_preload will call omnitrace_set_mpi if OMNITRACE_USE_MPI
  or OMNITRACE_USE_MPIP in the env is true but not call it otherwise
  because doing so either overrides OMNITRACE_USE_PID (when true) or
  disable mpip from initialization (when false) and the MPI
  init can be caught later and override OMNITRACE_USE_PID

* config updates

- set_setting_value sets user update type
- remove volatile from get_settings_configured
- don't override settings::default_process_suffix
- don't kill process in omnitrace_exit_action
- set_state ignores updating state if >= State::Finalized

* Handle state > State::Finalized

* fork gotcha updates

- unsets LD_PRELOAD
- sets OMNITRACE_ROOT_PROCESS
- sets OMNITRACE_CHILD_PROCESS

* libomnitrace library.cpp updates

- basic_bundle for fini metrics
- handle finalization from child process

* sampling updates

- sampling::shutdown handles when child process

* Add example and test using fork

* Update run-ci script to support not submitting

* Tweak test envs

* Update build flags when codecov enabled

* remove unnecessary includes of sampling header

* Replace mpi copy/fini static lambda with free-funcs

* Update codecov job

* Fix OMPT segfaults after finalization

* Miscellaneous updates after rebase

* fixes for causal profiling

* revert some run-ci.sh changes

* Disable storing env in sampling::shutdown

* formatting fix

* Update timemory submodule

- fixed occasional synchronization issues with allocator offloading
- exclude protozero:: from internal samples

* improve root/child process detection

- avoid omnitrace_finalize in MPI when child process
- revert some testing tweaks

[ROCm/rocprofiler-systems commit: 32b15fe7b7]
2023-02-08 01:31:38 -06:00

141 linhas
4.9 KiB
C++

// MIT License
//
// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include "api.hpp"
#include "core/common.hpp"
#include "core/defines.hpp"
#include "core/state.hpp"
#include "core/timemory.hpp"
#include "library/causal/components/causal_gotcha.hpp"
#include "library/components/exit_gotcha.hpp"
#include "library/components/fork_gotcha.hpp"
#include "library/components/mpi_gotcha.hpp"
#include "library/components/numa_gotcha.hpp"
#include "library/components/pthread_gotcha.hpp"
#include "library/components/roctracer.hpp"
#include "library/thread_data.hpp"
#include <timemory/backends/threading.hpp>
#include <timemory/macros/language.hpp>
#include <memory>
#include <set>
#include <string>
#include <string_view>
#include <unordered_set>
namespace omnitrace
{
// started during preinit phase
using preinit_bundle_t =
tim::lightweight_tuple<exit_gotcha_t, fork_gotcha_t, mpi_gotcha_t>;
// started during init phase
using init_bundle_t = tim::lightweight_tuple<causal::component::causal_gotcha,
pthread_gotcha, component::numa_gotcha>;
// bundle of components around omnitrace_init and omnitrace_finalize
using main_bundle_t =
tim::lightweight_tuple<comp::wall_clock, comp::peak_rss, comp::page_rss,
comp::cpu_clock, comp::cpu_util>;
// bundle of components around each thread
#if defined(TIMEMORY_RUSAGE_THREAD) && TIMEMORY_RUSAGE_THREAD > 0
using thread_bundle_t = tim::lightweight_tuple<comp::wall_clock, comp::thread_cpu_clock,
comp::thread_cpu_util, comp::peak_rss>;
#else
using thread_bundle_t = tim::lightweight_tuple<comp::wall_clock, comp::thread_cpu_clock,
comp::thread_cpu_util>;
#endif
std::unique_ptr<main_bundle_t>&
get_main_bundle();
std::unique_ptr<init_bundle_t>&
get_init_bundle();
std::unique_ptr<preinit_bundle_t>&
get_preinit_bundle();
std::atomic<uint64_t>&
get_cpu_cid() TIMEMORY_HOT;
unique_ptr_t<std::vector<uint64_t>>&
get_cpu_cid_stack(int64_t _tid = threading::get_id(), int64_t _parent = 0) TIMEMORY_HOT;
using cpu_cid_data_t = std::tuple<uint64_t, uint64_t, uint32_t>;
using cpu_cid_pair_t = std::tuple<uint64_t, uint32_t>;
using cpu_cid_parent_map_t = std::unordered_map<uint64_t, cpu_cid_pair_t>;
unique_ptr_t<cpu_cid_parent_map_t>&
get_cpu_cid_parents(int64_t _tid = threading::get_id()) TIMEMORY_HOT;
cpu_cid_data_t
create_cpu_cid_entry(int64_t _tid = threading::get_id()) TIMEMORY_HOT;
cpu_cid_pair_t
get_cpu_cid_entry(uint64_t _cid, int64_t _tid = threading::get_id()) TIMEMORY_HOT;
tim::mutex_t&
get_cpu_cid_stack_lock(int64_t _tid = threading::get_id()) TIMEMORY_HOT;
// query current value
bool
sampling_enabled_on_child_threads();
// use this to disable sampling in a region (e.g. right before thread creation)
bool
push_enable_sampling_on_child_threads(bool _v);
// use this to restore previous setting
bool
pop_enable_sampling_on_child_threads();
// make sure every newly created thead starts with this value
void
set_sampling_on_all_future_threads(bool _v);
struct scoped_child_sampling
{
scoped_child_sampling(bool _v) { push_enable_sampling_on_child_threads(_v); }
~scoped_child_sampling() { pop_enable_sampling_on_child_threads(); }
};
pid_t
get_root_process_id();
bool
is_root_process();
bool
is_child_process();
} // namespace omnitrace
#define OMNITRACE_SCOPED_SAMPLING_ON_CHILD_THREADS(VALUE) \
::omnitrace::scoped_child_sampling OMNITRACE_VARIABLE(_scoped_child_sampling_, \
__LINE__) \
{ \
VALUE \
}