Files
rocm-systems/projects/rocprofiler-systems/include/library/critical_trace.hpp
T
Jonathan R. Madsen ebb29ac54a Miscellaneous updates (#21)
* Miscellaneous updates

- Updated README
- Updated VERSION
- Header include tweaks
- get_verbose() + get_verbose_env()
- fixes to omnitrace-avail
    - exclude all cuda/cupti settings
    - apply available_only to hw counters
- config file warnings
- config displayed at verbose > 0
- fix to MPI_Finalize when only using MPI headers

* Updated LICENSE

* CPack tweak

[ROCm/rocprofiler-systems commit: 8648410309]
2022-01-26 23:25:00 -06:00

209 rader
6.4 KiB
C++

// MIT License
//
// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include "library/config.hpp"
#include "library/defines.hpp"
#include "library/thread_data.hpp"
#include <timemory/tpls/cereal/cereal/cereal.hpp>
#include <cstdint>
#include <cstdlib>
#include <ostream>
#include <string>
#include <vector>
namespace omnitrace
{
namespace critical_trace
{
enum class Device : short
{
NONE = 0,
CPU,
GPU,
};
enum class Phase : short
{
NONE = 0,
BEGIN,
END,
DELTA,
};
struct entry
{
entry() = default;
~entry() = default;
entry(const entry&) = default;
entry(entry&&) noexcept = default;
entry& operator=(const entry&) = default;
entry& operator=(entry&&) noexcept = default;
uint16_t priority = 0; // priority value (for sorting)
Device device = Device::CPU; // which device it executed on
Phase phase = Phase::NONE; // start / stop / unspecified
uint16_t depth = 0; // call-stack depth
int64_t tid = 0; // thread id it was registered on
uint64_t cpu_cid = 0; // CPU correlation id
uint64_t gpu_cid = 0; // GPU correlation id
uint64_t parent_cid = 0; // parent CPU correlation id
int64_t begin_ns = 0; // timestamp of start
int64_t end_ns = 0; // timestamp of end
size_t hash = 0; // hash for name
bool operator==(const entry& rhs) const;
bool operator!=(const entry& rhs) const { return !(*this == rhs); }
bool operator<(const entry& rhs) const;
bool operator>(const entry& rhs) const;
bool operator<=(const entry& rhs) const { return !(*this > rhs); }
bool operator>=(const entry& rhs) const { return !(*this < rhs); }
entry& operator+=(const entry& rhs);
size_t get_hash() const;
int64_t get_timestamp() const;
int64_t get_cost() const;
bool is_bounded(const entry& rhs) const;
int64_t get_overlap(const entry& rhs) const;
int64_t get_independent(const entry& rhs) const;
int64_t get_overlap(const entry& rhs, int64_t _tid) const;
int64_t get_independent(const entry& rhs, int64_t _tid) const;
bool is_bounded(const entry& rhs, int64_t _tid) const;
void write(std::ostream& _os) const;
static bool is_delta(const entry&, const std::string_view&);
friend std::ostream& operator<<(std::ostream& _os, const entry& _v)
{
_v.write(_os);
return _os;
}
template <typename Archive>
void serialize(Archive& ar, unsigned int);
};
template <typename Archive>
void
entry::serialize(Archive& ar, unsigned int)
{
namespace cereal = tim::cereal;
ar(cereal::make_nvp("priority", priority), cereal::make_nvp("device", device),
cereal::make_nvp("phase", phase), cereal::make_nvp("depth", depth),
cereal::make_nvp("tid", tid), cereal::make_nvp("cpu_cid", cpu_cid),
cereal::make_nvp("gpu_cid", gpu_cid), cereal::make_nvp("parent_cid", parent_cid),
cereal::make_nvp("begin_ns", begin_ns), cereal::make_nvp("end_ns", end_ns),
cereal::make_nvp("hash", hash));
if(get_critical_trace_serialize_names())
{
std::string _name{};
if(hash > 0) _name = tim::demangle(tim::get_hash_identifier(hash));
ar(cereal::make_nvp("name", _name));
}
}
struct call_chain : private std::vector<entry>
{
using base_type = std::vector<entry>;
using base_type::at;
using base_type::back;
using base_type::begin;
using base_type::cbegin;
using base_type::cend;
using base_type::clear;
using base_type::emplace_back;
using base_type::empty;
using base_type::end;
using base_type::erase;
using base_type::front;
using base_type::pop_back;
using base_type::push_back;
using base_type::rbegin;
using base_type::rend;
using base_type::reserve;
using base_type::size;
size_t get_hash() const;
int64_t get_cost(int64_t _tid = -1) const;
int64_t get_overlap(int64_t _tid = -1) const;
int64_t get_independent(int64_t _tid = -1) const;
static std::vector<call_chain>& get_top_chains();
bool operator==(const call_chain& rhs) const;
bool operator!=(const call_chain& rhs) const { return !(*this == rhs); }
friend std::ostream& operator<<(std::ostream& _os, const call_chain& _v)
{
size_t _n = 0;
for(const auto& itr : _v)
_os << " [" << _n++ << "] " << itr << "\n";
return _os;
}
template <typename Archive>
void serialize(Archive& ar, unsigned int)
{
namespace cereal = tim::cereal;
ar(cereal::make_nvp("call_chain", static_cast<base_type&>(*this)));
}
template <Device DevT>
void generate_perfetto(std::set<entry>& _used) const;
template <bool BoolV = true, typename FuncT>
bool query(FuncT&&) const;
};
using hash_ids = std::unordered_set<std::string>;
uint64_t
get_update_frequency();
std::unique_ptr<call_chain>&
get(int64_t _tid = threading::get_id());
size_t
add_hash_id(const std::string& _label);
void
add_hash_id(const hash_ids&);
void
update(int64_t _tid = threading::get_id());
void
compute(int64_t _tid = threading::get_id());
struct id
{};
} // namespace critical_trace
} // namespace omnitrace