ROCpd support [Part 1] (#279)
- Add rocpd support for
- cpu_frequency
- amd_smi
- sampling
[ROCm/rocprofiler-systems commit: 26ae543012]
Este commit está contenido en:
cometido por
GitHub
padre
e2fc692ee0
commit
166babf234
@@ -24,3 +24,6 @@
|
||||
[submodule "external/pybind11"]
|
||||
path = external/pybind11
|
||||
url = https://github.com/jrmadsen/pybind11.git
|
||||
[submodule "external/sqlite"]
|
||||
path = external/sqlite
|
||||
url = https://github.com/sqlite/sqlite.git
|
||||
|
||||
@@ -10,6 +10,7 @@ Full documentation for ROCm Systems Profiler is available at [https://rocm.docs.
|
||||
- How-to document for VCN and JPEG activity sampling and tracing.
|
||||
- Support for tracing Fortran applications.
|
||||
- Support for tracing MPI API in Fortran.
|
||||
- Initial support for rocPD database output with the `ROCPROFSYS_USE_ROCPD` configuration setting.
|
||||
- By default, group "kernel dispatch" and "memory copy" events by HIP stream ID in Perfetto traces.
|
||||
- Add the "ROCPROFSYS_ROCM_GROUP_BY_QUEUE" configuration setting to group events by queue, instead.
|
||||
|
||||
|
||||
@@ -221,6 +221,12 @@ rocprofiler_systems_add_option(ROCPROFSYS_BUILD_CODECOV "Build for code coverage
|
||||
rocprofiler_systems_add_option(ROCPROFSYS_INSTALL_PERFETTO_TOOLS
|
||||
"Install perfetto tools (i.e. traced, perfetto, etc.)" OFF
|
||||
)
|
||||
rocprofiler_systems_add_option(ROCPROFILER_BUILD_SQLITE3
|
||||
"Enable building sqlite3 library internally" OFF
|
||||
)
|
||||
rocprofiler_systems_add_option(ROCPROFSYS_ENABLE_BENCHMARK
|
||||
"Enable performance benchmarking capabilities for the project" OFF
|
||||
)
|
||||
|
||||
if(ROCPROFSYS_USE_PAPI)
|
||||
rocprofiler_systems_add_option(ROCPROFSYS_BUILD_PAPI "Build PAPI from submodule" ON)
|
||||
@@ -328,6 +334,10 @@ if(ROCPROFSYS_BUILD_TESTING OR "$ENV{ROCPROFSYS_CI}" MATCHES "[1-9]+|ON|on|y|yes
|
||||
include(CTest)
|
||||
endif()
|
||||
|
||||
if(ROCPROFSYS_ENABLE_BENCHMARK)
|
||||
add_compile_definitions(-DROCPROFSYS_USE_BENCHMARK=1)
|
||||
endif()
|
||||
|
||||
# ------------------------------------------------------------------------------#
|
||||
#
|
||||
# library and executables
|
||||
|
||||
@@ -53,6 +53,9 @@ rocprofiler_systems_add_interface_library(rocprofiler-systems-python
|
||||
rocprofiler_systems_add_interface_library(rocprofiler-systems-perfetto
|
||||
"Enables Perfetto support"
|
||||
)
|
||||
rocprofiler_systems_add_interface_library(rocprofiler-systems-sqlite3
|
||||
"Use SQLite3 for rocpd data storage"
|
||||
)
|
||||
rocprofiler_systems_add_interface_library(rocprofiler-systems-timemory
|
||||
"Provides timemory libraries"
|
||||
)
|
||||
@@ -532,6 +535,14 @@ rocprofiler_systems_checkout_git_submodule(
|
||||
|
||||
include(Perfetto)
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
#
|
||||
# SQLite3
|
||||
#
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
|
||||
include(SQLite3)
|
||||
|
||||
# ----------------------------------------------------------------------------------------#
|
||||
#
|
||||
# ELFIO
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
include_guard(GLOBAL)
|
||||
|
||||
if(ROCPROFILER_BUILD_SQLITE3)
|
||||
message(STATUS "Building SQLite3 from source!")
|
||||
execute_process(
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory ${PROJECT_BINARY_DIR}/external/sqlite
|
||||
)
|
||||
# checkout submodule if not already checked out or clone repo if no .gitmodules file
|
||||
rocprofiler_systems_checkout_git_submodule(
|
||||
RELATIVE_PATH external/sqlite
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||
TEST_FILE configure
|
||||
REPO_URL https://github.com/sqlite/sqlite.git
|
||||
REPO_BRANCH "version-3.45.3"
|
||||
)
|
||||
|
||||
find_program(MAKE_COMMAND NAMES make gmake PATH_SUFFIXES bin REQUIRED)
|
||||
|
||||
include(ExternalProject)
|
||||
ExternalProject_Add(
|
||||
rocprofiler-systems-sqlite-build
|
||||
PREFIX ${PROJECT_BINARY_DIR}/external/sqlite/build
|
||||
SOURCE_DIR ${PROJECT_SOURCE_DIR}/external/sqlite
|
||||
BUILD_IN_SOURCE 0
|
||||
CONFIGURE_COMMAND
|
||||
<SOURCE_DIR>/configure --prefix=${PROJECT_BINARY_DIR}/external/sqlite/install
|
||||
--libdir=${PROJECT_BINARY_DIR}/external/sqlite/install/lib --disable-shared
|
||||
--with-tempstore=yes --enable-all --disable-tcl CFLAGS=-O3\ -g1
|
||||
BUILD_COMMAND ${MAKE_COMMAND} install -s
|
||||
INSTALL_COMMAND ""
|
||||
)
|
||||
|
||||
target_link_libraries(
|
||||
rocprofiler-systems-sqlite3
|
||||
INTERFACE
|
||||
$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/external/sqlite/install/lib/libsqlite3.a>
|
||||
)
|
||||
target_include_directories(
|
||||
rocprofiler-systems-sqlite3
|
||||
SYSTEM
|
||||
INTERFACE $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/external/sqlite/install/include>
|
||||
)
|
||||
add_dependencies(rocprofiler-systems-sqlite3 rocprofiler-systems-sqlite-build)
|
||||
else()
|
||||
message(STATUS "Using system SQLite3 library")
|
||||
find_package(SQLite3 REQUIRED)
|
||||
target_link_libraries(rocprofiler-systems-sqlite3 INTERFACE SQLite::SQLite3)
|
||||
endif()
|
||||
@@ -118,7 +118,7 @@ write_hw_counter_info(std::ostream&, const array_t<bool, N>& = {},
|
||||
namespace
|
||||
{
|
||||
// initialize HIP before main so that librocprof-sys is not HSA_TOOLS_LIB
|
||||
int gpu_count = rocprofsys::gpu::device_count();
|
||||
int gpu_count = 0;
|
||||
|
||||
// statically allocated shared_ptrs to prevent use after free errors
|
||||
auto timemory_manager = tim::manager::master_instance();
|
||||
@@ -138,6 +138,7 @@ main(int argc, char** argv)
|
||||
tim::unwind::set_bfd_verbose(3);
|
||||
tim::set_env("ROCPROFSYS_INIT_TOOLING", "OFF", 1);
|
||||
rocprofsys_init_library();
|
||||
gpu_count = rocprofsys::gpu::device_count();
|
||||
|
||||
std::set<std::string> _category_options = component_categories{}();
|
||||
{
|
||||
|
||||
@@ -40,6 +40,7 @@ target_link_libraries(
|
||||
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-compile-options>
|
||||
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-compile-definitions>
|
||||
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-perfetto>
|
||||
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-sqlite3>
|
||||
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-timemory>
|
||||
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-elfutils>
|
||||
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-bfd>
|
||||
|
||||
@@ -26,6 +26,8 @@ target_sources(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/invoke.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/join.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/setup.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/traits.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/md5sum.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/static_object.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/synchronized.hpp
|
||||
)
|
||||
|
||||
@@ -0,0 +1,469 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <iomanip>
|
||||
#include <string_view>
|
||||
#include <type_traits>
|
||||
|
||||
#include "traits.hpp"
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
inline namespace common
|
||||
{
|
||||
|
||||
class md5sum
|
||||
{
|
||||
public:
|
||||
using size_type = uint32_t; // must be 32bit
|
||||
using raw_digest_t = std::array<uint8_t, 16>;
|
||||
static constexpr int blocksize = 64;
|
||||
|
||||
template <typename Tp, typename... Args>
|
||||
explicit md5sum(Tp&& arg, Args&&... args);
|
||||
|
||||
md5sum() = default;
|
||||
~md5sum() = default;
|
||||
md5sum(const md5sum&) = default;
|
||||
md5sum(md5sum&&) = default;
|
||||
|
||||
md5sum& operator=(const md5sum&) = default;
|
||||
md5sum& operator=(md5sum&&) = default;
|
||||
|
||||
md5sum& update(std::string_view inp);
|
||||
md5sum& update(const unsigned char* buf, size_type length);
|
||||
md5sum& update(const char* buf, size_type length);
|
||||
md5sum& finalize();
|
||||
std::string hexdigest() const;
|
||||
std::string hexliteral() const;
|
||||
raw_digest_t rawdigest() const { return digest; }
|
||||
|
||||
template <typename Tp,
|
||||
typename Up = std::enable_if_t<std::is_arithmetic<Tp>::value, int>>
|
||||
md5sum& update(Tp inp);
|
||||
|
||||
friend std::ostream& operator<<(std::ostream&, md5sum md5);
|
||||
|
||||
private:
|
||||
void transform(const uint8_t block[blocksize]);
|
||||
|
||||
bool finalized = false;
|
||||
// 64bit counter for number of bits (lo, hi)
|
||||
std::array<uint32_t, 2> count = { 0, 0 };
|
||||
std::array<uint8_t, blocksize> buffer{}; // overflow bytes from last 64 byte chunk
|
||||
// digest so far, initialized to magic initialization constants.
|
||||
std::array<uint32_t, 4> state = { 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476 };
|
||||
std::array<uint8_t, 16> digest{}; // result
|
||||
};
|
||||
|
||||
template <typename Tp, typename... Args>
|
||||
md5sum::md5sum(Tp&& arg, Args&&... args)
|
||||
{
|
||||
auto _update = [&](auto&& _val) {
|
||||
using value_type =
|
||||
std::remove_reference_t<std::remove_cv_t<std::decay_t<decltype(_val)>>>;
|
||||
static_assert(!std::is_pointer<value_type>::value,
|
||||
"constructor cannot be called with pointer argument");
|
||||
update(std::forward<decltype(_val)>(_val));
|
||||
};
|
||||
|
||||
_update(std::forward<Tp>(arg));
|
||||
(_update(std::forward<Args>(args)), ...);
|
||||
finalize();
|
||||
}
|
||||
|
||||
template <typename Tp, typename Up>
|
||||
md5sum&
|
||||
md5sum::update(Tp inp)
|
||||
{
|
||||
static_assert(std::is_arithmetic<Tp>::value, "expected arithmetic type");
|
||||
return update(reinterpret_cast<const char*>(&inp), sizeof(Tp));
|
||||
}
|
||||
|
||||
template <template <typename, typename...> class ContainerT, typename Tp,
|
||||
typename... TailT>
|
||||
std::string
|
||||
compute_md5sum(const ContainerT<Tp, TailT...>& inp,
|
||||
std::enable_if_t<traits::is_string_literal<Tp>(), int>)
|
||||
{
|
||||
auto _val = md5sum{};
|
||||
for(const auto& itr : inp)
|
||||
_val.update(std::string_view{ inp });
|
||||
_val.finalize();
|
||||
return _val.hexdigest();
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
using size_type = typename md5sum::size_type;
|
||||
|
||||
// Constants for md5sumTransform routine.
|
||||
constexpr uint32_t S11 = 7;
|
||||
constexpr uint32_t S12 = 12;
|
||||
constexpr uint32_t S13 = 17;
|
||||
constexpr uint32_t S14 = 22;
|
||||
constexpr uint32_t S21 = 5;
|
||||
constexpr uint32_t S22 = 9;
|
||||
constexpr uint32_t S23 = 14;
|
||||
constexpr uint32_t S24 = 20;
|
||||
constexpr uint32_t S31 = 4;
|
||||
constexpr uint32_t S32 = 11;
|
||||
constexpr uint32_t S33 = 16;
|
||||
constexpr uint32_t S34 = 23;
|
||||
constexpr uint32_t S41 = 6;
|
||||
constexpr uint32_t S42 = 10;
|
||||
constexpr uint32_t S43 = 15;
|
||||
constexpr uint32_t S44 = 21;
|
||||
|
||||
// low level logic operations
|
||||
static inline uint32_t
|
||||
F(uint32_t x, uint32_t y, uint32_t z);
|
||||
|
||||
static inline uint32_t
|
||||
G(uint32_t x, uint32_t y, uint32_t z);
|
||||
|
||||
static inline uint32_t
|
||||
H(uint32_t x, uint32_t y, uint32_t z);
|
||||
|
||||
static inline uint32_t
|
||||
I(uint32_t x, uint32_t y, uint32_t z);
|
||||
|
||||
static inline uint32_t
|
||||
rotate_left(uint32_t x, int n);
|
||||
|
||||
static inline void
|
||||
FF(uint32_t& a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac);
|
||||
|
||||
static inline void
|
||||
GG(uint32_t& a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac);
|
||||
|
||||
static inline void
|
||||
HH(uint32_t& a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac);
|
||||
|
||||
static inline void
|
||||
II(uint32_t& a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac);
|
||||
|
||||
// F, G, H and I are basic md5sum functions.
|
||||
inline uint32_t
|
||||
F(uint32_t x, uint32_t y, uint32_t z)
|
||||
{
|
||||
return (x & y) | (~x & z);
|
||||
}
|
||||
|
||||
inline uint32_t
|
||||
G(uint32_t x, uint32_t y, uint32_t z)
|
||||
{
|
||||
return (x & z) | (y & ~z);
|
||||
}
|
||||
|
||||
inline uint32_t
|
||||
H(uint32_t x, uint32_t y, uint32_t z)
|
||||
{
|
||||
return x ^ y ^ z;
|
||||
}
|
||||
|
||||
inline uint32_t
|
||||
I(uint32_t x, uint32_t y, uint32_t z)
|
||||
{
|
||||
return y ^ (x | ~z);
|
||||
}
|
||||
|
||||
// rotate_left rotates x left n bits.
|
||||
inline uint32_t
|
||||
rotate_left(uint32_t x, int n)
|
||||
{
|
||||
return (x << n) | (x >> (32 - n));
|
||||
}
|
||||
|
||||
// FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
|
||||
// Rotation is separate from addition to prevent recomputation.
|
||||
inline void
|
||||
FF(uint32_t& a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac)
|
||||
{
|
||||
a = rotate_left(a + F(b, c, d) + x + ac, s) + b;
|
||||
}
|
||||
|
||||
inline void
|
||||
GG(uint32_t& a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac)
|
||||
{
|
||||
a = rotate_left(a + G(b, c, d) + x + ac, s) + b;
|
||||
}
|
||||
|
||||
inline void
|
||||
HH(uint32_t& a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac)
|
||||
{
|
||||
a = rotate_left(a + H(b, c, d) + x + ac, s) + b;
|
||||
}
|
||||
|
||||
inline void
|
||||
II(uint32_t& a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac)
|
||||
{
|
||||
a = rotate_left(a + I(b, c, d) + x + ac, s) + b;
|
||||
}
|
||||
|
||||
// decodes input (unsigned char) into output (uint32_t). Assumes len is a multiple of 4.
|
||||
void
|
||||
decode(uint32_t output[], const uint8_t input[], size_type len)
|
||||
{
|
||||
for(unsigned int i = 0, j = 0; j < len; i++, j += 4)
|
||||
output[i] = ((uint32_t) input[j]) | (((uint32_t) input[j + 1]) << 8) |
|
||||
(((uint32_t) input[j + 2]) << 16) | (((uint32_t) input[j + 3]) << 24);
|
||||
}
|
||||
|
||||
// encodes input (uint32_t) into output (unsigned char). Assumes len is
|
||||
// a multiple of 4.
|
||||
void
|
||||
encode(uint8_t output[], const uint32_t input[], size_type len)
|
||||
{
|
||||
for(size_type i = 0, j = 0; j < len; i++, j += 4)
|
||||
{
|
||||
output[j] = input[i] & 0xff;
|
||||
output[j + 1] = (input[i] >> 8) & 0xff;
|
||||
output[j + 2] = (input[i] >> 16) & 0xff;
|
||||
output[j + 3] = (input[i] >> 24) & 0xff;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// apply md5sum algo on a block
|
||||
void
|
||||
md5sum::transform(const uint8_t block[blocksize])
|
||||
{
|
||||
uint32_t a = state[0], b = state[1], c = state[2], d = state[3], x[16];
|
||||
decode(x, block, blocksize);
|
||||
|
||||
/* Round 1 */
|
||||
FF(a, b, c, d, x[0], S11, 0xd76aa478); /* 1 */
|
||||
FF(d, a, b, c, x[1], S12, 0xe8c7b756); /* 2 */
|
||||
FF(c, d, a, b, x[2], S13, 0x242070db); /* 3 */
|
||||
FF(b, c, d, a, x[3], S14, 0xc1bdceee); /* 4 */
|
||||
FF(a, b, c, d, x[4], S11, 0xf57c0faf); /* 5 */
|
||||
FF(d, a, b, c, x[5], S12, 0x4787c62a); /* 6 */
|
||||
FF(c, d, a, b, x[6], S13, 0xa8304613); /* 7 */
|
||||
FF(b, c, d, a, x[7], S14, 0xfd469501); /* 8 */
|
||||
FF(a, b, c, d, x[8], S11, 0x698098d8); /* 9 */
|
||||
FF(d, a, b, c, x[9], S12, 0x8b44f7af); /* 10 */
|
||||
FF(c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
|
||||
FF(b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
|
||||
FF(a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
|
||||
FF(d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
|
||||
FF(c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
|
||||
FF(b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
|
||||
|
||||
/* Round 2 */
|
||||
GG(a, b, c, d, x[1], S21, 0xf61e2562); /* 17 */
|
||||
GG(d, a, b, c, x[6], S22, 0xc040b340); /* 18 */
|
||||
GG(c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
|
||||
GG(b, c, d, a, x[0], S24, 0xe9b6c7aa); /* 20 */
|
||||
GG(a, b, c, d, x[5], S21, 0xd62f105d); /* 21 */
|
||||
GG(d, a, b, c, x[10], S22, 0x2441453); /* 22 */
|
||||
GG(c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
|
||||
GG(b, c, d, a, x[4], S24, 0xe7d3fbc8); /* 24 */
|
||||
GG(a, b, c, d, x[9], S21, 0x21e1cde6); /* 25 */
|
||||
GG(d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
|
||||
GG(c, d, a, b, x[3], S23, 0xf4d50d87); /* 27 */
|
||||
GG(b, c, d, a, x[8], S24, 0x455a14ed); /* 28 */
|
||||
GG(a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
|
||||
GG(d, a, b, c, x[2], S22, 0xfcefa3f8); /* 30 */
|
||||
GG(c, d, a, b, x[7], S23, 0x676f02d9); /* 31 */
|
||||
GG(b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
|
||||
|
||||
/* Round 3 */
|
||||
HH(a, b, c, d, x[5], S31, 0xfffa3942); /* 33 */
|
||||
HH(d, a, b, c, x[8], S32, 0x8771f681); /* 34 */
|
||||
HH(c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
|
||||
HH(b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
|
||||
HH(a, b, c, d, x[1], S31, 0xa4beea44); /* 37 */
|
||||
HH(d, a, b, c, x[4], S32, 0x4bdecfa9); /* 38 */
|
||||
HH(c, d, a, b, x[7], S33, 0xf6bb4b60); /* 39 */
|
||||
HH(b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
|
||||
HH(a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
|
||||
HH(d, a, b, c, x[0], S32, 0xeaa127fa); /* 42 */
|
||||
HH(c, d, a, b, x[3], S33, 0xd4ef3085); /* 43 */
|
||||
HH(b, c, d, a, x[6], S34, 0x4881d05); /* 44 */
|
||||
HH(a, b, c, d, x[9], S31, 0xd9d4d039); /* 45 */
|
||||
HH(d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
|
||||
HH(c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
|
||||
HH(b, c, d, a, x[2], S34, 0xc4ac5665); /* 48 */
|
||||
|
||||
/* Round 4 */
|
||||
II(a, b, c, d, x[0], S41, 0xf4292244); /* 49 */
|
||||
II(d, a, b, c, x[7], S42, 0x432aff97); /* 50 */
|
||||
II(c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
|
||||
II(b, c, d, a, x[5], S44, 0xfc93a039); /* 52 */
|
||||
II(a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
|
||||
II(d, a, b, c, x[3], S42, 0x8f0ccc92); /* 54 */
|
||||
II(c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
|
||||
II(b, c, d, a, x[1], S44, 0x85845dd1); /* 56 */
|
||||
II(a, b, c, d, x[8], S41, 0x6fa87e4f); /* 57 */
|
||||
II(d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
|
||||
II(c, d, a, b, x[6], S43, 0xa3014314); /* 59 */
|
||||
II(b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
|
||||
II(a, b, c, d, x[4], S41, 0xf7537e82); /* 61 */
|
||||
II(d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
|
||||
II(c, d, a, b, x[2], S43, 0x2ad7d2bb); /* 63 */
|
||||
II(b, c, d, a, x[9], S44, 0xeb86d391); /* 64 */
|
||||
|
||||
state[0] += a;
|
||||
state[1] += b;
|
||||
state[2] += c;
|
||||
state[3] += d;
|
||||
|
||||
// Zeroize sensitive information.
|
||||
memset(x, 0, sizeof x);
|
||||
}
|
||||
|
||||
md5sum&
|
||||
md5sum::update(std::string_view inp)
|
||||
{
|
||||
return update(inp.data(), inp.length());
|
||||
}
|
||||
|
||||
// md5sum block update operation. Continues an md5sum message-digest
|
||||
// operation, processing another message block
|
||||
md5sum&
|
||||
md5sum::update(const unsigned char input[], size_type length)
|
||||
{
|
||||
// compute number of bytes mod 64
|
||||
size_type index = count[0] / 8 % blocksize;
|
||||
|
||||
// Update number of bits
|
||||
if((count[0] += (length << 3)) < (length << 3)) count[1]++;
|
||||
count[1] += (length >> 29);
|
||||
|
||||
// number of bytes we need to fill in buffer
|
||||
size_type firstpart = 64 - index;
|
||||
size_type i = 0;
|
||||
|
||||
// transform as many times as possible.
|
||||
if(length >= firstpart)
|
||||
{
|
||||
// fill buffer first, transform
|
||||
memcpy(&buffer[index], input, firstpart);
|
||||
transform(buffer.data());
|
||||
|
||||
// transform chunks of blocksize (64 bytes)
|
||||
for(i = firstpart; i + blocksize <= length; i += blocksize)
|
||||
transform(&input[i]);
|
||||
|
||||
index = 0;
|
||||
}
|
||||
|
||||
// buffer remaining input
|
||||
memcpy(&buffer[index], &input[i], length - i);
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
// for convenience provide a verson with signed char
|
||||
md5sum&
|
||||
md5sum::update(const char input[], size_type length)
|
||||
{
|
||||
return update((const unsigned char*) input, length);
|
||||
}
|
||||
|
||||
// md5sum finalization. Ends an md5sum message-digest operation, writing the
|
||||
// the message digest and zeroizing the context.
|
||||
md5sum&
|
||||
md5sum::finalize()
|
||||
{
|
||||
static unsigned char padding[64] = { 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
|
||||
if(!finalized)
|
||||
{
|
||||
// Save number of bits
|
||||
unsigned char bits[8];
|
||||
encode(bits, count.data(), 8);
|
||||
|
||||
// pad out to 56 mod 64.
|
||||
size_type index = count[0] / 8 % 64;
|
||||
size_type padLen = (index < 56) ? (56 - index) : (120 - index);
|
||||
update(padding, padLen);
|
||||
|
||||
// Append length (before padding)
|
||||
update(bits, 8);
|
||||
|
||||
// Store state in digest
|
||||
encode(digest.data(), state.data(), 16);
|
||||
|
||||
// Zeroize sensitive information.
|
||||
memset(buffer.data(), 0, sizeof buffer);
|
||||
memset(count.data(), 0, sizeof count);
|
||||
|
||||
finalized = true;
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
// return hex representation of digest as string
|
||||
std::string
|
||||
md5sum::hexdigest() const
|
||||
{
|
||||
if(!finalized) return std::string{};
|
||||
|
||||
char buf[33];
|
||||
for(int i = 0; i < 16; i++)
|
||||
snprintf(buf + i * 2, 3, "%02x", digest[i]);
|
||||
buf[32] = '\0';
|
||||
|
||||
return std::string(buf);
|
||||
}
|
||||
|
||||
std::string
|
||||
md5sum::hexliteral() const
|
||||
{
|
||||
if(!finalized) return std::string{};
|
||||
|
||||
auto _oss = std::ostringstream{};
|
||||
_oss << "X'";
|
||||
for(auto itr : rawdigest())
|
||||
_oss << std::hex << std::setw(2) << std::setfill('0') << static_cast<int>(itr);
|
||||
_oss << "'";
|
||||
return _oss.str();
|
||||
}
|
||||
|
||||
std::ostream&
|
||||
operator<<(std::ostream& out, md5sum md5)
|
||||
{
|
||||
return out << md5.hexdigest();
|
||||
}
|
||||
|
||||
std::string
|
||||
compute_md5sum(std::string_view inp)
|
||||
{
|
||||
return md5sum{ inp }.finalize().hexdigest();
|
||||
}
|
||||
|
||||
} // namespace common
|
||||
} // namespace rocprofsys
|
||||
@@ -0,0 +1,83 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
inline namespace common
|
||||
{
|
||||
namespace traits
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
template <typename T>
|
||||
struct is_string_literal_impl : std::false_type
|
||||
{};
|
||||
|
||||
template <>
|
||||
struct is_string_literal_impl<std::string_view> : std::true_type
|
||||
{};
|
||||
|
||||
template <>
|
||||
struct is_string_literal_impl<const char*> : std::true_type
|
||||
{};
|
||||
|
||||
template <>
|
||||
struct is_string_literal_impl<char*> : std::true_type
|
||||
{};
|
||||
|
||||
template <>
|
||||
struct is_string_literal_impl<std::string> : std::true_type
|
||||
{};
|
||||
|
||||
template <typename T>
|
||||
inline constexpr bool is_string_literal_impl_v = is_string_literal_impl<T>::value;
|
||||
|
||||
} // namespace
|
||||
|
||||
template <typename T>
|
||||
constexpr bool
|
||||
is_string_literal()
|
||||
{
|
||||
using Tp = std::decay_t<T>;
|
||||
return is_string_literal_impl_v<Tp>;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct is_optional : std::false_type
|
||||
{};
|
||||
|
||||
template <typename T>
|
||||
struct is_optional<std::optional<T>> : std::true_type
|
||||
{};
|
||||
|
||||
template <typename T>
|
||||
inline constexpr bool is_optional_v = is_optional<T>::value;
|
||||
|
||||
} // namespace traits
|
||||
} // namespace common
|
||||
} // namespace rocprofsys
|
||||
@@ -6,31 +6,38 @@ configure_file(
|
||||
)
|
||||
|
||||
set(core_sources
|
||||
${CMAKE_CURRENT_LIST_DIR}/agent_manager.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/amd_smi.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/argparse.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/categories.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/config.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/constraint.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/cpu.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/debug.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/dynamic_library.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/exception.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/gpu.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/mproc.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/node_info.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/perf.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/perfetto.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/rocprofiler-sdk.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/amd_smi.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/state.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/timemory.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/utility.cpp
|
||||
)
|
||||
|
||||
set(core_headers
|
||||
${CMAKE_CURRENT_LIST_DIR}/agent.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/agent_manager.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/amd_smi.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/argparse.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/categories.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/common.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/concepts.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/config.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/constraint.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/cpu.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/debug.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/dynamic_library.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/exception.hpp
|
||||
@@ -38,11 +45,11 @@ set(core_headers
|
||||
${CMAKE_CURRENT_LIST_DIR}/locking.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/mpi.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/mproc.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/node_info.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/perf.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/perfetto.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/redirect.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/rocprofiler-sdk.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/amd_smi.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/state.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/timemory.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/utility.hpp
|
||||
@@ -61,6 +68,7 @@ target_sources(
|
||||
add_subdirectory(binary)
|
||||
add_subdirectory(components)
|
||||
add_subdirectory(containers)
|
||||
add_subdirectory(rocpd)
|
||||
|
||||
target_include_directories(
|
||||
rocprofiler-systems-core-library
|
||||
@@ -95,6 +103,22 @@ target_link_libraries(
|
||||
$<BUILD_INTERFACE:$<IF:$<BOOL:${ROCPROFSYS_BUILD_LTO}>,rocprofiler-systems::rocprofiler-systems-lto,>>
|
||||
)
|
||||
|
||||
file(GLOB ROCPD_SCHEMA_FILES "${CMAKE_CURRENT_LIST_DIR}/rocpd/data_storage/schema/*.sql")
|
||||
|
||||
foreach(_SRC ${ROCPD_SCHEMA_FILES})
|
||||
cmake_path(GET _SRC FILENAME _BASE)
|
||||
configure_file(
|
||||
${_SRC}
|
||||
${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/${_BASE}
|
||||
COPYONLY
|
||||
)
|
||||
install(
|
||||
FILES ${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/${_BASE}
|
||||
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}
|
||||
COMPONENT core
|
||||
)
|
||||
endforeach()
|
||||
|
||||
set_target_properties(
|
||||
rocprofiler-systems-core-library
|
||||
PROPERTIES OUTPUT_NAME ${BINARY_NAME_PREFIX}-core
|
||||
|
||||
@@ -0,0 +1,62 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
# include <amd_smi/amdsmi.h>
|
||||
# include <rocprofiler-sdk/agent.h>
|
||||
#endif
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
|
||||
enum class agent_type : uint8_t
|
||||
{
|
||||
CPU, ///< Agent type is a CPU
|
||||
GPU ///< Agent type is a GPU
|
||||
};
|
||||
|
||||
struct agent
|
||||
{
|
||||
agent_type type;
|
||||
uint64_t id;
|
||||
uint32_t node_id;
|
||||
int32_t logical_node_id;
|
||||
int32_t logical_node_type_id;
|
||||
std::string name;
|
||||
std::string model_name;
|
||||
std::string vendor_name;
|
||||
std::string product_name;
|
||||
|
||||
size_t device_id{ 0 };
|
||||
size_t base_id{ 0 };
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
amdsmi_processor_handle smi_handle = nullptr;
|
||||
#endif
|
||||
};
|
||||
|
||||
} // namespace rocprofsys
|
||||
@@ -0,0 +1,137 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "agent_manager.hpp"
|
||||
#include "debug.hpp"
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
|
||||
agent_manager&
|
||||
agent_manager::get_instance()
|
||||
{
|
||||
static agent_manager instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
void
|
||||
agent_manager::insert_agent(agent& _agent)
|
||||
{
|
||||
ROCPROFSYS_VERBOSE(
|
||||
3, "Inserting agent with device handle: %lu, and agent id: %ld, device type: %s",
|
||||
_agent.device_id,
|
||||
(_agent.type == agent_type::GPU ? _gpu_agents_cnt : _cpu_agents_cnt),
|
||||
(_agent.type == agent_type::GPU ? "GPU" : "CPU"));
|
||||
|
||||
_agent.device_id =
|
||||
(_agent.type == agent_type::GPU ? _gpu_agents_cnt++ : _cpu_agents_cnt++);
|
||||
_agents.emplace_back(std::make_shared<agent>(_agent));
|
||||
}
|
||||
|
||||
const agent&
|
||||
agent_manager::get_agent_by_id(size_t device_id, agent_type type) const
|
||||
{
|
||||
ROCPROFSYS_VERBOSE(3, "Getting agent for device id: %ld, type %s\n", device_id,
|
||||
(type == agent_type::GPU) ? "GPU" : "CPU");
|
||||
auto _agent =
|
||||
std::find_if(_agents.begin(), _agents.end(), [&](const auto& agent_ptr) {
|
||||
return agent_ptr->type == type && agent_ptr->device_id == device_id;
|
||||
});
|
||||
if(_agent == _agents.end())
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "Agent not found for device id: " << device_id
|
||||
<< ", type: " << (type == agent_type::GPU ? "GPU" : "CPU");
|
||||
throw std::out_of_range(oss.str());
|
||||
}
|
||||
return **_agent;
|
||||
}
|
||||
|
||||
const agent&
|
||||
agent_manager::get_agent_by_handle(uint64_t device_handle, agent_type type) const
|
||||
{
|
||||
ROCPROFSYS_VERBOSE(3, "Getting agent for device handle: %ld, type %s\n",
|
||||
device_handle, (type == agent_type::GPU ? "GPU" : "CPU"));
|
||||
auto _agent =
|
||||
std::find_if(_agents.begin(), _agents.end(), [&](const auto& agent_ptr) {
|
||||
return agent_ptr->type == type && agent_ptr->id == device_handle;
|
||||
});
|
||||
if(_agent == _agents.end())
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "Agent not found for device handle: " << device_handle
|
||||
<< ", type: " << (type == agent_type::GPU ? "GPU" : "CPU");
|
||||
throw std::out_of_range(oss.str());
|
||||
}
|
||||
return **_agent;
|
||||
}
|
||||
|
||||
const agent&
|
||||
agent_manager::get_agent_by_handle(size_t device_handle) const
|
||||
{
|
||||
ROCPROFSYS_VERBOSE(3, "Getting agent for device handle: %ld\n", device_handle);
|
||||
auto _agent =
|
||||
std::find_if(_agents.begin(), _agents.end(), [&](const auto& agent_ptr) {
|
||||
return agent_ptr->id == device_handle;
|
||||
});
|
||||
if(_agent == _agents.end())
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "Agent not found for device handle: " << device_handle;
|
||||
throw std::out_of_range(oss.str());
|
||||
}
|
||||
return **_agent;
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<agent>>
|
||||
agent_manager::get_agents_by_type(agent_type type) const
|
||||
{
|
||||
ROCPROFSYS_VERBOSE(3, "Getting agent for device type: %s\n",
|
||||
type == agent_type::GPU ? "GPU" : "CPU");
|
||||
|
||||
std::vector<std::shared_ptr<agent>> agents;
|
||||
std::copy_if(std::begin(_agents), std::end(_agents), std::back_inserter(agents),
|
||||
[&type](const auto& agent_ptr) { return agent_ptr->type == type; });
|
||||
return agents;
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<agent>>
|
||||
agent_manager::get_agents() const
|
||||
{
|
||||
return _agents;
|
||||
}
|
||||
|
||||
size_t
|
||||
agent_manager::get_gpu_agents_count() const
|
||||
{
|
||||
return _gpu_agents_cnt;
|
||||
}
|
||||
|
||||
size_t
|
||||
agent_manager::get_cpu_agents_count() const
|
||||
{
|
||||
return _cpu_agents_cnt;
|
||||
}
|
||||
|
||||
} // namespace rocprofsys
|
||||
@@ -0,0 +1,62 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "agent.hpp"
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
|
||||
struct agent_manager
|
||||
{
|
||||
static agent_manager& get_instance();
|
||||
|
||||
agent_manager(const agent_manager&) = delete;
|
||||
agent_manager& operator=(const agent_manager&) = delete;
|
||||
agent_manager(agent_manager&&) = delete;
|
||||
agent_manager& operator=(agent_manager&&) = delete;
|
||||
~agent_manager() = default;
|
||||
|
||||
void insert_agent(agent& agent);
|
||||
const agent& get_agent_by_id(size_t device_id, agent_type type) const;
|
||||
const agent& get_agent_by_handle(size_t device_id, agent_type type) const;
|
||||
const agent& get_agent_by_handle(size_t device_handle) const;
|
||||
|
||||
std::vector<std::shared_ptr<agent>> get_agents_by_type(agent_type type) const;
|
||||
|
||||
std::vector<std::shared_ptr<agent>> get_agents() const;
|
||||
|
||||
size_t get_gpu_agents_count() const;
|
||||
size_t get_cpu_agents_count() const;
|
||||
|
||||
private:
|
||||
std::vector<std::shared_ptr<agent>> _agents;
|
||||
size_t _gpu_agents_cnt{ 0 };
|
||||
size_t _cpu_agents_cnt{ 0 };
|
||||
agent_manager() = default;
|
||||
};
|
||||
|
||||
} // namespace rocprofsys
|
||||
@@ -0,0 +1,353 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <bitset>
|
||||
#include <chrono>
|
||||
#include <cstdlib>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <unistd.h>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "core/benchmark/category.hpp"
|
||||
#include "core/debug.hpp"
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace benchmark
|
||||
{
|
||||
namespace
|
||||
{
|
||||
template <bool enabled, typename category_enum, category_enum... enabled_categories>
|
||||
struct benchmark_impl
|
||||
{
|
||||
template <category_enum... categories>
|
||||
struct scope
|
||||
{
|
||||
scope(const scope&) = delete;
|
||||
scope& operator=(const scope&) = delete;
|
||||
~scope() = default;
|
||||
|
||||
protected:
|
||||
scope() = default;
|
||||
scope(scope&&) = default;
|
||||
scope& operator=(scope&&) = default;
|
||||
};
|
||||
|
||||
template <category_enum... categories>
|
||||
static void start()
|
||||
{}
|
||||
|
||||
template <category_enum... categories>
|
||||
static void end()
|
||||
{}
|
||||
|
||||
template <category_enum... categories>
|
||||
[[nodiscard]] static scope<categories...> scoped_trace()
|
||||
{
|
||||
return scope<categories...>{};
|
||||
}
|
||||
|
||||
static void init_from_env(const char* = nullptr) {}
|
||||
static void show_results() {}
|
||||
};
|
||||
|
||||
using tid_t = __pid_t;
|
||||
struct indexed_category
|
||||
{
|
||||
size_t category;
|
||||
tid_t thread_id;
|
||||
|
||||
friend bool operator==(const indexed_category& lhs, const indexed_category& rhs)
|
||||
{
|
||||
return lhs.category == rhs.category && lhs.thread_id == rhs.thread_id;
|
||||
}
|
||||
};
|
||||
|
||||
struct indexed_category_hash
|
||||
{
|
||||
size_t operator()(const indexed_category& p) const noexcept
|
||||
{
|
||||
std::size_t hash1 = std::hash<size_t>{}(p.category);
|
||||
std::size_t hash2 = std::hash<size_t>{}(p.thread_id);
|
||||
return hash1 ^ (hash2 << 1);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename category_enum, category_enum... enabled_categories>
|
||||
struct benchmark_impl<true, category_enum, enabled_categories...>
|
||||
{
|
||||
static_assert(std::is_enum_v<category_enum>, "category_enum must be an enum");
|
||||
|
||||
public:
|
||||
using clock = std::chrono::high_resolution_clock;
|
||||
using time_point = clock::time_point;
|
||||
static constexpr size_t _max_categories = static_cast<size_t>(category_enum::count);
|
||||
|
||||
template <category_enum... categories>
|
||||
struct scope
|
||||
{
|
||||
friend benchmark_impl;
|
||||
|
||||
public:
|
||||
scope(const scope&) = delete;
|
||||
scope& operator=(const scope&) = delete;
|
||||
~scope() { end<categories...>(); }
|
||||
|
||||
protected:
|
||||
scope() { start<categories...>(); }
|
||||
|
||||
scope(scope&&) = default;
|
||||
scope& operator=(scope&&) = default;
|
||||
};
|
||||
|
||||
template <category_enum... categories>
|
||||
static void start()
|
||||
{
|
||||
static const thread_local auto _thread_id = gettid();
|
||||
const auto now = clock::now();
|
||||
std::lock_guard lock(m_mutex);
|
||||
(..., (is_category_defined<categories>([&] {
|
||||
if(m_enabled.test(to_index(categories)))
|
||||
m_started[{ to_index(categories), _thread_id }] = now;
|
||||
})));
|
||||
}
|
||||
|
||||
template <category_enum... categories>
|
||||
static void end()
|
||||
{
|
||||
static const thread_local auto _thread_id = getpid();
|
||||
const auto _end_time = clock::now();
|
||||
std::lock_guard lock(m_mutex);
|
||||
(..., (is_category_defined<categories>([&] {
|
||||
if(m_enabled.test(to_index(categories)))
|
||||
end_category(_end_time, categories, _thread_id);
|
||||
})));
|
||||
}
|
||||
|
||||
template <category_enum... categories>
|
||||
[[nodiscard]] static scope<categories...> scoped_trace()
|
||||
{
|
||||
return scope<categories...>{};
|
||||
}
|
||||
|
||||
static void init_from_env(const char* envVar = "ROCPROFSYS_BENCHMARK_CATEGORIES")
|
||||
{
|
||||
std::lock_guard lock(m_mutex);
|
||||
const auto* env = std::getenv(envVar);
|
||||
if(env == nullptr || std::string(env).empty())
|
||||
{
|
||||
ROCPROFSYS_WARNING(1, "No BENCHMARK categories specified in environment "
|
||||
"variable ROCPROFSYS_BENCHMARK_CATEGORIES.\n");
|
||||
return;
|
||||
}
|
||||
std::string _str(env);
|
||||
std::istringstream ss(_str);
|
||||
std::string token;
|
||||
|
||||
while(std::getline(ss, token, ','))
|
||||
{
|
||||
token.erase(0, token.find_first_not_of(" \t"));
|
||||
token.erase(token.find_last_not_of(" \t") + 1);
|
||||
for(category_enum cat : compiledCategories)
|
||||
{
|
||||
if(to_string(cat) == token)
|
||||
{
|
||||
m_enabled.set(to_index(cat));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void show_results()
|
||||
{
|
||||
std::lock_guard lock(m_mutex);
|
||||
std::vector<std::pair<category_enum, result_data>> sorted;
|
||||
|
||||
for(category_enum cat : compiledCategories)
|
||||
{
|
||||
const auto& data = m_results[to_index(cat)];
|
||||
if(data.count > 0)
|
||||
{
|
||||
sorted.emplace_back(cat, data);
|
||||
}
|
||||
}
|
||||
|
||||
std::sort(sorted.begin(), sorted.end(), [](const auto& a, const auto& b) {
|
||||
return a.second.total_time > b.second.total_time;
|
||||
});
|
||||
|
||||
constexpr uint32_t _category = 30;
|
||||
constexpr uint32_t _calls = 8;
|
||||
constexpr uint32_t _total = 12;
|
||||
constexpr uint32_t _avg = 10;
|
||||
constexpr uint32_t _min = 10;
|
||||
constexpr uint32_t _max = 10;
|
||||
|
||||
std::cout << "\033[32m"
|
||||
<< std::string(_category + _calls + _total + _avg + _min + _max, '=')
|
||||
<< "\n";
|
||||
std::cout << "Benchmark Results (Sorted by Total Time):\n";
|
||||
std::cout << std::string(_category + _calls + _total + _avg + _min + _max, '-')
|
||||
<< "\n";
|
||||
std::cout << std::left << std::setw(_category) << "Category" << std::right
|
||||
<< std::setw(_calls) << "Calls" << std::setw(_total) << "Total(ms)"
|
||||
<< std::setw(_avg) << "Avg(us)" << std::setw(_min) << "Min(us)"
|
||||
<< std::setw(_max) << "Max(us)" << "\n";
|
||||
|
||||
std::cout << std::string(_category + _calls + _total + _avg + _min + _max, '-')
|
||||
<< "\n";
|
||||
|
||||
for(const auto& [cat, data] : sorted)
|
||||
{
|
||||
double totalMs = static_cast<double>(data.total_time) / 1000.0;
|
||||
double avgUs = static_cast<double>(data.total_time) / data.count;
|
||||
|
||||
std::cout << std::left << std::setw(_category) << to_string(cat) << std::right
|
||||
<< std::setw(_calls) << data.count << std::setw(_total)
|
||||
<< std::fixed << std::setprecision(3) << totalMs << std::setw(_avg)
|
||||
<< std::fixed << std::setprecision(1) << avgUs << std::setw(_min)
|
||||
<< data.min_time << std::setw(_max) << data.max_time << "\n";
|
||||
}
|
||||
|
||||
std::cout << std::string(_category + _calls + _total + _avg + _min + _max, '=')
|
||||
<< "\033[0m" << "\n\n";
|
||||
}
|
||||
|
||||
private:
|
||||
struct result_data
|
||||
{
|
||||
uint64_t total_time = 0;
|
||||
size_t count = 0;
|
||||
uint64_t min_time = std::numeric_limits<uint64_t>::max();
|
||||
uint64_t max_time = std::numeric_limits<uint64_t>::min();
|
||||
|
||||
void update(uint64_t duration)
|
||||
{
|
||||
total_time += duration;
|
||||
count += 1;
|
||||
if(duration < min_time) min_time = duration;
|
||||
if(duration > max_time) max_time = duration;
|
||||
}
|
||||
};
|
||||
|
||||
static constexpr size_t to_index(category_enum cat)
|
||||
{
|
||||
return static_cast<size_t>(cat);
|
||||
}
|
||||
|
||||
static void end_category(const time_point& end_time, category_enum cat,
|
||||
const tid_t thread_id)
|
||||
{
|
||||
const size_t _idx = to_index(cat);
|
||||
auto _it = m_started.find({ _idx, thread_id });
|
||||
if(_it == m_started.end())
|
||||
{
|
||||
ROCPROFSYS_WARNING(1, "Benchmark error: missing start time for category!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
auto duration =
|
||||
std::chrono::duration_cast<std::chrono::microseconds>(end_time - _it->second)
|
||||
.count();
|
||||
m_started.erase(_it);
|
||||
m_results[_idx].update(duration);
|
||||
}
|
||||
|
||||
template <category_enum Cat, typename Func>
|
||||
static constexpr void is_category_defined(Func&& f)
|
||||
{
|
||||
if constexpr(((Cat == enabled_categories) || ...))
|
||||
{
|
||||
f();
|
||||
}
|
||||
}
|
||||
|
||||
static constexpr std::array<category_enum, sizeof...(enabled_categories)>
|
||||
compiledCategories = { enabled_categories... };
|
||||
|
||||
static inline std::unordered_map<indexed_category, time_point, indexed_category_hash>
|
||||
m_started;
|
||||
static inline std::array<result_data, _max_categories> m_results{};
|
||||
static inline std::bitset<_max_categories> m_enabled;
|
||||
static inline std::mutex m_mutex;
|
||||
};
|
||||
|
||||
#ifdef ROCPROFSYS_ENABLE_BENCHMARK
|
||||
using _benchmark_impl = benchmark::benchmark_impl<
|
||||
static_cast<bool>(ROCPROFSYS_ENABLE_BENCHMARK), benchmark::category,
|
||||
benchmark::category::kernel_dispatch, benchmark::category::memory_copy,
|
||||
benchmark::category::memory_allocate, benchmark::category::db_entry_kernel_dispatch,
|
||||
benchmark::category::db_entry_memory_copy,
|
||||
benchmark::category::db_entry_memory_allocate,
|
||||
benchmark::category::perfetto_kernel_dispatch,
|
||||
benchmark::category::sdk_tool_buffered_tracing>;
|
||||
#else
|
||||
using _benchmark_impl = benchmark::benchmark_impl<false, benchmark::category>;
|
||||
#endif
|
||||
} // namespace
|
||||
|
||||
template <category... categories>
|
||||
void
|
||||
start()
|
||||
{
|
||||
_benchmark_impl::template start<categories...>();
|
||||
}
|
||||
|
||||
template <category... categories>
|
||||
void
|
||||
end()
|
||||
{
|
||||
_benchmark_impl::template end<categories...>();
|
||||
}
|
||||
|
||||
template <category... categories>
|
||||
[[nodiscard]] auto
|
||||
scoped_trace()
|
||||
{
|
||||
return _benchmark_impl::template scoped_trace<categories...>();
|
||||
}
|
||||
|
||||
inline void
|
||||
init_from_env(const char* envVar = "BENCHMARK_CATEGORIES")
|
||||
{
|
||||
_benchmark_impl::init_from_env(envVar);
|
||||
}
|
||||
|
||||
inline void
|
||||
show_results()
|
||||
{
|
||||
_benchmark_impl::show_results();
|
||||
}
|
||||
|
||||
} // namespace benchmark
|
||||
} // namespace rocprofsys
|
||||
@@ -0,0 +1,68 @@
|
||||
// Copyright (c) 2018-2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string_view>
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace benchmark
|
||||
{
|
||||
enum class category
|
||||
{
|
||||
kernel_dispatch,
|
||||
db_entry_kernel_dispatch,
|
||||
memory_copy,
|
||||
db_entry_memory_copy,
|
||||
memory_allocate,
|
||||
db_entry_memory_allocate,
|
||||
perfetto_kernel_dispatch,
|
||||
sdk_tool_buffered_tracing,
|
||||
count
|
||||
};
|
||||
|
||||
constexpr std::string_view
|
||||
to_string(category cat)
|
||||
{
|
||||
switch(cat)
|
||||
{
|
||||
case category::kernel_dispatch: return "kernel_dispatch";
|
||||
case category::db_entry_kernel_dispatch: return "db_entry_kernel_dispatch";
|
||||
case category::memory_copy: return "memory_copy";
|
||||
case category::memory_allocate: return "memory_allocate";
|
||||
case category::db_entry_memory_copy: return "db_entry_memory_copy";
|
||||
case category::db_entry_memory_allocate: return "db_entry_memory_allocate";
|
||||
case category::perfetto_kernel_dispatch: return "perfetto_kernel_dispatch";
|
||||
case category::sdk_tool_buffered_tracing: return "sdk_tool_buffered_tracing";
|
||||
default: return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace benchmark
|
||||
} // namespace rocprofsys
|
||||
@@ -312,6 +312,9 @@ configure_settings(bool _init)
|
||||
"Enable causal profiling analysis", false, "backend",
|
||||
"causal", "analysis");
|
||||
|
||||
ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_USE_ROCPD", "Enable rocpd backend", false,
|
||||
"backend", "rocpd");
|
||||
|
||||
ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_USE_ROCM",
|
||||
"Enable ROCm API and kernel tracing", true, "backend",
|
||||
"rocm");
|
||||
@@ -1246,10 +1249,17 @@ configure_signal_handler(const std::shared_ptr<settings>& _config)
|
||||
static auto _dyninst_trampoline_signal =
|
||||
getenv("DYNINST_SIGNAL_TRAMPOLINE_SIGILL") ? SIGILL : SIGTRAP;
|
||||
|
||||
static auto root_pid =
|
||||
get_env<pid_t>("ROCPROFSYS_ROOT_PROCESS", process::get_id(), false);
|
||||
if(_config->get_enable_signal_handler())
|
||||
{
|
||||
tim::signals::disable_signal_detection();
|
||||
signal_settings::enable(sys_signal::Interrupt);
|
||||
auto is_child_process = root_pid != getpid();
|
||||
if(is_child_process)
|
||||
{
|
||||
signal_settings::enable(sys_signal::Terminate);
|
||||
}
|
||||
signal_settings::set_exit_action(rocprofsys_exit_action);
|
||||
signal_settings::check_environment();
|
||||
auto default_signals = signal_settings::get_default();
|
||||
@@ -2347,6 +2357,40 @@ get_tmpdir()
|
||||
return static_cast<tim::tsettings<std::string>&>(*_v->second).get();
|
||||
}
|
||||
|
||||
std::string
|
||||
get_database_absolute_path(std::string_view database_name)
|
||||
{
|
||||
const auto* _existing_path = std::getenv("ROCPROFSYS_DATABASE_DIR");
|
||||
auto _dir = _existing_path ? std::string{ _existing_path } : std::string{};
|
||||
auto _ext = std::string{ "db" };
|
||||
|
||||
auto _cfg = settings::compose_filename_config{ settings::use_output_suffix(),
|
||||
settings::default_process_suffix(),
|
||||
false, _dir };
|
||||
|
||||
const auto get_path = [](const std::string& path) {
|
||||
size_t last_slash = path.find_last_of("/\\");
|
||||
return (last_slash != std::string::npos) ? path.substr(0, last_slash + 1)
|
||||
: std::string{};
|
||||
};
|
||||
|
||||
auto _val = settings::compose_output_filename(std::string(database_name), _ext, _cfg);
|
||||
_dir = get_path(_val);
|
||||
|
||||
setenv("ROCPROFSYS_DATABASE_DIR", _dir.c_str(), 1);
|
||||
|
||||
if(!_val.empty() && _val.at(0) != '/')
|
||||
return settings::format(JOIN('/', "%env{PWD}%", _val), get_config()->get_tag());
|
||||
return _val;
|
||||
}
|
||||
|
||||
bool&
|
||||
get_use_rocpd()
|
||||
{
|
||||
static auto _v = get_config()->at("ROCPROFSYS_USE_ROCPD");
|
||||
return static_cast<tim::tsettings<bool>&>(*_v).get();
|
||||
}
|
||||
|
||||
tmp_file::tmp_file(std::string _v)
|
||||
: filename{ std::move(_v) }
|
||||
{}
|
||||
|
||||
@@ -358,6 +358,12 @@ get_use_tmp_files();
|
||||
std::string
|
||||
get_tmpdir();
|
||||
|
||||
std::string
|
||||
get_database_absolute_path(std::string_view database_name);
|
||||
|
||||
bool&
|
||||
get_use_rocpd() ROCPROFSYS_HOT;
|
||||
|
||||
struct tmp_file
|
||||
{
|
||||
tmp_file(std::string);
|
||||
|
||||
@@ -0,0 +1,168 @@
|
||||
#include "cpu.hpp"
|
||||
#include "agent_manager.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <fstream>
|
||||
#include <functional>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace cpu
|
||||
{
|
||||
std::vector<cpu_info>
|
||||
process_cpu_info_data()
|
||||
{
|
||||
std::vector<cpu_info> cpu_data;
|
||||
std::ifstream cpuinfo_file("/proc/cpuinfo");
|
||||
|
||||
if(!cpuinfo_file.is_open())
|
||||
{
|
||||
return cpu_data;
|
||||
}
|
||||
|
||||
std::string line;
|
||||
cpu_info current_cpu;
|
||||
bool has_processor_entry = false;
|
||||
|
||||
auto parse_long = [](const std::string& value) -> long {
|
||||
try
|
||||
{
|
||||
return std::stol(value);
|
||||
} catch(const std::exception&)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
};
|
||||
|
||||
auto trim_whitespace = [](const std::string& str) -> std::string {
|
||||
size_t start = str.find_first_not_of(" \t");
|
||||
if(start == std::string::npos) return "";
|
||||
size_t end = str.find_last_not_of(" \t");
|
||||
return str.substr(start, end - start + 1);
|
||||
};
|
||||
|
||||
static const std::unordered_map<std::string,
|
||||
std::function<void(cpu_info&, const std::string&)>>
|
||||
field_parsers = {
|
||||
{ "processor",
|
||||
[&parse_long](cpu_info& cpu, const std::string& val) {
|
||||
cpu.processor = parse_long(val);
|
||||
} },
|
||||
{ "cpu family",
|
||||
[&parse_long](cpu_info& cpu, const std::string& val) {
|
||||
cpu.family = parse_long(val);
|
||||
} },
|
||||
{ "model",
|
||||
[&parse_long](cpu_info& cpu, const std::string& val) {
|
||||
cpu.model = parse_long(val);
|
||||
} },
|
||||
{ "physical id",
|
||||
[&parse_long](cpu_info& cpu, const std::string& val) {
|
||||
cpu.physical_id = parse_long(val);
|
||||
} },
|
||||
{ "core id",
|
||||
[&parse_long](cpu_info& cpu, const std::string& val) {
|
||||
cpu.core_id = parse_long(val);
|
||||
} },
|
||||
{ "apicid",
|
||||
[&parse_long](cpu_info& cpu, const std::string& val) {
|
||||
cpu.apicid = parse_long(val);
|
||||
} },
|
||||
{ "vendor_id",
|
||||
[](cpu_info& cpu, const std::string& val) { cpu.vendor_id = val; } },
|
||||
{ "model name",
|
||||
[](cpu_info& cpu, const std::string& val) { cpu.model_name = val; } }
|
||||
};
|
||||
|
||||
while(std::getline(cpuinfo_file, line))
|
||||
{
|
||||
if(line.empty())
|
||||
{
|
||||
if(has_processor_entry)
|
||||
{
|
||||
cpu_data.push_back(current_cpu);
|
||||
return cpu_data; // Return immediately after first core
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
size_t colon_pos = line.find(':');
|
||||
if(colon_pos == std::string::npos)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
std::string key = trim_whitespace(line.substr(0, colon_pos));
|
||||
std::string value = trim_whitespace(line.substr(colon_pos + 1));
|
||||
|
||||
std::transform(key.begin(), key.end(), key.begin(), ::tolower);
|
||||
|
||||
auto it = field_parsers.find(key);
|
||||
if(it != field_parsers.end())
|
||||
{
|
||||
it->second(current_cpu, value);
|
||||
if(key == "processor")
|
||||
{
|
||||
has_processor_entry = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(has_processor_entry)
|
||||
{
|
||||
cpu_data.push_back(current_cpu);
|
||||
}
|
||||
|
||||
return cpu_data;
|
||||
}
|
||||
|
||||
std::vector<cpu_info>
|
||||
get_cpu_info()
|
||||
{
|
||||
static auto _v = process_cpu_info_data();
|
||||
return _v;
|
||||
}
|
||||
|
||||
size_t
|
||||
device_count()
|
||||
{
|
||||
auto cpu_data = get_cpu_info();
|
||||
return cpu_data.size();
|
||||
}
|
||||
|
||||
void
|
||||
query_cpu_agents()
|
||||
{
|
||||
int32_t id_count = 0;
|
||||
uint32_t node_count = 0;
|
||||
uint32_t cpu_count = 0;
|
||||
|
||||
if(device_count() == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
auto& _agent_manager = agent_manager::get_instance();
|
||||
auto cpu_data = get_cpu_info();
|
||||
|
||||
for(auto& cpu : cpu_data)
|
||||
{
|
||||
auto node_id = node_count++;
|
||||
auto logical_id = id_count++;
|
||||
auto id = cpu_count++;
|
||||
auto cur_agent = agent{ agent_type::CPU,
|
||||
id,
|
||||
node_id,
|
||||
logical_id,
|
||||
static_cast<int32_t>(id),
|
||||
cpu.model_name,
|
||||
cpu.model_name,
|
||||
cpu.vendor_id,
|
||||
"" };
|
||||
_agent_manager.insert_agent(cur_agent);
|
||||
}
|
||||
}
|
||||
} // namespace cpu
|
||||
} // namespace rocprofsys
|
||||
@@ -0,0 +1,57 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace cpu
|
||||
{
|
||||
|
||||
struct cpu_info
|
||||
{
|
||||
long processor = -1;
|
||||
long family = -1;
|
||||
long model = -1;
|
||||
long physical_id = -1;
|
||||
long core_id = -1;
|
||||
long apicid = -1;
|
||||
std::string vendor_id = {};
|
||||
std::string model_name = {};
|
||||
};
|
||||
|
||||
std::vector<cpu_info>
|
||||
process_cpu_info_data();
|
||||
|
||||
std::vector<cpu_info>
|
||||
get_cpu_info();
|
||||
|
||||
size_t
|
||||
device_count();
|
||||
|
||||
void
|
||||
query_cpu_agents();
|
||||
|
||||
} // namespace cpu
|
||||
} // namespace rocprofsys
|
||||
@@ -20,6 +20,7 @@
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "agent.hpp"
|
||||
#define ROCPROFILER_SDK_CEREAL_NAMESPACE_BEGIN \
|
||||
namespace tim \
|
||||
{ \
|
||||
@@ -41,6 +42,10 @@
|
||||
|
||||
#include <timemory/manager.hpp>
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "core/agent_manager.hpp"
|
||||
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
# include <amd_smi/amdsmi.h>
|
||||
# include <rocprofiler-sdk/agent.h>
|
||||
@@ -108,18 +113,31 @@ amdsmi_init()
|
||||
}
|
||||
#endif // ROCPROFSYS_USE_ROCM > 0
|
||||
|
||||
int32_t
|
||||
query_rocm_gpu_agents()
|
||||
size_t
|
||||
query_rocm_agents()
|
||||
{
|
||||
int32_t _dev_cnt = 0;
|
||||
size_t _dev_cnt = 0;
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
auto iterator = [](rocprofiler_agent_version_t /*version*/, const void** agents,
|
||||
size_t num_agents, void* user_data) -> rocprofiler_status_t {
|
||||
auto* _cnt = static_cast<int32_t*>(user_data);
|
||||
auto iterator = []([[maybe_unused]] rocprofiler_agent_version_t version,
|
||||
const void** agents, size_t num_agents,
|
||||
[[maybe_unused]] void* user_data) -> rocprofiler_status_t {
|
||||
auto& _agent_manager = agent_manager::get_instance();
|
||||
for(size_t i = 0; i < num_agents; ++i)
|
||||
{
|
||||
const auto* _agent = static_cast<const rocprofiler_agent_v0_t*>(agents[i]);
|
||||
if(_agent && _agent->type == ROCPROFILER_AGENT_TYPE_GPU) *_cnt += 1;
|
||||
const auto* _agent = static_cast<const rocprofiler_agent_v0_t*>(agents[i]);
|
||||
auto cur_agent = agent{
|
||||
(_agent->type == ROCPROFILER_AGENT_TYPE_GPU ? agent_type::GPU
|
||||
: agent_type::CPU),
|
||||
_agent->device_id,
|
||||
_agent->node_id,
|
||||
_agent->logical_node_id,
|
||||
_agent->logical_node_type_id,
|
||||
std::string(_agent->name),
|
||||
std::string(_agent->vendor_name),
|
||||
std::string(_agent->product_name),
|
||||
std::string(_agent->model_name),
|
||||
};
|
||||
_agent_manager.insert_agent(cur_agent);
|
||||
}
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
};
|
||||
@@ -127,15 +145,14 @@ query_rocm_gpu_agents()
|
||||
try
|
||||
{
|
||||
rocprofiler_query_available_agents(ROCPROFILER_AGENT_INFO_VERSION_0, iterator,
|
||||
sizeof(rocprofiler_agent_v0_t), &_dev_cnt);
|
||||
sizeof(rocprofiler_agent_v0_t), nullptr);
|
||||
} catch(std::exception& _e)
|
||||
{
|
||||
ROCPROFSYS_BASIC_VERBOSE(
|
||||
1, "Exception thrown getting the rocm agents: %s. _dev_cnt=%d\n", _e.what(),
|
||||
1, "Exception thrown getting the rocm agents: %s. _dev_cnt=%ld\n", _e.what(),
|
||||
_dev_cnt);
|
||||
}
|
||||
// rocprofiler_query_available_agents(ROCPROFILER_AGENT_INFO_VERSION_0, iterator,
|
||||
// sizeof(rocprofiler_agent_v0_t), &_dev_cnt);
|
||||
_dev_cnt = agent_manager::get_instance().get_gpu_agents_count();
|
||||
#endif
|
||||
return _dev_cnt;
|
||||
}
|
||||
@@ -145,7 +162,7 @@ int
|
||||
device_count()
|
||||
{
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
static int _num_devices = query_rocm_gpu_agents();
|
||||
static int _num_devices = query_rocm_agents();
|
||||
return _num_devices;
|
||||
#else
|
||||
return 0;
|
||||
@@ -174,20 +191,31 @@ add_device_metadata(ArchiveT& ar)
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
using agent_vec_t = std::vector<rocprofiler_agent_v0_t>;
|
||||
|
||||
auto _agents_vec = agent_vec_t{};
|
||||
auto iterator = [](rocprofiler_agent_version_t /*version*/, const void** agents,
|
||||
size_t num_agents, void* user_data) -> rocprofiler_status_t {
|
||||
auto* _agents_vec_v = static_cast<agent_vec_t*>(user_data);
|
||||
_agents_vec_v->reserve(num_agents);
|
||||
auto iterator_cb = []([[maybe_unused]] rocprofiler_agent_version_t version,
|
||||
const void** agents, size_t num_agents,
|
||||
[[maybe_unused]] void* user_data) -> rocprofiler_status_t {
|
||||
auto* agents_vec = static_cast<agent_vec_t*>(user_data);
|
||||
for(size_t i = 0; i < num_agents; ++i)
|
||||
{
|
||||
const auto* _agent = static_cast<const rocprofiler_agent_v0_t*>(agents[i]);
|
||||
if(_agent) _agents_vec_v->emplace_back(*_agent);
|
||||
if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU)
|
||||
{
|
||||
agents_vec->push_back(*_agent);
|
||||
}
|
||||
}
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
};
|
||||
rocprofiler_query_available_agents(ROCPROFILER_AGENT_INFO_VERSION_0, iterator,
|
||||
sizeof(rocprofiler_agent_v0_t), &_agents_vec);
|
||||
|
||||
auto _agents_vec = agent_vec_t{};
|
||||
try
|
||||
{
|
||||
rocprofiler_query_available_agents(ROCPROFILER_AGENT_INFO_VERSION_0, iterator_cb,
|
||||
sizeof(rocprofiler_agent_v0_t), &_agents_vec);
|
||||
} catch(std::exception& _e)
|
||||
{
|
||||
ROCPROFSYS_BASIC_VERBOSE(1, "Exception thrown getting the rocm agents: %s.\n",
|
||||
_e.what());
|
||||
}
|
||||
|
||||
ar(make_nvp("rocm_agents", _agents_vec));
|
||||
#else
|
||||
@@ -228,6 +256,7 @@ get_processor_handles()
|
||||
{
|
||||
uint32_t socket_count;
|
||||
uint32_t processor_count;
|
||||
processors::processors_list.clear();
|
||||
|
||||
// Passing nullptr will return us the number of sockets available for read in this
|
||||
// system
|
||||
|
||||
@@ -0,0 +1,72 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "node_info.hpp"
|
||||
#include "debug.hpp"
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <sys/utsname.h>
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
|
||||
node_info::node_info()
|
||||
{
|
||||
auto ifs = std::ifstream{ "/etc/machine-id" };
|
||||
if(!ifs.is_open())
|
||||
{
|
||||
ROCPROFSYS_WARNING(0, "Error: Unable to open /etc/machine-id!");
|
||||
return;
|
||||
}
|
||||
if(!(ifs >> machine_id) || machine_id.empty())
|
||||
{
|
||||
ROCPROFSYS_WARNING(0, "Error: Unable to read machine ID from /etc/machine-id!");
|
||||
}
|
||||
|
||||
hash = std::hash<std::string>{}(machine_id) % std::numeric_limits<int64_t>::max();
|
||||
id = hash % std::numeric_limits<size_t>::max();
|
||||
|
||||
struct utsname _sys_info;
|
||||
if(uname(&_sys_info))
|
||||
{
|
||||
ROCPROFSYS_WARNING(0, "Error: Unable to get system information!");
|
||||
return;
|
||||
}
|
||||
|
||||
system_name = _sys_info.sysname;
|
||||
node_name = _sys_info.nodename;
|
||||
release = _sys_info.release;
|
||||
version = _sys_info.version;
|
||||
machine = _sys_info.machine;
|
||||
domain_name = _sys_info.domainname;
|
||||
}
|
||||
|
||||
node_info&
|
||||
node_info::get_instance()
|
||||
{
|
||||
static node_info instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
} // namespace rocprofsys
|
||||
@@ -0,0 +1,58 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
|
||||
struct node_info
|
||||
{
|
||||
private:
|
||||
node_info();
|
||||
|
||||
public:
|
||||
~node_info() = default;
|
||||
node_info(const node_info&) = default;
|
||||
node_info(node_info&&) noexcept = default;
|
||||
node_info& operator=(const node_info&) = default;
|
||||
node_info& operator=(node_info&&) noexcept = default;
|
||||
|
||||
static node_info& get_instance();
|
||||
|
||||
uint64_t id = 0;
|
||||
uint64_t hash = 0;
|
||||
std::string machine_id = {};
|
||||
std::string system_name = {};
|
||||
std::string node_name = {};
|
||||
std::string release = {};
|
||||
std::string version = {};
|
||||
std::string machine = {};
|
||||
std::string domain_name = {};
|
||||
};
|
||||
|
||||
const node_info&
|
||||
get_node_info();
|
||||
} // namespace rocprofsys
|
||||
@@ -0,0 +1,13 @@
|
||||
set(rocpd_sources
|
||||
${CMAKE_CURRENT_LIST_DIR}/data_processor.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/json.cpp
|
||||
)
|
||||
|
||||
set(rocpd_headers
|
||||
${CMAKE_CURRENT_LIST_DIR}/data_processor.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/json.hpp
|
||||
)
|
||||
|
||||
target_sources(rocprofiler-systems-core-library PRIVATE ${rocpd_sources} ${rocpd_headers})
|
||||
|
||||
add_subdirectory(data_storage)
|
||||
@@ -0,0 +1,674 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "data_processor.hpp"
|
||||
#include "core/rocpd/data_storage/database.hpp"
|
||||
#include "core/rocpd/data_storage/table_insert_query.hpp"
|
||||
#include "debug.hpp"
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace rocpd
|
||||
{
|
||||
data_processor::data_processor()
|
||||
{
|
||||
data_storage::database::get_instance().initialize_schema();
|
||||
_upid = data_storage::database::get_instance().get_upid();
|
||||
|
||||
// Initialize event statement
|
||||
initialize_event_stmt();
|
||||
initialize_pmc_event_stmt();
|
||||
initialize_sample_stmt();
|
||||
initialize_region_stmt();
|
||||
initialize_kernel_dispatch_stmt();
|
||||
initialize_memory_copy_stmt();
|
||||
initialize_code_object_stmt();
|
||||
initialize_kernel_symbol_stmt();
|
||||
initialize_metadata();
|
||||
initialize_args_stmt();
|
||||
initialize_memory_alloc_stmt();
|
||||
}
|
||||
|
||||
data_processor&
|
||||
data_processor::get_instance()
|
||||
{
|
||||
static data_processor _instance;
|
||||
return _instance;
|
||||
}
|
||||
|
||||
void
|
||||
data_processor::initialize_metadata()
|
||||
{
|
||||
data_storage::queries::table_insert_query query;
|
||||
data_storage::database::get_instance().execute_query(
|
||||
query.set_table_name("rocpd_metadata_" + _upid)
|
||||
.set_columns("tag", "value")
|
||||
.set_values("upid", _upid)
|
||||
.get_query_string());
|
||||
}
|
||||
|
||||
size_t
|
||||
data_processor::insert_string(const char* str)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_data_mutex);
|
||||
auto it = _string_map.find(str);
|
||||
if(it != _string_map.end()) return _string_map.at(str);
|
||||
|
||||
data_storage::queries::table_insert_query query;
|
||||
data_storage::database::get_instance().execute_query(
|
||||
query.set_table_name("rocpd_string_" + _upid)
|
||||
.set_columns("guid", "string")
|
||||
.set_values(_upid, str)
|
||||
.get_query_string());
|
||||
|
||||
const auto string_id = data_storage::database::get_instance().get_last_insert_id();
|
||||
_string_map.emplace(str, string_id);
|
||||
return string_id;
|
||||
}
|
||||
|
||||
void
|
||||
data_processor::insert_node_info(size_t node_id, size_t hash, const char* machine_id,
|
||||
const char* system_name, const char* hostname,
|
||||
const char* release, const char* version,
|
||||
const char* hardware_name, const char* domain_name)
|
||||
{
|
||||
data_storage::queries::table_insert_query query;
|
||||
data_storage::database::get_instance().execute_query(
|
||||
query.set_table_name("rocpd_info_node_" + _upid)
|
||||
.set_columns("id", "guid", "hash", "machine_id", "system_name", "hostname",
|
||||
"release", "version", "hardware_name", "domain_name")
|
||||
.set_values(node_id, _upid, hash, machine_id, system_name, hostname, release,
|
||||
version, hardware_name, domain_name)
|
||||
.get_query_string());
|
||||
}
|
||||
|
||||
void
|
||||
data_processor::insert_process_info(size_t nid, size_t ppid, size_t pid, size_t init,
|
||||
size_t fini, size_t start, size_t end,
|
||||
const char* command, const char* environment,
|
||||
const char* extdata)
|
||||
{
|
||||
data_storage::queries::table_insert_query query;
|
||||
data_storage::database::get_instance().execute_query(
|
||||
query.set_table_name("rocpd_info_process_" + _upid)
|
||||
.set_columns("id", "guid", "nid", "ppid", "pid", "init", "fini", "start",
|
||||
"end", "command", "environment", "extdata")
|
||||
.set_values(pid, _upid, nid, ppid, pid, init, fini, start, end, command,
|
||||
environment, extdata)
|
||||
.get_query_string());
|
||||
}
|
||||
|
||||
size_t
|
||||
data_processor::insert_agent(size_t node_id, size_t pid, const char* agent_type,
|
||||
size_t absolute_index, size_t logical_index,
|
||||
size_t type_index, uint64_t uuid, const char* name,
|
||||
const char* model_name, const char* vendor_name,
|
||||
const char* product_name, const char* user_name,
|
||||
const char* extdata)
|
||||
{
|
||||
data_storage::queries::table_insert_query query;
|
||||
data_storage::database::get_instance().execute_query(
|
||||
query.set_table_name("rocpd_info_agent_" + _upid)
|
||||
.set_columns("guid", "nid", "pid", "type", "absolute_index", "logical_index",
|
||||
"type_index", "uuid", "name", "model_name", "vendor_name",
|
||||
"product_name", "user_name", "extdata")
|
||||
.set_values(_upid, node_id, pid, agent_type, absolute_index, logical_index,
|
||||
type_index, uuid, name, model_name, vendor_name, product_name,
|
||||
user_name, extdata)
|
||||
.get_query_string());
|
||||
|
||||
return data_storage::database::get_instance().get_last_insert_id();
|
||||
}
|
||||
|
||||
void
|
||||
data_processor::insert_track(const char* track_name, size_t node_id, size_t process_id,
|
||||
std::optional<size_t> thread_id, const char* extdata)
|
||||
{
|
||||
if(_tracks.find(track_name) != _tracks.end())
|
||||
{
|
||||
ROCPROFSYS_WARNING(2, "Fail to add track %s, already exist!\n", track_name);
|
||||
return;
|
||||
}
|
||||
|
||||
auto name_id = insert_string(track_name);
|
||||
|
||||
data_storage::queries::table_insert_query query;
|
||||
data_storage::database::get_instance().execute_query(
|
||||
query.set_table_name("rocpd_track_" + _upid)
|
||||
.set_columns("guid", "nid", "pid", "tid", "name_id", "extdata")
|
||||
.set_values(_upid, node_id, process_id, thread_id, name_id, extdata)
|
||||
.get_query_string());
|
||||
|
||||
auto track_id = data_storage::database::get_instance().get_last_insert_id();
|
||||
_tracks[track_name] = track_name_map{ track_id, name_id };
|
||||
}
|
||||
|
||||
void
|
||||
data_processor::insert_pmc_description(
|
||||
size_t node_id, size_t process_id, size_t agent_id, const char* target_arch,
|
||||
size_t event_code, size_t instance_id, const char* name, const char* symbol,
|
||||
const char* description, const char* long_description, const char* component,
|
||||
const char* units, const char* value_type, const char* block, const char* expression,
|
||||
uint32_t is_constant, uint32_t is_derived, const char* extdata)
|
||||
{
|
||||
auto it = _pmc_descriptor_map.find({ agent_id, name });
|
||||
if(it != _pmc_descriptor_map.end())
|
||||
{
|
||||
ROCPROFSYS_WARNING(0,
|
||||
"Insert PMC description failed! Error: PMC descriptor "
|
||||
"(name:%s) (ID:%lu) already exist!\n",
|
||||
name, agent_id);
|
||||
return;
|
||||
}
|
||||
data_storage::queries::table_insert_query query_builder;
|
||||
|
||||
auto query =
|
||||
query_builder.set_table_name("rocpd_info_pmc_" + _upid)
|
||||
.set_columns("guid", "nid", "pid", "agent_id", "target_arch", "event_code",
|
||||
"instance_id", "name", "symbol", "description",
|
||||
"long_description", "component", "units", "value_type", "block",
|
||||
"expression", "is_constant", "is_derived", "extdata")
|
||||
.set_values(_upid, node_id, process_id, agent_id, target_arch, event_code,
|
||||
instance_id, name, symbol, description, long_description,
|
||||
component, units, value_type, block, expression, is_constant,
|
||||
is_derived, extdata)
|
||||
.get_query_string();
|
||||
data_storage::database::get_instance().execute_query(query);
|
||||
|
||||
auto pmc_id = data_storage::database::get_instance().get_last_insert_id();
|
||||
_pmc_descriptor_map.emplace(
|
||||
std::pair<pmc_identifier, size_t>{ { agent_id, name }, pmc_id });
|
||||
}
|
||||
|
||||
void
|
||||
data_processor::insert_pmc_event(size_t event_id, size_t agent_id, const char* pmc_name,
|
||||
double value, const char* extdata)
|
||||
{
|
||||
ROCPROFSYS_VERBOSE(2,
|
||||
"Insert PMC event: id %ld, agent id: %ld, pmc name: %s, value: "
|
||||
"%lf, extdata: %s\n",
|
||||
event_id, agent_id, pmc_name, value, extdata);
|
||||
auto it = _pmc_descriptor_map.find({ agent_id, pmc_name });
|
||||
if(it == _pmc_descriptor_map.end())
|
||||
{
|
||||
ROCPROFSYS_WARNING(0,
|
||||
"Insert PMC event failed! Error: non-existing PMC description "
|
||||
"agent id: %ld, pmc name: %s !\n",
|
||||
agent_id, pmc_name);
|
||||
return;
|
||||
}
|
||||
|
||||
const auto pmc_description_id = it->second;
|
||||
_insert_pmc_event_statement(_upid.c_str(), event_id, pmc_description_id, value,
|
||||
extdata);
|
||||
}
|
||||
|
||||
void
|
||||
data_processor::insert_sample(const char* track, uint64_t timestamp, size_t event_id,
|
||||
const char* extdata)
|
||||
{
|
||||
ROCPROFSYS_VERBOSE(
|
||||
3, "Insert sample: track: %s, timestamp: %lu, event id: %ld, extdata: %s\n",
|
||||
track, timestamp, event_id, extdata);
|
||||
auto it = _tracks.find(track);
|
||||
if(it == _tracks.end())
|
||||
{
|
||||
ROCPROFSYS_WARNING(0, "Insert sample failed! Error: Unexisting track %s!\n",
|
||||
track);
|
||||
return;
|
||||
}
|
||||
auto track_info = it->second;
|
||||
_insert_sample_statement(_upid.c_str(), track_info.track_id, timestamp, event_id,
|
||||
extdata);
|
||||
}
|
||||
|
||||
size_t
|
||||
data_processor::insert_event(size_t category_id, size_t stack_id, size_t parent_stack_id,
|
||||
size_t correlation_id, const char* call_stack,
|
||||
const char* line_info, const char* extdata)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_data_mutex);
|
||||
auto it = _category_map.find(category_id);
|
||||
if(it == _category_map.end())
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "Insert event failed! Error: Unknown category id: " << category_id
|
||||
<< " for UPID: " << _upid;
|
||||
throw std::runtime_error(oss.str());
|
||||
}
|
||||
|
||||
ROCPROFSYS_VERBOSE(3, "Insert event category id: %ld, string id: %ld\n", category_id,
|
||||
it->second);
|
||||
|
||||
_insert_event_statement(_upid.c_str(), it->second, stack_id, parent_stack_id,
|
||||
correlation_id, call_stack, line_info, extdata);
|
||||
return data_storage::database::get_instance().get_last_insert_id();
|
||||
}
|
||||
|
||||
void
|
||||
data_processor::initialize_event_stmt()
|
||||
{
|
||||
data_storage::queries::table_insert_query query_builder;
|
||||
auto query = query_builder.set_table_name("rocpd_event_" + _upid)
|
||||
.set_columns("guid", "category_id", "stack_id", "parent_stack_id",
|
||||
"correlation_id", "call_stack", "line_info", "extdata")
|
||||
.set_values('?', '?', '?', '?', '?', '?', '?', '?')
|
||||
.get_query_string();
|
||||
_insert_event_statement =
|
||||
data_storage::database::get_instance()
|
||||
.create_statement_executor<const char*, size_t, size_t, size_t, size_t,
|
||||
const char*, const char*, const char*>(query);
|
||||
}
|
||||
|
||||
void
|
||||
data_processor::initialize_pmc_event_stmt()
|
||||
{
|
||||
data_storage::queries::table_insert_query query_builder;
|
||||
auto query = query_builder.set_table_name("rocpd_pmc_event_" + _upid)
|
||||
.set_columns("guid", "event_id", "pmc_id", "value", "extdata")
|
||||
.set_values('?', '?', '?', '?', '?')
|
||||
.get_query_string();
|
||||
_insert_pmc_event_statement =
|
||||
data_storage::database::get_instance()
|
||||
.create_statement_executor<const char*, size_t, size_t, double, const char*>(
|
||||
query);
|
||||
}
|
||||
|
||||
void
|
||||
data_processor::initialize_sample_stmt()
|
||||
{
|
||||
data_storage::queries::table_insert_query query_builder;
|
||||
auto query = query_builder.set_table_name("rocpd_sample_" + _upid)
|
||||
.set_columns("guid", "track_id", "timestamp", "event_id", "extdata")
|
||||
.set_values('?', '?', '?', '?', '?')
|
||||
.get_query_string();
|
||||
_insert_sample_statement =
|
||||
data_storage::database::get_instance()
|
||||
.create_statement_executor<const char*, size_t, uint64_t, size_t,
|
||||
const char*>(query);
|
||||
}
|
||||
|
||||
void
|
||||
data_processor::initialize_region_stmt()
|
||||
{
|
||||
data_storage::queries::table_insert_query query_builder;
|
||||
auto query = query_builder.set_table_name("rocpd_region_" + _upid)
|
||||
.set_columns("guid", "nid", "pid", "tid", "start", "end", "name_id",
|
||||
"event_id", "extdata")
|
||||
.set_values('?', '?', '?', '?', '?', '?', '?', '?', '?')
|
||||
.get_query_string();
|
||||
_insert_region_statement =
|
||||
data_storage::database::get_instance()
|
||||
.create_statement_executor<const char*, size_t, size_t, size_t, uint64_t,
|
||||
uint64_t, size_t, size_t, const char*>(query);
|
||||
}
|
||||
|
||||
void
|
||||
data_processor::initialize_kernel_dispatch_stmt()
|
||||
{
|
||||
data_storage::queries::table_insert_query query_builder;
|
||||
auto query = query_builder.set_table_name("rocpd_kernel_dispatch_" + _upid)
|
||||
.set_columns("guid", "nid", "pid", "tid", "agent_id", "kernel_id",
|
||||
"dispatch_id", "queue_id", "stream_id", "start", "end",
|
||||
"private_segment_size", "group_segment_size",
|
||||
"workgroup_size_x", "workgroup_size_y",
|
||||
"workgroup_size_z", "grid_size_x", "grid_size_y",
|
||||
"grid_size_z", "region_name_id", "event_id", "extdata")
|
||||
.set_values('?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
|
||||
'?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?')
|
||||
.get_query_string();
|
||||
_insert_kernel_dispatch_statement =
|
||||
data_storage::database::get_instance()
|
||||
.create_statement_executor<const char*, size_t, size_t, size_t, size_t,
|
||||
size_t, size_t, size_t, size_t, uint64_t, uint64_t,
|
||||
size_t, size_t, size_t, size_t, size_t, size_t,
|
||||
size_t, size_t, size_t, size_t, const char*>(
|
||||
query);
|
||||
}
|
||||
|
||||
void
|
||||
data_processor::initialize_memory_copy_stmt()
|
||||
{
|
||||
data_storage::queries::table_insert_query query_builder;
|
||||
auto query = query_builder.set_table_name("rocpd_memory_copy_" + _upid)
|
||||
.set_columns("guid", "nid", "pid", "tid", "start", "end", "name_id",
|
||||
"dst_agent_id", "dst_address", "src_agent_id",
|
||||
"src_address", "size", "queue_id", "stream_id",
|
||||
"region_name_id", "event_id", "extdata")
|
||||
.set_values('?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
|
||||
'?', '?', '?', '?', '?', '?')
|
||||
.get_query_string();
|
||||
_insert_memory_copy_statement =
|
||||
data_storage::database::get_instance()
|
||||
.create_statement_executor<const char*, size_t, size_t, size_t, uint64_t,
|
||||
uint64_t, size_t, size_t, size_t, size_t, size_t,
|
||||
size_t, size_t, size_t, size_t, size_t,
|
||||
const char*>(query);
|
||||
}
|
||||
|
||||
void
|
||||
data_processor::initialize_kernel_symbol_stmt()
|
||||
{
|
||||
data_storage::queries::table_insert_query query_builder;
|
||||
auto query =
|
||||
query_builder.set_table_name("rocpd_info_kernel_symbol_" + _upid)
|
||||
.set_columns("id", "guid", "nid", "pid", "code_object_id", "kernel_name",
|
||||
"display_name", "kernel_object", "kernarg_segment_size",
|
||||
"kernarg_segment_alignment", "group_segment_size",
|
||||
"private_segment_size", "sgpr_count", "arch_vgpr_count",
|
||||
"accum_vgpr_count", "extdata")
|
||||
.set_values('?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
|
||||
'?', '?', '?')
|
||||
.get_query_string();
|
||||
_insert_kernel_symbol_statement =
|
||||
data_storage::database::get_instance()
|
||||
.create_statement_executor<size_t, const char*, size_t, size_t, uint64_t,
|
||||
const char*, const char*, uint64_t, uint32_t,
|
||||
uint32_t, uint32_t, uint32_t, uint32_t, uint32_t,
|
||||
uint32_t, const char*>(query);
|
||||
}
|
||||
|
||||
void
|
||||
data_processor::initialize_code_object_stmt()
|
||||
{
|
||||
data_storage::queries::table_insert_query query_builder;
|
||||
auto query =
|
||||
query_builder.set_table_name("rocpd_info_code_object_" + _upid)
|
||||
.set_columns("id", "guid", "nid", "pid", "agent_id", "uri", "load_base",
|
||||
"load_size", "load_delta", "storage_type", "extdata")
|
||||
.set_values('?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?')
|
||||
.get_query_string();
|
||||
_insert_code_object_statement =
|
||||
data_storage::database::get_instance()
|
||||
.create_statement_executor<size_t, const char*, size_t, size_t, size_t,
|
||||
const char*, uint64_t, uint64_t, uint64_t,
|
||||
const char*, const char*>(query);
|
||||
}
|
||||
|
||||
void
|
||||
data_processor::initialize_args_stmt()
|
||||
{
|
||||
data_storage::queries::table_insert_query query_builder;
|
||||
auto query = query_builder.set_table_name("rocpd_arg_" + _upid)
|
||||
.set_columns("guid", "event_id", "position", "type", "name", "value",
|
||||
"extdata")
|
||||
.set_values('?', '?', '?', '?', '?', '?', '?')
|
||||
.get_query_string();
|
||||
_insert_args_statement =
|
||||
data_storage::database::get_instance()
|
||||
.create_statement_executor<const char*, size_t, size_t, const char*,
|
||||
const char*, const char*, const char*>(query);
|
||||
}
|
||||
|
||||
void
|
||||
data_processor::initialize_memory_alloc_stmt()
|
||||
{
|
||||
data_storage::queries::table_insert_query query_builder;
|
||||
auto query = query_builder.set_table_name("rocpd_memory_allocate_" + _upid)
|
||||
.set_columns("guid", "nid", "pid", "tid", "agent_id", "type",
|
||||
"level", "start", "end", "address", "size", "queue_id",
|
||||
"stream_id", "event_id", "extdata")
|
||||
.set_values('?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
|
||||
'?', '?', '?', '?')
|
||||
.get_query_string();
|
||||
_insert_memory_alloc_statement =
|
||||
data_storage::database::get_instance()
|
||||
.create_statement_executor<
|
||||
const char*, size_t, size_t, size_t, size_t, const char*, const char*,
|
||||
uint64_t, uint64_t, size_t, size_t, size_t, size_t, size_t, const char*>(
|
||||
query);
|
||||
|
||||
// Statement without agent_id
|
||||
query = query_builder.set_table_name("rocpd_memory_allocate_" + _upid)
|
||||
.set_columns("guid", "nid", "pid", "tid", "type", "level", "start", "end",
|
||||
"address", "size", "queue_id", "stream_id", "event_id",
|
||||
"extdata")
|
||||
.set_values('?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
|
||||
'?', '?')
|
||||
.get_query_string();
|
||||
_insert_memory_alloc_no_agent_statement =
|
||||
data_storage::database::get_instance()
|
||||
.create_statement_executor<const char*, size_t, size_t, size_t, const char*,
|
||||
const char*, uint64_t, uint64_t, size_t, size_t,
|
||||
size_t, size_t, size_t, const char*>(query);
|
||||
}
|
||||
|
||||
void
|
||||
data_processor::insert_args(size_t event_id, size_t position, const char* type,
|
||||
const char* name, const char* value, const char* extdata)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_data_mutex);
|
||||
_insert_args_statement(_upid.c_str(), event_id, position, type, name, value, extdata);
|
||||
}
|
||||
|
||||
void
|
||||
data_processor::insert_stream_info(size_t stream_id, size_t node_id, size_t process_id,
|
||||
const char* name, const char* extdata)
|
||||
{
|
||||
if(_stream_ids.count(stream_id) > 0)
|
||||
{
|
||||
// ROCPROFSYS_WARNING(
|
||||
// 1, "Insert stream info failed! Error: Stream ID %ld already exists!\n",
|
||||
// stream_id);
|
||||
return;
|
||||
}
|
||||
data_storage::queries::table_insert_query query;
|
||||
data_storage::database::get_instance().execute_query(
|
||||
query.set_table_name("rocpd_info_stream_" + _upid)
|
||||
.set_columns("id", "guid", "nid", "pid", "name", "extdata")
|
||||
.set_values(stream_id, _upid, node_id, process_id, name, extdata)
|
||||
.get_query_string());
|
||||
_stream_ids.insert(stream_id);
|
||||
}
|
||||
|
||||
void
|
||||
data_processor::insert_queue_info(size_t queue_id, size_t node_id, size_t process_id,
|
||||
const char* name, const char* extdata)
|
||||
{
|
||||
if(_queue_ids.count(queue_id) > 0)
|
||||
{
|
||||
// ROCPROFSYS_WARNING(
|
||||
// 1, "Insert queue info failed! Error: Queue ID %ld already exists!\n",
|
||||
// queue_id);
|
||||
return;
|
||||
}
|
||||
data_storage::queries::table_insert_query query;
|
||||
data_storage::database::get_instance().execute_query(
|
||||
query.set_table_name("rocpd_info_queue_" + _upid)
|
||||
.set_columns("id", "guid", "nid", "pid", "name", "extdata")
|
||||
.set_values(queue_id, _upid, node_id, process_id, name, extdata)
|
||||
.get_query_string());
|
||||
_queue_ids.insert(queue_id);
|
||||
}
|
||||
|
||||
void
|
||||
data_processor::insert_code_object(size_t id, size_t node_id, size_t process_id,
|
||||
size_t agent_id, const char* uri, uint64_t ld_base,
|
||||
uint64_t ld_size, uint64_t ld_delta,
|
||||
const char* storage_type, const char* extdata)
|
||||
{
|
||||
if(_code_object_ids.count(id) > 0)
|
||||
{
|
||||
// ROCPROFSYS_WARNING(
|
||||
// 1,
|
||||
// "Insert code object info failed! Error: Code object ID %ld already
|
||||
// exists!\n", id);
|
||||
return;
|
||||
}
|
||||
ROCPROFSYS_VERBOSE(2, "Insert code object with ID: %ld\n", id);
|
||||
std::lock_guard<std::mutex> lock(_data_mutex);
|
||||
_insert_code_object_statement(id, _upid.c_str(), node_id, process_id, agent_id, uri,
|
||||
ld_base, ld_size, ld_delta, storage_type, extdata);
|
||||
|
||||
_code_object_ids.insert(id);
|
||||
}
|
||||
|
||||
void
|
||||
data_processor::insert_kernel_symbol(
|
||||
size_t id, size_t node_id, size_t process_id, uint64_t code_obj_id, const char* name,
|
||||
const char* display_name, uint32_t kernel_obj, uint32_t kernarg_segmnt_size,
|
||||
uint32_t kernarg_segment_alignment, uint32_t group_segment_size,
|
||||
uint32_t private_segment_size, uint32_t sgrp_count, uint32_t arch_vgrp_count,
|
||||
uint32_t accum_vgrp_count, const char* extdata)
|
||||
{
|
||||
if(_kernel_sym_ids.count(id) > 0)
|
||||
{
|
||||
// ROCPROFSYS_WARNING(
|
||||
// 1,
|
||||
// "Insert kernel symbol failed! Error: Kernel symbol ID %ld already
|
||||
// exists!\n", id);
|
||||
return;
|
||||
}
|
||||
|
||||
ROCPROFSYS_VERBOSE(2, "Insert kernel symbol: %s with ID: %ld\n", name, id);
|
||||
std::lock_guard<std::mutex> lock(_data_mutex);
|
||||
_insert_kernel_symbol_statement(
|
||||
id, _upid.c_str(), node_id, process_id, code_obj_id, name, display_name,
|
||||
kernel_obj, kernarg_segmnt_size, kernarg_segment_alignment, group_segment_size,
|
||||
private_segment_size, sgrp_count, arch_vgrp_count, accum_vgrp_count, extdata);
|
||||
|
||||
_kernel_sym_ids.insert(id);
|
||||
}
|
||||
|
||||
void
|
||||
data_processor::insert_category(size_t category_id, const char* name)
|
||||
{
|
||||
auto it = _category_map.find(category_id);
|
||||
if(it != _category_map.end())
|
||||
{
|
||||
// ROCPROFSYS_WARNING(
|
||||
// 1, "Insert category failed! Error: Category %s already exist!\n", name);
|
||||
return;
|
||||
}
|
||||
auto name_id = insert_string(name);
|
||||
std::lock_guard<std::mutex> lock(_data_mutex);
|
||||
ROCPROFSYS_VERBOSE(2, "Insert category: name: %s, id: %ld, name id: %ld\n", name,
|
||||
category_id, name_id);
|
||||
_category_map.emplace(category_id, name_id);
|
||||
}
|
||||
|
||||
void
|
||||
data_processor::insert_region(size_t node_id, size_t process_id, size_t thread_id,
|
||||
uint64_t start, uint64_t end, size_t name_id,
|
||||
size_t event_id, const char* extdata)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_data_mutex);
|
||||
ROCPROFSYS_VERBOSE(2, "Insert region for event id: %ld\n", event_id);
|
||||
|
||||
_insert_region_statement(_upid.c_str(), node_id, process_id, thread_id, start, end,
|
||||
name_id, event_id, extdata);
|
||||
}
|
||||
|
||||
void
|
||||
data_processor::insert_kernel_dispatch(
|
||||
size_t node_id, size_t process_id, size_t thread_id, size_t agent_id,
|
||||
size_t kernel_id, size_t dispatch_id, size_t queue_id, size_t stream_id,
|
||||
uint64_t start, uint64_t end, size_t private_segment_size, size_t group_segment_size,
|
||||
size_t workgroup_size_x, size_t workgroup_size_y, size_t workgroup_size_z,
|
||||
size_t grid_size_x, size_t grid_size_y, size_t grid_size_z, size_t region_name_id,
|
||||
size_t event_id, const char* extdata)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_data_mutex);
|
||||
|
||||
ROCPROFSYS_VERBOSE(2, "Insert kernel dispatch for event id: %ld\n", event_id);
|
||||
|
||||
_insert_kernel_dispatch_statement(
|
||||
_upid.c_str(), node_id, process_id, thread_id, agent_id, kernel_id, dispatch_id,
|
||||
queue_id, stream_id, start, end, private_segment_size, group_segment_size,
|
||||
workgroup_size_x, workgroup_size_y, workgroup_size_z, grid_size_x, grid_size_y,
|
||||
grid_size_z, region_name_id, event_id, extdata);
|
||||
}
|
||||
|
||||
void
|
||||
data_processor::insert_memory_copy(size_t node_id, size_t process_id, size_t thread_id,
|
||||
uint64_t start, uint64_t end, size_t name_id,
|
||||
size_t dst_agent_id, size_t dst_addr,
|
||||
size_t src_agent_id, size_t src_addr, size_t size,
|
||||
size_t queue_id, size_t stream_id,
|
||||
size_t region_name_id, size_t event_id,
|
||||
const char* extdata)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_data_mutex);
|
||||
|
||||
_insert_memory_copy_statement(_upid.c_str(), node_id, process_id, thread_id, start,
|
||||
end, name_id, dst_agent_id, dst_addr, src_agent_id,
|
||||
src_addr, size, queue_id, stream_id, region_name_id,
|
||||
event_id, extdata);
|
||||
}
|
||||
|
||||
void
|
||||
data_processor::insert_memory_alloc(size_t node_id, size_t process_id, size_t thread_id,
|
||||
std::optional<size_t> agent_id, const char* type,
|
||||
const char* level, uint64_t start, uint64_t end,
|
||||
size_t address, size_t size, size_t queue_id,
|
||||
size_t stream_id, size_t event_id,
|
||||
const char* extdata)
|
||||
{
|
||||
if(agent_id.has_value())
|
||||
{
|
||||
_insert_memory_alloc_statement(_upid.c_str(), node_id, process_id, thread_id,
|
||||
agent_id.value(), type, level, start, end, address,
|
||||
size, queue_id, stream_id, event_id, extdata);
|
||||
}
|
||||
else
|
||||
{
|
||||
_insert_memory_alloc_no_agent_statement(
|
||||
_upid.c_str(), node_id, process_id, thread_id, type, level, start, end,
|
||||
address, size, queue_id, stream_id, event_id, extdata);
|
||||
}
|
||||
}
|
||||
|
||||
size_t
|
||||
data_processor::insert_thread_info(size_t node_id, size_t parent_process_id,
|
||||
size_t process_id, size_t thread_id, const char* name,
|
||||
uint64_t start, uint64_t end, const char* extdata)
|
||||
{
|
||||
auto it = _thread_id_map.find(thread_id);
|
||||
|
||||
if(it != _thread_id_map.end())
|
||||
{
|
||||
return _thread_id_map.at(thread_id);
|
||||
}
|
||||
|
||||
data_storage::queries::table_insert_query query;
|
||||
data_storage::database::get_instance().execute_query(
|
||||
query.set_table_name("rocpd_info_thread_" + _upid)
|
||||
.set_columns("guid", "nid", "ppid", "pid", "tid", "name", "start", "end",
|
||||
"extdata")
|
||||
.set_values(_upid.c_str(), node_id, parent_process_id, process_id, thread_id,
|
||||
name, start, end, extdata)
|
||||
.get_query_string());
|
||||
|
||||
auto thread_idx = data_storage::database::get_instance().get_last_insert_id();
|
||||
_thread_id_map.emplace(thread_id, thread_idx);
|
||||
return thread_idx;
|
||||
}
|
||||
|
||||
void
|
||||
data_processor::flush()
|
||||
{
|
||||
// Flush all pending data to the database
|
||||
data_storage::database::get_instance().flush();
|
||||
}
|
||||
|
||||
} // namespace rocpd
|
||||
} // namespace rocprofsys
|
||||
@@ -0,0 +1,252 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace rocpd
|
||||
{
|
||||
struct data_processor
|
||||
{
|
||||
using insert_event_stmt =
|
||||
std::function<void(const char*, size_t, size_t, size_t, size_t, const char*,
|
||||
const char*, const char*)>;
|
||||
using insert_pmc_event_stms =
|
||||
std::function<void(const char*, size_t, size_t, double, const char*)>;
|
||||
using insert_sample_stmt =
|
||||
std::function<void(const char*, size_t, uint64_t, size_t, const char*)>;
|
||||
using insert_region_stmt =
|
||||
std::function<void(const char*, size_t, size_t, size_t, uint64_t, uint64_t,
|
||||
size_t, size_t, const char*)>;
|
||||
using insert_kernel_dispatch_stmt = std::function<void(
|
||||
const char*, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t,
|
||||
uint64_t, uint64_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t,
|
||||
size_t, size_t, size_t, const char*)>;
|
||||
using insert_memory_copy_stmt = std::function<void(
|
||||
const char*, size_t, size_t, size_t, uint64_t, uint64_t, size_t, size_t, size_t,
|
||||
size_t, size_t, size_t, size_t, size_t, size_t, size_t, const char*)>;
|
||||
using insert_memory_alloc_stmt = std::function<void(
|
||||
const char*, size_t, size_t, size_t, size_t, const char*, const char*, uint64_t,
|
||||
uint64_t, size_t, size_t, size_t, size_t, size_t, const char*)>;
|
||||
using insert_memory_alloc_no_agent_stmt = std::function<void(
|
||||
const char*, size_t, size_t, size_t, const char*, const char*, uint64_t, uint64_t,
|
||||
size_t, size_t, size_t, size_t, size_t, const char*)>;
|
||||
using insert_kernel_symbol_stmt =
|
||||
std::function<void(size_t, const char*, size_t, size_t, uint64_t, const char*,
|
||||
const char*, uint64_t, uint32_t, uint32_t, uint32_t, uint32_t,
|
||||
uint32_t, uint32_t, uint32_t, const char*)>;
|
||||
using insert_code_object_stmt =
|
||||
std::function<void(size_t, const char*, size_t, size_t, size_t, const char*,
|
||||
uint64_t, uint64_t, uint64_t, const char*, const char*)>;
|
||||
using insert_args_stmt = std::function<void(const char*, size_t, size_t, const char*,
|
||||
const char*, const char*, const char*)>;
|
||||
|
||||
private:
|
||||
struct track_name_map
|
||||
{
|
||||
size_t track_id;
|
||||
size_t name_id;
|
||||
};
|
||||
|
||||
struct pmc_identifier
|
||||
{
|
||||
size_t agent_id;
|
||||
std::string name;
|
||||
};
|
||||
|
||||
struct pmc_identifier_hash
|
||||
{
|
||||
std::size_t operator()(const pmc_identifier& pmc) const noexcept
|
||||
{
|
||||
std::size_t h1 = std::hash<size_t>{}(pmc.agent_id);
|
||||
std::size_t h2 = std::hash<std::string>{}(pmc.name);
|
||||
return h1 ^ (h2 << 1);
|
||||
}
|
||||
};
|
||||
|
||||
struct pmc_identifier_equal
|
||||
{
|
||||
bool operator()(const pmc_identifier& lhs,
|
||||
const pmc_identifier& rhs) const noexcept
|
||||
{
|
||||
return lhs.agent_id == rhs.agent_id && lhs.name == rhs.name;
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
static data_processor& get_instance();
|
||||
|
||||
size_t insert_string(const char* str);
|
||||
|
||||
void insert_node_info(size_t node_id, size_t hash, const char* machine_id,
|
||||
const char* system_name, const char* hostname,
|
||||
const char* release, const char* version,
|
||||
const char* hardware_name, const char* domain_name);
|
||||
|
||||
void insert_process_info(size_t node_id, size_t ppid, size_t pid, size_t init,
|
||||
size_t fini, size_t start, size_t end, const char* command,
|
||||
const char* environment = "{}", const char* extdata = "{}");
|
||||
|
||||
size_t insert_agent(size_t node_id, size_t pid, const char* agent_type,
|
||||
size_t absolute_index, size_t logical_index, size_t type_index,
|
||||
uint64_t uuid, const char* name, const char* model_name,
|
||||
const char* vendor_name, const char* product_name,
|
||||
const char* user_name, const char* extdata = "{}");
|
||||
|
||||
void insert_track(const char* track_name, size_t node_id, size_t process_id,
|
||||
std::optional<size_t> thread_id, const char* extdata = "{}");
|
||||
|
||||
size_t insert_event(size_t category_id, size_t stack_id, size_t parent_stack_id,
|
||||
size_t correlation_id, const char* call_stack = "{}",
|
||||
const char* line_info = "{}", const char* extdata = "{}");
|
||||
|
||||
void insert_pmc_event(size_t event_id, size_t agent_id, const char* pmc_descriptor,
|
||||
double value, const char* extdata = "{}");
|
||||
|
||||
void insert_pmc_description(size_t node_id, size_t process_id, size_t agent_id,
|
||||
const char* target_arch, size_t event_code,
|
||||
size_t instance_id, const char* name, const char* symbol,
|
||||
const char* description, const char* long_description,
|
||||
const char* component, const char* units,
|
||||
const char* value_type, const char* block,
|
||||
const char* expression, uint32_t is_constant,
|
||||
uint32_t is_derived, const char* extdata = "{}");
|
||||
|
||||
void insert_sample(const char* track, uint64_t timestamp, size_t event_id,
|
||||
const char* extdata = "{}");
|
||||
|
||||
void insert_category(size_t category_id, const char* name);
|
||||
|
||||
void insert_region(size_t node_id, size_t process_id, size_t thread_id,
|
||||
uint64_t start, uint64_t end, size_t name_id, size_t event_id,
|
||||
const char* extdata = "{}");
|
||||
|
||||
size_t insert_thread_info(size_t node_id, size_t parent_process_id, size_t process_id,
|
||||
size_t thread_id, const char* name, uint64_t start = 0,
|
||||
uint64_t end = 0, const char* extdata = "{}");
|
||||
|
||||
void insert_stream_info(size_t stream_id, size_t node_id, size_t process_id,
|
||||
const char* name, const char* extdata = "{}");
|
||||
void insert_queue_info(size_t queue_id, size_t node_id, size_t process_id,
|
||||
const char* name, const char* extdata = "{}");
|
||||
|
||||
void insert_kernel_dispatch(size_t node_id, size_t process_id, size_t thread_id,
|
||||
size_t agent_id, size_t kernel_id, size_t dispatch_id,
|
||||
size_t queue_id, size_t stream_id, uint64_t start,
|
||||
uint64_t end, size_t private_segment_size,
|
||||
size_t group_segment_size, size_t workgroup_size_x,
|
||||
size_t workgroup_size_y, size_t workgroup_size_z,
|
||||
size_t grid_size_x, size_t grid_size_y,
|
||||
size_t grid_size_z, size_t region_name_id,
|
||||
size_t event_id, const char* extdata = "{}");
|
||||
|
||||
void insert_memory_copy(size_t node_id, size_t process_id, size_t thread_id,
|
||||
uint64_t start, uint64_t end, size_t name_id,
|
||||
size_t dst_agent_id, size_t dst_addr, size_t src_agent_id,
|
||||
size_t src_addr, size_t size, size_t queue_id,
|
||||
size_t stream_id, size_t region_name_id, size_t event_id,
|
||||
const char* extdata = "{}");
|
||||
|
||||
void insert_kernel_symbol(size_t id, size_t node_id, size_t process_id,
|
||||
uint64_t code_obj_id, const char* name,
|
||||
const char* display_name, uint32_t kernel_obj,
|
||||
uint32_t kernarg_segmnt_size,
|
||||
uint32_t kernarg_segment_alignment,
|
||||
uint32_t group_segment_size, uint32_t private_segment_size,
|
||||
uint32_t sgrp_count, uint32_t arch_vgrp_count,
|
||||
uint32_t accum_vgrp_count, const char* extdata = "{}");
|
||||
|
||||
void insert_code_object(size_t id, size_t node_id, size_t process_id, size_t agent_id,
|
||||
const char* uri, uint64_t ld_base, uint64_t ld_size,
|
||||
uint64_t ld_delta, const char* storage_type,
|
||||
const char* extdata = "{}");
|
||||
|
||||
void insert_args(size_t event_id, size_t position, const char* type, const char* name,
|
||||
const char* value, const char* extdata = "{}");
|
||||
|
||||
void insert_memory_alloc(size_t node_id, size_t process_id, size_t thread_id,
|
||||
std::optional<size_t> agent_id, const char* type,
|
||||
const char* level, uint64_t start, uint64_t end,
|
||||
size_t address, size_t size, size_t queue_id,
|
||||
size_t stream_id, size_t event_id,
|
||||
const char* extdata = "{}");
|
||||
|
||||
void flush();
|
||||
|
||||
private:
|
||||
data_processor();
|
||||
data_processor(data_processor&) = delete;
|
||||
data_processor& operator=(const data_processor&) = delete;
|
||||
|
||||
void initialize_pmc_event_stmt();
|
||||
void initialize_event_stmt();
|
||||
void initialize_sample_stmt();
|
||||
void initialize_region_stmt();
|
||||
void initialize_kernel_dispatch_stmt();
|
||||
void initialize_memory_copy_stmt();
|
||||
void initialize_kernel_symbol_stmt();
|
||||
void initialize_code_object_stmt();
|
||||
void initialize_metadata();
|
||||
void initialize_args_stmt();
|
||||
void initialize_memory_alloc_stmt();
|
||||
|
||||
private:
|
||||
std::unordered_map<std::string, track_name_map> _tracks;
|
||||
std::unordered_map<pmc_identifier, size_t, pmc_identifier_hash, pmc_identifier_equal>
|
||||
_pmc_descriptor_map;
|
||||
std::unordered_map<size_t, size_t> _thread_id_map;
|
||||
std::unordered_map<size_t, size_t> _category_map;
|
||||
std::unordered_map<std::string, size_t> _string_map;
|
||||
|
||||
std::set<uint64_t> _code_object_ids;
|
||||
std::set<uint64_t> _kernel_sym_ids;
|
||||
std::set<uint64_t> _stream_ids;
|
||||
std::set<uint64_t> _queue_ids;
|
||||
|
||||
insert_event_stmt _insert_event_statement;
|
||||
insert_pmc_event_stms _insert_pmc_event_statement;
|
||||
insert_sample_stmt _insert_sample_statement;
|
||||
insert_region_stmt _insert_region_statement;
|
||||
insert_kernel_dispatch_stmt _insert_kernel_dispatch_statement;
|
||||
insert_memory_copy_stmt _insert_memory_copy_statement;
|
||||
insert_kernel_symbol_stmt _insert_kernel_symbol_statement;
|
||||
insert_code_object_stmt _insert_code_object_statement;
|
||||
insert_args_stmt _insert_args_statement;
|
||||
insert_memory_alloc_stmt _insert_memory_alloc_statement;
|
||||
insert_memory_alloc_no_agent_stmt _insert_memory_alloc_no_agent_statement;
|
||||
|
||||
std::string _upid{};
|
||||
|
||||
std::mutex _data_mutex;
|
||||
};
|
||||
|
||||
} // namespace rocpd
|
||||
} // namespace rocprofsys
|
||||
@@ -0,0 +1,14 @@
|
||||
set(data_storage_sources ${CMAKE_CURRENT_LIST_DIR}/database.cpp)
|
||||
|
||||
set(data_storage_headers
|
||||
${CMAKE_CURRENT_LIST_DIR}/database.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/insert_query_builders.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/table_insert_query.hpp
|
||||
)
|
||||
|
||||
target_sources(rocprofiler-systems-core-library PRIVATE ${data_storage_sources})
|
||||
|
||||
target_link_libraries(
|
||||
rocprofiler-systems-core-library
|
||||
PRIVATE $<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-sqlite3>
|
||||
)
|
||||
@@ -0,0 +1,170 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "database.hpp"
|
||||
#include "common/md5sum.hpp"
|
||||
#include "debug.hpp"
|
||||
#include "node_info.hpp"
|
||||
|
||||
#include <config.hpp>
|
||||
#include <fstream>
|
||||
#include <regex>
|
||||
#include <timemory/environment/types.hpp>
|
||||
#include <timemory/utility/filepath.hpp>
|
||||
#include <unistd.h>
|
||||
|
||||
namespace
|
||||
{
|
||||
void
|
||||
create_directory_for_database_file(const std::string& db_file)
|
||||
{
|
||||
auto _db_dirname = tim::filepath::dirname(db_file);
|
||||
if(!tim::filepath::direxists(_db_dirname))
|
||||
{
|
||||
tim::filepath::makedir(_db_dirname);
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace rocpd
|
||||
{
|
||||
namespace data_storage
|
||||
{
|
||||
database&
|
||||
database::get_instance()
|
||||
{
|
||||
static database _instance;
|
||||
return _instance;
|
||||
}
|
||||
|
||||
database::database()
|
||||
{
|
||||
auto db_name = std::string_view{ "rocpd.db" };
|
||||
auto abs_db_path = rocprofsys::get_database_absolute_path(db_name);
|
||||
create_directory_for_database_file(abs_db_path);
|
||||
ROCPROFSYS_VERBOSE(0, "Database: %s\r\n", abs_db_path.c_str());
|
||||
|
||||
validate_sqlite3_result(sqlite3_open(":memory:", &_sqlite3_db_temp), "",
|
||||
"database open failed!");
|
||||
validate_sqlite3_result(sqlite3_open(abs_db_path.c_str(), &_sqlite3_db), "",
|
||||
"database open failed!");
|
||||
}
|
||||
|
||||
database::~database()
|
||||
{
|
||||
sqlite3_close(_sqlite3_db_temp);
|
||||
sqlite3_close(_sqlite3_db);
|
||||
}
|
||||
|
||||
void
|
||||
database::initialize_schema()
|
||||
{
|
||||
auto get_file_path = [](const std::string_view filename) {
|
||||
auto _rocprofsys_root = tim::get_env<std::string>(
|
||||
"rocprofiler_systems_ROOT", tim::get_env<std::string>("ROCPROFSYS_ROOT", ""));
|
||||
if(!_rocprofsys_root.empty() &&
|
||||
tim::filepath::direxists(std::string(_rocprofsys_root)))
|
||||
{
|
||||
auto new_file_path = std::string(_rocprofsys_root)
|
||||
.append("/share/rocprofiler-systems/")
|
||||
.append(filename);
|
||||
if(tim::filepath::exists(new_file_path))
|
||||
{
|
||||
return new_file_path;
|
||||
}
|
||||
}
|
||||
return std::string(
|
||||
"rocprofiler-systems/source/lib/core/rocpd/data_storage/schema/")
|
||||
.append(filename);
|
||||
};
|
||||
|
||||
std::vector<std::string_view> schema_files = { "rocpd_tables.sql", "rocpd_views.sql",
|
||||
"data_views.sql", "marker_views.sql",
|
||||
"summary_views.sql" };
|
||||
|
||||
// Process each schema file
|
||||
for(const auto& schema_file : schema_files)
|
||||
{
|
||||
auto file_path = get_file_path(schema_file);
|
||||
std::ifstream file(file_path);
|
||||
if(!file.is_open())
|
||||
{
|
||||
throw std::runtime_error(
|
||||
std::string("Failed to open schema file ").append(file_path));
|
||||
}
|
||||
|
||||
std::stringstream ss_query;
|
||||
ss_query << file.rdbuf();
|
||||
std::string query = ss_query.str();
|
||||
|
||||
std::regex upid_pattern("\\{\\{uuid\\}\\}");
|
||||
std::regex view_upid_pattern("\\{\\{view_upid\\}\\}");
|
||||
|
||||
query = std::regex_replace(query, upid_pattern, "_" + get_upid());
|
||||
query = std::regex_replace(query, view_upid_pattern, "");
|
||||
|
||||
validate_sqlite3_result(
|
||||
sqlite3_exec(_sqlite3_db_temp, query.c_str(), 0, 0, 0), query.c_str(),
|
||||
std::string("Invalid schema file, init database failed!").append(file_path));
|
||||
file.close();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
database::execute_query(const std::string& query)
|
||||
{
|
||||
validate_sqlite3_result(sqlite3_exec(_sqlite3_db_temp, query.c_str(), 0, 0, 0),
|
||||
"Failed to execute query - ", query);
|
||||
}
|
||||
|
||||
std::string
|
||||
database::get_upid()
|
||||
{
|
||||
static std::string _upid = []() {
|
||||
auto n_info = node_info::get_instance();
|
||||
auto guid = common::md5sum{ n_info.id, getpid(), getppid() };
|
||||
return guid.hexdigest();
|
||||
}();
|
||||
return _upid;
|
||||
}
|
||||
|
||||
size_t
|
||||
database::get_last_insert_id() const
|
||||
{
|
||||
return sqlite3_last_insert_rowid(_sqlite3_db_temp);
|
||||
}
|
||||
|
||||
void
|
||||
database::flush()
|
||||
{
|
||||
auto* backup = sqlite3_backup_init(_sqlite3_db, "main", _sqlite3_db_temp, "main");
|
||||
if(backup)
|
||||
{
|
||||
sqlite3_backup_step(backup, -1); // Copy all pages
|
||||
sqlite3_backup_finish(backup);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace data_storage
|
||||
} // namespace rocpd
|
||||
} // namespace rocprofsys
|
||||
@@ -0,0 +1,204 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
#include "common/traits.hpp"
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <sqlite3.h>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace rocpd
|
||||
{
|
||||
namespace data_storage
|
||||
{
|
||||
static std::mutex _mutex;
|
||||
class database
|
||||
{
|
||||
public:
|
||||
static database& get_instance();
|
||||
|
||||
database(database&) = delete;
|
||||
database& operator=(database&) = delete;
|
||||
|
||||
void flush();
|
||||
|
||||
~database();
|
||||
|
||||
private:
|
||||
database();
|
||||
|
||||
template <typename... Args>
|
||||
inline void validate_sqlite3_result(int sqlite3_error_code, const char* query,
|
||||
Args&&... args)
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << "\n===========================================================\n";
|
||||
ss << "Database Error\n";
|
||||
((ss << args << " "), ...);
|
||||
ss << "\nQuery: " << query << "\n";
|
||||
switch(sqlite3_error_code)
|
||||
{
|
||||
case SQLITE_OK:
|
||||
case SQLITE_DONE: return;
|
||||
case SQLITE_CONSTRAINT:
|
||||
{
|
||||
sqlite3_stmt* stmt;
|
||||
|
||||
ss << "Constraint violation(s): " << "\n";
|
||||
|
||||
sqlite3_exec(_sqlite3_db_temp, "PRAGMA foreign_keys = OFF;", nullptr,
|
||||
nullptr, nullptr);
|
||||
sqlite3_exec(_sqlite3_db_temp, query, nullptr, nullptr, nullptr);
|
||||
sqlite3_exec(_sqlite3_db_temp, "PRAGMA foreign_keys = ON;", nullptr,
|
||||
nullptr, nullptr);
|
||||
sqlite3_prepare_v2(_sqlite3_db_temp, "PRAGMA foreign_key_check", -1,
|
||||
&stmt, nullptr);
|
||||
int rc = 0;
|
||||
while((rc = sqlite3_step(stmt)) == SQLITE_ROW)
|
||||
{
|
||||
const char* table = (const char*) sqlite3_column_text(stmt, 0);
|
||||
int rowid = sqlite3_column_int(stmt, 1);
|
||||
const char* parent = (const char*) sqlite3_column_text(stmt, 2);
|
||||
int fkid = sqlite3_column_int(stmt, 3);
|
||||
|
||||
ss << " - " << "FK Violation - Table: " << (table ? table : "NULL")
|
||||
<< ", RowID: " << rowid
|
||||
<< ", Parent: " << (parent ? parent : "NULL") << ", FKID: " << fkid
|
||||
<< "\n";
|
||||
}
|
||||
|
||||
sqlite3_finalize(stmt);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
{
|
||||
}
|
||||
break;
|
||||
}
|
||||
ss << " [Sqlite3 error: " << sqlite3_errstr(sqlite3_error_code);
|
||||
ss << " (Extended error message: " << sqlite3_errmsg(_sqlite3_db_temp) << ")]";
|
||||
throw std::runtime_error(ss.str());
|
||||
}
|
||||
|
||||
template <typename T, std::enable_if_t<!(common::traits::is_string_literal<T>() ||
|
||||
std::is_floating_point_v<std::decay_t<T>> ||
|
||||
std::is_same_v<std::decay_t<T>, int64_t> ||
|
||||
std::is_same_v<std::decay_t<T>, uint64_t> ||
|
||||
std::is_same_v<std::decay_t<T>, int32_t> ||
|
||||
std::is_same_v<std::decay_t<T>, uint32_t>),
|
||||
int> = 0>
|
||||
inline void bind_value([[maybe_unused]] sqlite3_stmt* stmt,
|
||||
[[maybe_unused]] int position, [[maybe_unused]] T& _value,
|
||||
[[maybe_unused]] const std::string& query)
|
||||
{
|
||||
throw std::runtime_error("Unsupported type for binding!");
|
||||
}
|
||||
|
||||
template <typename T,
|
||||
std::enable_if_t<common::traits::is_string_literal<T>(), int> = 0>
|
||||
inline void bind_value(sqlite3_stmt* stmt, int position, T&& _value,
|
||||
const std::string& query)
|
||||
{
|
||||
validate_sqlite3_result(
|
||||
sqlite3_bind_text(stmt, position, _value, -1, SQLITE_STATIC), query.c_str(),
|
||||
"Failed to bind text! Position: ", position, ", Value: ", _value);
|
||||
}
|
||||
|
||||
template <typename T,
|
||||
std::enable_if_t<std::is_floating_point_v<std::decay_t<T>>, int> = 0>
|
||||
inline void bind_value(sqlite3_stmt* stmt, int position, T&& _value,
|
||||
const std::string& query)
|
||||
{
|
||||
validate_sqlite3_result(
|
||||
sqlite3_bind_double(stmt, position, _value), query.c_str(),
|
||||
"Failed to bind double! Position: ", position, ", Value: ", _value);
|
||||
}
|
||||
|
||||
template <typename T, std::enable_if_t<std::is_same_v<std::decay_t<T>, int64_t> ||
|
||||
std::is_same_v<std::decay_t<T>, uint64_t>,
|
||||
int> = 0>
|
||||
inline void bind_value(sqlite3_stmt* stmt, int position, T&& _value,
|
||||
const std::string& query)
|
||||
{
|
||||
validate_sqlite3_result(sqlite3_bind_int64(stmt, position, _value), query.c_str(),
|
||||
"Failed to bind int64_t/uint64_t! Position: ", position,
|
||||
", Value: ", _value);
|
||||
}
|
||||
|
||||
template <typename T, std::enable_if_t<std::is_same_v<std::decay_t<T>, int32_t> ||
|
||||
std::is_same_v<std::decay_t<T>, uint32_t>,
|
||||
int> = 0>
|
||||
inline void bind_value(sqlite3_stmt* stmt, int position, T&& _value,
|
||||
const std::string& query)
|
||||
{
|
||||
validate_sqlite3_result(sqlite3_bind_int(stmt, position, _value), query.c_str(),
|
||||
"Failed to bind int32_t/uint32_t! Position: ", position,
|
||||
", Value: ", _value);
|
||||
}
|
||||
|
||||
public:
|
||||
void initialize_schema();
|
||||
|
||||
void execute_query(const std::string& query);
|
||||
|
||||
size_t get_last_insert_id() const;
|
||||
|
||||
/**
|
||||
* This function prepares an SQLite statement based on the provided SQL query and
|
||||
* returns a lambda that can execute the prepared statement, binding the provided
|
||||
* values to the respective placeholders in the query.
|
||||
*/
|
||||
template <typename... Values>
|
||||
auto create_statement_executor(const std::string& query)
|
||||
{
|
||||
sqlite3_stmt* p_stmt;
|
||||
validate_sqlite3_result(
|
||||
sqlite3_prepare_v2(_sqlite3_db_temp, query.c_str(), -1, &p_stmt, nullptr),
|
||||
query.c_str(), "Failed to create statement!");
|
||||
std::shared_ptr<sqlite3_stmt> stmt{ p_stmt, sqlite3_finalize };
|
||||
|
||||
return [stmt, query, this](Values... value) {
|
||||
std::lock_guard lock{ _mutex };
|
||||
int position = 1;
|
||||
|
||||
((bind_value(stmt.get(), position++, value, query)), ...);
|
||||
|
||||
validate_sqlite3_result(sqlite3_step(stmt.get()), query.c_str(),
|
||||
"Failed to execute step!\n", "Values: ", value...);
|
||||
sqlite3_reset(stmt.get());
|
||||
};
|
||||
}
|
||||
|
||||
static std::string get_upid();
|
||||
|
||||
private:
|
||||
sqlite3* _sqlite3_db{ nullptr };
|
||||
sqlite3* _sqlite3_db_temp{ nullptr };
|
||||
};
|
||||
|
||||
} // namespace data_storage
|
||||
} // namespace rocpd
|
||||
} // namespace rocprofsys
|
||||
+126
@@ -0,0 +1,126 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/traits.hpp"
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace rocpd
|
||||
{
|
||||
namespace data_storage
|
||||
{
|
||||
namespace queries
|
||||
{
|
||||
|
||||
namespace query_builders
|
||||
{
|
||||
|
||||
struct query_value_builder
|
||||
{
|
||||
query_value_builder(std::stringstream& ss)
|
||||
: _ss{ ss }
|
||||
{}
|
||||
|
||||
template <typename... Values>
|
||||
query_value_builder& set_values(Values&&... values)
|
||||
{
|
||||
auto i = sizeof...(values);
|
||||
_ss << "( ";
|
||||
((process_value(values) << (i-- > 1 ? ", " : " ")), ...);
|
||||
_ss << ")";
|
||||
return *this;
|
||||
}
|
||||
|
||||
std::string get_query_string() { return _ss.str(); }
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
std::enable_if_t<common::traits::is_string_literal<T>(), std::stringstream&>
|
||||
process_value(T& value)
|
||||
{
|
||||
_ss << "\"" << value << "\"";
|
||||
return _ss;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::enable_if_t<common::traits::is_optional_v<std::decay_t<T>>, std::stringstream&>
|
||||
process_value(T& value)
|
||||
{
|
||||
if(value.has_value())
|
||||
{
|
||||
_ss << value.value();
|
||||
}
|
||||
else
|
||||
{
|
||||
_ss << "NULL";
|
||||
}
|
||||
return _ss;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::enable_if_t<!common::traits::is_string_literal<T>() &&
|
||||
!common::traits::is_optional_v<std::decay_t<T>>,
|
||||
std::stringstream&>
|
||||
process_value(T& value)
|
||||
{
|
||||
_ss << value;
|
||||
return _ss;
|
||||
}
|
||||
|
||||
private:
|
||||
std::stringstream& _ss;
|
||||
};
|
||||
|
||||
struct query_columns_builder
|
||||
{
|
||||
query_columns_builder(std::stringstream& ss)
|
||||
: _ss{ ss }
|
||||
, _query_value_builder{ _ss }
|
||||
{}
|
||||
|
||||
template <typename... Columns,
|
||||
typename =
|
||||
std::enable_if_t<(common::traits::is_string_literal<Columns>() && ...)>>
|
||||
query_value_builder& set_columns(Columns&... columns)
|
||||
{
|
||||
auto i = sizeof...(columns);
|
||||
_ss << "( ";
|
||||
((_ss << columns << (i-- > 1 ? ", " : " ")), ...) << ") VALUES ";
|
||||
return _query_value_builder;
|
||||
}
|
||||
|
||||
private:
|
||||
std::stringstream& _ss;
|
||||
query_value_builder _query_value_builder;
|
||||
};
|
||||
|
||||
} // namespace query_builders
|
||||
} // namespace queries
|
||||
} // namespace data_storage
|
||||
} // namespace rocpd
|
||||
} // namespace rocprofsys
|
||||
+722
@@ -0,0 +1,722 @@
|
||||
--
|
||||
-- Useful views
|
||||
--
|
||||
-- Code objects
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`code_objects` AS
|
||||
SELECT
|
||||
CO.id,
|
||||
CO.guid,
|
||||
CO.nid,
|
||||
P.pid,
|
||||
A.absolute_index AS agent_abs_index,
|
||||
CO.uri,
|
||||
CO.load_base,
|
||||
CO.load_size,
|
||||
CO.load_delta,
|
||||
CO.storage_type AS storage_type_str,
|
||||
JSON_EXTRACT(CO.extdata, '$.size') AS code_object_size,
|
||||
JSON_EXTRACT(CO.extdata, '$.storage_type') AS storage_type,
|
||||
JSON_EXTRACT(CO.extdata, '$.memory_base') AS memory_base,
|
||||
JSON_EXTRACT(CO.extdata, '$.memory_size') AS memory_size
|
||||
FROM
|
||||
`rocpd_info_code_object` CO
|
||||
INNER JOIN `rocpd_info_agent` A ON CO.agent_id = A.id
|
||||
AND CO.guid = A.guid
|
||||
INNER JOIN `rocpd_info_process` P ON CO.pid = P.id
|
||||
AND CO.guid = P.guid;
|
||||
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`kernel_symbols` AS
|
||||
SELECT
|
||||
KS.id,
|
||||
KS.guid,
|
||||
KS.nid,
|
||||
P.pid,
|
||||
KS.code_object_id,
|
||||
KS.kernel_name,
|
||||
KS.display_name,
|
||||
KS.kernel_object,
|
||||
KS.kernarg_segment_size,
|
||||
KS.kernarg_segment_alignment,
|
||||
KS.group_segment_size,
|
||||
KS.private_segment_size,
|
||||
KS.sgpr_count,
|
||||
KS.arch_vgpr_count,
|
||||
KS.accum_vgpr_count,
|
||||
JSON_EXTRACT(KS.extdata, '$.size') AS kernel_symbol_size,
|
||||
JSON_EXTRACT(KS.extdata, '$.kernel_id') AS kernel_id,
|
||||
JSON_EXTRACT(KS.extdata, '$.kernel_code_entry_byte_offset') AS kernel_code_entry_byte_offset,
|
||||
JSON_EXTRACT(KS.extdata, '$.formatted_kernel_name') AS formatted_kernel_name,
|
||||
JSON_EXTRACT(KS.extdata, '$.demangled_kernel_name') AS demangled_kernel_name,
|
||||
JSON_EXTRACT(KS.extdata, '$.truncated_kernel_name') AS truncated_kernel_name,
|
||||
JSON_EXTRACT(KS.extdata, '$.kernel_address.handle') AS kernel_address
|
||||
FROM
|
||||
`rocpd_info_kernel_symbol` KS
|
||||
INNER JOIN `rocpd_info_process` P ON KS.pid = P.id
|
||||
AND KS.guid = P.guid;
|
||||
|
||||
-- Processes
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`processes` AS
|
||||
SELECT
|
||||
N.id AS nid,
|
||||
N.machine_id,
|
||||
N.system_name,
|
||||
N.hostname,
|
||||
N.release AS system_release,
|
||||
N.version AS system_version,
|
||||
P.guid,
|
||||
P.ppid,
|
||||
P.pid,
|
||||
P.init,
|
||||
P.start,
|
||||
P.end,
|
||||
P.fini,
|
||||
P.command
|
||||
FROM
|
||||
`rocpd_info_process` P
|
||||
INNER JOIN `rocpd_info_node` N ON N.id = P.nid
|
||||
AND N.guid = P.guid;
|
||||
|
||||
-- Threads
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`threads` AS
|
||||
SELECT
|
||||
N.id AS nid,
|
||||
N.machine_id,
|
||||
N.system_name,
|
||||
N.hostname,
|
||||
N.release AS system_release,
|
||||
N.version AS system_version,
|
||||
P.guid,
|
||||
P.ppid,
|
||||
P.pid,
|
||||
T.tid,
|
||||
T.start,
|
||||
T.end,
|
||||
T.name
|
||||
FROM
|
||||
`rocpd_info_thread` T
|
||||
INNER JOIN `rocpd_info_process` P ON P.id = T.pid
|
||||
AND N.guid = T.guid
|
||||
INNER JOIN `rocpd_info_node` N ON N.id = T.nid
|
||||
AND N.guid = T.guid;
|
||||
|
||||
-- CPU regions
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`regions` AS
|
||||
SELECT
|
||||
R.id,
|
||||
R.guid,
|
||||
(
|
||||
SELECT
|
||||
string
|
||||
FROM
|
||||
`rocpd_string` RS
|
||||
WHERE
|
||||
RS.id = E.category_id
|
||||
AND RS.guid = E.guid
|
||||
) AS category,
|
||||
S.string AS name,
|
||||
R.nid,
|
||||
P.pid,
|
||||
T.tid,
|
||||
R.start,
|
||||
R.end,
|
||||
(R.end - R.start) AS duration,
|
||||
R.event_id,
|
||||
E.stack_id,
|
||||
E.parent_stack_id,
|
||||
E.correlation_id AS corr_id,
|
||||
E.extdata,
|
||||
E.call_stack,
|
||||
E.line_info
|
||||
FROM
|
||||
`rocpd_region` R
|
||||
INNER JOIN `rocpd_event` E ON E.id = R.event_id
|
||||
AND E.guid = R.guid
|
||||
INNER JOIN `rocpd_string` S ON S.id = R.name_id
|
||||
AND S.guid = R.guid
|
||||
INNER JOIN `rocpd_info_process` P ON P.id = R.pid
|
||||
AND P.guid = R.guid
|
||||
INNER JOIN `rocpd_info_thread` T ON T.id = R.tid
|
||||
AND T.guid = R.guid;
|
||||
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`region_args` AS
|
||||
SELECT
|
||||
R.id,
|
||||
R.guid,
|
||||
R.nid,
|
||||
P.pid,
|
||||
A.type,
|
||||
A.name,
|
||||
A.value
|
||||
FROM
|
||||
`rocpd_region` R
|
||||
INNER JOIN `rocpd_event` E ON E.id = R.event_id
|
||||
AND E.guid = R.guid
|
||||
INNER JOIN `rocpd_arg` A ON A.event_id = E.id
|
||||
AND A.guid = R.guid
|
||||
INNER JOIN `rocpd_info_process` P ON P.id = R.pid
|
||||
AND P.guid = R.guid;
|
||||
|
||||
--
|
||||
-- Samples
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`samples` AS
|
||||
SELECT
|
||||
R.id,
|
||||
R.guid,
|
||||
(
|
||||
SELECT
|
||||
string
|
||||
FROM
|
||||
`rocpd_string` RS
|
||||
WHERE
|
||||
RS.id = E.category_id
|
||||
AND RS.guid = E.guid
|
||||
) AS category,
|
||||
(
|
||||
SELECT
|
||||
string
|
||||
FROM
|
||||
`rocpd_string` RS
|
||||
WHERE
|
||||
RS.id = T.name_id
|
||||
AND RS.guid = T.guid
|
||||
) AS name,
|
||||
T.nid,
|
||||
P.pid,
|
||||
TH.tid,
|
||||
R.timestamp,
|
||||
R.event_id,
|
||||
E.stack_id AS stack_id,
|
||||
E.parent_stack_id AS parent_stack_id,
|
||||
E.correlation_id AS corr_id,
|
||||
E.extdata AS extdata,
|
||||
E.call_stack AS call_stack,
|
||||
E.line_info AS line_info
|
||||
FROM
|
||||
`rocpd_sample` R
|
||||
INNER JOIN `rocpd_track` T ON T.id = R.track_id
|
||||
AND T.guid = R.guid
|
||||
INNER JOIN `rocpd_event` E ON E.id = R.event_id
|
||||
AND E.guid = R.guid
|
||||
INNER JOIN `rocpd_info_process` P ON P.id = T.pid
|
||||
AND P.guid = T.guid
|
||||
INNER JOIN `rocpd_info_thread` TH ON TH.id = T.tid
|
||||
AND TH.guid = T.guid;
|
||||
|
||||
--
|
||||
-- Provides samples view with the same columns as regions view
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`sample_regions` AS
|
||||
SELECT
|
||||
R.id,
|
||||
R.guid,
|
||||
(
|
||||
SELECT
|
||||
string
|
||||
FROM
|
||||
`rocpd_string` RS
|
||||
WHERE
|
||||
RS.id = E.category_id
|
||||
AND RS.guid = E.guid
|
||||
) AS category,
|
||||
(
|
||||
SELECT
|
||||
string
|
||||
FROM
|
||||
`rocpd_string` RS
|
||||
WHERE
|
||||
RS.id = T.name_id
|
||||
AND RS.guid = T.guid
|
||||
) AS name,
|
||||
T.nid,
|
||||
P.pid,
|
||||
TH.tid,
|
||||
R.timestamp AS start,
|
||||
R.timestamp AS END,
|
||||
(R.timestamp - R.timestamp) AS duration,
|
||||
R.event_id,
|
||||
E.stack_id AS stack_id,
|
||||
E.parent_stack_id AS parent_stack_id,
|
||||
E.correlation_id AS corr_id,
|
||||
E.extdata AS extdata,
|
||||
E.call_stack AS call_stack,
|
||||
E.line_info AS line_info
|
||||
FROM
|
||||
`rocpd_sample` R
|
||||
INNER JOIN `rocpd_track` T ON T.id = R.track_id
|
||||
AND T.guid = R.guid
|
||||
INNER JOIN `rocpd_event` E ON E.id = R.event_id
|
||||
AND E.guid = R.guid
|
||||
INNER JOIN `rocpd_info_process` P ON P.id = T.pid
|
||||
AND P.guid = T.guid
|
||||
INNER JOIN `rocpd_info_thread` TH ON TH.id = T.tid
|
||||
AND TH.guid = T.guid;
|
||||
|
||||
--
|
||||
-- Provides a unified view of the regions and samples
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`regions_and_samples` AS
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`regions`
|
||||
UNION ALL
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`sample_regions`;
|
||||
|
||||
--
|
||||
-- Kernel information
|
||||
CREATE VIEW
|
||||
`kernels` AS
|
||||
SELECT
|
||||
K.id,
|
||||
K.guid,
|
||||
T.tid,
|
||||
(
|
||||
SELECT
|
||||
string
|
||||
FROM
|
||||
`rocpd_string` RS
|
||||
WHERE
|
||||
RS.id = E.category_id
|
||||
AND RS.guid = E.guid
|
||||
) AS category,
|
||||
R.string AS region,
|
||||
S.display_name AS name,
|
||||
K.nid,
|
||||
P.pid,
|
||||
A.absolute_index AS agent_abs_index,
|
||||
A.logical_index AS agent_log_index,
|
||||
A.type_index AS agent_type_index,
|
||||
A.type AS agent_type,
|
||||
S.code_object_id AS code_object_id,
|
||||
K.kernel_id,
|
||||
K.dispatch_id,
|
||||
K.stream_id,
|
||||
K.queue_id,
|
||||
Q.name AS queue,
|
||||
ST.name AS stream,
|
||||
K.start,
|
||||
K.end,
|
||||
(K.end - K.start) AS duration,
|
||||
K.grid_size_x AS grid_x,
|
||||
K.grid_size_y AS grid_y,
|
||||
K.grid_size_z AS grid_z,
|
||||
K.workgroup_size_x AS workgroup_x,
|
||||
K.workgroup_size_y AS workgroup_y,
|
||||
K.workgroup_size_z AS workgroup_z,
|
||||
K.group_segment_size AS lds_size,
|
||||
K.private_segment_size AS scratch_size,
|
||||
S.group_segment_size AS static_lds_size,
|
||||
S.private_segment_size AS static_scratch_size,
|
||||
E.stack_id,
|
||||
E.parent_stack_id,
|
||||
E.correlation_id AS corr_id
|
||||
FROM
|
||||
`rocpd_kernel_dispatch` K
|
||||
INNER JOIN `rocpd_info_agent` A ON A.id = K.agent_id
|
||||
AND A.guid = K.guid
|
||||
INNER JOIN `rocpd_event` E ON E.id = K.event_id
|
||||
AND E.guid = K.guid
|
||||
INNER JOIN `rocpd_string` R ON R.id = K.region_name_id
|
||||
AND R.guid = K.guid
|
||||
INNER JOIN `rocpd_info_kernel_symbol` S ON S.id = K.kernel_id
|
||||
AND S.guid = K.guid
|
||||
LEFT JOIN `rocpd_info_stream` ST ON ST.id = K.stream_id
|
||||
AND ST.guid = K.guid
|
||||
LEFT JOIN `rocpd_info_queue` Q ON Q.id = K.queue_id
|
||||
AND Q.guid = K.guid
|
||||
INNER JOIN `rocpd_info_process` P ON P.id = Q.pid
|
||||
AND P.guid = Q.guid
|
||||
INNER JOIN `rocpd_info_thread` T ON T.id = K.tid
|
||||
AND T.guid = K.guid;
|
||||
|
||||
--
|
||||
-- Performance Monitoring Counters (PMC)
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`pmc_info` AS
|
||||
SELECT
|
||||
PMC_I.id,
|
||||
PMC_I.guid,
|
||||
PMC_I.nid,
|
||||
P.pid,
|
||||
A.absolute_index AS agent_abs_index,
|
||||
PMC_I.is_constant,
|
||||
PMC_I.is_derived,
|
||||
PMC_I.name,
|
||||
PMC_I.description,
|
||||
PMC_I.block,
|
||||
PMC_I.expression
|
||||
FROM
|
||||
`rocpd_info_pmc` PMC_I
|
||||
INNER JOIN `rocpd_info_agent` A ON PMC_I.agent_id = A.id
|
||||
AND PMC_I.guid = A.guid
|
||||
INNER JOIN `rocpd_info_process` P ON P.id = PMC_I.pid
|
||||
AND PMC_I.guid = P.guid;
|
||||
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`pmc_events` AS
|
||||
SELECT
|
||||
PMC_E.id,
|
||||
PMC_E.guid,
|
||||
PMC_E.pmc_id,
|
||||
E.id AS event_id,
|
||||
(
|
||||
SELECT
|
||||
string
|
||||
FROM
|
||||
`rocpd_string` RS
|
||||
WHERE
|
||||
RS.id = E.category_id
|
||||
AND RS.guid = E.guid
|
||||
) AS category,
|
||||
(
|
||||
SELECT
|
||||
display_name
|
||||
FROM
|
||||
`rocpd_info_kernel_symbol` KS
|
||||
WHERE
|
||||
KS.id = K.kernel_id
|
||||
AND KS.guid = K.guid
|
||||
) AS name,
|
||||
K.nid,
|
||||
P.pid,
|
||||
K.dispatch_id,
|
||||
K.start,
|
||||
K.end,
|
||||
(K.end - K.start) AS duration,
|
||||
PMC_I.name AS counter_name,
|
||||
PMC_E.value AS counter_value
|
||||
FROM
|
||||
`rocpd_pmc_event` PMC_E
|
||||
INNER JOIN `rocpd_info_pmc` PMC_I ON PMC_I.id = PMC_E.pmc_id
|
||||
AND PMC_I.guid = PMC_E.guid
|
||||
INNER JOIN `rocpd_event` E ON E.id = PMC_E.event_id
|
||||
AND E.guid = PMC_E.guid
|
||||
INNER JOIN `rocpd_kernel_dispatch` K ON K.event_id = PMC_E.event_id
|
||||
AND K.guid = PMC_E.guid
|
||||
INNER JOIN `rocpd_info_process` P ON P.id = K.pid
|
||||
AND P.guid = K.guid;
|
||||
|
||||
-- events with arguments ---
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`events_args` AS
|
||||
SELECT
|
||||
E.id AS event_id,
|
||||
(
|
||||
SELECT
|
||||
string
|
||||
FROM
|
||||
`rocpd_string` RS
|
||||
WHERE
|
||||
RS.id = E.category_id
|
||||
AND RS.guid = E.guid
|
||||
) AS category,
|
||||
E.stack_id,
|
||||
E.parent_stack_id,
|
||||
E.correlation_id,
|
||||
A.position AS arg_position,
|
||||
A.type AS arg_type,
|
||||
A.name AS arg_name,
|
||||
A.value AS arg_value,
|
||||
E.call_stack,
|
||||
E.line_info,
|
||||
A.extdata
|
||||
FROM
|
||||
`rocpd_event` E
|
||||
INNER JOIN `rocpd_arg` A ON A.event_id = E.id
|
||||
AND A.guid = E.guid;
|
||||
|
||||
-- list of astream arguments enriched by the corresponding stream descriptions
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`stream_args` AS
|
||||
SELECT
|
||||
A.id AS argument_id,
|
||||
A.event_id AS event_id,
|
||||
A.position AS arg_position,
|
||||
A.type AS arg_type,
|
||||
A.value AS arg_value,
|
||||
JSON_EXTRACT(A.extdata, '$.stream_id') AS stream_id,
|
||||
S.nid,
|
||||
P.pid,
|
||||
S.name AS stream_name,
|
||||
S.extdata AS extdata
|
||||
FROM
|
||||
`rocpd_arg` A
|
||||
INNER JOIN `rocpd_info_stream` S ON JSON_EXTRACT(A.extdata, '$.stream_id') = S.id
|
||||
AND A.guid = S.guid
|
||||
INNER JOIN `rocpd_info_process` P ON P.id = S.pid
|
||||
AND P.guid = S.guid
|
||||
WHERE
|
||||
A.name = 'stream';
|
||||
|
||||
--
|
||||
--
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`memory_copies` AS
|
||||
SELECT
|
||||
M.id,
|
||||
M.guid,
|
||||
(
|
||||
SELECT
|
||||
string
|
||||
FROM
|
||||
`rocpd_string` RS
|
||||
WHERE
|
||||
RS.id = E.category_id
|
||||
AND RS.guid = E.guid
|
||||
) AS category,
|
||||
M.nid,
|
||||
P.pid,
|
||||
T.tid,
|
||||
M.start,
|
||||
M.end,
|
||||
(M.end - M.start) AS duration,
|
||||
S.string AS name,
|
||||
R.string AS region_name,
|
||||
M.stream_id,
|
||||
M.queue_id,
|
||||
ST.name AS stream_name,
|
||||
Q.name AS queue_name,
|
||||
M.size,
|
||||
dst_agent.name AS dst_device,
|
||||
dst_agent.absolute_index AS dst_agent_abs_index,
|
||||
dst_agent.logical_index AS dst_agent_log_index,
|
||||
dst_agent.type_index AS dst_agent_type_index,
|
||||
dst_agent.type AS dst_agent_type,
|
||||
M.dst_address,
|
||||
src_agent.name AS src_device,
|
||||
src_agent.absolute_index AS src_agent_abs_index,
|
||||
src_agent.logical_index AS src_agent_log_index,
|
||||
src_agent.type_index AS src_agent_type_index,
|
||||
src_agent.type AS src_agent_type,
|
||||
M.src_address,
|
||||
E.stack_id,
|
||||
E.parent_stack_id,
|
||||
E.correlation_id AS corr_id
|
||||
FROM
|
||||
`rocpd_memory_copy` M
|
||||
INNER JOIN `rocpd_string` S ON S.id = M.name_id
|
||||
AND S.guid = M.guid
|
||||
LEFT JOIN `rocpd_string` R ON R.id = M.region_name_id
|
||||
AND R.guid = M.guid
|
||||
INNER JOIN `rocpd_info_agent` dst_agent ON dst_agent.id = M.dst_agent_id
|
||||
AND dst_agent.guid = M.guid
|
||||
INNER JOIN `rocpd_info_agent` src_agent ON src_agent.id = M.src_agent_id
|
||||
AND src_agent.guid = M.guid
|
||||
LEFT JOIN `rocpd_info_queue` Q ON Q.id = M.queue_id
|
||||
AND Q.guid = M.guid
|
||||
LEFT JOIN `rocpd_info_stream` ST ON ST.id = M.stream_id
|
||||
AND ST.guid = M.guid
|
||||
INNER JOIN `rocpd_event` E ON E.id = M.event_id
|
||||
AND E.guid = M.guid
|
||||
INNER JOIN `rocpd_info_process` P ON P.id = M.pid
|
||||
AND P.guid = M.guid
|
||||
INNER JOIN `rocpd_info_thread` T ON T.id = M.tid
|
||||
AND T.guid = M.guid;
|
||||
|
||||
--
|
||||
--
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`memory_allocations` AS
|
||||
SELECT
|
||||
M.id,
|
||||
M.guid,
|
||||
(
|
||||
SELECT
|
||||
string
|
||||
FROM
|
||||
`rocpd_string` RS
|
||||
WHERE
|
||||
RS.id = E.category_id
|
||||
AND RS.guid = E.guid
|
||||
) AS category,
|
||||
M.nid,
|
||||
P.pid,
|
||||
T.tid,
|
||||
M.start,
|
||||
M.end,
|
||||
(M.end - M.start) AS duration,
|
||||
M.type,
|
||||
M.level,
|
||||
A.name AS agent_name,
|
||||
A.absolute_index AS agent_abs_index,
|
||||
A.logical_index AS agent_log_index,
|
||||
A.type_index AS agent_type_index,
|
||||
A.type AS agent_type,
|
||||
M.address,
|
||||
M.size,
|
||||
M.queue_id,
|
||||
Q.name AS queue_name,
|
||||
M.stream_id,
|
||||
ST.name AS stream_name,
|
||||
E.stack_id,
|
||||
E.parent_stack_id,
|
||||
E.correlation_id AS corr_id
|
||||
FROM
|
||||
`rocpd_memory_allocate` M
|
||||
LEFT JOIN `rocpd_info_agent` A ON M.agent_id = A.id
|
||||
AND M.guid = A.guid
|
||||
LEFT JOIN `rocpd_info_queue` Q ON Q.id = M.queue_id
|
||||
AND Q.guid = M.guid
|
||||
LEFT JOIN `rocpd_info_stream` ST ON ST.id = M.stream_id
|
||||
AND ST.guid = M.guid
|
||||
INNER JOIN `rocpd_event` E ON E.id = M.event_id
|
||||
AND E.guid = M.guid
|
||||
INNER JOIN `rocpd_info_process` P ON P.id = M.pid
|
||||
AND P.guid = M.guid
|
||||
INNER JOIN `rocpd_info_thread` T ON T.id = M.tid
|
||||
AND P.guid = M.guid;
|
||||
|
||||
--
|
||||
--
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`scratch_memory` AS
|
||||
SELECT
|
||||
M.id,
|
||||
M.guid,
|
||||
M.nid,
|
||||
P.pid,
|
||||
M.type AS operation,
|
||||
A.name AS agent_name,
|
||||
A.absolute_index AS agent_abs_index,
|
||||
A.logical_index AS agent_log_index,
|
||||
A.type_index AS agent_type_index,
|
||||
A.type AS agent_type,
|
||||
M.queue_id,
|
||||
T.tid,
|
||||
JSON_EXTRACT(M.extdata, '$.flags') AS alloc_flags,
|
||||
M.start,
|
||||
M.end,
|
||||
M.size,
|
||||
M.address,
|
||||
E.correlation_id,
|
||||
E.stack_id,
|
||||
E.parent_stack_id,
|
||||
E.correlation_id AS corr_id,
|
||||
(
|
||||
SELECT
|
||||
string
|
||||
FROM
|
||||
`rocpd_string` RS
|
||||
WHERE
|
||||
RS.id = E.category_id
|
||||
AND RS.guid = E.guid
|
||||
) AS category,
|
||||
E.extdata AS event_extdata
|
||||
FROM
|
||||
`rocpd_memory_allocate` M
|
||||
LEFT JOIN `rocpd_info_agent` A ON M.agent_id = A.id
|
||||
AND M.guid = A.guid
|
||||
LEFT JOIN `rocpd_info_queue` Q ON Q.id = M.queue_id
|
||||
AND Q.guid = M.guid
|
||||
INNER JOIN `rocpd_event` E ON E.id = M.event_id
|
||||
AND E.guid = M.guid
|
||||
INNER JOIN `rocpd_info_process` P ON P.id = M.pid
|
||||
AND P.guid = M.guid
|
||||
INNER JOIN `rocpd_info_thread` T ON T.id = M.tid
|
||||
AND T.guid = M.guid
|
||||
WHERE
|
||||
M.level = 'SCRATCH'
|
||||
ORDER BY
|
||||
M.start ASC;
|
||||
|
||||
--
|
||||
--
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`counters_collection` AS
|
||||
SELECT
|
||||
MIN(PMC_E.id) AS id,
|
||||
PMC_E.guid,
|
||||
K.dispatch_id,
|
||||
K.kernel_id,
|
||||
E.id AS event_id,
|
||||
E.correlation_id,
|
||||
E.stack_id,
|
||||
E.parent_stack_id,
|
||||
P.pid,
|
||||
T.tid,
|
||||
K.agent_id,
|
||||
A.absolute_index AS agent_abs_index,
|
||||
A.logical_index AS agent_log_index,
|
||||
A.type_index AS agent_type_index,
|
||||
A.type AS agent_type,
|
||||
K.queue_id,
|
||||
k.grid_size_x AS grid_size_x,
|
||||
k.grid_size_y AS grid_size_y,
|
||||
k.grid_size_z AS grid_size_z,
|
||||
(K.grid_size_x * K.grid_size_y * K.grid_size_z) AS grid_size,
|
||||
S.display_name AS kernel_name,
|
||||
(
|
||||
SELECT
|
||||
string
|
||||
FROM
|
||||
`rocpd_string` RS
|
||||
WHERE
|
||||
RS.id = K.region_name_id
|
||||
AND RS.guid = K.guid
|
||||
) AS kernel_region,
|
||||
K.workgroup_size_x AS workgroup_size_x,
|
||||
K.workgroup_size_y AS workgroup_size_y,
|
||||
K.workgroup_size_z AS workgroup_size_z,
|
||||
(K.workgroup_size_x * K.workgroup_size_y * K.workgroup_size_z) AS workgroup_size,
|
||||
K.group_segment_size AS lds_block_size,
|
||||
K.private_segment_size AS scratch_size,
|
||||
S.arch_vgpr_count AS vgpr_count,
|
||||
S.accum_vgpr_count,
|
||||
S.sgpr_count,
|
||||
PMC_I.name AS counter_name,
|
||||
PMC_I.symbol AS counter_symbol,
|
||||
PMC_I.component,
|
||||
PMC_I.description,
|
||||
PMC_I.block,
|
||||
PMC_I.expression,
|
||||
PMC_I.value_type,
|
||||
PMC_I.id AS counter_id,
|
||||
SUM(PMC_E.value) AS value,
|
||||
K.start,
|
||||
K.end,
|
||||
PMC_I.is_constant,
|
||||
PMC_I.is_derived,
|
||||
(K.end - K.start) AS duration,
|
||||
(
|
||||
SELECT
|
||||
string
|
||||
FROM
|
||||
`rocpd_string` RS
|
||||
WHERE
|
||||
RS.id = E.category_id
|
||||
AND RS.guid = E.guid
|
||||
) AS category,
|
||||
K.nid,
|
||||
E.extdata,
|
||||
S.code_object_id
|
||||
FROM
|
||||
`rocpd_pmc_event` PMC_E
|
||||
INNER JOIN `rocpd_info_pmc` PMC_I ON PMC_I.id = PMC_E.pmc_id
|
||||
AND PMC_I.guid = PMC_E.guid
|
||||
INNER JOIN `rocpd_event` E ON E.id = PMC_E.event_id
|
||||
AND E.guid = PMC_E.guid
|
||||
INNER JOIN `rocpd_kernel_dispatch` K ON K.event_id = PMC_E.event_id
|
||||
AND K.guid = PMC_E.guid
|
||||
INNER JOIN `rocpd_info_agent` A ON A.id = K.agent_id
|
||||
AND A.guid = K.guid
|
||||
INNER JOIN `rocpd_info_kernel_symbol` S ON S.id = K.kernel_id
|
||||
AND S.guid = K.guid
|
||||
INNER JOIN `rocpd_info_process` P ON P.id = K.pid
|
||||
AND P.guid = K.guid
|
||||
INNER JOIN `rocpd_info_thread` T ON T.id = K.tid
|
||||
AND T.guid = K.guid
|
||||
GROUP BY
|
||||
PMC_E.guid,
|
||||
K.dispatch_id,
|
||||
PMC_I.name,
|
||||
K.agent_id;
|
||||
+3
@@ -0,0 +1,3 @@
|
||||
--
|
||||
-- Views related to markers
|
||||
--
|
||||
+45
@@ -0,0 +1,45 @@
|
||||
--
|
||||
-- Indexes for the various fields
|
||||
--
|
||||
|
||||
-- string field
|
||||
-- CREATE INDEX `rocpd_string{{uuid}}_string_idx` ON `rocpd_string{{uuid}}` ("string");
|
||||
|
||||
-- guid field
|
||||
-- CREATE INDEX `rocpd_string{{uuid}}_guid_idx` ON `rocpd_string{{uuid}}` ("id", "guid");
|
||||
-- CREATE INDEX `rocpd_info_node{{uuid}}_guid_idx` ON `rocpd_info_node{{uuid}}` ("id", "guid");
|
||||
-- CREATE INDEX `rocpd_info_process{{uuid}}_guid_idx` ON `rocpd_info_process{{uuid}}` ("id", "guid");
|
||||
-- CREATE INDEX `rocpd_info_thread{{uuid}}_guid_idx` ON `rocpd_info_thread{{uuid}}` ("id", "guid");
|
||||
-- CREATE INDEX `rocpd_info_agent{{uuid}}_guid_idx` ON `rocpd_info_agent{{uuid}}` ("id", "guid");
|
||||
-- CREATE INDEX `rocpd_info_queue{{uuid}}_guid_idx` ON `rocpd_info_queue{{uuid}}` ("id", "guid");
|
||||
-- CREATE INDEX `rocpd_info_stream{{uuid}}_guid_idx` ON `rocpd_info_stream{{uuid}}` ("id", "guid");
|
||||
-- CREATE INDEX `rocpd_info_pmc{{uuid}}_guid_idx` ON `rocpd_info_pmc{{uuid}}` ("id", "guid");
|
||||
-- CREATE INDEX `rocpd_info_code_object{{uuid}}_guid_idx` ON `rocpd_info_code_object{{uuid}}` ("id", "guid");
|
||||
-- CREATE INDEX `rocpd_info_kernel_symbol{{uuid}}_guid_idx` ON `rocpd_info_kernel_symbol{{uuid}}` ("id", "guid");
|
||||
-- CREATE INDEX `rocpd_track{{uuid}}_guid_idx` ON `rocpd_track{{uuid}}` ("id", "guid");
|
||||
-- CREATE INDEX `rocpd_event{{uuid}}_guid_idx` ON `rocpd_event{{uuid}}` ("id", "guid");
|
||||
-- CREATE INDEX `rocpd_arg{{uuid}}_guid_idx` ON `rocpd_arg{{uuid}}` ("id", "guid");
|
||||
-- CREATE INDEX `rocpd_pmc_event{{uuid}}_guid_idx` ON `rocpd_pmc_event{{uuid}}` ("id", "guid");
|
||||
-- CREATE INDEX `rocpd_region{{uuid}}_guid_idx` ON `rocpd_region{{uuid}}` ("id", "guid");
|
||||
-- CREATE INDEX `rocpd_sample{{uuid}}_guid_idx` ON `rocpd_sample{{uuid}}` ("id", "guid");
|
||||
-- CREATE INDEX `rocpd_kernel_dispatch{{uuid}}_guid_idx` ON `rocpd_kernel_dispatch{{uuid}}` ("id", "guid");
|
||||
-- CREATE INDEX `rocpd_memory_copy{{uuid}}_guid_idx` ON `rocpd_memory_copy{{uuid}}` ("id", "guid");
|
||||
-- CREATE INDEX `rocpd_memory_allocate{{uuid}}_guid_idx` ON `rocpd_memory_allocate{{uuid}}` ("id", "guid");
|
||||
|
||||
-- CREATE INDEX `rocpd_event{{uuid}}_category_idx` ON `rocpd_event{{uuid}}` ("id", "guid", "category_id");
|
||||
-- CREATE INDEX `rocpd_region{{uuid}}_event_idx` ON `rocpd_region{{uuid}}` ("id", "guid", "event_id");
|
||||
-- CREATE INDEX `rocpd_region{{uuid}}_name_idx` ON `rocpd_region{{uuid}}` ("id", "guid", "name_id");
|
||||
-- CREATE INDEX `rocpd_sample{{uuid}}_event_idx` ON `rocpd_sample{{uuid}}` ("id", "guid", "event_id");
|
||||
-- CREATE INDEX `rocpd_sample{{uuid}}_track_idx` ON `rocpd_sample{{uuid}}` ("id", "guid", "track_id");
|
||||
-- CREATE INDEX `rocpd_track{{uuid}}_name_idx` ON `rocpd_track{{uuid}}` ("id", "guid", "name_id");
|
||||
|
||||
-- CREATE INDEX `rocpd_memory_copy{{uuid}}_guid_nid_pid_idx` ON `rocpd_memory_copy{{uuid}}` ("guid", "nid", "pid");
|
||||
-- CREATE INDEX `rocpd_kernel_dispatch{{uuid}}_guid_nid_pid_idx` ON `rocpd_kernel_dispatch{{uuid}}` ("guid", "nid", "pid");
|
||||
-- CREATE INDEX `rocpd_region{{uuid}}_guid_idx` ON `rocpd_region{{uuid}}` ("guid", "nid", "pid");
|
||||
-- CREATE INDEX `rocpd_sample{{uuid}}_guid_nid_pid_idx` ON `rocpd_sample{{uuid}}` ("guid", "nid", "pid");
|
||||
|
||||
-- CREATE INDEX `rocpd_region{{uuid}}_guid_idx` ON `rocpd_region{{uuid}}` ("guid");
|
||||
-- CREATE INDEX `rocpd_region{{uuid}}_nid_idx` ON `rocpd_region{{uuid}}` ("nid");
|
||||
-- CREATE INDEX `rocpd_region{{uuid}}_pid_idx` ON `rocpd_region{{uuid}}` ("pid");
|
||||
-- CREATE INDEX `rocpd_region{{uuid}}_start_idx` ON `rocpd_region{{uuid}}` ("start");
|
||||
-- CREATE INDEX `rocpd_region{{uuid}}_end_idx` ON `rocpd_region{{uuid}}` ("end");
|
||||
+373
@@ -0,0 +1,373 @@
|
||||
-- Enable foreign key support for cascading
|
||||
PRAGMA foreign_keys = ON;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS
|
||||
"rocpd_metadata{{uuid}}" (
|
||||
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||
"tag" TEXT NOT NULL,
|
||||
"value" TEXT NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS
|
||||
`rocpd_string{{uuid}}` (
|
||||
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
|
||||
"string" TEXT NOT NULL UNIQUE ON CONFLICT ABORT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS
|
||||
`rocpd_info_node{{uuid}}` (
|
||||
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
|
||||
"hash" BIGINT NOT NULL UNIQUE,
|
||||
"machine_id" TEXT NOT NULL UNIQUE,
|
||||
"system_name" TEXT,
|
||||
"hostname" TEXT,
|
||||
"release" TEXT,
|
||||
"version" TEXT,
|
||||
"hardware_name" TEXT,
|
||||
"domain_name" TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS
|
||||
`rocpd_info_process{{uuid}}` (
|
||||
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
|
||||
"nid" INTEGER NOT NULL,
|
||||
"ppid" INTEGER,
|
||||
"pid" INTEGER NOT NULL,
|
||||
"init" BIGINT,
|
||||
"fini" BIGINT,
|
||||
"start" BIGINT,
|
||||
"end" BIGINT,
|
||||
"command" TEXT,
|
||||
"environment" JSONB DEFAULT "{}" NOT NULL,
|
||||
"extdata" JSONB DEFAULT "{}" NOT NULL,
|
||||
FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS
|
||||
`rocpd_info_thread{{uuid}}` (
|
||||
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
|
||||
"nid" INTEGER NOT NULL,
|
||||
"ppid" INTEGER,
|
||||
"pid" INTEGER NOT NULL,
|
||||
"tid" INTEGER NOT NULL,
|
||||
"name" TEXT,
|
||||
"start" BIGINT,
|
||||
"end" BIGINT,
|
||||
"extdata" JSONB DEFAULT "{}" NOT NULL,
|
||||
FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS
|
||||
`rocpd_info_agent{{uuid}}` (
|
||||
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
|
||||
"nid" INTEGER NOT NULL,
|
||||
"pid" INTEGER NOT NULL,
|
||||
"type" TEXT CHECK ("type" IN ('CPU', 'GPU')),
|
||||
"absolute_index" INTEGER,
|
||||
"logical_index" INTEGER,
|
||||
"type_index" INTEGER,
|
||||
"uuid" INTEGER,
|
||||
"name" TEXT,
|
||||
"model_name" TEXT,
|
||||
"vendor_name" TEXT,
|
||||
"product_name" TEXT,
|
||||
"user_name" TEXT,
|
||||
"extdata" JSONB DEFAULT "{}" NOT NULL,
|
||||
FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS
|
||||
`rocpd_info_queue{{uuid}}` (
|
||||
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
|
||||
"nid" INTEGER NOT NULL,
|
||||
"pid" INTEGER NOT NULL,
|
||||
"name" TEXT,
|
||||
"extdata" JSONB DEFAULT "{}" NOT NULL,
|
||||
FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS
|
||||
`rocpd_info_stream{{uuid}}` (
|
||||
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
|
||||
"nid" INTEGER NOT NULL,
|
||||
"pid" INTEGER NOT NULL,
|
||||
"name" TEXT,
|
||||
"extdata" JSONB DEFAULT "{}" NOT NULL,
|
||||
FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE
|
||||
);
|
||||
|
||||
-- 2993533, 2269219937, 2993533
|
||||
-- 2993533, 2269219937, 2993533
|
||||
-- Performance monitoring counters (PMC) descriptions
|
||||
CREATE TABLE IF NOT EXISTS
|
||||
`rocpd_info_pmc{{uuid}}` (
|
||||
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
|
||||
"nid" INTEGER NOT NULL,
|
||||
"pid" INTEGER NOT NULL,
|
||||
"agent_id" INTEGER,
|
||||
"target_arch" TEXT CHECK ("target_arch" IN ('CPU', 'GPU')),
|
||||
"event_code" INT,
|
||||
"instance_id" INTEGER,
|
||||
"name" TEXT NOT NULL,
|
||||
"symbol" TEXT NOT NULL,
|
||||
"description" TEXT,
|
||||
"long_description" TEXT DEFAULT "",
|
||||
"component" TEXT,
|
||||
"units" TEXT DEFAULT "",
|
||||
"value_type" TEXT CHECK ("value_type" IN ('ABS', 'ACCUM', 'RELATIVE')),
|
||||
"block" TEXT,
|
||||
"expression" TEXT,
|
||||
"is_constant" INTEGER,
|
||||
"is_derived" INTEGER,
|
||||
"extdata" JSONB DEFAULT "{}" NOT NULL,
|
||||
FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (agent_id) REFERENCES `rocpd_info_agent{{uuid}}` (id) ON UPDATE CASCADE
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS
|
||||
`rocpd_info_code_object{{uuid}}` (
|
||||
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
|
||||
"nid" INTEGER NOT NULL,
|
||||
"pid" INTEGER NOT NULL,
|
||||
"agent_id" INTEGER,
|
||||
"uri" TEXT,
|
||||
"load_base" BIGINT,
|
||||
"load_size" BIGINT,
|
||||
"load_delta" BIGINT,
|
||||
"storage_type" TEXT CHECK ("storage_type" IN ('FILE', 'MEMORY')),
|
||||
"extdata" JSONB DEFAULT "{}" NOT NULL,
|
||||
FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (agent_id) REFERENCES `rocpd_info_agent{{uuid}}` (id) ON UPDATE CASCADE
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS
|
||||
`rocpd_info_kernel_symbol{{uuid}}` (
|
||||
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
|
||||
"nid" INTEGER NOT NULL,
|
||||
"pid" INTEGER NOT NULL,
|
||||
"code_object_id" INTEGER NOT NULL,
|
||||
"kernel_name" TEXT,
|
||||
"display_name" TEXT,
|
||||
"kernel_object" INTEGER,
|
||||
"kernarg_segment_size" INTEGER,
|
||||
"kernarg_segment_alignment" INTEGER,
|
||||
"group_segment_size" INTEGER,
|
||||
"private_segment_size" INTEGER,
|
||||
"sgpr_count" INTEGER,
|
||||
"arch_vgpr_count" INTEGER,
|
||||
"accum_vgpr_count" INTEGER,
|
||||
"extdata" JSONB DEFAULT "{}" NOT NULL,
|
||||
FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (code_object_id) REFERENCES `rocpd_info_code_object{{uuid}}` (id) ON UPDATE CASCADE
|
||||
);
|
||||
|
||||
-- Stores repetitive info for samples
|
||||
CREATE TABLE IF NOT EXISTS
|
||||
`rocpd_track{{uuid}}` (
|
||||
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
|
||||
"nid" INTEGER NOT NULL,
|
||||
"pid" INTEGER,
|
||||
"tid" INTEGER,
|
||||
"name_id" INTEGER,
|
||||
"extdata" JSONB DEFAULT "{}" NOT NULL,
|
||||
FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (tid) REFERENCES `rocpd_info_thread{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (name_id) REFERENCES `rocpd_string{{uuid}}` (id) ON UPDATE CASCADE
|
||||
);
|
||||
|
||||
-- Storage for a region, instant, and counter
|
||||
CREATE TABLE IF NOT EXISTS
|
||||
`rocpd_event{{uuid}}` (
|
||||
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
|
||||
"category_id" INTEGER,
|
||||
"stack_id" INTEGER,
|
||||
"parent_stack_id" INTEGER,
|
||||
"correlation_id" INTEGER,
|
||||
"call_stack" JSONB DEFAULT "{}" NOT NULL,
|
||||
"line_info" JSONB DEFAULT "{}" NOT NULL,
|
||||
"extdata" JSONB DEFAULT "{}" NOT NULL,
|
||||
FOREIGN KEY (category_id) REFERENCES `rocpd_string{{uuid}}` (id) ON UPDATE CASCADE
|
||||
);
|
||||
|
||||
-- stores arguments for events
|
||||
CREATE TABLE IF NOT EXISTS
|
||||
`rocpd_arg{{uuid}}` (
|
||||
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
|
||||
"event_id" INTEGER NOT NULL,
|
||||
"position" INTEGER NOT NULL,
|
||||
"type" TEXT NOT NULL,
|
||||
"name" TEXT NOT NULL,
|
||||
"value" TEXT, -- TODO: discuss make it value_id and integer, refer to string table --
|
||||
"extdata" JSONB DEFAULT "{}" NOT NULL,
|
||||
FOREIGN KEY (event_id) REFERENCES `rocpd_event{{uuid}}` (id) ON UPDATE CASCADE
|
||||
);
|
||||
|
||||
-- Region with a start/stop on the same thread (CPU)
|
||||
CREATE TABLE IF NOT EXISTS
|
||||
`rocpd_pmc_event{{uuid}}` (
|
||||
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
|
||||
"event_id" INTEGER,
|
||||
"pmc_id" INTEGER NOT NULL,
|
||||
"value" REAL DEFAULT 0.0,
|
||||
"extdata" JSONB DEFAULT "{}",
|
||||
FOREIGN KEY (pmc_id) REFERENCES `rocpd_info_pmc{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (event_id) REFERENCES `rocpd_event{{uuid}}` (id) ON UPDATE CASCADE
|
||||
);
|
||||
|
||||
-- Region with a start/stop on the same thread (CPU)
|
||||
CREATE TABLE IF NOT EXISTS
|
||||
`rocpd_region{{uuid}}` (
|
||||
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
|
||||
"nid" INTEGER NOT NULL,
|
||||
"pid" INTEGER NOT NULL,
|
||||
"tid" INTEGER NOT NULL,
|
||||
"start" BIGINT NOT NULL,
|
||||
"end" BIGINT NOT NULL,
|
||||
"name_id" INTEGER NOT NULL,
|
||||
"event_id" INTEGER,
|
||||
"extdata" JSONB DEFAULT "{}" NOT NULL,
|
||||
FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (tid) REFERENCES `rocpd_info_thread{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (name_id) REFERENCES `rocpd_string{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (event_id) REFERENCES `rocpd_event{{uuid}}` (id) ON UPDATE CASCADE
|
||||
);
|
||||
|
||||
-- Instantaneous sample
|
||||
CREATE TABLE IF NOT EXISTS
|
||||
`rocpd_sample{{uuid}}` (
|
||||
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
|
||||
"track_id" INTEGER NOT NULL,
|
||||
"timestamp" BIGINT NOT NULL,
|
||||
"event_id" INTEGER,
|
||||
"extdata" JSONB DEFAULT "{}" NOT NULL,
|
||||
FOREIGN KEY (track_id) REFERENCES `rocpd_track{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (event_id) REFERENCES `rocpd_event{{uuid}}` (id) ON UPDATE CASCADE
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS
|
||||
`rocpd_kernel_dispatch{{uuid}}` (
|
||||
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
|
||||
"nid" INTEGER NOT NULL,
|
||||
"pid" INTEGER NOT NULL,
|
||||
"tid" INTEGER,
|
||||
"agent_id" INTEGER NOT NULL,
|
||||
"kernel_id" INTEGER NOT NULL,
|
||||
"dispatch_id" INTEGER NOT NULL,
|
||||
"queue_id" INTEGER NOT NULL,
|
||||
"stream_id" INTEGER NOT NULL,
|
||||
"start" BIGINT NOT NULL,
|
||||
"end" BIGINT NOT NULL,
|
||||
"private_segment_size" INTEGER,
|
||||
"group_segment_size" INTEGER,
|
||||
"workgroup_size_x" INTEGER NOT NULL,
|
||||
"workgroup_size_y" INTEGER NOT NULL,
|
||||
"workgroup_size_z" INTEGER NOT NULL,
|
||||
"grid_size_x" INTEGER NOT NULL,
|
||||
"grid_size_y" INTEGER NOT NULL,
|
||||
"grid_size_z" INTEGER NOT NULL,
|
||||
"region_name_id" INTEGER,
|
||||
"event_id" INTEGER,
|
||||
"extdata" JSONB DEFAULT "{}" NOT NULL,
|
||||
FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (tid) REFERENCES `rocpd_info_thread{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (agent_id) REFERENCES `rocpd_info_agent{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (kernel_id) REFERENCES `rocpd_info_kernel_symbol{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (queue_id) REFERENCES `rocpd_info_queue{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (stream_id) REFERENCES `rocpd_info_stream{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (region_name_id) REFERENCES `rocpd_string{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (event_id) REFERENCES `rocpd_event{{uuid}}` (id) ON UPDATE CASCADE
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS
|
||||
`rocpd_memory_copy{{uuid}}` (
|
||||
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
|
||||
"nid" INTEGER NOT NULL,
|
||||
"pid" INTEGER NOT NULL,
|
||||
"tid" INTEGER,
|
||||
"start" BIGINT NOT NULL,
|
||||
"end" BIGINT NOT NULL,
|
||||
"name_id" INTEGER NOT NULL,
|
||||
"dst_agent_id" INTEGER,
|
||||
"dst_address" INTEGER,
|
||||
"src_agent_id" INTEGER,
|
||||
"src_address" INTEGER,
|
||||
"size" INTEGER NOT NULL,
|
||||
"queue_id" INTEGER,
|
||||
"stream_id" INTEGER,
|
||||
"region_name_id" INTEGER,
|
||||
"event_id" INTEGER,
|
||||
"extdata" JSONB DEFAULT "{}" NOT NULL,
|
||||
FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (tid) REFERENCES `rocpd_info_thread{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (name_id) REFERENCES `rocpd_string{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (dst_agent_id) REFERENCES `rocpd_info_agent{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (src_agent_id) REFERENCES `rocpd_info_agent{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (stream_id) REFERENCES `rocpd_info_stream{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (queue_id) REFERENCES `rocpd_info_queue{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (region_name_id) REFERENCES `rocpd_string{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (event_id) REFERENCES `rocpd_event{{uuid}}` (id) ON UPDATE CASCADE
|
||||
);
|
||||
|
||||
-- Memory allocations (real memory, virtual memory, and scratch memory)
|
||||
CREATE TABLE IF NOT EXISTS
|
||||
`rocpd_memory_allocate{{uuid}}` (
|
||||
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
|
||||
"nid" INTEGER NOT NULL,
|
||||
"pid" INTEGER NOT NULL,
|
||||
"tid" INTEGER,
|
||||
"agent_id" INTEGER,
|
||||
"type" TEXT CHECK ("type" IN ('ALLOC', 'FREE', 'REALLOC', 'RECLAIM')),
|
||||
"level" TEXT CHECK ("level" IN ('REAL', 'VIRTUAL', 'SCRATCH')),
|
||||
"start" BIGINT NOT NULL,
|
||||
"end" BIGINT NOT NULL,
|
||||
"address" INTEGER,
|
||||
"size" INTEGER NOT NULL,
|
||||
"queue_id" INTEGER,
|
||||
"stream_id" INTEGER,
|
||||
"event_id" INTEGER,
|
||||
"extdata" JSONB DEFAULT "{}" NOT NULL,
|
||||
FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (tid) REFERENCES `rocpd_info_thread{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (agent_id) REFERENCES `rocpd_info_agent{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (stream_id) REFERENCES `rocpd_info_stream{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (queue_id) REFERENCES `rocpd_info_queue{{uuid}}` (id) ON UPDATE CASCADE,
|
||||
FOREIGN KEY (event_id) REFERENCES `rocpd_event{{uuid}}` (id) ON UPDATE CASCADE
|
||||
);
|
||||
|
||||
INSERT INTO
|
||||
`rocpd_metadata{{uuid}}` ("tag", "value")
|
||||
VALUES
|
||||
("schema_version", "3"),
|
||||
("uuid", "{{uuid}}"),
|
||||
("guid", "{{guid}}");
|
||||
+139
@@ -0,0 +1,139 @@
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`rocpd_metadata` AS
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`rocpd_metadata{{uuid}}`;
|
||||
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`rocpd_string` AS
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`rocpd_string{{uuid}}`;
|
||||
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`rocpd_info_node` AS
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`rocpd_info_node{{uuid}}`;
|
||||
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`rocpd_info_process` AS
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`rocpd_info_process{{uuid}}`;
|
||||
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`rocpd_info_thread` AS
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`rocpd_info_thread{{uuid}}`;
|
||||
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`rocpd_info_agent` AS
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`rocpd_info_agent{{uuid}}`;
|
||||
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`rocpd_info_queue` AS
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`rocpd_info_queue{{uuid}}`;
|
||||
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`rocpd_info_stream` AS
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`rocpd_info_stream{{uuid}}`;
|
||||
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`rocpd_info_pmc` AS
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`rocpd_info_pmc{{uuid}}`;
|
||||
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`rocpd_info_code_object` AS
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`rocpd_info_code_object{{uuid}}`;
|
||||
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`rocpd_info_kernel_symbol` AS
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`rocpd_info_kernel_symbol{{uuid}}`;
|
||||
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`rocpd_track` AS
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`rocpd_track{{uuid}}`;
|
||||
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`rocpd_event` AS
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`rocpd_event{{uuid}}`;
|
||||
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`rocpd_arg` AS
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`rocpd_arg{{uuid}}`;
|
||||
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`rocpd_pmc_event` AS
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`rocpd_pmc_event{{uuid}}`;
|
||||
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`rocpd_region` AS
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`rocpd_region{{uuid}}`;
|
||||
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`rocpd_sample` AS
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`rocpd_sample{{uuid}}`;
|
||||
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`rocpd_kernel_dispatch` AS
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`rocpd_kernel_dispatch{{uuid}}`;
|
||||
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`rocpd_memory_copy` AS
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`rocpd_memory_copy{{uuid}}`;
|
||||
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`rocpd_memory_allocate` AS
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`rocpd_memory_allocate{{uuid}}`;
|
||||
+376
@@ -0,0 +1,376 @@
|
||||
--
|
||||
-- Useful summary views
|
||||
--
|
||||
--
|
||||
-- Sorted list of kernels which consume the most overall time
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`top_kernels` AS
|
||||
SELECT
|
||||
S.display_name AS name,
|
||||
COUNT(K.kernel_id) AS total_calls,
|
||||
SUM(K.end - K.start) / 1000.0 AS total_duration,
|
||||
(SUM(K.end - K.start) / COUNT(K.kernel_id)) / 1000.0 AS average,
|
||||
SUM(K.end - K.start) * 100.0 / (
|
||||
SELECT
|
||||
SUM(A.end - A.start)
|
||||
FROM
|
||||
`rocpd_kernel_dispatch` A
|
||||
) AS percentage
|
||||
FROM
|
||||
`rocpd_kernel_dispatch` K
|
||||
INNER JOIN `rocpd_info_kernel_symbol` S ON S.id = K.kernel_id
|
||||
AND S.guid = K.guid
|
||||
GROUP BY
|
||||
name
|
||||
ORDER BY
|
||||
total_duration DESC;
|
||||
|
||||
--
|
||||
-- GPU utilization metrics including kernels and memory copy operations
|
||||
CREATE VIEW IF NOT EXISTS
|
||||
`busy` AS
|
||||
SELECT
|
||||
A.agent_id,
|
||||
AG.type,
|
||||
GpuTime,
|
||||
WallTime,
|
||||
GpuTime * 1.0 / WallTime AS Busy
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
agent_id,
|
||||
guid,
|
||||
SUM(END - start) AS GpuTime
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
agent_id,
|
||||
guid,
|
||||
END,
|
||||
start
|
||||
FROM
|
||||
`rocpd_kernel_dispatch`
|
||||
UNION ALL
|
||||
SELECT
|
||||
dst_agent_id AS agent_id,
|
||||
guid,
|
||||
END,
|
||||
start
|
||||
FROM
|
||||
`rocpd_memory_copy`
|
||||
)
|
||||
GROUP BY
|
||||
agent_id,
|
||||
guid
|
||||
) A
|
||||
INNER JOIN (
|
||||
SELECT
|
||||
MAX(END) - MIN(start) AS WallTime
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
END,
|
||||
start
|
||||
FROM
|
||||
`rocpd_kernel_dispatch`
|
||||
UNION ALL
|
||||
SELECT
|
||||
END,
|
||||
start
|
||||
FROM
|
||||
`rocpd_memory_copy`
|
||||
)
|
||||
) W ON 1 = 1
|
||||
INNER JOIN `rocpd_info_agent` AG ON AG.id = A.agent_id
|
||||
AND AG.guid = A.guid;
|
||||
|
||||
--
|
||||
-- Overall performance summary including kernels and memory copy operations
|
||||
CREATE VIEW
|
||||
`top` AS
|
||||
SELECT
|
||||
name,
|
||||
COUNT(*) AS total_calls,
|
||||
SUM(duration) / 1000.0 AS total_duration,
|
||||
(SUM(duration) / COUNT(*)) / 1000.0 AS average,
|
||||
SUM(duration) * 100.0 / total_time AS percentage
|
||||
FROM
|
||||
(
|
||||
-- Kernel operations
|
||||
SELECT
|
||||
ks.display_name AS name,
|
||||
(kd.end - kd.start) AS duration
|
||||
FROM
|
||||
`rocpd_kernel_dispatch` kd
|
||||
INNER JOIN `rocpd_info_kernel_symbol` ks ON kd.kernel_id = ks.id
|
||||
AND kd.guid = ks.guid
|
||||
UNION ALL
|
||||
-- Memory operations
|
||||
SELECT
|
||||
rs.string AS name,
|
||||
(END - start) AS duration
|
||||
FROM
|
||||
`rocpd_memory_copy` mc
|
||||
INNER JOIN `rocpd_string` rs ON rs.id = mc.name_id
|
||||
AND rs.guid = mc.guid
|
||||
UNION ALL
|
||||
-- Regions
|
||||
SELECT
|
||||
rs.string AS name,
|
||||
(END - start) AS duration
|
||||
FROM
|
||||
`rocpd_region` rr
|
||||
INNER JOIN `rocpd_string` rs ON rs.id = rr.name_id
|
||||
AND rs.guid = rr.guid
|
||||
) operations
|
||||
CROSS JOIN (
|
||||
SELECT
|
||||
SUM(END - start) AS total_time
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
END,
|
||||
start
|
||||
FROM
|
||||
`rocpd_kernel_dispatch`
|
||||
UNION ALL
|
||||
SELECT
|
||||
END,
|
||||
start
|
||||
FROM
|
||||
`rocpd_memory_copy`
|
||||
UNION ALL
|
||||
SELECT
|
||||
END,
|
||||
start
|
||||
FROM
|
||||
`rocpd_region`
|
||||
)
|
||||
) TOTAL
|
||||
GROUP BY
|
||||
name
|
||||
ORDER BY
|
||||
total_duration DESC;
|
||||
|
||||
-- Kernel summary by name
|
||||
CREATE VIEW
|
||||
`kernel_summary` AS
|
||||
WITH
|
||||
avg_data AS (
|
||||
SELECT
|
||||
name,
|
||||
AVG(duration) AS avg_duration
|
||||
FROM
|
||||
`kernels`
|
||||
GROUP BY
|
||||
name
|
||||
),
|
||||
aggregated_data AS (
|
||||
SELECT
|
||||
K.name,
|
||||
COUNT(*) AS calls,
|
||||
SUM(K.duration) AS total_duration,
|
||||
SUM(CAST(K.duration AS REAL) * CAST(K.duration AS REAL)) AS sqr_duration,
|
||||
A.avg_duration AS average_duration,
|
||||
MIN(K.duration) AS min_duration,
|
||||
MAX(K.duration) AS max_duration,
|
||||
SUM(CAST((K.duration - A.avg_duration) AS REAL) * CAST((K.duration - A.avg_duration) AS REAL)) / (COUNT(*) - 1) AS variance_duration,
|
||||
SQRT(
|
||||
SUM(CAST((K.duration - A.avg_duration) AS REAL) * CAST((K.duration - A.avg_duration) AS REAL)) / (COUNT(*) - 1)
|
||||
) AS std_dev_duration
|
||||
FROM
|
||||
`kernels` K
|
||||
JOIN avg_data A ON K.name = A.name
|
||||
GROUP BY
|
||||
K.name
|
||||
),
|
||||
total_duration AS (
|
||||
SELECT
|
||||
SUM(total_duration) AS grand_total_duration
|
||||
FROM
|
||||
aggregated_data
|
||||
)
|
||||
SELECT
|
||||
AD.name AS name,
|
||||
AD.calls,
|
||||
AD.total_duration AS "DURATION (nsec)",
|
||||
AD.sqr_duration AS "SQR (nsec)",
|
||||
AD.average_duration AS "AVERAGE (nsec)",
|
||||
(CAST(AD.total_duration AS REAL) / TD.grand_total_duration) * 100 AS "PERCENT (INC)",
|
||||
AD.min_duration AS "MIN (nsec)",
|
||||
AD.max_duration AS "MAX (nsec)",
|
||||
AD.variance_duration AS "VARIANCE",
|
||||
AD.std_dev_duration AS "STD_DEV"
|
||||
FROM
|
||||
aggregated_data AD
|
||||
CROSS JOIN total_duration TD;
|
||||
|
||||
--
|
||||
-- Kernel summary by region name
|
||||
CREATE VIEW
|
||||
`kernel_summary_region` AS
|
||||
WITH
|
||||
avg_data AS (
|
||||
SELECT
|
||||
region,
|
||||
AVG(duration) AS avg_duration
|
||||
FROM
|
||||
`kernels`
|
||||
GROUP BY
|
||||
region
|
||||
),
|
||||
aggregated_data AS (
|
||||
SELECT
|
||||
K.region AS name,
|
||||
COUNT(*) AS calls,
|
||||
SUM(K.duration) AS total_duration,
|
||||
SUM(CAST(K.duration AS REAL) * CAST(K.duration AS REAL)) AS sqr_duration,
|
||||
A.avg_duration AS average_duration,
|
||||
MIN(K.duration) AS min_duration,
|
||||
MAX(K.duration) AS max_duration,
|
||||
SUM(CAST((K.duration - A.avg_duration) AS REAL) * CAST((K.duration - A.avg_duration) AS REAL)) / (COUNT(*) - 1) AS variance_duration,
|
||||
SQRT(
|
||||
SUM(CAST((K.duration - A.avg_duration) AS REAL) * CAST((K.duration - A.avg_duration) AS REAL)) / (COUNT(*) - 1)
|
||||
) AS std_dev_duration
|
||||
FROM
|
||||
`kernels` K
|
||||
JOIN avg_data A ON K.region = A.region
|
||||
GROUP BY
|
||||
K.region
|
||||
),
|
||||
total_duration AS (
|
||||
SELECT
|
||||
SUM(total_duration) AS grand_total_duration
|
||||
FROM
|
||||
aggregated_data
|
||||
)
|
||||
SELECT
|
||||
AD.name AS name,
|
||||
AD.calls,
|
||||
AD.total_duration AS "DURATION (nsec)",
|
||||
AD.sqr_duration AS "SQR (nsec)",
|
||||
AD.average_duration AS "AVERAGE (nsec)",
|
||||
(CAST(AD.total_duration AS REAL) / TD.grand_total_duration) * 100 AS "PERCENT (INC)",
|
||||
AD.min_duration AS "MIN (nsec)",
|
||||
AD.max_duration AS "MAX (nsec)",
|
||||
AD.variance_duration AS "VARIANCE",
|
||||
AD.std_dev_duration AS "STD_DEV"
|
||||
FROM
|
||||
aggregated_data AD
|
||||
CROSS JOIN total_duration TD;
|
||||
|
||||
--
|
||||
-- Memory copy summary
|
||||
CREATE VIEW
|
||||
`memory_copy_summary` AS
|
||||
WITH
|
||||
avg_data AS (
|
||||
SELECT
|
||||
name,
|
||||
AVG(duration) AS avg_duration
|
||||
FROM
|
||||
`memory_copies`
|
||||
GROUP BY
|
||||
name
|
||||
),
|
||||
aggregated_data AS (
|
||||
SELECT
|
||||
MC.name,
|
||||
COUNT(*) AS calls,
|
||||
SUM(MC.duration) AS total_duration,
|
||||
SUM(CAST(MC.duration AS REAL) * CAST(MC.duration AS REAL)) AS sqr_duration,
|
||||
A.avg_duration AS average_duration,
|
||||
MIN(MC.duration) AS min_duration,
|
||||
MAX(MC.duration) AS max_duration,
|
||||
SUM(
|
||||
CAST((MC.duration - A.avg_duration) AS REAL) * CAST((MC.duration - A.avg_duration) AS REAL)
|
||||
) / (COUNT(*) - 1) AS variance_duration,
|
||||
SQRT(
|
||||
SUM(
|
||||
CAST((MC.duration - A.avg_duration) AS REAL) * CAST((MC.duration - A.avg_duration) AS REAL)
|
||||
) / (COUNT(*) - 1)
|
||||
) AS std_dev_duration
|
||||
FROM
|
||||
`memory_copies` MC
|
||||
JOIN avg_data A ON MC.name = A.name
|
||||
GROUP BY
|
||||
MC.name
|
||||
),
|
||||
total_duration AS (
|
||||
SELECT
|
||||
SUM(total_duration) AS grand_total_duration
|
||||
FROM
|
||||
aggregated_data
|
||||
)
|
||||
SELECT
|
||||
AD.name AS name,
|
||||
AD.calls,
|
||||
AD.total_duration AS "DURATION (nsec)",
|
||||
AD.sqr_duration AS "SQR (nsec)",
|
||||
AD.average_duration AS "AVERAGE (nsec)",
|
||||
(CAST(AD.total_duration AS REAL) / TD.grand_total_duration) * 100 AS "PERCENT (INC)",
|
||||
AD.min_duration AS "MIN (nsec)",
|
||||
AD.max_duration AS "MAX (nsec)",
|
||||
AD.variance_duration AS "VARIANCE",
|
||||
AD.std_dev_duration AS "STD_DEV"
|
||||
FROM
|
||||
aggregated_data AD
|
||||
CROSS JOIN total_duration TD;
|
||||
|
||||
--
|
||||
-- Memory allocation summary
|
||||
CREATE VIEW
|
||||
`memory_allocation_summary` AS
|
||||
WITH
|
||||
avg_data AS (
|
||||
SELECT
|
||||
type AS name,
|
||||
AVG(duration) AS avg_duration
|
||||
FROM
|
||||
`memory_allocations`
|
||||
GROUP BY
|
||||
type
|
||||
),
|
||||
aggregated_data AS (
|
||||
SELECT
|
||||
MA.type AS name,
|
||||
COUNT(*) AS calls,
|
||||
SUM(MA.duration) AS total_duration,
|
||||
SUM(CAST(MA.duration AS REAL) * CAST(MA.duration AS REAL)) AS sqr_duration,
|
||||
A.avg_duration AS average_duration,
|
||||
MIN(MA.duration) AS min_duration,
|
||||
MAX(MA.duration) AS max_duration,
|
||||
SUM(
|
||||
CAST((MA.duration - A.avg_duration) AS REAL) * CAST((MA.duration - A.avg_duration) AS REAL)
|
||||
) / (COUNT(*) - 1) AS variance_duration,
|
||||
SQRT(
|
||||
SUM(
|
||||
CAST((MA.duration - A.avg_duration) AS REAL) * CAST((MA.duration - A.avg_duration) AS REAL)
|
||||
) / (COUNT(*) - 1)
|
||||
) AS std_dev_duration
|
||||
FROM
|
||||
`memory_allocations` MA
|
||||
JOIN avg_data A ON MA.type = A.name
|
||||
GROUP BY
|
||||
MA.type
|
||||
),
|
||||
total_duration AS (
|
||||
SELECT
|
||||
SUM(total_duration) AS grand_total_duration
|
||||
FROM
|
||||
aggregated_data
|
||||
)
|
||||
SELECT
|
||||
'MEMORY_ALLOCATION_' || AD.name AS name,
|
||||
AD.calls,
|
||||
AD.total_duration AS "DURATION (nsec)",
|
||||
AD.sqr_duration AS "SQR (nsec)",
|
||||
AD.average_duration AS "AVERAGE (nsec)",
|
||||
(CAST(AD.total_duration AS REAL) / TD.grand_total_duration) * 100 AS "PERCENT (INC)",
|
||||
AD.min_duration AS "MIN (nsec)",
|
||||
AD.max_duration AS "MAX (nsec)",
|
||||
AD.variance_duration AS "VARIANCE",
|
||||
AD.std_dev_duration AS "STD_DEV"
|
||||
FROM
|
||||
aggregated_data AD
|
||||
CROSS JOIN total_duration TD;
|
||||
+57
@@ -0,0 +1,57 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "insert_query_builders.hpp"
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace rocpd
|
||||
{
|
||||
namespace data_storage
|
||||
{
|
||||
namespace queries
|
||||
{
|
||||
|
||||
struct table_insert_query
|
||||
{
|
||||
table_insert_query()
|
||||
: _query_columns_builder{ _ss }
|
||||
{}
|
||||
|
||||
query_builders::query_columns_builder& set_table_name(const std::string& tableName)
|
||||
{
|
||||
_ss.str("");
|
||||
_ss << "INSERT INTO " << tableName << " ";
|
||||
return _query_columns_builder;
|
||||
}
|
||||
|
||||
private:
|
||||
std::stringstream _ss;
|
||||
query_builders::query_columns_builder _query_columns_builder;
|
||||
};
|
||||
|
||||
} // namespace queries
|
||||
} // namespace data_storage
|
||||
} // namespace rocpd
|
||||
} // namespace rocprofsys
|
||||
@@ -0,0 +1,99 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "json.hpp"
|
||||
#include <sstream>
|
||||
|
||||
namespace rocpd
|
||||
{
|
||||
|
||||
std::shared_ptr<json>
|
||||
json::create()
|
||||
{
|
||||
return std::shared_ptr<json>(new json());
|
||||
}
|
||||
|
||||
void
|
||||
json::set(const std::string& key, const json_value& value)
|
||||
{
|
||||
data[key] = std::make_shared<json_value>(value);
|
||||
}
|
||||
|
||||
std::string
|
||||
json::to_string() const
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "{";
|
||||
bool first = true;
|
||||
|
||||
for(const auto& [key, value] : data)
|
||||
{
|
||||
if(!first) oss << ", ";
|
||||
first = false;
|
||||
|
||||
oss << "\"" << key << "\": " << stringify(value);
|
||||
}
|
||||
|
||||
oss << "}";
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
std::string
|
||||
json::stringify(const std::shared_ptr<json_value>& value)
|
||||
{
|
||||
std::ostringstream oss;
|
||||
std::visit(
|
||||
[&oss](auto&& arg) {
|
||||
using T = std::decay_t<decltype(arg)>;
|
||||
if constexpr(std::is_same_v<T, std::string>)
|
||||
oss << "\"" << arg << "\"";
|
||||
else if constexpr(std::is_same_v<T, bool>)
|
||||
oss << (arg ? "true" : "false");
|
||||
else if constexpr(std::is_same_v<T, std::nullptr_t>)
|
||||
oss << "null";
|
||||
else if constexpr(std::is_same_v<T, std::vector<json>>)
|
||||
{
|
||||
oss << "[";
|
||||
bool first = true;
|
||||
for(const auto& item : arg)
|
||||
{
|
||||
if(!first) oss << ", ";
|
||||
first = false;
|
||||
oss << item.to_string();
|
||||
}
|
||||
oss << "]";
|
||||
}
|
||||
else if constexpr(std::is_same_v<T, std::shared_ptr<json>>)
|
||||
{
|
||||
oss << arg->to_string();
|
||||
}
|
||||
else
|
||||
{
|
||||
// handle int + double
|
||||
oss << arg;
|
||||
}
|
||||
},
|
||||
*value);
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
} // namespace rocpd
|
||||
@@ -0,0 +1,57 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
namespace rocpd
|
||||
{
|
||||
|
||||
class json
|
||||
{
|
||||
public:
|
||||
static std::shared_ptr<json> create();
|
||||
|
||||
using json_value =
|
||||
std::variant<std::string, int, double, long long, bool, std::vector<json>,
|
||||
std::nullptr_t, std::shared_ptr<json>>;
|
||||
|
||||
void set(const std::string& key, const json_value& value);
|
||||
|
||||
std::string to_string() const;
|
||||
|
||||
private:
|
||||
json() = default;
|
||||
|
||||
private:
|
||||
static std::string stringify(const std::shared_ptr<json_value>& value);
|
||||
|
||||
private:
|
||||
std::unordered_map<std::string, std::shared_ptr<json_value>> data;
|
||||
};
|
||||
|
||||
} // namespace rocpd
|
||||
@@ -27,17 +27,22 @@
|
||||
#include "api.hpp"
|
||||
#include "common/setup.hpp"
|
||||
#include "common/static_object.hpp"
|
||||
#include "core/agent.hpp"
|
||||
#include "core/agent_manager.hpp"
|
||||
#include "core/categories.hpp"
|
||||
#include "core/components/fwd.hpp"
|
||||
#include "core/concepts.hpp"
|
||||
#include "core/config.hpp"
|
||||
#include "core/constraint.hpp"
|
||||
#include "core/cpu.hpp"
|
||||
#include "core/debug.hpp"
|
||||
#include "core/defines.hpp"
|
||||
#include "core/dynamic_library.hpp"
|
||||
#include "core/gpu.hpp"
|
||||
#include "core/locking.hpp"
|
||||
#include "core/node_info.hpp"
|
||||
#include "core/perfetto_fwd.hpp"
|
||||
#include "core/rocpd/data_processor.hpp"
|
||||
#include "core/timemory.hpp"
|
||||
#include "core/utility.hpp"
|
||||
#include "library/causal/data.hpp"
|
||||
@@ -77,6 +82,10 @@
|
||||
#include <timemory/utility/join.hpp>
|
||||
#include <timemory/utility/procfs/maps.hpp>
|
||||
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
# include <rocprofiler-sdk/agent.h>
|
||||
#endif
|
||||
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <csignal>
|
||||
@@ -86,6 +95,7 @@
|
||||
#include <pthread.h>
|
||||
#include <stdexcept>
|
||||
#include <string_view>
|
||||
#include <unistd.h>
|
||||
#include <utility>
|
||||
|
||||
using namespace rocprofsys;
|
||||
@@ -297,6 +307,66 @@ namespace
|
||||
bool _set_mpi_called = false;
|
||||
std::function<void()> _preinit_callback = []() { get_preinit_bundle()->start(); };
|
||||
|
||||
std::vector<std::string>
|
||||
read_command_line(pid_t _pid)
|
||||
{
|
||||
auto _cmdline = std::vector<std::string>{};
|
||||
auto fcmdline = std::stringstream{};
|
||||
fcmdline << "/proc/" << _pid << "/cmdline";
|
||||
auto ifs = std::ifstream{ fcmdline.str().c_str() };
|
||||
if(ifs)
|
||||
{
|
||||
std::string sarg;
|
||||
while(std::getline(ifs, sarg, '\0'))
|
||||
{
|
||||
_cmdline.push_back(sarg);
|
||||
}
|
||||
ifs.close();
|
||||
}
|
||||
|
||||
return _cmdline;
|
||||
}
|
||||
|
||||
void
|
||||
rocprofsys_preinit_rocpd()
|
||||
{
|
||||
auto& _data_processor = rocpd::data_processor::get_instance();
|
||||
const auto& _n_info = node_info::get_instance();
|
||||
auto _cmd_line = read_command_line(getpid());
|
||||
auto& _agent_manager = agent_manager::get_instance();
|
||||
|
||||
if(_cmd_line.empty())
|
||||
{
|
||||
_cmd_line.emplace_back("rocprofiler-systems");
|
||||
}
|
||||
|
||||
_data_processor.insert_node_info(
|
||||
_n_info.id, _n_info.hash, _n_info.machine_id.c_str(), _n_info.system_name.c_str(),
|
||||
_n_info.node_name.c_str(), _n_info.release.c_str(), _n_info.version.c_str(),
|
||||
_n_info.machine.c_str(), _n_info.domain_name.c_str());
|
||||
_data_processor.insert_process_info(_n_info.id, getppid(), getpid(), 0, 0, 0, 0,
|
||||
_cmd_line[0].c_str(), "{}");
|
||||
|
||||
const auto& agents = _agent_manager.get_agents();
|
||||
for(const auto& rocpd_agent : agents)
|
||||
{
|
||||
auto _base_id = rocpd::data_processor::get_instance().insert_agent(
|
||||
_n_info.id, getpid(),
|
||||
((rocpd_agent->type == agent_type::GPU) ? "GPU" : "CPU"),
|
||||
rocpd_agent->node_id, rocpd_agent->logical_node_id,
|
||||
rocpd_agent->logical_node_type_id, rocpd_agent->id, rocpd_agent->name.c_str(),
|
||||
rocpd_agent->model_name.c_str(), rocpd_agent->vendor_name.c_str(),
|
||||
rocpd_agent->product_name.c_str(), "");
|
||||
rocpd_agent->base_id = _base_id;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
rocprofsys_preinit_cpu_agents()
|
||||
{
|
||||
cpu::query_cpu_agents();
|
||||
}
|
||||
|
||||
void
|
||||
rocprofsys_preinit_hidden()
|
||||
{
|
||||
@@ -423,17 +493,17 @@ rocprofsys_init_tooling_hidden(void)
|
||||
{ ROCPROFSYS_DEFAULT_ROCM_PATH }) };
|
||||
#endif
|
||||
|
||||
static bool _once = false;
|
||||
static auto _debug_init = get_debug_init();
|
||||
static pid_t _once = 0;
|
||||
static auto _debug_init = get_debug_init();
|
||||
|
||||
ROCPROFSYS_CONDITIONAL_BASIC_PRINT_F(_debug_init, "State is %s...\n",
|
||||
std::to_string(get_state()).c_str());
|
||||
|
||||
if(get_state() != State::PreInit || get_state() == State::Init || _once)
|
||||
if(get_state() != State::PreInit || get_state() == State::Init || _once == getpid())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
_once = true;
|
||||
_once = getpid();
|
||||
|
||||
ROCPROFSYS_SCOPED_THREAD_STATE(ThreadState::Internal);
|
||||
|
||||
@@ -460,6 +530,12 @@ rocprofsys_init_tooling_hidden(void)
|
||||
auto _dtor = scope::destructor{ []() {
|
||||
// if set to finalized, don't continue
|
||||
if(get_state() > State::Active) return;
|
||||
|
||||
#if !(ROCPROFSYS_USE_ROCM > 0)
|
||||
rocprofsys_preinit_cpu_agents();
|
||||
#endif
|
||||
if(get_use_rocpd()) rocprofsys_preinit_rocpd();
|
||||
|
||||
if(get_use_process_sampling())
|
||||
{
|
||||
ROCPROFSYS_SCOPED_SAMPLING_ON_CHILD_THREADS(false);
|
||||
@@ -681,7 +757,6 @@ rocprofsys_finalize_hidden(void)
|
||||
threading::remove_callback(&ensure_initialization);
|
||||
|
||||
bool _is_child = is_child_process();
|
||||
|
||||
set_thread_state(ThreadState::Completed);
|
||||
|
||||
// return if not active
|
||||
@@ -693,6 +768,18 @@ rocprofsys_finalize_hidden(void)
|
||||
}
|
||||
else if(_is_child)
|
||||
{
|
||||
#if defined(ROCPROFSYS_USE_ROCM) && ROCPROFSYS_USE_ROCM > 0
|
||||
// Flush buffered traces in case of child process
|
||||
if(get_use_rocm())
|
||||
{
|
||||
ROCPROFSYS_VERBOSE_F(1, "Shutting down ROCm...\n");
|
||||
rocprofiler_sdk::shutdown();
|
||||
}
|
||||
#endif
|
||||
if(get_use_rocpd())
|
||||
{
|
||||
rocpd::data_processor::get_instance().flush();
|
||||
}
|
||||
set_state(State::Finalized);
|
||||
std::quick_exit(EXIT_SUCCESS);
|
||||
return;
|
||||
@@ -983,6 +1070,10 @@ rocprofsys_finalize_hidden(void)
|
||||
[](int) {});
|
||||
|
||||
common::destroy_static_objects();
|
||||
if(get_use_rocpd())
|
||||
{
|
||||
rocpd::data_processor::get_instance().flush();
|
||||
}
|
||||
}
|
||||
|
||||
//======================================================================================//
|
||||
|
||||
@@ -26,18 +26,22 @@
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "core/agent.hpp"
|
||||
#if defined(NDEBUG)
|
||||
# undef NDEBUG
|
||||
#endif
|
||||
|
||||
#include "library/amd_smi.hpp"
|
||||
#include "core/agent_manager.hpp"
|
||||
#include "core/common.hpp"
|
||||
#include "core/components/fwd.hpp"
|
||||
#include "core/config.hpp"
|
||||
#include "core/debug.hpp"
|
||||
#include "core/gpu.hpp"
|
||||
#include "core/node_info.hpp"
|
||||
#include "core/perfetto.hpp"
|
||||
#include "core/rocpd/data_processor.hpp"
|
||||
#include "core/state.hpp"
|
||||
#include "library/amd_smi.hpp"
|
||||
#include "library/runtime.hpp"
|
||||
#include "library/thread_info.hpp"
|
||||
|
||||
@@ -49,13 +53,11 @@
|
||||
#include <timemory/utility/locking.hpp>
|
||||
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <ios>
|
||||
#include <optional>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <sys/resource.h>
|
||||
#include <thread>
|
||||
|
||||
#define ROCPROFSYS_AMD_SMI_CALL(...) \
|
||||
::rocprofsys::amd_smi::check_error(__FILE__, __LINE__, __VA_ARGS__)
|
||||
@@ -69,6 +71,115 @@ using sampler_instances = thread_data<bundle_t, category::amd_smi>;
|
||||
|
||||
namespace
|
||||
{
|
||||
int64_t
|
||||
get_tid()
|
||||
{
|
||||
static thread_local auto _v = threading::get_id();
|
||||
return _v;
|
||||
}
|
||||
|
||||
rocpd::data_processor&
|
||||
get_data_processor()
|
||||
{
|
||||
return rocpd::data_processor::get_instance();
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_initialize_category()
|
||||
{
|
||||
get_data_processor().insert_category(ROCPROFSYS_CATEGORY_AMD_SMI,
|
||||
trait::name<category::amd_smi>::value);
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_initialize_smi_tracks()
|
||||
{
|
||||
auto& data_processor = get_data_processor();
|
||||
auto& n_info = node_info::get_instance();
|
||||
const auto thread_id = std::nullopt; // Internal thread ID for amd-smi
|
||||
|
||||
data_processor.insert_track(trait::name<category::amd_smi_mm_busy>::value, n_info.id,
|
||||
getpid(), thread_id);
|
||||
data_processor.insert_track(trait::name<category::amd_smi_power>::value, n_info.id,
|
||||
getpid(), thread_id);
|
||||
data_processor.insert_track(trait::name<category::amd_smi_temp>::value, n_info.id,
|
||||
getpid(), thread_id);
|
||||
data_processor.insert_track(trait::name<category::amd_smi_memory_usage>::value,
|
||||
n_info.id, getpid(), thread_id);
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_initialize_smi_pmc(size_t gpu_id)
|
||||
{
|
||||
auto& data_processor = get_data_processor();
|
||||
// find the proper values for a following definitions
|
||||
size_t EVENT_CODE = 0;
|
||||
size_t INSTANCE_ID = 0;
|
||||
const char* LONG_DESCRIPTION = "";
|
||||
const char* COMPONENT = "";
|
||||
const char* BLOCK = "";
|
||||
const char* EXPRESSION = "";
|
||||
const char* CELSIUS_DEGREES = "\u00B0C";
|
||||
auto ni = node_info::get_instance();
|
||||
const auto* TARGET_ARCH = "GPU";
|
||||
|
||||
auto& _agent_manager = agent_manager::get_instance();
|
||||
auto base_id = _agent_manager.get_agent_by_id(gpu_id, agent_type::GPU).base_id;
|
||||
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
trait::name<category::amd_smi_mm_busy>::value, "Busy",
|
||||
trait::name<category::amd_smi_mm_busy>::description, LONG_DESCRIPTION, COMPONENT,
|
||||
"%", "ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
trait::name<category::amd_smi_temp>::value, "Temp",
|
||||
trait::name<category::amd_smi_temp>::description, LONG_DESCRIPTION, COMPONENT,
|
||||
CELSIUS_DEGREES, "ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
trait::name<category::amd_smi_power>::value, "Pow",
|
||||
trait::name<category::amd_smi_power>::description, LONG_DESCRIPTION, COMPONENT,
|
||||
"w", "ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
trait::name<category::amd_smi_memory_usage>::value, "MemUsg",
|
||||
trait::name<category::amd_smi_memory_usage>::description, LONG_DESCRIPTION,
|
||||
COMPONENT, "MB", "ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_process_smi_pmc_events(const uint32_t device_id, const amd_smi::settings& settings,
|
||||
uint64_t timestamp, double busy, double temp, double power,
|
||||
double usage)
|
||||
{
|
||||
if(!(settings.busy || settings.temp || settings.power || settings.mem_usage)) return;
|
||||
|
||||
auto& data_processor = get_data_processor();
|
||||
auto event_id = data_processor.insert_event(ROCPROFSYS_CATEGORY_AMD_SMI, 0, 0, 0);
|
||||
|
||||
auto& _agent_manager = agent_manager::get_instance();
|
||||
auto base_id = _agent_manager.get_agent_by_id(device_id, agent_type::GPU).base_id;
|
||||
|
||||
auto insert_event_and_sample = [&](bool enabled, const char* name, double value) {
|
||||
if(!enabled) return;
|
||||
data_processor.insert_pmc_event(event_id, base_id, name, value);
|
||||
data_processor.insert_sample(name, timestamp, event_id);
|
||||
};
|
||||
|
||||
insert_event_and_sample(settings.busy, trait::name<category::amd_smi_mm_busy>::value,
|
||||
busy);
|
||||
insert_event_and_sample(settings.temp, trait::name<category::amd_smi_temp>::value,
|
||||
temp);
|
||||
insert_event_and_sample(settings.power, trait::name<category::amd_smi_power>::value,
|
||||
power);
|
||||
insert_event_and_sample(settings.mem_usage,
|
||||
trait::name<category::amd_smi_memory_usage>::value, usage);
|
||||
}
|
||||
|
||||
auto&
|
||||
get_settings(uint32_t _dev_id)
|
||||
{
|
||||
@@ -140,6 +251,8 @@ data::data(uint32_t _dev_id) { sample(_dev_id); }
|
||||
void
|
||||
data::sample(uint32_t _dev_id)
|
||||
{
|
||||
if(is_child_process()) return;
|
||||
|
||||
auto _ts = tim::get_clock_real_now<size_t, std::nano>();
|
||||
assert(_ts < std::numeric_limits<int64_t>::max());
|
||||
amdsmi_gpu_metrics_t _gpu_metrics;
|
||||
@@ -168,7 +281,6 @@ data::sample(uint32_t _dev_id)
|
||||
}
|
||||
|
||||
amdsmi_processor_handle sample_handle = gpu::get_handle_from_id(_dev_id);
|
||||
|
||||
ROCPROFSYS_AMDSMI_GET(get_settings(m_dev_id).busy, amdsmi_get_gpu_activity,
|
||||
sample_handle, &m_busy_perc);
|
||||
ROCPROFSYS_AMDSMI_GET(get_settings(m_dev_id).temp, amdsmi_get_temp_metric,
|
||||
@@ -276,10 +388,15 @@ config()
|
||||
*_bundle_data.at(i) = unique_ptr_t<bundle_t>{ new bundle_t{} };
|
||||
}
|
||||
}
|
||||
|
||||
data::get_initial().resize(data::device_count);
|
||||
for(auto itr : data::device_list)
|
||||
data::get_initial().at(itr).sample(itr);
|
||||
|
||||
if(get_use_rocpd())
|
||||
{
|
||||
rocpd_initialize_category();
|
||||
rocpd_initialize_smi_tracks();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
@@ -363,192 +480,194 @@ data::post_process(uint32_t _dev_id)
|
||||
|
||||
auto _settings = get_settings(_dev_id);
|
||||
|
||||
auto _process_perfetto = [&]() {
|
||||
constexpr uint8_t AMD_SMI_METRICS_COUNT = 8;
|
||||
auto _idx = std::array<uint64_t, AMD_SMI_METRICS_COUNT>{};
|
||||
auto use_perfetto = get_use_perfetto();
|
||||
auto use_rocpd = get_use_rocpd();
|
||||
|
||||
if(use_rocpd)
|
||||
{
|
||||
rocpd_initialize_smi_pmc(_dev_id);
|
||||
}
|
||||
|
||||
for(auto& itr : _amd_smi)
|
||||
{
|
||||
using counter_track = perfetto_counter_track<data>;
|
||||
if(itr.m_dev_id != _dev_id) continue;
|
||||
|
||||
uint64_t _ts = itr.m_ts;
|
||||
if(!_thread_info->is_valid_time(_ts)) continue;
|
||||
|
||||
double _gfxbusy = itr.m_busy_perc.gfx_activity;
|
||||
double _umcbusy = itr.m_busy_perc.umc_activity;
|
||||
double _mmbusy = itr.m_busy_perc.mm_activity;
|
||||
double _temp = itr.m_temp;
|
||||
double _power = itr.m_power.current_socket_power;
|
||||
double _usage = itr.m_mem_usage / static_cast<double>(units::megabyte);
|
||||
|
||||
auto setup_perfetto_counter_tracks = [&]() {
|
||||
if(counter_track::exists(_dev_id)) return;
|
||||
|
||||
auto addendum = [&](const char* _v) {
|
||||
return JOIN(" ", "GPU", _v, JOIN("", '[', _dev_id, ']'), "(S)");
|
||||
};
|
||||
|
||||
auto addendum_blk = [&](std::size_t _i, const char* _metric,
|
||||
std::size_t xcp_idx = SIZE_MAX) {
|
||||
if(xcp_idx != SIZE_MAX)
|
||||
{
|
||||
return JOIN(
|
||||
" ", "GPU", JOIN("", '[', _dev_id, ']'), _metric,
|
||||
JOIN("", "XCP_", xcp_idx, ": [", (_i < 10 ? "0" : ""), _i, ']'),
|
||||
"(S)");
|
||||
}
|
||||
else
|
||||
{
|
||||
return JOIN(" ", "GPU", JOIN("", '[', _dev_id, ']'), _metric,
|
||||
JOIN("", "[", (_i < 10 ? "0" : ""), _i, ']'), "(S)");
|
||||
}
|
||||
};
|
||||
|
||||
{
|
||||
_idx.fill(_idx.size());
|
||||
uint64_t nidx = 0;
|
||||
if(_settings.busy)
|
||||
{
|
||||
_idx.at(0) = nidx++; // GFX Busy
|
||||
_idx.at(1) = nidx++; // UMC Busy
|
||||
_idx.at(2) = nidx++; // MM Busy
|
||||
counter_track::emplace(_dev_id, addendum("GFX Busy"), "%");
|
||||
counter_track::emplace(_dev_id, addendum("UMC Busy"), "%");
|
||||
counter_track::emplace(_dev_id, addendum("MM Busy"), "%");
|
||||
}
|
||||
if(_settings.temp) _idx.at(3) = nidx++;
|
||||
if(_settings.power) _idx.at(4) = nidx++;
|
||||
if(_settings.mem_usage) _idx.at(5) = nidx++;
|
||||
if(_settings.vcn_activity) _idx.at(6) = nidx++;
|
||||
if(_settings.jpeg_activity) _idx.at(7) = nidx++;
|
||||
}
|
||||
|
||||
for(auto& itr : _amd_smi)
|
||||
{
|
||||
using counter_track = perfetto_counter_track<data>;
|
||||
if(itr.m_dev_id != _dev_id) continue;
|
||||
if(!counter_track::exists(_dev_id))
|
||||
if(_settings.temp)
|
||||
{
|
||||
auto addendum = [&](const char* _v) {
|
||||
return JOIN(" ", "GPU", _v, JOIN("", '[', _dev_id, ']'), "(S)");
|
||||
};
|
||||
auto addendum_blk = [&](std::size_t _i, const char* _metric,
|
||||
std::size_t xcp_idx = SIZE_MAX) {
|
||||
if(xcp_idx != SIZE_MAX)
|
||||
{
|
||||
return JOIN(" ", "GPU", JOIN("", '[', _dev_id, ']'), _metric,
|
||||
JOIN("", "XCP_", xcp_idx, ": [", (_i < 10 ? "0" : ""),
|
||||
_i, ']'),
|
||||
"(S)");
|
||||
}
|
||||
else
|
||||
{
|
||||
return JOIN(" ", "GPU", JOIN("", '[', _dev_id, ']'), _metric,
|
||||
JOIN("", "[", (_i < 10 ? "0" : ""), _i, ']'), "(S)");
|
||||
}
|
||||
};
|
||||
|
||||
if(_settings.busy)
|
||||
counter_track::emplace(_dev_id, addendum("Temperature"), "deg C");
|
||||
}
|
||||
if(_settings.power)
|
||||
{
|
||||
counter_track::emplace(_dev_id, addendum("Current Power"), "watts");
|
||||
}
|
||||
if(_settings.mem_usage)
|
||||
{
|
||||
counter_track::emplace(_dev_id, addendum("Memory Usage"), "megabytes");
|
||||
}
|
||||
if(_settings.vcn_activity)
|
||||
{
|
||||
if(itr.m_xcp_metrics.empty())
|
||||
{
|
||||
counter_track::emplace(_dev_id, addendum("GFX Busy"), "%");
|
||||
counter_track::emplace(_dev_id, addendum("UMC Busy"), "%");
|
||||
counter_track::emplace(_dev_id, addendum("MM Busy"), "%");
|
||||
ROCPROFSYS_VERBOSE(
|
||||
1, "No VCN activity data collected from device %u\n", _dev_id);
|
||||
}
|
||||
if(_settings.temp)
|
||||
counter_track::emplace(_dev_id, addendum("Temperature"), "deg C");
|
||||
if(_settings.power)
|
||||
counter_track::emplace(_dev_id, addendum("Current Power"), "watts");
|
||||
if(_settings.mem_usage)
|
||||
counter_track::emplace(_dev_id, addendum("Memory Usage"),
|
||||
"megabytes");
|
||||
if(_settings.vcn_activity)
|
||||
else if(gpu::is_vcn_activity_supported(_dev_id))
|
||||
{
|
||||
if(itr.m_xcp_metrics.empty())
|
||||
{
|
||||
ROCPROFSYS_VERBOSE(
|
||||
1, "No VCN activity data collected from device %u\n",
|
||||
_dev_id);
|
||||
}
|
||||
else if(gpu::is_vcn_activity_supported(_dev_id))
|
||||
{
|
||||
// For VCN activity, use simple indexing
|
||||
for(std::size_t i = 0;
|
||||
i < std::size(itr.m_xcp_metrics[0].vcn_busy); ++i)
|
||||
counter_track::emplace(_dev_id,
|
||||
addendum_blk(i, "VCN Activity"), "%");
|
||||
}
|
||||
else
|
||||
{
|
||||
for(std::size_t xcp = 0; xcp < std::size(itr.m_xcp_metrics);
|
||||
++xcp)
|
||||
{
|
||||
for(std::size_t i = 0;
|
||||
i < std::size(itr.m_xcp_metrics[xcp].vcn_busy); ++i)
|
||||
{
|
||||
counter_track::emplace(
|
||||
_dev_id, addendum_blk(i, "VCN Activity", xcp), "%");
|
||||
}
|
||||
}
|
||||
}
|
||||
// For VCN activity, use simple indexing
|
||||
for(std::size_t i = 0; i < std::size(itr.m_xcp_metrics[0].vcn_busy);
|
||||
++i)
|
||||
counter_track::emplace(_dev_id, addendum_blk(i, "VCN Activity"),
|
||||
"%");
|
||||
}
|
||||
if(_settings.jpeg_activity)
|
||||
else
|
||||
{
|
||||
if(itr.m_xcp_metrics.empty())
|
||||
for(std::size_t xcp = 0; xcp < std::size(itr.m_xcp_metrics); ++xcp)
|
||||
{
|
||||
ROCPROFSYS_VERBOSE(
|
||||
1, "No JPEG activity data collected from device %u\n",
|
||||
_dev_id);
|
||||
}
|
||||
else if(gpu::is_jpeg_activity_supported(_dev_id))
|
||||
{
|
||||
// For JPEG activity, use simple indexing
|
||||
for(std::size_t i = 0;
|
||||
i < std::size(itr.m_xcp_metrics[0].jpeg_busy); ++i)
|
||||
counter_track::emplace(_dev_id,
|
||||
addendum_blk(i, "JPEG Activity"), "%");
|
||||
}
|
||||
else
|
||||
{
|
||||
for(std::size_t xcp = 0; xcp < std::size(itr.m_xcp_metrics);
|
||||
++xcp)
|
||||
i < std::size(itr.m_xcp_metrics[xcp].vcn_busy); ++i)
|
||||
{
|
||||
for(std::size_t i = 0;
|
||||
i < std::size(itr.m_xcp_metrics[xcp].jpeg_busy); ++i)
|
||||
counter_track::emplace(
|
||||
_dev_id, addendum_blk(i, "JPEG Activity", xcp), "%");
|
||||
counter_track::emplace(
|
||||
_dev_id, addendum_blk(i, "VCN Activity", xcp), "%");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
uint64_t _ts = itr.m_ts;
|
||||
if(!_thread_info->is_valid_time(_ts)) continue;
|
||||
if(_settings.jpeg_activity)
|
||||
{
|
||||
if(itr.m_xcp_metrics.empty())
|
||||
{
|
||||
ROCPROFSYS_VERBOSE(
|
||||
1, "No JPEG activity data collected from device %u\n", _dev_id);
|
||||
}
|
||||
else if(gpu::is_jpeg_activity_supported(_dev_id))
|
||||
{
|
||||
for(std::size_t i = 0; i < std::size(itr.m_xcp_metrics[0].jpeg_busy);
|
||||
++i)
|
||||
counter_track::emplace(_dev_id, addendum_blk(i, "JPEG Activity"),
|
||||
"%");
|
||||
}
|
||||
else
|
||||
{
|
||||
for(std::size_t xcp = 0; xcp < std::size(itr.m_xcp_metrics); ++xcp)
|
||||
{
|
||||
for(std::size_t i = 0;
|
||||
i < std::size(itr.m_xcp_metrics[xcp].jpeg_busy); ++i)
|
||||
counter_track::emplace(
|
||||
_dev_id, addendum_blk(i, "JPEG Activity", xcp), "%");
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
double _gfxbusy = itr.m_busy_perc.gfx_activity;
|
||||
double _umcbusy = itr.m_busy_perc.umc_activity;
|
||||
double _mmbusy = itr.m_busy_perc.mm_activity;
|
||||
double _temp = itr.m_temp;
|
||||
double _power = itr.m_power.current_socket_power;
|
||||
double _usage = itr.m_mem_usage / static_cast<double>(units::megabyte);
|
||||
auto write_perfetto_metrics = [&]() {
|
||||
size_t track_index = 0;
|
||||
|
||||
if(_settings.busy)
|
||||
{
|
||||
TRACE_COUNTER("device_busy_gfx", counter_track::at(_dev_id, _idx.at(0)),
|
||||
_ts, _gfxbusy);
|
||||
TRACE_COUNTER("device_busy_umc", counter_track::at(_dev_id, _idx.at(1)),
|
||||
_ts, _umcbusy);
|
||||
TRACE_COUNTER("device_busy_mm", counter_track::at(_dev_id, _idx.at(2)),
|
||||
TRACE_COUNTER("device_busy_gfx",
|
||||
counter_track::at(_dev_id, track_index++), _ts, _gfxbusy);
|
||||
TRACE_COUNTER("device_busy_umc",
|
||||
counter_track::at(_dev_id, track_index++), _ts, _umcbusy);
|
||||
TRACE_COUNTER("device_busy_mm", counter_track::at(_dev_id, track_index++),
|
||||
_ts, _mmbusy);
|
||||
}
|
||||
if(_settings.temp)
|
||||
TRACE_COUNTER("device_temp", counter_track::at(_dev_id, _idx.at(3)), _ts,
|
||||
_temp);
|
||||
{
|
||||
TRACE_COUNTER("device_temp", counter_track::at(_dev_id, track_index++),
|
||||
_ts, _temp);
|
||||
}
|
||||
if(_settings.power)
|
||||
TRACE_COUNTER("device_power", counter_track::at(_dev_id, _idx.at(4)), _ts,
|
||||
_power);
|
||||
{
|
||||
TRACE_COUNTER("device_power", counter_track::at(_dev_id, track_index++),
|
||||
_ts, _power);
|
||||
}
|
||||
if(_settings.mem_usage)
|
||||
{
|
||||
TRACE_COUNTER("device_memory_usage",
|
||||
counter_track::at(_dev_id, _idx.at(5)), _ts, _usage);
|
||||
counter_track::at(_dev_id, track_index++), _ts, _usage);
|
||||
}
|
||||
|
||||
if(_settings.vcn_activity && !itr.m_xcp_metrics.empty())
|
||||
{
|
||||
uint64_t idx = _idx.at(6);
|
||||
// Iterate over all XCPs and their VCN busy/activity values
|
||||
for(const auto& metrics : itr.m_xcp_metrics)
|
||||
{
|
||||
for(const auto& vcn_val : metrics.vcn_busy)
|
||||
{
|
||||
TRACE_COUNTER("device_vcn_activity",
|
||||
counter_track::at(_dev_id, idx), _ts, vcn_val);
|
||||
++idx;
|
||||
counter_track::at(_dev_id, track_index++), _ts,
|
||||
vcn_val);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(_settings.jpeg_activity && !itr.m_xcp_metrics.empty())
|
||||
{
|
||||
uint64_t idx = _idx.at(7);
|
||||
// Calculate total VCN metrics to properly offset JPEG metrics index
|
||||
if(_settings.vcn_activity)
|
||||
{
|
||||
size_t total_vcn_metrics = 0;
|
||||
for(const auto& metrics : itr.m_xcp_metrics)
|
||||
total_vcn_metrics += metrics.vcn_busy.size();
|
||||
if(total_vcn_metrics > 0) idx += (total_vcn_metrics - 1);
|
||||
}
|
||||
// Iterate over all XCPs and their JPEG busy/activity values
|
||||
for(const auto& metrics : itr.m_xcp_metrics)
|
||||
{
|
||||
for(const auto& jpeg_val : metrics.jpeg_busy)
|
||||
{
|
||||
TRACE_COUNTER("device_jpeg_activity",
|
||||
counter_track::at(_dev_id, idx), _ts, jpeg_val);
|
||||
++idx;
|
||||
counter_track::at(_dev_id, track_index++), _ts,
|
||||
jpeg_val);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
if(get_use_perfetto()) _process_perfetto();
|
||||
if(use_perfetto)
|
||||
{
|
||||
setup_perfetto_counter_tracks();
|
||||
write_perfetto_metrics();
|
||||
}
|
||||
|
||||
if(use_rocpd)
|
||||
{
|
||||
rocpd_process_smi_pmc_events(_dev_id, _settings, _ts, _mmbusy, _temp, _power,
|
||||
_usage);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------//
|
||||
@@ -573,7 +692,7 @@ setup()
|
||||
ROCPROFSYS_VERBOSE_F(0, "AMD SMI version: %u.%u.%u - str: %s.\n", _version.major,
|
||||
_version.minor, _version.release, _version.build);
|
||||
|
||||
data::device_count = gpu::get_processor_count();
|
||||
data::device_count = gpu::device_count();
|
||||
|
||||
auto _devices_v = get_sampling_gpus();
|
||||
for(auto& itr : _devices_v)
|
||||
@@ -668,7 +787,6 @@ setup()
|
||||
}
|
||||
|
||||
is_initialized() = true;
|
||||
|
||||
data::setup();
|
||||
} catch(std::runtime_error& _e)
|
||||
{
|
||||
@@ -705,7 +823,10 @@ void
|
||||
post_process()
|
||||
{
|
||||
for(auto itr : data::device_list)
|
||||
{
|
||||
ROCPROFSYS_VERBOSE(2, "Post-processing amd-smi data for device: %d", itr);
|
||||
data::post_process(itr);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t
|
||||
|
||||
+269
@@ -21,10 +21,14 @@
|
||||
// SOFTWARE.
|
||||
|
||||
#include "library/components/backtrace_metrics.hpp"
|
||||
#include "core/agent.hpp"
|
||||
#include "core/agent_manager.hpp"
|
||||
#include "core/components/fwd.hpp"
|
||||
#include "core/config.hpp"
|
||||
#include "core/debug.hpp"
|
||||
#include "core/node_info.hpp"
|
||||
#include "core/perfetto.hpp"
|
||||
#include "core/rocpd/data_processor.hpp"
|
||||
#include "library/components/ensure_storage.hpp"
|
||||
#include "library/ptl.hpp"
|
||||
#include "library/runtime.hpp"
|
||||
@@ -137,6 +141,12 @@ backtrace_metrics::get_hw_counter_labels(int64_t _tid)
|
||||
return (_v) ? *_v : std::vector<std::string>{};
|
||||
}
|
||||
|
||||
rocpd::data_processor&
|
||||
get_data_processor()
|
||||
{
|
||||
return rocpd::data_processor::get_instance();
|
||||
}
|
||||
|
||||
void
|
||||
backtrace_metrics::start()
|
||||
{}
|
||||
@@ -327,6 +337,228 @@ backtrace_metrics::fini_perfetto(int64_t _tid, valid_array_t _valid)
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_init_categories()
|
||||
{
|
||||
static bool _is_initialized = false;
|
||||
if(_is_initialized) return;
|
||||
|
||||
get_data_processor().insert_category(
|
||||
category_enum_id<category::thread_cpu_time>::value,
|
||||
trait::name<category::thread_cpu_time>::value);
|
||||
get_data_processor().insert_category(
|
||||
category_enum_id<category::thread_peak_memory>::value,
|
||||
trait::name<category::thread_peak_memory>::value);
|
||||
get_data_processor().insert_category(
|
||||
category_enum_id<category::thread_context_switch>::value,
|
||||
trait::name<category::thread_context_switch>::value);
|
||||
get_data_processor().insert_category(
|
||||
category_enum_id<category::thread_page_fault>::value,
|
||||
trait::name<category::thread_page_fault>::value);
|
||||
get_data_processor().insert_category(
|
||||
category_enum_id<category::thread_hardware_counter>::value,
|
||||
trait::name<category::thread_hardware_counter>::value);
|
||||
|
||||
_is_initialized = true;
|
||||
}
|
||||
|
||||
template <typename Category>
|
||||
void
|
||||
rocpd_init_tracks(int64_t _tid)
|
||||
{
|
||||
auto& data_processor = get_data_processor();
|
||||
auto& n_info = node_info::get_instance();
|
||||
const auto& t_info = thread_info::get(_tid, SequentTID);
|
||||
auto _tid_name = JOIN("", '[', _tid, ']');
|
||||
|
||||
auto thread_idx = data_processor.insert_thread_info(
|
||||
n_info.id, getppid(), getpid(), t_info->index_data->system_value,
|
||||
JOIN(" ", "Thread", _tid).c_str(), t_info->get_start(), t_info->get_stop(), "{}");
|
||||
|
||||
if constexpr(std::is_same_v<Category, category::thread_hardware_counter>)
|
||||
{
|
||||
// Initialize hw_counter_tracks and create one track for each hardware counter
|
||||
auto _hw_cnt_labels = *get_papi_labels(_tid);
|
||||
for(auto& itr : _hw_cnt_labels)
|
||||
{
|
||||
std::string _desc = tim::papi::get_event_info(itr).short_descr;
|
||||
if(_desc.empty()) _desc = itr;
|
||||
ROCPROFSYS_CI_THROW(_desc.empty(), "Empty description for %s\n", itr.c_str());
|
||||
|
||||
std::string track_name = JOIN(' ', "Thread", _desc, _tid_name, "(S)");
|
||||
data_processor.insert_track(track_name.c_str(), n_info.id, getpid(),
|
||||
thread_idx, "{}");
|
||||
}
|
||||
}
|
||||
else
|
||||
data_processor.insert_track(
|
||||
JOIN('_', trait::name<Category>::value, _tid_name).c_str(), n_info.id,
|
||||
getpid(), thread_idx, "{}");
|
||||
}
|
||||
|
||||
template <typename Category>
|
||||
void
|
||||
rocpd_initialize_backtrace_metrics_pmc(size_t dev_id, const char* units, int64_t _tid)
|
||||
{
|
||||
auto& data_processor = get_data_processor();
|
||||
auto _tid_name = JOIN("", '[', _tid, ']');
|
||||
|
||||
size_t EVENT_CODE = 0;
|
||||
size_t INSTANCE_ID = 0;
|
||||
const char* LONG_DESCRIPTION = "";
|
||||
const char* COMPONENT = "";
|
||||
const char* BLOCK = "";
|
||||
const char* EXPRESSION = "";
|
||||
auto ni = node_info::get_instance();
|
||||
const auto* TARGET_ARCH = "CPU";
|
||||
|
||||
auto& _agent_manager = agent_manager::get_instance();
|
||||
auto _base_id = _agent_manager.get_agent_by_id(dev_id, agent_type::CPU).base_id;
|
||||
|
||||
if constexpr(std::is_same_v<Category, category::thread_hardware_counter>)
|
||||
{
|
||||
auto _hw_cnt_labels = *get_papi_labels(_tid);
|
||||
for(auto& itr : _hw_cnt_labels)
|
||||
{
|
||||
std::string _desc = tim::papi::get_event_info(itr).short_descr;
|
||||
if(_desc.empty()) _desc = itr;
|
||||
ROCPROFSYS_CI_THROW(_desc.empty(), "Empty description for %s\n", itr.c_str());
|
||||
|
||||
std::string track_name = JOIN(' ', "Thread", _desc, _tid_name, "(S)");
|
||||
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), _base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
track_name.c_str(), trait::name<Category>::value,
|
||||
trait::name<Category>::description, LONG_DESCRIPTION, COMPONENT, units,
|
||||
"ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
}
|
||||
}
|
||||
else
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), _base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
JOIN("_", trait::name<Category>::value, _tid_name).c_str(),
|
||||
trait::name<Category>::value, trait::name<Category>::description,
|
||||
LONG_DESCRIPTION, COMPONENT, units, "ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
}
|
||||
|
||||
template <typename Category, typename Value>
|
||||
void
|
||||
rocpd_process_backtrace_metrics_events(const uint32_t device_id, uint64_t timestamp,
|
||||
Value value, int64_t _tid)
|
||||
{
|
||||
auto& data_processor = get_data_processor();
|
||||
auto _tid_name = JOIN("", '[', _tid, ']');
|
||||
auto event_id =
|
||||
data_processor.insert_event(category_enum_id<Category>::value, 0, 0, 0);
|
||||
auto& agent_mngr = agent_manager::get_instance();
|
||||
auto base_id = agent_mngr.get_agent_by_id(device_id, agent_type::CPU).base_id;
|
||||
|
||||
auto insert_event_and_sample = [&](const char* name, double _value) {
|
||||
data_processor.insert_pmc_event(event_id, base_id, name, _value);
|
||||
data_processor.insert_sample(name, timestamp, event_id);
|
||||
};
|
||||
|
||||
if constexpr(std::is_same_v<Category, category::thread_hardware_counter>)
|
||||
{
|
||||
auto _hw_cnt_labels = *get_papi_labels(_tid);
|
||||
const auto& _hw_counters =
|
||||
static_cast<backtrace_metrics::hw_counter_data_t>(value);
|
||||
for(size_t i = 0; i < _hw_cnt_labels.size() && i < _hw_counters.size(); ++i)
|
||||
{
|
||||
std::string _desc = tim::papi::get_event_info(_hw_cnt_labels[i]).short_descr;
|
||||
if(_desc.empty()) _desc = _hw_cnt_labels[i];
|
||||
std::string track_name = JOIN(' ', "Thread", _desc, _tid_name, "(S)");
|
||||
|
||||
insert_event_and_sample(track_name.c_str(), _hw_counters.at(i));
|
||||
}
|
||||
}
|
||||
else
|
||||
insert_event_and_sample(
|
||||
JOIN("_", trait::name<Category>::value, _tid_name).c_str(), value);
|
||||
}
|
||||
|
||||
void
|
||||
backtrace_metrics::init_rocpd(int64_t _tid, valid_array_t _valid)
|
||||
{
|
||||
rocpd_init_categories();
|
||||
if(get_valid(category::thread_cpu_time{}, _valid))
|
||||
{
|
||||
rocpd_init_tracks<category::thread_cpu_time>(_tid);
|
||||
rocpd_initialize_backtrace_metrics_pmc<category::thread_cpu_time>(0, "sec", _tid);
|
||||
}
|
||||
if(get_valid(category::thread_peak_memory{}, _valid))
|
||||
{
|
||||
rocpd_init_tracks<category::thread_peak_memory>(_tid);
|
||||
rocpd_initialize_backtrace_metrics_pmc<category::thread_peak_memory>(0, "MB",
|
||||
_tid);
|
||||
}
|
||||
if(get_valid(category::thread_context_switch{}, _valid))
|
||||
{
|
||||
rocpd_init_tracks<category::thread_context_switch>(_tid);
|
||||
rocpd_initialize_backtrace_metrics_pmc<category::thread_context_switch>(0, "",
|
||||
_tid);
|
||||
}
|
||||
if(get_valid(category::thread_page_fault{}, _valid))
|
||||
{
|
||||
rocpd_init_tracks<category::thread_page_fault>(_tid);
|
||||
rocpd_initialize_backtrace_metrics_pmc<category::thread_page_fault>(0, "", _tid);
|
||||
}
|
||||
if(get_valid(type_list<hw_counters>{}, _valid) &&
|
||||
get_valid(category::thread_hardware_counter{}, _valid))
|
||||
{
|
||||
rocpd_init_tracks<category::thread_hardware_counter>(_tid);
|
||||
rocpd_initialize_backtrace_metrics_pmc<category::thread_hardware_counter>(0, "",
|
||||
_tid);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
backtrace_metrics::fini_rocpd(int64_t _tid, valid_array_t _valid)
|
||||
{
|
||||
const auto& _thread_info = thread_info::get(_tid, SequentTID);
|
||||
|
||||
ROCPROFSYS_CI_THROW(!_thread_info, "Error! missing thread info for tid=%li\n", _tid);
|
||||
if(!_thread_info) return;
|
||||
|
||||
uint64_t _ts = _thread_info->get_stop();
|
||||
|
||||
if(get_valid(category::thread_cpu_time{}, _valid))
|
||||
{
|
||||
rocpd_process_backtrace_metrics_events<category::thread_cpu_time, double>(
|
||||
0, _ts, 0, _tid);
|
||||
}
|
||||
|
||||
if(get_valid(category::thread_peak_memory{}, _valid))
|
||||
{
|
||||
rocpd_process_backtrace_metrics_events<category::thread_peak_memory, double>(
|
||||
0, _ts, 0, _tid);
|
||||
}
|
||||
|
||||
if(get_valid(category::thread_context_switch{}, _valid))
|
||||
{
|
||||
rocpd_process_backtrace_metrics_events<category::thread_context_switch, int64_t>(
|
||||
0, _ts, 0, _tid);
|
||||
}
|
||||
|
||||
if(get_valid(category::thread_page_fault{}, _valid))
|
||||
{
|
||||
rocpd_process_backtrace_metrics_events<category::thread_page_fault, int64_t>(
|
||||
0, _ts, 0, _tid);
|
||||
}
|
||||
|
||||
if(get_valid(type_list<hw_counters>{}, _valid) &&
|
||||
get_valid(category::thread_hardware_counter{}, _valid))
|
||||
{
|
||||
auto _hw_cnt_labels = *get_papi_labels(_tid);
|
||||
hw_counter_data_t zero_counters{};
|
||||
zero_counters.fill(0.0);
|
||||
|
||||
rocpd_process_backtrace_metrics_events<category::thread_hardware_counter,
|
||||
hw_counter_data_t>(0, _ts, zero_counters,
|
||||
_tid);
|
||||
}
|
||||
}
|
||||
|
||||
backtrace_metrics&
|
||||
backtrace_metrics::operator-=(const backtrace_metrics& _rhs)
|
||||
{
|
||||
@@ -407,6 +639,43 @@ backtrace_metrics::post_process_perfetto(int64_t _tid, uint64_t _ts) const
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
backtrace_metrics::post_process_rocpd(int64_t _tid, uint64_t _ts) const
|
||||
{
|
||||
auto is_category_enabled = [&](const auto& _category) { return (*this)(_category); };
|
||||
|
||||
if(is_category_enabled(category::thread_cpu_time{}))
|
||||
{
|
||||
rocpd_process_backtrace_metrics_events<category::thread_cpu_time, double>(
|
||||
0, _ts, m_cpu / units::sec, _tid);
|
||||
}
|
||||
|
||||
if(is_category_enabled(category::thread_peak_memory{}))
|
||||
{
|
||||
rocpd_process_backtrace_metrics_events<category::thread_peak_memory, double>(
|
||||
0, _ts, m_mem_peak / units::megabyte, _tid);
|
||||
}
|
||||
|
||||
if(is_category_enabled(category::thread_context_switch{}))
|
||||
{
|
||||
rocpd_process_backtrace_metrics_events<category::thread_context_switch, int64_t>(
|
||||
0, _ts, m_ctx_swch, _tid);
|
||||
}
|
||||
|
||||
if(is_category_enabled(category::thread_page_fault{}))
|
||||
{
|
||||
rocpd_process_backtrace_metrics_events<category::thread_page_fault, int64_t>(
|
||||
0, _ts, m_page_flt, _tid);
|
||||
}
|
||||
if(is_category_enabled(type_list<hw_counters>{}) &&
|
||||
is_category_enabled(category::thread_hardware_counter{}))
|
||||
{
|
||||
rocpd_process_backtrace_metrics_events<category::thread_hardware_counter,
|
||||
hw_counter_data_t>(0, _ts, m_hw_counter,
|
||||
_tid);
|
||||
}
|
||||
}
|
||||
} // namespace component
|
||||
} // namespace rocprofsys
|
||||
|
||||
|
||||
+3
@@ -83,6 +83,8 @@ struct backtrace_metrics : comp::empty_base
|
||||
static void configure(bool, int64_t _tid = threading::get_id());
|
||||
static void init_perfetto(int64_t _tid, valid_array_t);
|
||||
static void fini_perfetto(int64_t _tid, valid_array_t);
|
||||
static void init_rocpd(int64_t _tid, valid_array_t);
|
||||
static void fini_rocpd(int64_t _tid, valid_array_t);
|
||||
static std::vector<std::string> get_hw_counter_labels(int64_t);
|
||||
|
||||
template <typename Tp>
|
||||
@@ -113,6 +115,7 @@ struct backtrace_metrics : comp::empty_base
|
||||
const auto& get_hw_counters() const { return m_hw_counter; }
|
||||
|
||||
void post_process_perfetto(int64_t _tid, uint64_t _ts) const;
|
||||
void post_process_rocpd(int64_t _tid, uint64_t _ts) const;
|
||||
|
||||
backtrace_metrics& operator-=(const backtrace_metrics&);
|
||||
|
||||
|
||||
+346
-113
@@ -21,15 +21,15 @@
|
||||
// SOFTWARE.
|
||||
|
||||
#include "library/components/comm_data.hpp"
|
||||
#include "core/agent_manager.hpp"
|
||||
#include "core/components/fwd.hpp"
|
||||
#include "core/config.hpp"
|
||||
#include "core/node_info.hpp"
|
||||
#include "core/perfetto.hpp"
|
||||
#include "core/rocpd/data_processor.hpp"
|
||||
#include "library/tracing.hpp"
|
||||
|
||||
#include <timemory/backends/mpi.hpp>
|
||||
#include <timemory/manager.hpp>
|
||||
#include <timemory/units.hpp>
|
||||
#include <timemory/utility/locking.hpp>
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
@@ -74,6 +74,138 @@ write_perfetto_counter_track(uint64_t _val)
|
||||
}
|
||||
} // namespace
|
||||
|
||||
namespace
|
||||
{
|
||||
rocpd::data_processor&
|
||||
get_data_processor()
|
||||
{
|
||||
return rocpd::data_processor::get_instance();
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_initialize_comm_data_categories()
|
||||
{
|
||||
static bool _is_initialized = false;
|
||||
if(_is_initialized) return;
|
||||
|
||||
get_data_processor().insert_category(category_enum_id<category::comm_data>::value,
|
||||
trait::name<category::comm_data>::value);
|
||||
#if defined(ROCPROFSYS_USE_MPI)
|
||||
get_data_processor().insert_category(category_enum_id<category::mpi>::value,
|
||||
trait::name<category::mpi>::value);
|
||||
#endif
|
||||
#if defined(ROCPROFSYS_USE_RCCL)
|
||||
get_data_processor().insert_category(category_enum_id<category::rocm_rccl>::value,
|
||||
trait::name<category::rocm_rccl>::value);
|
||||
#endif
|
||||
_is_initialized = true;
|
||||
}
|
||||
|
||||
template <typename Track>
|
||||
void
|
||||
rocpd_initialize_track()
|
||||
{
|
||||
auto& n_info = node_info::get_instance();
|
||||
auto thread_id = std::nullopt;
|
||||
auto _init_track = [&](const char* label) {
|
||||
ROCPROFSYS_VERBOSE(3, "INSERT_TRACK label: %s, node ID: %d, Process ID: %d",
|
||||
label, n_info.id, getpid());
|
||||
get_data_processor().insert_track(label, n_info.id, getpid(), thread_id);
|
||||
};
|
||||
|
||||
static std::once_flag _once{};
|
||||
std::call_once(_once, _init_track, Track::label);
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_initialize_comm_data_pmc()
|
||||
{
|
||||
[[maybe_unused]] auto& data_processor = get_data_processor();
|
||||
// find the proper values for a following definitions
|
||||
[[maybe_unused]] size_t EVENT_CODE = 0;
|
||||
[[maybe_unused]] size_t INSTANCE_ID = 0;
|
||||
[[maybe_unused]] constexpr const char* LONG_DESCRIPTION = "";
|
||||
[[maybe_unused]] constexpr const char* COMPONENT = "";
|
||||
[[maybe_unused]] constexpr const char* BLOCK = "";
|
||||
[[maybe_unused]] constexpr const char* EXPRESSION = "";
|
||||
[[maybe_unused]] constexpr const char* MSG = "bytes";
|
||||
[[maybe_unused]] constexpr const auto* TARGET_ARCH = "CPU";
|
||||
auto ni = node_info::get_instance();
|
||||
constexpr const auto DEVICE_ID = 0; // Assuming CPU device ID is 0
|
||||
|
||||
auto& _agent_manager = agent_manager::get_instance();
|
||||
[[maybe_unused]] auto base_id =
|
||||
_agent_manager.get_agent_by_id(DEVICE_ID, agent_type::CPU).base_id;
|
||||
|
||||
#if defined(ROCPROFSYS_USE_MPI)
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
comm_data::mpi_send::label, "Tracks MPI Send communication data sizes",
|
||||
trait::name<category::mpi>::description, LONG_DESCRIPTION, COMPONENT, MSG, "ABS",
|
||||
BLOCK, EXPRESSION, 0, 0);
|
||||
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
comm_data::mpi_recv::label, "Tracks MPI Receive communication data sizes",
|
||||
trait::name<category::mpi>::description, LONG_DESCRIPTION, COMPONENT, MSG, "ABS",
|
||||
BLOCK, EXPRESSION, 0, 0);
|
||||
#endif
|
||||
#if defined(ROCPROFSYS_USE_RCCL)
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, rccl_send::label,
|
||||
"Tracks RCCL Send communication data sizes",
|
||||
trait::name<category::rocm_rccl>::description, LONG_DESCRIPTION, COMPONENT, MSG,
|
||||
"ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, rccl_recv::label,
|
||||
"Tracks RCCL Receive communication data sizes",
|
||||
trait::name<category::rocm_rccl>::description, LONG_DESCRIPTION, COMPONENT, MSG,
|
||||
"ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename Track>
|
||||
void
|
||||
rocpd_process_cpu_usage_events(const uint32_t device_id, int bytes)
|
||||
{
|
||||
auto& data_processor = get_data_processor();
|
||||
auto event_id = data_processor.insert_event(
|
||||
category_enum_id<category::comm_data>::value, 0, 0, 0);
|
||||
|
||||
auto& agents = agent_manager::get_instance();
|
||||
auto agent = agents.get_agent_by_id(device_id, agent_type::CPU);
|
||||
|
||||
auto insert_event_and_sample = [&](const char* name, uint64_t timestamp,
|
||||
double value) {
|
||||
data_processor.insert_pmc_event(event_id, agent.device_id, name, value);
|
||||
data_processor.insert_sample(name, timestamp, event_id);
|
||||
};
|
||||
|
||||
static std::mutex _mutex{};
|
||||
static uint64_t value = 0;
|
||||
uint64_t _now = 0;
|
||||
{
|
||||
std::unique_lock<std::mutex> _lk{ _mutex };
|
||||
_now = rocprofsys::tracing::now<uint64_t>();
|
||||
bytes = (value += bytes);
|
||||
}
|
||||
|
||||
insert_event_and_sample(Track::label, _now, bytes);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void
|
||||
comm_data::start()
|
||||
{
|
||||
if(get_use_rocpd())
|
||||
{
|
||||
rocpd_initialize_comm_data_categories();
|
||||
rocpd_initialize_comm_data_pmc();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
comm_data::preinit()
|
||||
{
|
||||
@@ -116,13 +248,22 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, int cou
|
||||
|
||||
write_perfetto_counter_track<mpi_send>(count * _size);
|
||||
|
||||
if(!rocprofsys::get_use_timemory()) return;
|
||||
auto _name = std::string_view{ _data.tool_id };
|
||||
tracker_t _a{ _name };
|
||||
add(_a, count * _size);
|
||||
tracker_t _b{ JOIN('/', _name, JOIN('=', "dst", dst)) };
|
||||
add(_b, count * _size);
|
||||
add(JOIN('/', _name, JOIN('=', "dst", dst), JOIN('=', "tag", tag)), count * _size);
|
||||
if(get_use_rocpd())
|
||||
{
|
||||
rocpd_initialize_track<mpi_send>();
|
||||
rocpd_process_cpu_usage_events<mpi_send>(0, count * _size);
|
||||
}
|
||||
|
||||
if(rocprofsys::get_use_timemory())
|
||||
{
|
||||
auto _name = std::string_view{ _data.tool_id };
|
||||
tracker_t _a{ _name };
|
||||
add(_a, count * _size);
|
||||
tracker_t _b{ JOIN('/', _name, JOIN('=', "dst", dst)) };
|
||||
add(_b, count * _size);
|
||||
add(JOIN('/', _name, JOIN('=', "dst", dst), JOIN('=', "tag", tag)),
|
||||
count * _size);
|
||||
}
|
||||
}
|
||||
|
||||
// MPI_Recv
|
||||
@@ -133,15 +274,24 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, void*, int count,
|
||||
int _size = mpi_type_size(datatype);
|
||||
if(_size == 0) return;
|
||||
|
||||
write_perfetto_counter_track<mpi_recv>(count * _size);
|
||||
if(get_use_perfetto()) write_perfetto_counter_track<mpi_recv>(count * _size);
|
||||
|
||||
if(!rocprofsys::get_use_timemory()) return;
|
||||
auto _name = std::string_view{ _data.tool_id };
|
||||
tracker_t _a{ _name };
|
||||
add(_a, count * _size);
|
||||
tracker_t _b{ JOIN('/', _name, JOIN('=', "dst", dst)) };
|
||||
add(_b, count * _size);
|
||||
add(JOIN('/', _name, JOIN('=', "dst", dst), JOIN('=', "tag", tag)), count * _size);
|
||||
if(get_use_rocpd())
|
||||
{
|
||||
rocpd_initialize_track<mpi_recv>();
|
||||
rocpd_process_cpu_usage_events<mpi_recv>(0, count * _size);
|
||||
}
|
||||
|
||||
if(rocprofsys::get_use_timemory())
|
||||
{
|
||||
auto _name = std::string_view{ _data.tool_id };
|
||||
tracker_t _a{ _name };
|
||||
add(_a, count * _size);
|
||||
tracker_t _b{ JOIN('/', _name, JOIN('=', "dst", dst)) };
|
||||
add(_b, count * _size);
|
||||
add(JOIN('/', _name, JOIN('=', "dst", dst), JOIN('=', "tag", tag)),
|
||||
count * _size);
|
||||
}
|
||||
}
|
||||
|
||||
// MPI_Isend
|
||||
@@ -152,15 +302,24 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, int cou
|
||||
int _size = mpi_type_size(datatype);
|
||||
if(_size == 0) return;
|
||||
|
||||
write_perfetto_counter_track<mpi_send>(count * _size);
|
||||
if(get_use_perfetto()) write_perfetto_counter_track<mpi_send>(count * _size);
|
||||
|
||||
if(!rocprofsys::get_use_timemory()) return;
|
||||
auto _name = std::string_view{ _data.tool_id };
|
||||
tracker_t _a{ _name };
|
||||
add(_a, count * _size);
|
||||
tracker_t _b{ JOIN('/', _name, JOIN('=', "dst", dst)) };
|
||||
add(_b, count * _size);
|
||||
add(JOIN('/', _name, JOIN('=', "dst", dst), JOIN('=', "tag", tag)), count * _size);
|
||||
if(get_use_rocpd())
|
||||
{
|
||||
rocpd_initialize_track<mpi_send>();
|
||||
rocpd_process_cpu_usage_events<mpi_send>(0, count * _size);
|
||||
}
|
||||
|
||||
if(rocprofsys::get_use_timemory())
|
||||
{
|
||||
auto _name = std::string_view{ _data.tool_id };
|
||||
tracker_t _a{ _name };
|
||||
add(_a, count * _size);
|
||||
tracker_t _b{ JOIN('/', _name, JOIN('=', "dst", dst)) };
|
||||
add(_b, count * _size);
|
||||
add(JOIN('/', _name, JOIN('=', "dst", dst), JOIN('=', "tag", tag)),
|
||||
count * _size);
|
||||
}
|
||||
}
|
||||
|
||||
// MPI_Irecv
|
||||
@@ -171,15 +330,24 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, void*, int count,
|
||||
int _size = mpi_type_size(datatype);
|
||||
if(_size == 0) return;
|
||||
|
||||
write_perfetto_counter_track<mpi_recv>(count * _size);
|
||||
if(get_use_perfetto()) write_perfetto_counter_track<mpi_recv>(count * _size);
|
||||
|
||||
if(!rocprofsys::get_use_timemory()) return;
|
||||
auto _name = std::string_view{ _data.tool_id };
|
||||
tracker_t _a{ _name };
|
||||
add(_a, count * _size);
|
||||
tracker_t _b{ JOIN('/', _name, JOIN('=', "dst", dst)) };
|
||||
add(_b, count * _size);
|
||||
add(JOIN('/', _name, JOIN('=', "dst", dst), JOIN('=', "tag", tag)), count * _size);
|
||||
if(get_use_rocpd())
|
||||
{
|
||||
rocpd_initialize_track<mpi_recv>();
|
||||
rocpd_process_cpu_usage_events<mpi_recv>(0, count * _size);
|
||||
}
|
||||
|
||||
if(rocprofsys::get_use_timemory())
|
||||
{
|
||||
auto _name = std::string_view{ _data.tool_id };
|
||||
tracker_t _a{ _name };
|
||||
add(_a, count * _size);
|
||||
tracker_t _b{ JOIN('/', _name, JOIN('=', "dst", dst)) };
|
||||
add(_b, count * _size);
|
||||
add(JOIN('/', _name, JOIN('=', "dst", dst), JOIN('=', "tag", tag)),
|
||||
count * _size);
|
||||
}
|
||||
}
|
||||
|
||||
// MPI_Bcast
|
||||
@@ -190,13 +358,21 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, void*, int count,
|
||||
int _size = mpi_type_size(datatype);
|
||||
if(_size == 0) return;
|
||||
|
||||
write_perfetto_counter_track<mpi_send>(count * _size);
|
||||
if(get_use_perfetto()) write_perfetto_counter_track<mpi_send>(count * _size);
|
||||
|
||||
if(!rocprofsys::get_use_timemory()) return;
|
||||
auto _name = std::string_view{ _data.tool_id };
|
||||
tracker_t _t{ _name };
|
||||
add(_t, count * _size);
|
||||
add(JOIN('/', _name, JOIN('=', "root", root)), count * _size);
|
||||
if(get_use_rocpd())
|
||||
{
|
||||
rocpd_initialize_track<mpi_send>();
|
||||
rocpd_process_cpu_usage_events<mpi_send>(0, count * _size);
|
||||
}
|
||||
|
||||
if(rocprofsys::get_use_timemory())
|
||||
{
|
||||
auto _name = std::string_view{ _data.tool_id };
|
||||
tracker_t _t{ _name };
|
||||
add(_t, count * _size);
|
||||
add(JOIN('/', _name, JOIN('=', "root", root)), count * _size);
|
||||
}
|
||||
}
|
||||
|
||||
// MPI_Allreduce
|
||||
@@ -207,11 +383,21 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, void*,
|
||||
int _size = mpi_type_size(datatype);
|
||||
if(_size == 0) return;
|
||||
|
||||
write_perfetto_counter_track<mpi_recv>(count * _size);
|
||||
write_perfetto_counter_track<mpi_send>(count * _size);
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
write_perfetto_counter_track<mpi_recv>(count * _size);
|
||||
write_perfetto_counter_track<mpi_send>(count * _size);
|
||||
}
|
||||
|
||||
if(!rocprofsys::get_use_timemory()) return;
|
||||
add(_data, count * _size);
|
||||
if(get_use_rocpd())
|
||||
{
|
||||
rocpd_initialize_track<mpi_send>();
|
||||
rocpd_initialize_track<mpi_recv>();
|
||||
rocpd_process_cpu_usage_events<mpi_recv>(0, count * _size);
|
||||
rocpd_process_cpu_usage_events<mpi_send>(0, count * _size);
|
||||
}
|
||||
|
||||
if(rocprofsys::get_use_timemory()) add(_data, count * _size);
|
||||
}
|
||||
|
||||
// MPI_Sendrecv
|
||||
@@ -224,30 +410,43 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, int sen
|
||||
int _recv_size = mpi_type_size(recvtype);
|
||||
if(_send_size == 0 || _recv_size == 0) return;
|
||||
|
||||
write_perfetto_counter_track<mpi_send>(sendcount * _send_size);
|
||||
write_perfetto_counter_track<mpi_recv>(recvcount * _recv_size);
|
||||
|
||||
if(!rocprofsys::get_use_timemory()) return;
|
||||
auto _name = std::string_view{ _data.tool_id };
|
||||
tracker_t _t{ _name };
|
||||
add(_t, sendcount * _send_size + recvcount * _recv_size);
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
tracker_t _b{ JOIN('/', _name, "send") };
|
||||
add(_b, sendcount * _send_size);
|
||||
tracker_t _c{ JOIN('/', _name, JOIN('=', "send", dst)) };
|
||||
add(_b, sendcount * _send_size);
|
||||
add(JOIN('/', _name, "send", JOIN('=', "tag", sendtag)), sendcount * _send_size);
|
||||
add(JOIN('/', _name, JOIN('=', "send", dst), JOIN('=', "tag", sendtag)),
|
||||
sendcount * _send_size);
|
||||
write_perfetto_counter_track<mpi_send>(sendcount * _send_size);
|
||||
write_perfetto_counter_track<mpi_recv>(recvcount * _recv_size);
|
||||
}
|
||||
|
||||
if(get_use_rocpd())
|
||||
{
|
||||
tracker_t _b{ JOIN('/', _name, "recv") };
|
||||
add(_b, recvcount * _recv_size);
|
||||
tracker_t _c{ JOIN('/', _name, JOIN('=', "recv", src)) };
|
||||
add(_b, recvcount * _recv_size);
|
||||
add(JOIN('/', _name, "recv", JOIN('=', "tag", recvtag)), recvcount * _recv_size);
|
||||
add(JOIN('/', _name, JOIN('=', "recv", src), JOIN('=', "tag", recvtag)),
|
||||
recvcount * _recv_size);
|
||||
rocpd_process_cpu_usage_events<mpi_send>(0, sendcount * _send_size);
|
||||
rocpd_process_cpu_usage_events<mpi_recv>(0, recvcount * _send_size);
|
||||
}
|
||||
|
||||
if(rocprofsys::get_use_timemory())
|
||||
{
|
||||
auto _name = std::string_view{ _data.tool_id };
|
||||
tracker_t _t{ _name };
|
||||
add(_t, sendcount * _send_size + recvcount * _recv_size);
|
||||
{
|
||||
tracker_t _b{ JOIN('/', _name, "send") };
|
||||
add(_b, sendcount * _send_size);
|
||||
tracker_t _c{ JOIN('/', _name, JOIN('=', "send", dst)) };
|
||||
add(_b, sendcount * _send_size);
|
||||
add(JOIN('/', _name, "send", JOIN('=', "tag", sendtag)),
|
||||
sendcount * _send_size);
|
||||
add(JOIN('/', _name, JOIN('=', "send", dst), JOIN('=', "tag", sendtag)),
|
||||
sendcount * _send_size);
|
||||
}
|
||||
{
|
||||
tracker_t _b{ JOIN('/', _name, "recv") };
|
||||
add(_b, recvcount * _recv_size);
|
||||
tracker_t _c{ JOIN('/', _name, JOIN('=', "recv", src)) };
|
||||
add(_b, recvcount * _recv_size);
|
||||
add(JOIN('/', _name, "recv", JOIN('=', "tag", recvtag)),
|
||||
recvcount * _recv_size);
|
||||
add(JOIN('/', _name, JOIN('=', "recv", src), JOIN('=', "tag", recvtag)),
|
||||
recvcount * _recv_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -262,17 +461,28 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, int sen
|
||||
int _recv_size = mpi_type_size(recvtype);
|
||||
if(_send_size == 0 || _recv_size == 0) return;
|
||||
|
||||
write_perfetto_counter_track<mpi_send>(sendcount * _send_size);
|
||||
write_perfetto_counter_track<mpi_recv>(recvcount * _recv_size);
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
write_perfetto_counter_track<mpi_send>(sendcount * _send_size);
|
||||
write_perfetto_counter_track<mpi_recv>(recvcount * _recv_size);
|
||||
}
|
||||
|
||||
if(!rocprofsys::get_use_timemory()) return;
|
||||
auto _name = std::string_view{ _data.tool_id };
|
||||
tracker_t _t{ _name };
|
||||
add(_t, sendcount * _send_size + recvcount * _recv_size);
|
||||
tracker_t _r(JOIN('/', _name, JOIN('=', "root", root)));
|
||||
add(_r, sendcount * _send_size + recvcount * _recv_size);
|
||||
add(JOIN('/', _name, JOIN('=', "root", root), "send"), sendcount * _send_size);
|
||||
add(JOIN('/', _name, JOIN('=', "root", root), "recv"), recvcount * _recv_size);
|
||||
if(get_use_rocpd())
|
||||
{
|
||||
rocpd_process_cpu_usage_events<mpi_send>(0, sendcount * _send_size);
|
||||
rocpd_process_cpu_usage_events<mpi_recv>(0, recvcount * _send_size);
|
||||
}
|
||||
|
||||
if(rocprofsys::get_use_timemory())
|
||||
{
|
||||
auto _name = std::string_view{ _data.tool_id };
|
||||
tracker_t _t{ _name };
|
||||
add(_t, sendcount * _send_size + recvcount * _recv_size);
|
||||
tracker_t _r(JOIN('/', _name, JOIN('=', "root", root)));
|
||||
add(_r, sendcount * _send_size + recvcount * _recv_size);
|
||||
add(JOIN('/', _name, JOIN('=', "root", root), "send"), sendcount * _send_size);
|
||||
add(JOIN('/', _name, JOIN('=', "root", root), "recv"), recvcount * _recv_size);
|
||||
}
|
||||
}
|
||||
|
||||
// MPI_Alltoall
|
||||
@@ -285,15 +495,26 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, int sen
|
||||
int _recv_size = mpi_type_size(recvtype);
|
||||
if(_send_size == 0 || _recv_size == 0) return;
|
||||
|
||||
write_perfetto_counter_track<mpi_send>(sendcount * _send_size);
|
||||
write_perfetto_counter_track<mpi_recv>(recvcount * _recv_size);
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
write_perfetto_counter_track<mpi_send>(sendcount * _send_size);
|
||||
write_perfetto_counter_track<mpi_recv>(recvcount * _recv_size);
|
||||
}
|
||||
|
||||
if(!rocprofsys::get_use_timemory()) return;
|
||||
auto _name = std::string_view{ _data.tool_id };
|
||||
tracker_t _t{ _name };
|
||||
add(_t, sendcount * _send_size + recvcount * _recv_size);
|
||||
add(JOIN('/', _name, "send"), sendcount * _send_size);
|
||||
add(JOIN('/', _name, "recv"), recvcount * _recv_size);
|
||||
if(get_use_rocpd())
|
||||
{
|
||||
rocpd_process_cpu_usage_events<mpi_send>(0, sendcount * _send_size);
|
||||
rocpd_process_cpu_usage_events<mpi_recv>(0, recvcount * _recv_size);
|
||||
}
|
||||
|
||||
if(rocprofsys::get_use_timemory())
|
||||
{
|
||||
auto _name = std::string_view{ _data.tool_id };
|
||||
tracker_t _t{ _name };
|
||||
add(_t, sendcount * _send_size + recvcount * _recv_size);
|
||||
add(JOIN('/', _name, "send"), sendcount * _send_size);
|
||||
add(JOIN('/', _name, "recv"), recvcount * _recv_size);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -309,13 +530,17 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, const v
|
||||
int _size = rccl_type_size(datatype);
|
||||
if(_size <= 0) return;
|
||||
|
||||
write_perfetto_counter_track<rccl_recv>(count * _size);
|
||||
if(get_use_perfetto()) write_perfetto_counter_track<rccl_recv>(count * _size);
|
||||
|
||||
if(!rocprofsys::get_use_timemory()) return;
|
||||
auto _name = std::string_view{ _data.tool_id };
|
||||
tracker_t _t{ _name };
|
||||
add(_t, count * _size);
|
||||
add(JOIN('/', _name, JOIN('=', "root", root)), count * _size);
|
||||
if(get_use_rocpd()) rocpd_process_cpu_usage_events<rccl_recv>(0, count * _size);
|
||||
|
||||
if(rocprofsys::get_use_timemory())
|
||||
{
|
||||
auto _name = std::string_view{ _data.tool_id };
|
||||
tracker_t _t{ _name };
|
||||
add(_t, count * _size);
|
||||
add(JOIN('/', _name, JOIN('=', "root", root)), count * _size);
|
||||
}
|
||||
}
|
||||
|
||||
// ncclSend
|
||||
@@ -334,27 +559,32 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, size_t
|
||||
|
||||
if(_send_types.count(_data.tool_id) > 0)
|
||||
{
|
||||
write_perfetto_counter_track<rccl_send>(count * _size);
|
||||
if(get_use_perfetto()) write_perfetto_counter_track<rccl_send>(count * _size);
|
||||
if(get_use_rocpd()) rocpd_process_cpu_usage_events<rccl_send>(0, count * _size);
|
||||
}
|
||||
else if(_recv_types.count(_data.tool_id) > 0)
|
||||
{
|
||||
write_perfetto_counter_track<rccl_recv>(count * _size);
|
||||
if(get_use_perfetto()) write_perfetto_counter_track<rccl_recv>(count * _size);
|
||||
if(get_use_rocpd()) rocpd_process_cpu_usage_events<rccl_recv>(0, count * _size);
|
||||
}
|
||||
else
|
||||
{
|
||||
ROCPROFSYS_CI_THROW(true, "RCCL function not handled: %s", _data.tool_id.c_str());
|
||||
}
|
||||
|
||||
write_perfetto_counter_track<rccl_recv>(count * _size);
|
||||
if(get_use_perfetto()) write_perfetto_counter_track<rccl_recv>(count * _size);
|
||||
if(get_use_rocpd()) rocpd_process_cpu_usage_events<rccl_recv>(0, count * _size);
|
||||
|
||||
if(!rocprofsys::get_use_timemory()) return;
|
||||
auto _name = std::string_view{ _data.tool_id };
|
||||
std::string _label = "root";
|
||||
if(_name.find("Send") != std::string::npos) _label = "peer";
|
||||
if(rocprofsys::get_use_timemory())
|
||||
{
|
||||
auto _name = std::string_view{ _data.tool_id };
|
||||
std::string _label = "root";
|
||||
if(_name.find("Send") != std::string::npos) _label = "peer";
|
||||
|
||||
tracker_t _t{ _name };
|
||||
add(_t, count * _size);
|
||||
add(JOIN('/', _name, JOIN('=', _label, peer)), count * _size);
|
||||
tracker_t _t{ _name };
|
||||
add(_t, count * _size);
|
||||
add(JOIN('/', _name, JOIN('=', _label, peer)), count * _size);
|
||||
}
|
||||
}
|
||||
|
||||
// ncclBroadcast
|
||||
@@ -365,13 +595,16 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, const v
|
||||
int _size = rccl_type_size(datatype);
|
||||
if(_size <= 0) return;
|
||||
|
||||
write_perfetto_counter_track<rccl_send>(count * _size);
|
||||
if(get_use_perfetto()) write_perfetto_counter_track<rccl_send>(count * _size);
|
||||
if(get_use_rocpd()) rocpd_process_cpu_usage_events<rccl_send>(0, count * _size);
|
||||
|
||||
if(!rocprofsys::get_use_timemory()) return;
|
||||
auto _name = std::string_view{ _data.tool_id };
|
||||
tracker_t _t{ _name };
|
||||
add(_t, count * _size);
|
||||
add(JOIN('/', _data.tool_id, JOIN('=', "root", root)), count * _size);
|
||||
if(rocprofsys::get_use_timemory())
|
||||
{
|
||||
auto _name = std::string_view{ _data.tool_id };
|
||||
tracker_t _t{ _name };
|
||||
add(_t, count * _size);
|
||||
add(JOIN('/', _data.tool_id, JOIN('=', "root", root)), count * _size);
|
||||
}
|
||||
}
|
||||
|
||||
// ncclAllReduce
|
||||
@@ -389,19 +622,20 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, const v
|
||||
|
||||
if(_send_types.count(_data.tool_id) > 0)
|
||||
{
|
||||
write_perfetto_counter_track<rccl_send>(count * _size);
|
||||
if(get_use_perfetto()) write_perfetto_counter_track<rccl_send>(count * _size);
|
||||
if(get_use_rocpd()) rocpd_process_cpu_usage_events<rccl_send>(0, count * _size);
|
||||
}
|
||||
else if(_recv_types.count(_data.tool_id) > 0)
|
||||
{
|
||||
write_perfetto_counter_track<rccl_recv>(count * _size);
|
||||
if(get_use_perfetto()) write_perfetto_counter_track<rccl_recv>(count * _size);
|
||||
if(get_use_rocpd()) rocpd_process_cpu_usage_events<rccl_recv>(0, count * _size);
|
||||
}
|
||||
else
|
||||
{
|
||||
ROCPROFSYS_CI_THROW(true, "RCCL function not handled: %s", _data.tool_id.c_str());
|
||||
}
|
||||
|
||||
if(!rocprofsys::get_use_timemory()) return;
|
||||
add(_data, count * _size);
|
||||
if(rocprofsys::get_use_timemory()) add(_data, count * _size);
|
||||
}
|
||||
|
||||
// ncclAllGather
|
||||
@@ -413,10 +647,9 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, const v
|
||||
int _size = rccl_type_size(datatype);
|
||||
if(_size <= 0) return;
|
||||
|
||||
write_perfetto_counter_track<rccl_recv>(count * _size);
|
||||
|
||||
if(!rocprofsys::get_use_timemory()) return;
|
||||
add(_data, count * _size);
|
||||
if(get_use_perfetto()) write_perfetto_counter_track<rccl_recv>(count * _size);
|
||||
if(get_use_rocpd()) rocpd_process_cpu_usage_events<rccl_recv>(0, count * _size);
|
||||
if(rocprofsys::get_use_timemory()) add(_data, count * _size);
|
||||
}
|
||||
#endif
|
||||
} // namespace component
|
||||
|
||||
+1
-1
@@ -82,7 +82,7 @@ struct comm_data : base<comm_data, void>
|
||||
static void preinit();
|
||||
static void configure();
|
||||
static void global_finalize();
|
||||
static void start() {}
|
||||
static void start();
|
||||
static void stop() {}
|
||||
|
||||
#if defined(ROCPROFSYS_USE_MPI)
|
||||
|
||||
@@ -21,15 +21,15 @@
|
||||
// SOFTWARE.
|
||||
|
||||
#include "library/cpu_freq.hpp"
|
||||
#include "core/agent.hpp"
|
||||
#include "core/agent_manager.hpp"
|
||||
#include "core/common.hpp"
|
||||
#include "core/components/fwd.hpp"
|
||||
#include "core/config.hpp"
|
||||
#include "core/debug.hpp"
|
||||
#include "core/defines.hpp"
|
||||
#include "core/node_info.hpp"
|
||||
#include "core/perfetto.hpp"
|
||||
#include "core/timemory.hpp"
|
||||
#include "core/rocpd/data_processor.hpp"
|
||||
#include "library/components/cpu_freq.hpp"
|
||||
#include "library/thread_data.hpp"
|
||||
#include "library/thread_info.hpp"
|
||||
|
||||
#include <timemory/components/rusage/backends.hpp>
|
||||
@@ -44,7 +44,6 @@
|
||||
#include <sys/resource.h>
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
@@ -65,6 +64,181 @@ init_perfetto_counter_tracks(type_list<Types...>)
|
||||
{
|
||||
(perfetto_counter_track<Types>::init(), ...);
|
||||
}
|
||||
|
||||
template <typename Category>
|
||||
inline std::string
|
||||
get_cpu_freq_track_name(uint64_t cpu_id)
|
||||
{
|
||||
return std::string(trait::name<Category>::value) + " [" + std::to_string(cpu_id) +
|
||||
"]";
|
||||
}
|
||||
|
||||
template <typename Func>
|
||||
void
|
||||
do_for_enabled_cpus(Func&& func)
|
||||
{
|
||||
const auto& enabled_cpus = component::cpu_freq::get_enabled_cpus();
|
||||
for(const auto& cpu : enabled_cpus)
|
||||
{
|
||||
func(cpu);
|
||||
}
|
||||
}
|
||||
|
||||
rocpd::data_processor&
|
||||
get_data_processor()
|
||||
{
|
||||
return rocpd::data_processor::get_instance();
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_initialize_cpu_freq_category()
|
||||
{
|
||||
get_data_processor().insert_category(ROCPROFSYS_CATEGORY_CPU_FREQ,
|
||||
trait::name<category::cpu_freq>::value);
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_initialize_cpu_freq_tracks()
|
||||
{
|
||||
auto& data_processor = get_data_processor();
|
||||
auto& n_info = node_info::get_instance();
|
||||
const auto thread_idx = std::nullopt; // Internal thread ID for cpu-freq
|
||||
|
||||
do_for_enabled_cpus([&](size_t cpu_id) {
|
||||
data_processor.insert_track(
|
||||
get_cpu_freq_track_name<category::cpu_freq>(cpu_id).c_str(), n_info.id,
|
||||
getpid(), thread_idx);
|
||||
});
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_initialize_cpu_usage_tracks()
|
||||
{
|
||||
auto& data_processor = get_data_processor();
|
||||
auto& n_info = node_info::get_instance();
|
||||
const auto thread_idx = std::nullopt; // Internal thread ID for cpu-freq
|
||||
|
||||
data_processor.insert_track(trait::name<category::process_page>::value, n_info.id,
|
||||
getpid(), thread_idx);
|
||||
data_processor.insert_track(trait::name<category::process_virt>::value, n_info.id,
|
||||
getpid(), thread_idx);
|
||||
data_processor.insert_track(trait::name<category::process_peak>::value, n_info.id,
|
||||
getpid(), thread_idx);
|
||||
data_processor.insert_track(trait::name<category::process_context_switch>::value,
|
||||
n_info.id, getpid(), thread_idx);
|
||||
data_processor.insert_track(trait::name<category::process_page_fault>::value,
|
||||
n_info.id, getpid(), thread_idx);
|
||||
data_processor.insert_track(trait::name<category::process_user_mode_time>::value,
|
||||
n_info.id, getpid(), thread_idx);
|
||||
data_processor.insert_track(trait::name<category::process_kernel_mode_time>::value,
|
||||
n_info.id, getpid(), thread_idx);
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_initialize_cpu_freq_pmc(size_t dev_id)
|
||||
{
|
||||
auto& data_processor = get_data_processor();
|
||||
// find the proper values for a following definitions
|
||||
size_t EVENT_CODE = 0;
|
||||
size_t INSTANCE_ID = 0;
|
||||
const char* LONG_DESCRIPTION = "";
|
||||
const char* COMPONENT = "";
|
||||
const char* BLOCK = "";
|
||||
const char* EXPRESSION = "";
|
||||
const char* MEMORY = "MB";
|
||||
const char* TIME = "sec";
|
||||
auto ni = node_info::get_instance();
|
||||
const auto* TARGET_ARCH = "CPU";
|
||||
|
||||
auto& _agent_manager = agent_manager::get_instance();
|
||||
auto base_id = _agent_manager.get_agent_by_id(dev_id, agent_type::CPU).base_id;
|
||||
|
||||
do_for_enabled_cpus([&](size_t cpu_id) {
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
get_cpu_freq_track_name<category::cpu_freq>(cpu_id).c_str(), "Frequency",
|
||||
trait::name<category::cpu_freq>::description, LONG_DESCRIPTION, COMPONENT,
|
||||
component::cpu_freq::display_unit().c_str(), "ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
});
|
||||
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
trait::name<category::process_page>::value, "Memory Usage",
|
||||
trait::name<category::process_page>::description, LONG_DESCRIPTION, COMPONENT,
|
||||
MEMORY, "ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
trait::name<category::process_virt>::value, "Virtual Memory Usage",
|
||||
trait::name<category::process_virt>::description, LONG_DESCRIPTION, COMPONENT,
|
||||
MEMORY, "ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
trait::name<category::process_peak>::value, "Peak Memory",
|
||||
trait::name<category::process_peak>::description, LONG_DESCRIPTION, COMPONENT,
|
||||
MEMORY, "ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
trait::name<category::process_context_switch>::value, "Context Switches",
|
||||
trait::name<category::process_context_switch>::description, LONG_DESCRIPTION,
|
||||
COMPONENT, "", "ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
trait::name<category::process_page_fault>::value, "Page Faults",
|
||||
trait::name<category::process_page_fault>::description, LONG_DESCRIPTION,
|
||||
COMPONENT, "", "ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
trait::name<category::process_user_mode_time>::value, "User Time",
|
||||
trait::name<category::process_user_mode_time>::description, LONG_DESCRIPTION,
|
||||
COMPONENT, TIME, "ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
trait::name<category::process_kernel_mode_time>::value, "Kernel Time",
|
||||
trait::name<category::process_kernel_mode_time>::description, LONG_DESCRIPTION,
|
||||
COMPONENT, TIME, "ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_process_cpu_usage_events(const uint32_t device_id, uint64_t timestamp,
|
||||
const component::cpu_freq& freq, double mem_page,
|
||||
double virt_mem_page, double peak_mem,
|
||||
double context_switch, double page_fault, double user_time,
|
||||
double kernel_time)
|
||||
{
|
||||
auto& data_processor = get_data_processor();
|
||||
auto event_id = data_processor.insert_event(ROCPROFSYS_CATEGORY_CPU_FREQ, 0, 0, 0);
|
||||
|
||||
auto& agent_mngr = agent_manager::get_instance();
|
||||
auto base_id = agent_mngr.get_agent_by_id(device_id, agent_type::CPU).base_id;
|
||||
|
||||
auto insert_event_and_sample = [&](const char* name, double value) {
|
||||
data_processor.insert_pmc_event(event_id, base_id, name, value);
|
||||
data_processor.insert_sample(name, timestamp, event_id);
|
||||
};
|
||||
|
||||
do_for_enabled_cpus([&](size_t cpu_id) {
|
||||
insert_event_and_sample(
|
||||
get_cpu_freq_track_name<category::cpu_freq>(cpu_id).c_str(), freq.at(cpu_id));
|
||||
});
|
||||
|
||||
insert_event_and_sample(trait::name<category::process_page>::value, mem_page);
|
||||
insert_event_and_sample(trait::name<category::process_virt>::value, virt_mem_page);
|
||||
insert_event_and_sample(trait::name<category::process_peak>::value, peak_mem);
|
||||
insert_event_and_sample(trait::name<category::process_context_switch>::value,
|
||||
context_switch);
|
||||
insert_event_and_sample(trait::name<category::process_page_fault>::value, page_fault);
|
||||
insert_event_and_sample(trait::name<category::process_user_mode_time>::value,
|
||||
user_time);
|
||||
insert_event_and_sample(trait::name<category::process_kernel_mode_time>::value,
|
||||
kernel_time);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace cpu_freq
|
||||
} // namespace rocprofsys
|
||||
@@ -76,11 +250,14 @@ namespace cpu_freq
|
||||
void
|
||||
setup()
|
||||
{
|
||||
init_perfetto_counter_tracks(
|
||||
type_list<category::cpu_freq, category::process_page, category::process_virt,
|
||||
category::process_peak, category::process_context_switch,
|
||||
category::process_page_fault, category::process_user_mode_time,
|
||||
category::process_kernel_mode_time>{});
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
init_perfetto_counter_tracks(
|
||||
type_list<category::cpu_freq, category::process_page, category::process_virt,
|
||||
category::process_peak, category::process_context_switch,
|
||||
category::process_page_fault, category::process_user_mode_time,
|
||||
category::process_kernel_mode_time>{});
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
@@ -163,6 +340,28 @@ post_process()
|
||||
ROCPROFSYS_VERBOSE(1,
|
||||
"Post-processing %zu cpu frequency and memory usage entries...\n",
|
||||
data.size());
|
||||
|
||||
auto& enabled_cpus = component::cpu_freq::get_enabled_cpus();
|
||||
|
||||
if(get_use_rocpd())
|
||||
{
|
||||
rocpd_initialize_cpu_freq_category();
|
||||
rocpd_initialize_cpu_usage_tracks();
|
||||
rocpd_initialize_cpu_freq_tracks();
|
||||
|
||||
// `get_enabled_cpus()` returns the number of cores enabled for monitoring but the
|
||||
// actually device_id is 0, since there is a single device available. And the
|
||||
// agents seems to be assigned per device basis not per core.
|
||||
// TODO: `get_enabled_cpus()` should be fixed in the future to align with GPU
|
||||
// implementation.
|
||||
auto cpu_agents =
|
||||
agent_manager::get_instance().get_agents_by_type(agent_type::CPU);
|
||||
for(auto& agent : cpu_agents)
|
||||
{
|
||||
rocpd_initialize_cpu_freq_pmc(agent->device_id);
|
||||
}
|
||||
}
|
||||
|
||||
auto _process_frequencies = [](size_t _idx, size_t _offset) {
|
||||
using freq_track = perfetto_counter_track<category::cpu_freq>;
|
||||
|
||||
@@ -191,14 +390,17 @@ post_process()
|
||||
};
|
||||
|
||||
auto _process_cpu_rusage = []() {
|
||||
config_perfetto_counter_tracks(
|
||||
type_list<category::process_page, category::process_virt,
|
||||
category::process_peak, category::process_context_switch,
|
||||
category::process_page_fault, category::process_user_mode_time,
|
||||
category::process_kernel_mode_time>{},
|
||||
{ "Memory Usage", "Virtual Memory Usage", "Peak Memory", "Context Switches",
|
||||
"Page Faults", "User Time", "Kernel Time" },
|
||||
{ "MB", "MB", "MB", "", "", "sec", "sec" });
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
config_perfetto_counter_tracks(
|
||||
type_list<category::process_page, category::process_virt,
|
||||
category::process_peak, category::process_context_switch,
|
||||
category::process_page_fault, category::process_user_mode_time,
|
||||
category::process_kernel_mode_time>{},
|
||||
{ "Memory Usage", "Virtual Memory Usage", "Peak Memory",
|
||||
"Context Switches", "Page Faults", "User Time", "Kernel Time" },
|
||||
{ "MB", "MB", "MB", "", "", "sec", "sec" });
|
||||
}
|
||||
|
||||
const auto& _thread_info = thread_info::get(0, InternalTID);
|
||||
ROCPROFSYS_CI_THROW(!_thread_info, "Missing thread info for thread 0");
|
||||
@@ -209,47 +411,61 @@ post_process()
|
||||
uint64_t _ts = std::get<0>(itr);
|
||||
if(!_thread_info->is_valid_time(_ts)) continue;
|
||||
|
||||
double _page = std::get<1>(itr);
|
||||
double _virt = std::get<2>(itr);
|
||||
double _peak = std::get<3>(itr);
|
||||
double _page = std::get<1>(itr) / units::megabyte;
|
||||
double _virt = std::get<2>(itr) / units::megabyte;
|
||||
double _peak = std::get<3>(itr) / units::megabyte;
|
||||
uint64_t _cntx = std::get<4>(itr);
|
||||
uint64_t _flts = std::get<5>(itr);
|
||||
double _user = std::get<6>(itr);
|
||||
double _kern = std::get<7>(itr);
|
||||
write_perfetto_counter_track<category::process_page>(_ts,
|
||||
_page / units::megabyte);
|
||||
write_perfetto_counter_track<category::process_virt>(_ts,
|
||||
_virt / units::megabyte);
|
||||
write_perfetto_counter_track<category::process_peak>(_ts,
|
||||
_peak / units::megabyte);
|
||||
write_perfetto_counter_track<category::process_context_switch>(_ts, _cntx);
|
||||
write_perfetto_counter_track<category::process_page_fault>(_ts, _flts);
|
||||
write_perfetto_counter_track<category::process_user_mode_time>(
|
||||
_ts, _user / units::sec);
|
||||
write_perfetto_counter_track<category::process_kernel_mode_time>(
|
||||
_ts, _kern / units::sec);
|
||||
double _user = std::get<6>(itr) / units::sec;
|
||||
double _kern = std::get<7>(itr) / units::sec;
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
write_perfetto_counter_track<category::process_page>(_ts, _page);
|
||||
write_perfetto_counter_track<category::process_virt>(_ts, _virt);
|
||||
write_perfetto_counter_track<category::process_peak>(_ts, _peak);
|
||||
write_perfetto_counter_track<category::process_context_switch>(_ts,
|
||||
_cntx);
|
||||
write_perfetto_counter_track<category::process_page_fault>(_ts, _flts);
|
||||
write_perfetto_counter_track<category::process_user_mode_time>(_ts,
|
||||
_user);
|
||||
write_perfetto_counter_track<category::process_kernel_mode_time>(_ts,
|
||||
_kern);
|
||||
}
|
||||
if(get_use_rocpd())
|
||||
{
|
||||
const auto& freq_data = std::get<8>(itr);
|
||||
rocpd_process_cpu_usage_events(0, _ts, freq_data, _page, _virt, _peak,
|
||||
_cntx, _flts, _user, _kern);
|
||||
}
|
||||
}
|
||||
|
||||
auto _end_ts = _thread_info->get_stop();
|
||||
write_perfetto_counter_track<category::process_page>(_end_ts, 0.0);
|
||||
write_perfetto_counter_track<category::process_virt>(_end_ts, 0.0);
|
||||
write_perfetto_counter_track<category::process_peak>(_end_ts, 0.0);
|
||||
write_perfetto_counter_track<category::process_context_switch>(_end_ts, 0);
|
||||
write_perfetto_counter_track<category::process_page_fault>(_end_ts, 0);
|
||||
write_perfetto_counter_track<category::process_user_mode_time>(_end_ts, 0.0);
|
||||
write_perfetto_counter_track<category::process_kernel_mode_time>(_end_ts, 0.0);
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
auto _end_ts = _thread_info->get_stop();
|
||||
write_perfetto_counter_track<category::process_page>(_end_ts, 0.0);
|
||||
write_perfetto_counter_track<category::process_virt>(_end_ts, 0.0);
|
||||
write_perfetto_counter_track<category::process_peak>(_end_ts, 0.0);
|
||||
write_perfetto_counter_track<category::process_context_switch>(_end_ts, 0);
|
||||
write_perfetto_counter_track<category::process_page_fault>(_end_ts, 0);
|
||||
write_perfetto_counter_track<category::process_user_mode_time>(_end_ts, 0.0);
|
||||
write_perfetto_counter_track<category::process_kernel_mode_time>(_end_ts,
|
||||
0.0);
|
||||
}
|
||||
};
|
||||
|
||||
_process_cpu_rusage();
|
||||
|
||||
auto& enabled_cpu_freqs = component::cpu_freq::get_enabled_cpus();
|
||||
for(auto itr = enabled_cpu_freqs.begin(); itr != enabled_cpu_freqs.end(); ++itr)
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
auto _idx = *itr;
|
||||
auto _offset = std::distance(enabled_cpu_freqs.begin(), itr);
|
||||
_process_frequencies(_idx, _offset);
|
||||
for(auto itr = enabled_cpus.begin(); itr != enabled_cpus.end(); ++itr)
|
||||
{
|
||||
auto _idx = *itr;
|
||||
auto _offset = std::distance(enabled_cpus.begin(), itr);
|
||||
_process_frequencies(_idx, _offset);
|
||||
}
|
||||
}
|
||||
enabled_cpu_freqs.clear();
|
||||
enabled_cpus.clear();
|
||||
}
|
||||
|
||||
} // namespace cpu_freq
|
||||
} // namespace rocprofsys
|
||||
|
||||
@@ -20,14 +20,19 @@
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include <optional>
|
||||
#define TIMEMORY_KOKKOSP_POSTFIX ROCPROFSYS_PUBLIC_API
|
||||
|
||||
#include "api.hpp"
|
||||
#include "core/agent_manager.hpp"
|
||||
#include "core/components/fwd.hpp"
|
||||
#include "core/config.hpp"
|
||||
#include "core/debug.hpp"
|
||||
#include "core/defines.hpp"
|
||||
#include "core/node_info.hpp"
|
||||
#include "core/perfetto.hpp"
|
||||
#include "core/rocpd/data_processor.hpp"
|
||||
#include "core/rocpd/json.hpp"
|
||||
#include "library/components/category_region.hpp"
|
||||
#include "library/runtime.hpp"
|
||||
|
||||
@@ -150,6 +155,52 @@ violates_name_rules(Arg&& _arg, Args&&... _args)
|
||||
}
|
||||
} // namespace
|
||||
|
||||
namespace
|
||||
{
|
||||
rocprofsys::rocpd::data_processor&
|
||||
get_data_processor()
|
||||
{
|
||||
return rocprofsys::rocpd::data_processor::get_instance();
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_initialize_kokkos_category()
|
||||
{
|
||||
get_data_processor().insert_category(
|
||||
rocprofsys::category_enum_id<category::kokkos>::value,
|
||||
rocprofsys::trait::name<category::kokkos>::value);
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_initialize_kokkos_track()
|
||||
{
|
||||
auto& data_processor = get_data_processor();
|
||||
auto& n_info = rocprofsys::node_info::get_instance();
|
||||
auto thread_id = std::nullopt;
|
||||
|
||||
data_processor.insert_track(rocprofsys::trait::name<category::kokkos>::value,
|
||||
n_info.id, getpid(), thread_id);
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_process_kokkos_event(const char* name, const char* event_type, const char* target,
|
||||
uint64_t timestamp_ns)
|
||||
{
|
||||
auto& data_processor = get_data_processor();
|
||||
auto event_metadata = rocpd::json::create();
|
||||
|
||||
event_metadata->set("name", name);
|
||||
event_metadata->set("event_type", event_type);
|
||||
event_metadata->set("target", target);
|
||||
|
||||
auto event_id = data_processor.insert_event(
|
||||
rocprofsys::category_enum_id<category::kokkos>::value, 0, 0, 0, "{}", "{}",
|
||||
event_metadata->to_string().c_str());
|
||||
data_processor.insert_sample(rocprofsys::trait::name<category::kokkos>::value,
|
||||
timestamp_ns, event_id, "{}");
|
||||
}
|
||||
|
||||
} // namespace
|
||||
//--------------------------------------------------------------------------------------//
|
||||
|
||||
extern "C"
|
||||
@@ -256,6 +307,12 @@ extern "C"
|
||||
rocprofsys_set_mpi_hidden(false, false);
|
||||
rocprofsys_init_hidden(_mode.c_str(), false, _arg0.c_str());
|
||||
rocprofsys_push_trace_hidden("kokkos_main");
|
||||
|
||||
if(rocprofsys::get_use_rocpd())
|
||||
{
|
||||
rocpd_initialize_kokkos_category();
|
||||
rocpd_initialize_kokkos_track();
|
||||
}
|
||||
}
|
||||
|
||||
setup_kernel_logger();
|
||||
@@ -545,6 +602,8 @@ extern "C"
|
||||
{
|
||||
if(violates_name_rules(label)) return;
|
||||
|
||||
auto timestamp = tim::get_clock_real_now<uint64_t, std::nano>();
|
||||
|
||||
ROCPROFSYS_SCOPED_THREAD_STATE(ThreadState::Internal);
|
||||
if(rocprofsys::config::get_use_perfetto())
|
||||
{
|
||||
@@ -559,12 +618,20 @@ extern "C"
|
||||
"", label, " [dual_view_sync][", (is_device) ? "device" : "host", "]")));
|
||||
kokkosp::profiler_t<kokkosp_region>{ _name }.mark();
|
||||
}
|
||||
|
||||
if(rocprofsys::config::get_use_rocpd())
|
||||
{
|
||||
rocpd_process_kokkos_event(JOIN(" ", _kp_prefix, label).c_str(),
|
||||
"[dual_view_sync]",
|
||||
(is_device) ? "device" : "host", timestamp);
|
||||
}
|
||||
}
|
||||
|
||||
void kokkosp_dual_view_modify(const char* label, const void* const, bool is_device)
|
||||
{
|
||||
if(violates_name_rules(label)) return;
|
||||
|
||||
auto timestamp = tim::get_clock_real_now<uint64_t, std::nano>();
|
||||
ROCPROFSYS_SCOPED_THREAD_STATE(ThreadState::Internal);
|
||||
if(rocprofsys::config::get_use_perfetto())
|
||||
{
|
||||
@@ -580,6 +647,13 @@ extern "C"
|
||||
(is_device) ? "device" : "host", "]")));
|
||||
kokkosp::profiler_t<kokkosp_region>{ _name }.mark();
|
||||
}
|
||||
|
||||
if(rocprofsys::config::get_use_rocpd())
|
||||
{
|
||||
rocpd_process_kokkos_event(JOIN(" ", _kp_prefix, label).c_str(),
|
||||
"[dual_view_modify]",
|
||||
(is_device) ? "device" : "host", timestamp);
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------------//
|
||||
|
||||
+1
-17
@@ -21,26 +21,10 @@
|
||||
// SOFTWARE.
|
||||
|
||||
#include "library/rocprofiler-sdk/counters.hpp"
|
||||
#include "common/synchronized.hpp"
|
||||
#include "core/debug.hpp"
|
||||
#include "core/timemory.hpp"
|
||||
#include "library/rocprofiler-sdk/fwd.hpp"
|
||||
|
||||
#include <timemory/utility/types.hpp>
|
||||
|
||||
#include <rocprofiler-sdk/agent.h>
|
||||
#include <rocprofiler-sdk/buffer_tracing.h>
|
||||
#include <rocprofiler-sdk/callback_tracing.h>
|
||||
#include <rocprofiler-sdk/cxx/hash.hpp>
|
||||
#include <rocprofiler-sdk/cxx/name_info.hpp>
|
||||
#include <rocprofiler-sdk/cxx/operators.hpp>
|
||||
#include <rocprofiler-sdk/dispatch_counting_service.h>
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/registration.h>
|
||||
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <timemory/utility/types.hpp>
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
|
||||
@@ -26,7 +26,10 @@
|
||||
#include "core/config.hpp"
|
||||
#include "core/debug.hpp"
|
||||
#include "core/locking.hpp"
|
||||
#include "core/node_info.hpp"
|
||||
#include "core/perf.hpp"
|
||||
#include "core/rocpd/data_processor.hpp"
|
||||
#include "core/rocpd/json.hpp"
|
||||
#include "core/state.hpp"
|
||||
#include "core/utility.hpp"
|
||||
#include "library/components/backtrace.hpp"
|
||||
@@ -153,6 +156,136 @@ namespace
|
||||
{
|
||||
using sampler_allocator_t = typename sampler_t::allocator_t;
|
||||
|
||||
template <typename Category>
|
||||
inline std::string
|
||||
get_category_track_name(uint64_t tid)
|
||||
{
|
||||
return std::string(trait::name<Category>::value) + "_" + std::to_string(tid);
|
||||
}
|
||||
|
||||
std::string
|
||||
generate_call_stack_json(const tim::unwind::processed_entry& stack_entry)
|
||||
{
|
||||
auto call_stack = ::rocpd::json::create();
|
||||
|
||||
call_stack->set("name", std::string(demangle(stack_entry.name)));
|
||||
call_stack->set("pc", as_hex(stack_entry.address));
|
||||
call_stack->set("file", std::string(stack_entry.location));
|
||||
|
||||
return call_stack->to_string();
|
||||
}
|
||||
|
||||
std::string
|
||||
generate_line_info_json(const tim::unwind::processed_entry& line_info_entry)
|
||||
{
|
||||
auto line_info = ::rocpd::json::create();
|
||||
line_info->set("line_address", as_hex(line_info_entry.line_address));
|
||||
line_info->set("name", std::string(demangle(line_info_entry.name)));
|
||||
|
||||
if(line_info_entry.lineinfo && !line_info_entry.lineinfo.lines.empty())
|
||||
{
|
||||
auto _lines = line_info_entry.lineinfo.lines;
|
||||
std::reverse(_lines.begin(), _lines.end());
|
||||
for(const auto& line : _lines)
|
||||
{
|
||||
auto inlined = ::rocpd::json::create();
|
||||
inlined->set("name", std::string(demangle(line.name)));
|
||||
inlined->set("location", std::string(line.location));
|
||||
inlined->set("line", std::to_string(line.line));
|
||||
line_info->set("inlined", inlined);
|
||||
}
|
||||
}
|
||||
|
||||
return line_info->to_string();
|
||||
}
|
||||
|
||||
std::string
|
||||
generate_hw_counter_json(int64_t _tid, const backtrace_metrics& metrics)
|
||||
{
|
||||
auto extdata = ::rocpd::json::create();
|
||||
|
||||
if(!metrics.get_hw_counters().empty())
|
||||
{
|
||||
auto _labels = backtrace_metrics::get_hw_counter_labels(_tid);
|
||||
auto _hw_cnt_vals = metrics.get_hw_counters();
|
||||
|
||||
auto hw_counters = ::rocpd::json::create();
|
||||
for(size_t i = 0; i < _labels.size(); ++i)
|
||||
{
|
||||
hw_counters->set(_labels.at(i), _hw_cnt_vals.at(i));
|
||||
}
|
||||
|
||||
extdata->set("hw_counters", hw_counters);
|
||||
}
|
||||
|
||||
return extdata->to_string();
|
||||
}
|
||||
|
||||
rocpd::data_processor&
|
||||
get_data_processor()
|
||||
{
|
||||
return rocpd::data_processor::get_instance();
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_initialize_sampling_category()
|
||||
{
|
||||
static bool _is_initialized = false;
|
||||
if(_is_initialized) return;
|
||||
|
||||
auto& data_processor = get_data_processor();
|
||||
data_processor.insert_category(ROCPROFSYS_CATEGORY_SAMPLING,
|
||||
trait::name<category::sampling>::value);
|
||||
data_processor.insert_category(ROCPROFSYS_CATEGORY_OVERFLOW_SAMPLING,
|
||||
trait::name<category::overflow_sampling>::value);
|
||||
data_processor.insert_category(ROCPROFSYS_CATEGORY_TIMER_SAMPLING,
|
||||
trait::name<category::timer_sampling>::value);
|
||||
|
||||
_is_initialized = true;
|
||||
}
|
||||
|
||||
size_t
|
||||
rocpd_initialize_thread_info(size_t tid)
|
||||
{
|
||||
const auto& _thread_info = thread_info::get(tid, SequentTID);
|
||||
ROCPROFSYS_CI_THROW(!_thread_info, "No valid thread info for tid=%li\n", tid);
|
||||
if(!_thread_info) return -1;
|
||||
|
||||
auto& data_processor = get_data_processor();
|
||||
auto& n_info = node_info::get_instance();
|
||||
|
||||
return data_processor.insert_thread_info(
|
||||
n_info.id, getppid(), getpid(), _thread_info->index_data->system_value,
|
||||
threading::get_thread_name().c_str(), _thread_info->get_start(),
|
||||
_thread_info->get_stop(), "{}");
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_init_track(const char* track_name, int64_t tid)
|
||||
{
|
||||
auto& data_processor = get_data_processor();
|
||||
auto& n_info = node_info::get_instance();
|
||||
|
||||
data_processor.insert_track(track_name, n_info.id, getpid(), tid, "{}");
|
||||
}
|
||||
|
||||
template <typename Category>
|
||||
void
|
||||
rocpd_insert_region(size_t thread_id, size_t start_time, size_t end_time, size_t name_id,
|
||||
const char* track, const char* call_stack = "{}",
|
||||
const char* line_info = "{}", const char* extdata = "{}")
|
||||
{
|
||||
auto& data_processor = get_data_processor();
|
||||
auto& n_info = node_info::get_instance();
|
||||
|
||||
auto event_id = data_processor.insert_event(category_enum_id<Category>::value, 0, 0,
|
||||
0, call_stack, line_info, extdata);
|
||||
|
||||
data_processor.insert_region(n_info.id, getpid(), thread_id, start_time, end_time,
|
||||
name_id, event_id);
|
||||
data_processor.insert_sample(track, start_time, event_id);
|
||||
}
|
||||
|
||||
auto&
|
||||
get_sampler_allocators()
|
||||
{
|
||||
@@ -810,6 +943,10 @@ void
|
||||
post_process_timemory(int64_t, const std::vector<timer_sampling_data>&,
|
||||
const std::vector<overflow_sampling_data>&);
|
||||
|
||||
void
|
||||
post_process_rocpd(int64_t _tid, const std::vector<timer_sampling_data>& _timer_data,
|
||||
const std::vector<overflow_sampling_data>& _overflow_data);
|
||||
|
||||
auto static_strings = std::set<std::string>{};
|
||||
|
||||
} // namespace
|
||||
@@ -939,15 +1076,15 @@ post_process()
|
||||
|
||||
auto _raw_data = _sampler->get_data();
|
||||
auto _loaded_data = load_offload_buffer(i);
|
||||
for(auto litr : _loaded_data)
|
||||
for(auto line : _loaded_data)
|
||||
{
|
||||
while(!litr.is_empty())
|
||||
while(!line.is_empty())
|
||||
{
|
||||
auto _v = sampler_bundle_t{};
|
||||
litr.read(&_v);
|
||||
line.read(&_v);
|
||||
_raw_data.emplace_back(std::move(_v));
|
||||
}
|
||||
litr.destroy();
|
||||
line.destroy();
|
||||
}
|
||||
|
||||
ROCPROFSYS_VERBOSE(2 || get_debug_sampling(),
|
||||
@@ -988,6 +1125,7 @@ post_process()
|
||||
|
||||
if(get_use_perfetto()) post_process_perfetto(i, _timer_data, _overflow_data);
|
||||
if(get_use_timemory()) post_process_timemory(i, _timer_data, _overflow_data);
|
||||
if(get_use_rocpd()) post_process_rocpd(i, _timer_data, _overflow_data);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1205,13 +1343,13 @@ post_process_perfetto(int64_t _tid, const std::vector<timer_sampling_data>& _tim
|
||||
auto _lines = iitr.lineinfo.lines;
|
||||
std::reverse(_lines.begin(), _lines.end());
|
||||
size_t _n = 0;
|
||||
for(const auto& litr : _lines)
|
||||
for(const auto& line : _lines)
|
||||
{
|
||||
auto _label = JOIN('-', "lineinfo", _n++);
|
||||
tracing::add_perfetto_annotation(
|
||||
ctx, _label.c_str(),
|
||||
JOIN('@', demangle(litr.name),
|
||||
JOIN(':', litr.location, litr.line)));
|
||||
JOIN('@', demangle(line.name),
|
||||
JOIN(':', line.location, line.line)));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1298,11 +1436,11 @@ post_process_perfetto(int64_t _tid, const std::vector<timer_sampling_data>& _tim
|
||||
auto _lines = iitr.lineinfo.lines;
|
||||
std::reverse(_lines.begin(), _lines.end());
|
||||
size_t _n = 0;
|
||||
for(const auto& litr : _lines)
|
||||
for(const auto& line : _lines)
|
||||
{
|
||||
const auto* _name =
|
||||
static_strings.emplace(demangle(litr.name)).first->c_str();
|
||||
auto _info = JOIN(':', litr.location, litr.line);
|
||||
static_strings.emplace(demangle(line.name)).first->c_str();
|
||||
auto _info = JOIN(':', line.location, line.line);
|
||||
tracing::push_perfetto_track(
|
||||
category::timer_sampling{}, _name, _track, _beg,
|
||||
[&](::perfetto::EventContext ctx) {
|
||||
@@ -1342,13 +1480,13 @@ post_process_perfetto(int64_t _tid, const std::vector<timer_sampling_data>& _tim
|
||||
auto _lines = iitr.lineinfo.lines;
|
||||
std::reverse(_lines.begin(), _lines.end());
|
||||
size_t _n = 0;
|
||||
for(const auto& litr : _lines)
|
||||
for(const auto& line : _lines)
|
||||
{
|
||||
auto _label = JOIN('-', "lineinfo", _n++);
|
||||
tracing::add_perfetto_annotation(
|
||||
ctx, _label.c_str(),
|
||||
JOIN('@', demangle(litr.name),
|
||||
JOIN(':', litr.location, litr.line)));
|
||||
JOIN('@', demangle(line.name),
|
||||
JOIN(':', line.location, line.line)));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1540,6 +1678,189 @@ post_process_timemory(int64_t _tid, const std::vector<timer_sampling_data>& _tim
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_post_process_overflow_data(
|
||||
int64_t _tid, const std::vector<overflow_sampling_data>& _overflow_data)
|
||||
{
|
||||
auto& data_processor = get_data_processor();
|
||||
|
||||
const auto& _thread_info = thread_info::get(_tid, SequentTID);
|
||||
ROCPROFSYS_CI_THROW(!_thread_info, "No valid thread info for tid=%li\n", _tid);
|
||||
if(!_thread_info) return;
|
||||
|
||||
auto _overflow_event =
|
||||
get_setting_value<std::string>("ROCPROFSYS_SAMPLING_OVERFLOW_EVENT").value_or("");
|
||||
|
||||
if(!_overflow_event.empty() && !_overflow_data.empty())
|
||||
{
|
||||
auto _beg_ns = std::max(_overflow_data.front().m_beg, _thread_info->get_start());
|
||||
auto _end_ns = std::min(_overflow_data.back().m_end, _thread_info->get_stop());
|
||||
|
||||
const auto _overflow_prefix = std::string_view{ "PERF_COUNT_" };
|
||||
const auto _overflow_pos = _overflow_event.find(_overflow_prefix);
|
||||
if(_overflow_pos != std::string::npos)
|
||||
_overflow_event =
|
||||
_overflow_event.substr(_overflow_pos + _overflow_prefix.length());
|
||||
|
||||
const auto* _main_name =
|
||||
static_strings.emplace(join(" ", _overflow_event, "samples [rocprof-sys]"))
|
||||
.first->c_str();
|
||||
auto main_name_id = data_processor.insert_string(_main_name);
|
||||
|
||||
const auto& _track_name =
|
||||
JOIN(" ", "Thread", _thread_info->index_data->sequent_value, "Overflow",
|
||||
"(S)", _thread_info->index_data->system_value);
|
||||
|
||||
auto thread_idx = rocpd_initialize_thread_info(_tid);
|
||||
rocpd_init_track(_track_name.c_str(), thread_idx);
|
||||
rocpd_insert_region<category::overflow_sampling>(
|
||||
thread_idx, _beg_ns, _end_ns, main_name_id, _track_name.c_str());
|
||||
|
||||
for(const auto& itr : _overflow_data)
|
||||
{
|
||||
auto _beg = itr.m_beg;
|
||||
auto _end = itr.m_end;
|
||||
|
||||
if(!_thread_info->is_valid_lifetime({ _beg, _end })) continue;
|
||||
|
||||
for(const auto& iitr : itr.m_stack)
|
||||
{
|
||||
const auto* _name =
|
||||
static_strings.emplace(demangle(iitr.name)).first->c_str();
|
||||
auto name_id = data_processor.insert_string(_name);
|
||||
rocpd_insert_region<category::overflow_sampling>(
|
||||
thread_idx, _beg, _end, name_id, _track_name.c_str(),
|
||||
generate_call_stack_json(iitr).c_str(),
|
||||
generate_line_info_json(iitr).c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_post_process_backtrace_metrics(int64_t _tid,
|
||||
const std::vector<timer_sampling_data>& _timer_data)
|
||||
{
|
||||
auto _valid_metrics = backtrace_metrics::valid_array_t{};
|
||||
|
||||
for(const auto& itr : _timer_data)
|
||||
{
|
||||
_valid_metrics |= itr.m_metrics.get_valid();
|
||||
}
|
||||
|
||||
if(trait::runtime_enabled<backtrace_metrics>::get() && get_use_rocpd())
|
||||
{
|
||||
ROCPROFSYS_VERBOSE(3 || get_debug_sampling(),
|
||||
"[%li] Post-processing metrics for rocpd...\n", _tid);
|
||||
backtrace_metrics::init_rocpd(_tid, _valid_metrics);
|
||||
for(const auto& itr : _timer_data)
|
||||
itr.m_metrics.post_process_rocpd(_tid, 0.5 * (itr.m_beg + itr.m_end));
|
||||
backtrace_metrics::fini_rocpd(_tid, _valid_metrics);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_post_process_timer_data(int64_t _tid,
|
||||
const std::vector<timer_sampling_data>& _timer_data)
|
||||
{
|
||||
auto& data_processor = get_data_processor();
|
||||
|
||||
const auto& _thread_info = thread_info::get(_tid, SequentTID);
|
||||
ROCPROFSYS_CI_THROW(!_thread_info, "No valid thread info for tid=%li\n", _tid);
|
||||
if(!_thread_info) return;
|
||||
|
||||
if(!_timer_data.empty())
|
||||
{
|
||||
rocpd_post_process_backtrace_metrics(_tid, _timer_data);
|
||||
|
||||
auto _beg_ns = std::max(_timer_data.front().m_beg, _thread_info->get_start());
|
||||
auto _end_ns = std::min(_timer_data.back().m_end, _thread_info->get_stop());
|
||||
|
||||
const auto _track_name =
|
||||
JOIN(" ", "Thread", _thread_info->index_data->sequent_value, "(S)",
|
||||
_thread_info->index_data->system_value);
|
||||
|
||||
auto thread_idx = rocpd_initialize_thread_info(_tid);
|
||||
rocpd_init_track(_track_name.c_str(), thread_idx);
|
||||
const auto main_name_id = data_processor.insert_string("samples [rocprof-sys]");
|
||||
rocpd_insert_region<category::timer_sampling>(thread_idx, _beg_ns, _end_ns,
|
||||
main_name_id, _track_name.c_str());
|
||||
|
||||
auto _labels = backtrace_metrics::get_hw_counter_labels(_tid);
|
||||
for(const auto& itr : _timer_data)
|
||||
{
|
||||
size_t _ncount = 0;
|
||||
uint64_t _beg = itr.m_beg;
|
||||
uint64_t _end = itr.m_end;
|
||||
if(!_thread_info->is_valid_lifetime({ _beg, _end })) continue;
|
||||
|
||||
for(const auto& iitr : itr.m_stack)
|
||||
{
|
||||
auto _ncur = _ncount++;
|
||||
// the begin/end + HW counters will be same for entire call-stack so only
|
||||
// annotate the top and the bottom functons to keep the data consumption
|
||||
// low
|
||||
bool _include_common = (_ncur == 0 || _ncur + 1 == itr.m_stack.size());
|
||||
|
||||
// Only annotate HW counters when first or last and HW counters are not
|
||||
// empty
|
||||
bool _include_hw =
|
||||
_include_common && !itr.m_metrics.get_hw_counters().empty();
|
||||
|
||||
std::string hw_counter_json = "{}";
|
||||
if(_include_hw)
|
||||
{
|
||||
// current values when read
|
||||
hw_counter_json = generate_hw_counter_json(_tid, itr.m_metrics);
|
||||
}
|
||||
|
||||
if(get_sampling_include_inlines() && iitr.lineinfo)
|
||||
{
|
||||
auto _lines = iitr.lineinfo.lines;
|
||||
std::reverse(_lines.begin(), _lines.end());
|
||||
size_t _n = 0;
|
||||
for(const auto& line : _lines)
|
||||
{
|
||||
const auto* _name =
|
||||
static_strings.emplace(demangle(line.name)).first->c_str();
|
||||
auto inlined_name_id = data_processor.insert_string(_name);
|
||||
|
||||
auto inlined_call_stack = ::rocpd::json::create();
|
||||
inlined_call_stack->set("name", std::string(demangle(line.name)));
|
||||
inlined_call_stack->set("location", std::string(line.location));
|
||||
inlined_call_stack->set("line", std::to_string(line.line));
|
||||
inlined_call_stack->set("inlined", "true");
|
||||
|
||||
rocpd_insert_region<category::timer_sampling>(
|
||||
thread_idx, _beg, _end, inlined_name_id, _track_name.c_str(),
|
||||
inlined_call_stack->to_string().c_str(), "{}",
|
||||
// Only include HW counters for first inlined function
|
||||
(_n == 0) ? hw_counter_json.c_str() : "{}");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto* _name = static_strings.emplace(iitr.name).first->c_str();
|
||||
const auto name_id = data_processor.insert_string(_name);
|
||||
rocpd_insert_region<category::timer_sampling>(
|
||||
thread_idx, _beg, _end, name_id, _track_name.c_str(),
|
||||
generate_call_stack_json(iitr).c_str(),
|
||||
generate_line_info_json(iitr).c_str(), hw_counter_json.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
post_process_rocpd(int64_t _tid, const std::vector<timer_sampling_data>& _timer_data,
|
||||
const std::vector<overflow_sampling_data>& _overflow_data)
|
||||
{
|
||||
rocpd_initialize_sampling_category();
|
||||
rocpd_post_process_overflow_data(_tid, _overflow_data);
|
||||
rocpd_post_process_timer_data(_tid, _timer_data);
|
||||
}
|
||||
|
||||
struct sampling_initialization
|
||||
{
|
||||
static void preinit()
|
||||
|
||||
Referencia en una nueva incidencia
Block a user