ROCpd support [Part 1] (#279)

- Add rocpd support for
 - cpu_frequency
 - amd_smi
 - sampling


[ROCm/rocprofiler-systems commit: 26ae543012]
This commit is contained in:
Aleksandar Djordjevic
2025-07-28 17:33:52 +02:00
committed by GitHub
parent e2fc692ee0
commit 166babf234
49 changed files with 6770 additions and 365 deletions
+3
View File
@@ -24,3 +24,6 @@
[submodule "external/pybind11"]
path = external/pybind11
url = https://github.com/jrmadsen/pybind11.git
[submodule "external/sqlite"]
path = external/sqlite
url = https://github.com/sqlite/sqlite.git
@@ -10,6 +10,7 @@ Full documentation for ROCm Systems Profiler is available at [https://rocm.docs.
- How-to document for VCN and JPEG activity sampling and tracing.
- Support for tracing Fortran applications.
- Support for tracing MPI API in Fortran.
- Initial support for rocPD database output with the `ROCPROFSYS_USE_ROCPD` configuration setting.
- By default, group "kernel dispatch" and "memory copy" events by HIP stream ID in Perfetto traces.
- Add the "ROCPROFSYS_ROCM_GROUP_BY_QUEUE" configuration setting to group events by queue, instead.
@@ -221,6 +221,12 @@ rocprofiler_systems_add_option(ROCPROFSYS_BUILD_CODECOV "Build for code coverage
rocprofiler_systems_add_option(ROCPROFSYS_INSTALL_PERFETTO_TOOLS
"Install perfetto tools (i.e. traced, perfetto, etc.)" OFF
)
rocprofiler_systems_add_option(ROCPROFILER_BUILD_SQLITE3
"Enable building sqlite3 library internally" OFF
)
rocprofiler_systems_add_option(ROCPROFSYS_ENABLE_BENCHMARK
"Enable performance benchmarking capabilities for the project" OFF
)
if(ROCPROFSYS_USE_PAPI)
rocprofiler_systems_add_option(ROCPROFSYS_BUILD_PAPI "Build PAPI from submodule" ON)
@@ -328,6 +334,10 @@ if(ROCPROFSYS_BUILD_TESTING OR "$ENV{ROCPROFSYS_CI}" MATCHES "[1-9]+|ON|on|y|yes
include(CTest)
endif()
if(ROCPROFSYS_ENABLE_BENCHMARK)
add_compile_definitions(-DROCPROFSYS_USE_BENCHMARK=1)
endif()
# ------------------------------------------------------------------------------#
#
# library and executables
@@ -53,6 +53,9 @@ rocprofiler_systems_add_interface_library(rocprofiler-systems-python
rocprofiler_systems_add_interface_library(rocprofiler-systems-perfetto
"Enables Perfetto support"
)
rocprofiler_systems_add_interface_library(rocprofiler-systems-sqlite3
"Use SQLite3 for rocpd data storage"
)
rocprofiler_systems_add_interface_library(rocprofiler-systems-timemory
"Provides timemory libraries"
)
@@ -532,6 +535,14 @@ rocprofiler_systems_checkout_git_submodule(
include(Perfetto)
# ----------------------------------------------------------------------------------------#
#
# SQLite3
#
# ----------------------------------------------------------------------------------------#
include(SQLite3)
# ----------------------------------------------------------------------------------------#
#
# ELFIO
@@ -0,0 +1,48 @@
include_guard(GLOBAL)
if(ROCPROFILER_BUILD_SQLITE3)
message(STATUS "Building SQLite3 from source!")
execute_process(
COMMAND ${CMAKE_COMMAND} -E make_directory ${PROJECT_BINARY_DIR}/external/sqlite
)
# checkout submodule if not already checked out or clone repo if no .gitmodules file
rocprofiler_systems_checkout_git_submodule(
RELATIVE_PATH external/sqlite
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
TEST_FILE configure
REPO_URL https://github.com/sqlite/sqlite.git
REPO_BRANCH "version-3.45.3"
)
find_program(MAKE_COMMAND NAMES make gmake PATH_SUFFIXES bin REQUIRED)
include(ExternalProject)
ExternalProject_Add(
rocprofiler-systems-sqlite-build
PREFIX ${PROJECT_BINARY_DIR}/external/sqlite/build
SOURCE_DIR ${PROJECT_SOURCE_DIR}/external/sqlite
BUILD_IN_SOURCE 0
CONFIGURE_COMMAND
<SOURCE_DIR>/configure --prefix=${PROJECT_BINARY_DIR}/external/sqlite/install
--libdir=${PROJECT_BINARY_DIR}/external/sqlite/install/lib --disable-shared
--with-tempstore=yes --enable-all --disable-tcl CFLAGS=-O3\ -g1
BUILD_COMMAND ${MAKE_COMMAND} install -s
INSTALL_COMMAND ""
)
target_link_libraries(
rocprofiler-systems-sqlite3
INTERFACE
$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/external/sqlite/install/lib/libsqlite3.a>
)
target_include_directories(
rocprofiler-systems-sqlite3
SYSTEM
INTERFACE $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/external/sqlite/install/include>
)
add_dependencies(rocprofiler-systems-sqlite3 rocprofiler-systems-sqlite-build)
else()
message(STATUS "Using system SQLite3 library")
find_package(SQLite3 REQUIRED)
target_link_libraries(rocprofiler-systems-sqlite3 INTERFACE SQLite::SQLite3)
endif()
@@ -118,7 +118,7 @@ write_hw_counter_info(std::ostream&, const array_t<bool, N>& = {},
namespace
{
// initialize HIP before main so that librocprof-sys is not HSA_TOOLS_LIB
int gpu_count = rocprofsys::gpu::device_count();
int gpu_count = 0;
// statically allocated shared_ptrs to prevent use after free errors
auto timemory_manager = tim::manager::master_instance();
@@ -138,6 +138,7 @@ main(int argc, char** argv)
tim::unwind::set_bfd_verbose(3);
tim::set_env("ROCPROFSYS_INIT_TOOLING", "OFF", 1);
rocprofsys_init_library();
gpu_count = rocprofsys::gpu::device_count();
std::set<std::string> _category_options = component_categories{}();
{
@@ -40,6 +40,7 @@ target_link_libraries(
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-compile-options>
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-compile-definitions>
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-perfetto>
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-sqlite3>
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-timemory>
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-elfutils>
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-bfd>
@@ -26,6 +26,8 @@ target_sources(
${CMAKE_CURRENT_SOURCE_DIR}/invoke.hpp
${CMAKE_CURRENT_SOURCE_DIR}/join.hpp
${CMAKE_CURRENT_SOURCE_DIR}/setup.hpp
${CMAKE_CURRENT_SOURCE_DIR}/traits.hpp
${CMAKE_CURRENT_SOURCE_DIR}/md5sum.hpp
${CMAKE_CURRENT_SOURCE_DIR}/static_object.hpp
${CMAKE_CURRENT_SOURCE_DIR}/synchronized.hpp
)
@@ -0,0 +1,469 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include <array>
#include <cstdint>
#include <cstring>
#include <iomanip>
#include <string_view>
#include <type_traits>
#include "traits.hpp"
namespace rocprofsys
{
inline namespace common
{
class md5sum
{
public:
using size_type = uint32_t; // must be 32bit
using raw_digest_t = std::array<uint8_t, 16>;
static constexpr int blocksize = 64;
template <typename Tp, typename... Args>
explicit md5sum(Tp&& arg, Args&&... args);
md5sum() = default;
~md5sum() = default;
md5sum(const md5sum&) = default;
md5sum(md5sum&&) = default;
md5sum& operator=(const md5sum&) = default;
md5sum& operator=(md5sum&&) = default;
md5sum& update(std::string_view inp);
md5sum& update(const unsigned char* buf, size_type length);
md5sum& update(const char* buf, size_type length);
md5sum& finalize();
std::string hexdigest() const;
std::string hexliteral() const;
raw_digest_t rawdigest() const { return digest; }
template <typename Tp,
typename Up = std::enable_if_t<std::is_arithmetic<Tp>::value, int>>
md5sum& update(Tp inp);
friend std::ostream& operator<<(std::ostream&, md5sum md5);
private:
void transform(const uint8_t block[blocksize]);
bool finalized = false;
// 64bit counter for number of bits (lo, hi)
std::array<uint32_t, 2> count = { 0, 0 };
std::array<uint8_t, blocksize> buffer{}; // overflow bytes from last 64 byte chunk
// digest so far, initialized to magic initialization constants.
std::array<uint32_t, 4> state = { 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476 };
std::array<uint8_t, 16> digest{}; // result
};
template <typename Tp, typename... Args>
md5sum::md5sum(Tp&& arg, Args&&... args)
{
auto _update = [&](auto&& _val) {
using value_type =
std::remove_reference_t<std::remove_cv_t<std::decay_t<decltype(_val)>>>;
static_assert(!std::is_pointer<value_type>::value,
"constructor cannot be called with pointer argument");
update(std::forward<decltype(_val)>(_val));
};
_update(std::forward<Tp>(arg));
(_update(std::forward<Args>(args)), ...);
finalize();
}
template <typename Tp, typename Up>
md5sum&
md5sum::update(Tp inp)
{
static_assert(std::is_arithmetic<Tp>::value, "expected arithmetic type");
return update(reinterpret_cast<const char*>(&inp), sizeof(Tp));
}
template <template <typename, typename...> class ContainerT, typename Tp,
typename... TailT>
std::string
compute_md5sum(const ContainerT<Tp, TailT...>& inp,
std::enable_if_t<traits::is_string_literal<Tp>(), int>)
{
auto _val = md5sum{};
for(const auto& itr : inp)
_val.update(std::string_view{ inp });
_val.finalize();
return _val.hexdigest();
}
namespace
{
using size_type = typename md5sum::size_type;
// Constants for md5sumTransform routine.
constexpr uint32_t S11 = 7;
constexpr uint32_t S12 = 12;
constexpr uint32_t S13 = 17;
constexpr uint32_t S14 = 22;
constexpr uint32_t S21 = 5;
constexpr uint32_t S22 = 9;
constexpr uint32_t S23 = 14;
constexpr uint32_t S24 = 20;
constexpr uint32_t S31 = 4;
constexpr uint32_t S32 = 11;
constexpr uint32_t S33 = 16;
constexpr uint32_t S34 = 23;
constexpr uint32_t S41 = 6;
constexpr uint32_t S42 = 10;
constexpr uint32_t S43 = 15;
constexpr uint32_t S44 = 21;
// low level logic operations
static inline uint32_t
F(uint32_t x, uint32_t y, uint32_t z);
static inline uint32_t
G(uint32_t x, uint32_t y, uint32_t z);
static inline uint32_t
H(uint32_t x, uint32_t y, uint32_t z);
static inline uint32_t
I(uint32_t x, uint32_t y, uint32_t z);
static inline uint32_t
rotate_left(uint32_t x, int n);
static inline void
FF(uint32_t& a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac);
static inline void
GG(uint32_t& a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac);
static inline void
HH(uint32_t& a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac);
static inline void
II(uint32_t& a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac);
// F, G, H and I are basic md5sum functions.
inline uint32_t
F(uint32_t x, uint32_t y, uint32_t z)
{
return (x & y) | (~x & z);
}
inline uint32_t
G(uint32_t x, uint32_t y, uint32_t z)
{
return (x & z) | (y & ~z);
}
inline uint32_t
H(uint32_t x, uint32_t y, uint32_t z)
{
return x ^ y ^ z;
}
inline uint32_t
I(uint32_t x, uint32_t y, uint32_t z)
{
return y ^ (x | ~z);
}
// rotate_left rotates x left n bits.
inline uint32_t
rotate_left(uint32_t x, int n)
{
return (x << n) | (x >> (32 - n));
}
// FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
// Rotation is separate from addition to prevent recomputation.
inline void
FF(uint32_t& a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac)
{
a = rotate_left(a + F(b, c, d) + x + ac, s) + b;
}
inline void
GG(uint32_t& a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac)
{
a = rotate_left(a + G(b, c, d) + x + ac, s) + b;
}
inline void
HH(uint32_t& a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac)
{
a = rotate_left(a + H(b, c, d) + x + ac, s) + b;
}
inline void
II(uint32_t& a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac)
{
a = rotate_left(a + I(b, c, d) + x + ac, s) + b;
}
// decodes input (unsigned char) into output (uint32_t). Assumes len is a multiple of 4.
void
decode(uint32_t output[], const uint8_t input[], size_type len)
{
for(unsigned int i = 0, j = 0; j < len; i++, j += 4)
output[i] = ((uint32_t) input[j]) | (((uint32_t) input[j + 1]) << 8) |
(((uint32_t) input[j + 2]) << 16) | (((uint32_t) input[j + 3]) << 24);
}
// encodes input (uint32_t) into output (unsigned char). Assumes len is
// a multiple of 4.
void
encode(uint8_t output[], const uint32_t input[], size_type len)
{
for(size_type i = 0, j = 0; j < len; i++, j += 4)
{
output[j] = input[i] & 0xff;
output[j + 1] = (input[i] >> 8) & 0xff;
output[j + 2] = (input[i] >> 16) & 0xff;
output[j + 3] = (input[i] >> 24) & 0xff;
}
}
} // namespace
// apply md5sum algo on a block
void
md5sum::transform(const uint8_t block[blocksize])
{
uint32_t a = state[0], b = state[1], c = state[2], d = state[3], x[16];
decode(x, block, blocksize);
/* Round 1 */
FF(a, b, c, d, x[0], S11, 0xd76aa478); /* 1 */
FF(d, a, b, c, x[1], S12, 0xe8c7b756); /* 2 */
FF(c, d, a, b, x[2], S13, 0x242070db); /* 3 */
FF(b, c, d, a, x[3], S14, 0xc1bdceee); /* 4 */
FF(a, b, c, d, x[4], S11, 0xf57c0faf); /* 5 */
FF(d, a, b, c, x[5], S12, 0x4787c62a); /* 6 */
FF(c, d, a, b, x[6], S13, 0xa8304613); /* 7 */
FF(b, c, d, a, x[7], S14, 0xfd469501); /* 8 */
FF(a, b, c, d, x[8], S11, 0x698098d8); /* 9 */
FF(d, a, b, c, x[9], S12, 0x8b44f7af); /* 10 */
FF(c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
FF(b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
FF(a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
FF(d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
FF(c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
FF(b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
/* Round 2 */
GG(a, b, c, d, x[1], S21, 0xf61e2562); /* 17 */
GG(d, a, b, c, x[6], S22, 0xc040b340); /* 18 */
GG(c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
GG(b, c, d, a, x[0], S24, 0xe9b6c7aa); /* 20 */
GG(a, b, c, d, x[5], S21, 0xd62f105d); /* 21 */
GG(d, a, b, c, x[10], S22, 0x2441453); /* 22 */
GG(c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
GG(b, c, d, a, x[4], S24, 0xe7d3fbc8); /* 24 */
GG(a, b, c, d, x[9], S21, 0x21e1cde6); /* 25 */
GG(d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
GG(c, d, a, b, x[3], S23, 0xf4d50d87); /* 27 */
GG(b, c, d, a, x[8], S24, 0x455a14ed); /* 28 */
GG(a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
GG(d, a, b, c, x[2], S22, 0xfcefa3f8); /* 30 */
GG(c, d, a, b, x[7], S23, 0x676f02d9); /* 31 */
GG(b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
/* Round 3 */
HH(a, b, c, d, x[5], S31, 0xfffa3942); /* 33 */
HH(d, a, b, c, x[8], S32, 0x8771f681); /* 34 */
HH(c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
HH(b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
HH(a, b, c, d, x[1], S31, 0xa4beea44); /* 37 */
HH(d, a, b, c, x[4], S32, 0x4bdecfa9); /* 38 */
HH(c, d, a, b, x[7], S33, 0xf6bb4b60); /* 39 */
HH(b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
HH(a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
HH(d, a, b, c, x[0], S32, 0xeaa127fa); /* 42 */
HH(c, d, a, b, x[3], S33, 0xd4ef3085); /* 43 */
HH(b, c, d, a, x[6], S34, 0x4881d05); /* 44 */
HH(a, b, c, d, x[9], S31, 0xd9d4d039); /* 45 */
HH(d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
HH(c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
HH(b, c, d, a, x[2], S34, 0xc4ac5665); /* 48 */
/* Round 4 */
II(a, b, c, d, x[0], S41, 0xf4292244); /* 49 */
II(d, a, b, c, x[7], S42, 0x432aff97); /* 50 */
II(c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
II(b, c, d, a, x[5], S44, 0xfc93a039); /* 52 */
II(a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
II(d, a, b, c, x[3], S42, 0x8f0ccc92); /* 54 */
II(c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
II(b, c, d, a, x[1], S44, 0x85845dd1); /* 56 */
II(a, b, c, d, x[8], S41, 0x6fa87e4f); /* 57 */
II(d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
II(c, d, a, b, x[6], S43, 0xa3014314); /* 59 */
II(b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
II(a, b, c, d, x[4], S41, 0xf7537e82); /* 61 */
II(d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
II(c, d, a, b, x[2], S43, 0x2ad7d2bb); /* 63 */
II(b, c, d, a, x[9], S44, 0xeb86d391); /* 64 */
state[0] += a;
state[1] += b;
state[2] += c;
state[3] += d;
// Zeroize sensitive information.
memset(x, 0, sizeof x);
}
md5sum&
md5sum::update(std::string_view inp)
{
return update(inp.data(), inp.length());
}
// md5sum block update operation. Continues an md5sum message-digest
// operation, processing another message block
md5sum&
md5sum::update(const unsigned char input[], size_type length)
{
// compute number of bytes mod 64
size_type index = count[0] / 8 % blocksize;
// Update number of bits
if((count[0] += (length << 3)) < (length << 3)) count[1]++;
count[1] += (length >> 29);
// number of bytes we need to fill in buffer
size_type firstpart = 64 - index;
size_type i = 0;
// transform as many times as possible.
if(length >= firstpart)
{
// fill buffer first, transform
memcpy(&buffer[index], input, firstpart);
transform(buffer.data());
// transform chunks of blocksize (64 bytes)
for(i = firstpart; i + blocksize <= length; i += blocksize)
transform(&input[i]);
index = 0;
}
// buffer remaining input
memcpy(&buffer[index], &input[i], length - i);
return *this;
}
// for convenience provide a verson with signed char
md5sum&
md5sum::update(const char input[], size_type length)
{
return update((const unsigned char*) input, length);
}
// md5sum finalization. Ends an md5sum message-digest operation, writing the
// the message digest and zeroizing the context.
md5sum&
md5sum::finalize()
{
static unsigned char padding[64] = { 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
if(!finalized)
{
// Save number of bits
unsigned char bits[8];
encode(bits, count.data(), 8);
// pad out to 56 mod 64.
size_type index = count[0] / 8 % 64;
size_type padLen = (index < 56) ? (56 - index) : (120 - index);
update(padding, padLen);
// Append length (before padding)
update(bits, 8);
// Store state in digest
encode(digest.data(), state.data(), 16);
// Zeroize sensitive information.
memset(buffer.data(), 0, sizeof buffer);
memset(count.data(), 0, sizeof count);
finalized = true;
}
return *this;
}
// return hex representation of digest as string
std::string
md5sum::hexdigest() const
{
if(!finalized) return std::string{};
char buf[33];
for(int i = 0; i < 16; i++)
snprintf(buf + i * 2, 3, "%02x", digest[i]);
buf[32] = '\0';
return std::string(buf);
}
std::string
md5sum::hexliteral() const
{
if(!finalized) return std::string{};
auto _oss = std::ostringstream{};
_oss << "X'";
for(auto itr : rawdigest())
_oss << std::hex << std::setw(2) << std::setfill('0') << static_cast<int>(itr);
_oss << "'";
return _oss.str();
}
std::ostream&
operator<<(std::ostream& out, md5sum md5)
{
return out << md5.hexdigest();
}
std::string
compute_md5sum(std::string_view inp)
{
return md5sum{ inp }.finalize().hexdigest();
}
} // namespace common
} // namespace rocprofsys
@@ -0,0 +1,83 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include <optional>
#include <string>
#include <type_traits>
namespace rocprofsys
{
inline namespace common
{
namespace traits
{
namespace
{
template <typename T>
struct is_string_literal_impl : std::false_type
{};
template <>
struct is_string_literal_impl<std::string_view> : std::true_type
{};
template <>
struct is_string_literal_impl<const char*> : std::true_type
{};
template <>
struct is_string_literal_impl<char*> : std::true_type
{};
template <>
struct is_string_literal_impl<std::string> : std::true_type
{};
template <typename T>
inline constexpr bool is_string_literal_impl_v = is_string_literal_impl<T>::value;
} // namespace
template <typename T>
constexpr bool
is_string_literal()
{
using Tp = std::decay_t<T>;
return is_string_literal_impl_v<Tp>;
}
template <typename T>
struct is_optional : std::false_type
{};
template <typename T>
struct is_optional<std::optional<T>> : std::true_type
{};
template <typename T>
inline constexpr bool is_optional_v = is_optional<T>::value;
} // namespace traits
} // namespace common
} // namespace rocprofsys
@@ -6,31 +6,38 @@ configure_file(
)
set(core_sources
${CMAKE_CURRENT_LIST_DIR}/agent_manager.cpp
${CMAKE_CURRENT_LIST_DIR}/amd_smi.cpp
${CMAKE_CURRENT_LIST_DIR}/argparse.cpp
${CMAKE_CURRENT_LIST_DIR}/categories.cpp
${CMAKE_CURRENT_LIST_DIR}/config.cpp
${CMAKE_CURRENT_LIST_DIR}/constraint.cpp
${CMAKE_CURRENT_LIST_DIR}/cpu.cpp
${CMAKE_CURRENT_LIST_DIR}/debug.cpp
${CMAKE_CURRENT_LIST_DIR}/dynamic_library.cpp
${CMAKE_CURRENT_LIST_DIR}/exception.cpp
${CMAKE_CURRENT_LIST_DIR}/gpu.cpp
${CMAKE_CURRENT_LIST_DIR}/mproc.cpp
${CMAKE_CURRENT_LIST_DIR}/node_info.cpp
${CMAKE_CURRENT_LIST_DIR}/perf.cpp
${CMAKE_CURRENT_LIST_DIR}/perfetto.cpp
${CMAKE_CURRENT_LIST_DIR}/rocprofiler-sdk.cpp
${CMAKE_CURRENT_LIST_DIR}/amd_smi.cpp
${CMAKE_CURRENT_LIST_DIR}/state.cpp
${CMAKE_CURRENT_LIST_DIR}/timemory.cpp
${CMAKE_CURRENT_LIST_DIR}/utility.cpp
)
set(core_headers
${CMAKE_CURRENT_LIST_DIR}/agent.hpp
${CMAKE_CURRENT_LIST_DIR}/agent_manager.hpp
${CMAKE_CURRENT_LIST_DIR}/amd_smi.hpp
${CMAKE_CURRENT_LIST_DIR}/argparse.hpp
${CMAKE_CURRENT_LIST_DIR}/categories.hpp
${CMAKE_CURRENT_LIST_DIR}/common.hpp
${CMAKE_CURRENT_LIST_DIR}/concepts.hpp
${CMAKE_CURRENT_LIST_DIR}/config.hpp
${CMAKE_CURRENT_LIST_DIR}/constraint.hpp
${CMAKE_CURRENT_LIST_DIR}/cpu.hpp
${CMAKE_CURRENT_LIST_DIR}/debug.hpp
${CMAKE_CURRENT_LIST_DIR}/dynamic_library.hpp
${CMAKE_CURRENT_LIST_DIR}/exception.hpp
@@ -38,11 +45,11 @@ set(core_headers
${CMAKE_CURRENT_LIST_DIR}/locking.hpp
${CMAKE_CURRENT_LIST_DIR}/mpi.hpp
${CMAKE_CURRENT_LIST_DIR}/mproc.hpp
${CMAKE_CURRENT_LIST_DIR}/node_info.hpp
${CMAKE_CURRENT_LIST_DIR}/perf.hpp
${CMAKE_CURRENT_LIST_DIR}/perfetto.hpp
${CMAKE_CURRENT_LIST_DIR}/redirect.hpp
${CMAKE_CURRENT_LIST_DIR}/rocprofiler-sdk.hpp
${CMAKE_CURRENT_LIST_DIR}/amd_smi.hpp
${CMAKE_CURRENT_LIST_DIR}/state.hpp
${CMAKE_CURRENT_LIST_DIR}/timemory.hpp
${CMAKE_CURRENT_LIST_DIR}/utility.hpp
@@ -61,6 +68,7 @@ target_sources(
add_subdirectory(binary)
add_subdirectory(components)
add_subdirectory(containers)
add_subdirectory(rocpd)
target_include_directories(
rocprofiler-systems-core-library
@@ -95,6 +103,22 @@ target_link_libraries(
$<BUILD_INTERFACE:$<IF:$<BOOL:${ROCPROFSYS_BUILD_LTO}>,rocprofiler-systems::rocprofiler-systems-lto,>>
)
file(GLOB ROCPD_SCHEMA_FILES "${CMAKE_CURRENT_LIST_DIR}/rocpd/data_storage/schema/*.sql")
foreach(_SRC ${ROCPD_SCHEMA_FILES})
cmake_path(GET _SRC FILENAME _BASE)
configure_file(
${_SRC}
${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/${_BASE}
COPYONLY
)
install(
FILES ${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/${_BASE}
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}
COMPONENT core
)
endforeach()
set_target_properties(
rocprofiler-systems-core-library
PROPERTIES OUTPUT_NAME ${BINARY_NAME_PREFIX}-core
@@ -0,0 +1,62 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include <cstddef>
#include <cstdint>
#include <string>
#if ROCPROFSYS_USE_ROCM > 0
# include <amd_smi/amdsmi.h>
# include <rocprofiler-sdk/agent.h>
#endif
namespace rocprofsys
{
enum class agent_type : uint8_t
{
CPU, ///< Agent type is a CPU
GPU ///< Agent type is a GPU
};
struct agent
{
agent_type type;
uint64_t id;
uint32_t node_id;
int32_t logical_node_id;
int32_t logical_node_type_id;
std::string name;
std::string model_name;
std::string vendor_name;
std::string product_name;
size_t device_id{ 0 };
size_t base_id{ 0 };
#if ROCPROFSYS_USE_ROCM > 0
amdsmi_processor_handle smi_handle = nullptr;
#endif
};
} // namespace rocprofsys
@@ -0,0 +1,137 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "agent_manager.hpp"
#include "debug.hpp"
#include <algorithm>
#include <iterator>
namespace rocprofsys
{
agent_manager&
agent_manager::get_instance()
{
static agent_manager instance;
return instance;
}
void
agent_manager::insert_agent(agent& _agent)
{
ROCPROFSYS_VERBOSE(
3, "Inserting agent with device handle: %lu, and agent id: %ld, device type: %s",
_agent.device_id,
(_agent.type == agent_type::GPU ? _gpu_agents_cnt : _cpu_agents_cnt),
(_agent.type == agent_type::GPU ? "GPU" : "CPU"));
_agent.device_id =
(_agent.type == agent_type::GPU ? _gpu_agents_cnt++ : _cpu_agents_cnt++);
_agents.emplace_back(std::make_shared<agent>(_agent));
}
const agent&
agent_manager::get_agent_by_id(size_t device_id, agent_type type) const
{
ROCPROFSYS_VERBOSE(3, "Getting agent for device id: %ld, type %s\n", device_id,
(type == agent_type::GPU) ? "GPU" : "CPU");
auto _agent =
std::find_if(_agents.begin(), _agents.end(), [&](const auto& agent_ptr) {
return agent_ptr->type == type && agent_ptr->device_id == device_id;
});
if(_agent == _agents.end())
{
std::ostringstream oss;
oss << "Agent not found for device id: " << device_id
<< ", type: " << (type == agent_type::GPU ? "GPU" : "CPU");
throw std::out_of_range(oss.str());
}
return **_agent;
}
const agent&
agent_manager::get_agent_by_handle(uint64_t device_handle, agent_type type) const
{
ROCPROFSYS_VERBOSE(3, "Getting agent for device handle: %ld, type %s\n",
device_handle, (type == agent_type::GPU ? "GPU" : "CPU"));
auto _agent =
std::find_if(_agents.begin(), _agents.end(), [&](const auto& agent_ptr) {
return agent_ptr->type == type && agent_ptr->id == device_handle;
});
if(_agent == _agents.end())
{
std::ostringstream oss;
oss << "Agent not found for device handle: " << device_handle
<< ", type: " << (type == agent_type::GPU ? "GPU" : "CPU");
throw std::out_of_range(oss.str());
}
return **_agent;
}
const agent&
agent_manager::get_agent_by_handle(size_t device_handle) const
{
ROCPROFSYS_VERBOSE(3, "Getting agent for device handle: %ld\n", device_handle);
auto _agent =
std::find_if(_agents.begin(), _agents.end(), [&](const auto& agent_ptr) {
return agent_ptr->id == device_handle;
});
if(_agent == _agents.end())
{
std::ostringstream oss;
oss << "Agent not found for device handle: " << device_handle;
throw std::out_of_range(oss.str());
}
return **_agent;
}
std::vector<std::shared_ptr<agent>>
agent_manager::get_agents_by_type(agent_type type) const
{
ROCPROFSYS_VERBOSE(3, "Getting agent for device type: %s\n",
type == agent_type::GPU ? "GPU" : "CPU");
std::vector<std::shared_ptr<agent>> agents;
std::copy_if(std::begin(_agents), std::end(_agents), std::back_inserter(agents),
[&type](const auto& agent_ptr) { return agent_ptr->type == type; });
return agents;
}
std::vector<std::shared_ptr<agent>>
agent_manager::get_agents() const
{
return _agents;
}
size_t
agent_manager::get_gpu_agents_count() const
{
return _gpu_agents_cnt;
}
size_t
agent_manager::get_cpu_agents_count() const
{
return _cpu_agents_cnt;
}
} // namespace rocprofsys
@@ -0,0 +1,62 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include <cstddef>
#include <memory>
#include <vector>
#include "agent.hpp"
namespace rocprofsys
{
struct agent_manager
{
static agent_manager& get_instance();
agent_manager(const agent_manager&) = delete;
agent_manager& operator=(const agent_manager&) = delete;
agent_manager(agent_manager&&) = delete;
agent_manager& operator=(agent_manager&&) = delete;
~agent_manager() = default;
void insert_agent(agent& agent);
const agent& get_agent_by_id(size_t device_id, agent_type type) const;
const agent& get_agent_by_handle(size_t device_id, agent_type type) const;
const agent& get_agent_by_handle(size_t device_handle) const;
std::vector<std::shared_ptr<agent>> get_agents_by_type(agent_type type) const;
std::vector<std::shared_ptr<agent>> get_agents() const;
size_t get_gpu_agents_count() const;
size_t get_cpu_agents_count() const;
private:
std::vector<std::shared_ptr<agent>> _agents;
size_t _gpu_agents_cnt{ 0 };
size_t _cpu_agents_cnt{ 0 };
agent_manager() = default;
};
} // namespace rocprofsys
@@ -0,0 +1,353 @@
// MIT License
//
// Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include <algorithm>
#include <array>
#include <bitset>
#include <chrono>
#include <cstdlib>
#include <iomanip>
#include <iostream>
#include <limits>
#include <mutex>
#include <sstream>
#include <string>
#include <type_traits>
#include <unistd.h>
#include <unordered_map>
#include <vector>
#include "core/benchmark/category.hpp"
#include "core/debug.hpp"
namespace rocprofsys
{
namespace benchmark
{
namespace
{
template <bool enabled, typename category_enum, category_enum... enabled_categories>
struct benchmark_impl
{
template <category_enum... categories>
struct scope
{
scope(const scope&) = delete;
scope& operator=(const scope&) = delete;
~scope() = default;
protected:
scope() = default;
scope(scope&&) = default;
scope& operator=(scope&&) = default;
};
template <category_enum... categories>
static void start()
{}
template <category_enum... categories>
static void end()
{}
template <category_enum... categories>
[[nodiscard]] static scope<categories...> scoped_trace()
{
return scope<categories...>{};
}
static void init_from_env(const char* = nullptr) {}
static void show_results() {}
};
using tid_t = __pid_t;
struct indexed_category
{
size_t category;
tid_t thread_id;
friend bool operator==(const indexed_category& lhs, const indexed_category& rhs)
{
return lhs.category == rhs.category && lhs.thread_id == rhs.thread_id;
}
};
struct indexed_category_hash
{
size_t operator()(const indexed_category& p) const noexcept
{
std::size_t hash1 = std::hash<size_t>{}(p.category);
std::size_t hash2 = std::hash<size_t>{}(p.thread_id);
return hash1 ^ (hash2 << 1);
}
};
template <typename category_enum, category_enum... enabled_categories>
struct benchmark_impl<true, category_enum, enabled_categories...>
{
static_assert(std::is_enum_v<category_enum>, "category_enum must be an enum");
public:
using clock = std::chrono::high_resolution_clock;
using time_point = clock::time_point;
static constexpr size_t _max_categories = static_cast<size_t>(category_enum::count);
template <category_enum... categories>
struct scope
{
friend benchmark_impl;
public:
scope(const scope&) = delete;
scope& operator=(const scope&) = delete;
~scope() { end<categories...>(); }
protected:
scope() { start<categories...>(); }
scope(scope&&) = default;
scope& operator=(scope&&) = default;
};
template <category_enum... categories>
static void start()
{
static const thread_local auto _thread_id = gettid();
const auto now = clock::now();
std::lock_guard lock(m_mutex);
(..., (is_category_defined<categories>([&] {
if(m_enabled.test(to_index(categories)))
m_started[{ to_index(categories), _thread_id }] = now;
})));
}
template <category_enum... categories>
static void end()
{
static const thread_local auto _thread_id = getpid();
const auto _end_time = clock::now();
std::lock_guard lock(m_mutex);
(..., (is_category_defined<categories>([&] {
if(m_enabled.test(to_index(categories)))
end_category(_end_time, categories, _thread_id);
})));
}
template <category_enum... categories>
[[nodiscard]] static scope<categories...> scoped_trace()
{
return scope<categories...>{};
}
static void init_from_env(const char* envVar = "ROCPROFSYS_BENCHMARK_CATEGORIES")
{
std::lock_guard lock(m_mutex);
const auto* env = std::getenv(envVar);
if(env == nullptr || std::string(env).empty())
{
ROCPROFSYS_WARNING(1, "No BENCHMARK categories specified in environment "
"variable ROCPROFSYS_BENCHMARK_CATEGORIES.\n");
return;
}
std::string _str(env);
std::istringstream ss(_str);
std::string token;
while(std::getline(ss, token, ','))
{
token.erase(0, token.find_first_not_of(" \t"));
token.erase(token.find_last_not_of(" \t") + 1);
for(category_enum cat : compiledCategories)
{
if(to_string(cat) == token)
{
m_enabled.set(to_index(cat));
}
}
}
}
static void show_results()
{
std::lock_guard lock(m_mutex);
std::vector<std::pair<category_enum, result_data>> sorted;
for(category_enum cat : compiledCategories)
{
const auto& data = m_results[to_index(cat)];
if(data.count > 0)
{
sorted.emplace_back(cat, data);
}
}
std::sort(sorted.begin(), sorted.end(), [](const auto& a, const auto& b) {
return a.second.total_time > b.second.total_time;
});
constexpr uint32_t _category = 30;
constexpr uint32_t _calls = 8;
constexpr uint32_t _total = 12;
constexpr uint32_t _avg = 10;
constexpr uint32_t _min = 10;
constexpr uint32_t _max = 10;
std::cout << "\033[32m"
<< std::string(_category + _calls + _total + _avg + _min + _max, '=')
<< "\n";
std::cout << "Benchmark Results (Sorted by Total Time):\n";
std::cout << std::string(_category + _calls + _total + _avg + _min + _max, '-')
<< "\n";
std::cout << std::left << std::setw(_category) << "Category" << std::right
<< std::setw(_calls) << "Calls" << std::setw(_total) << "Total(ms)"
<< std::setw(_avg) << "Avg(us)" << std::setw(_min) << "Min(us)"
<< std::setw(_max) << "Max(us)" << "\n";
std::cout << std::string(_category + _calls + _total + _avg + _min + _max, '-')
<< "\n";
for(const auto& [cat, data] : sorted)
{
double totalMs = static_cast<double>(data.total_time) / 1000.0;
double avgUs = static_cast<double>(data.total_time) / data.count;
std::cout << std::left << std::setw(_category) << to_string(cat) << std::right
<< std::setw(_calls) << data.count << std::setw(_total)
<< std::fixed << std::setprecision(3) << totalMs << std::setw(_avg)
<< std::fixed << std::setprecision(1) << avgUs << std::setw(_min)
<< data.min_time << std::setw(_max) << data.max_time << "\n";
}
std::cout << std::string(_category + _calls + _total + _avg + _min + _max, '=')
<< "\033[0m" << "\n\n";
}
private:
struct result_data
{
uint64_t total_time = 0;
size_t count = 0;
uint64_t min_time = std::numeric_limits<uint64_t>::max();
uint64_t max_time = std::numeric_limits<uint64_t>::min();
void update(uint64_t duration)
{
total_time += duration;
count += 1;
if(duration < min_time) min_time = duration;
if(duration > max_time) max_time = duration;
}
};
static constexpr size_t to_index(category_enum cat)
{
return static_cast<size_t>(cat);
}
static void end_category(const time_point& end_time, category_enum cat,
const tid_t thread_id)
{
const size_t _idx = to_index(cat);
auto _it = m_started.find({ _idx, thread_id });
if(_it == m_started.end())
{
ROCPROFSYS_WARNING(1, "Benchmark error: missing start time for category!\n");
return;
}
auto duration =
std::chrono::duration_cast<std::chrono::microseconds>(end_time - _it->second)
.count();
m_started.erase(_it);
m_results[_idx].update(duration);
}
template <category_enum Cat, typename Func>
static constexpr void is_category_defined(Func&& f)
{
if constexpr(((Cat == enabled_categories) || ...))
{
f();
}
}
static constexpr std::array<category_enum, sizeof...(enabled_categories)>
compiledCategories = { enabled_categories... };
static inline std::unordered_map<indexed_category, time_point, indexed_category_hash>
m_started;
static inline std::array<result_data, _max_categories> m_results{};
static inline std::bitset<_max_categories> m_enabled;
static inline std::mutex m_mutex;
};
#ifdef ROCPROFSYS_ENABLE_BENCHMARK
using _benchmark_impl = benchmark::benchmark_impl<
static_cast<bool>(ROCPROFSYS_ENABLE_BENCHMARK), benchmark::category,
benchmark::category::kernel_dispatch, benchmark::category::memory_copy,
benchmark::category::memory_allocate, benchmark::category::db_entry_kernel_dispatch,
benchmark::category::db_entry_memory_copy,
benchmark::category::db_entry_memory_allocate,
benchmark::category::perfetto_kernel_dispatch,
benchmark::category::sdk_tool_buffered_tracing>;
#else
using _benchmark_impl = benchmark::benchmark_impl<false, benchmark::category>;
#endif
} // namespace
template <category... categories>
void
start()
{
_benchmark_impl::template start<categories...>();
}
template <category... categories>
void
end()
{
_benchmark_impl::template end<categories...>();
}
template <category... categories>
[[nodiscard]] auto
scoped_trace()
{
return _benchmark_impl::template scoped_trace<categories...>();
}
inline void
init_from_env(const char* envVar = "BENCHMARK_CATEGORIES")
{
_benchmark_impl::init_from_env(envVar);
}
inline void
show_results()
{
_benchmark_impl::show_results();
}
} // namespace benchmark
} // namespace rocprofsys
@@ -0,0 +1,68 @@
// Copyright (c) 2018-2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#pragma once
#include <string_view>
namespace rocprofsys
{
namespace benchmark
{
enum class category
{
kernel_dispatch,
db_entry_kernel_dispatch,
memory_copy,
db_entry_memory_copy,
memory_allocate,
db_entry_memory_allocate,
perfetto_kernel_dispatch,
sdk_tool_buffered_tracing,
count
};
constexpr std::string_view
to_string(category cat)
{
switch(cat)
{
case category::kernel_dispatch: return "kernel_dispatch";
case category::db_entry_kernel_dispatch: return "db_entry_kernel_dispatch";
case category::memory_copy: return "memory_copy";
case category::memory_allocate: return "memory_allocate";
case category::db_entry_memory_copy: return "db_entry_memory_copy";
case category::db_entry_memory_allocate: return "db_entry_memory_allocate";
case category::perfetto_kernel_dispatch: return "perfetto_kernel_dispatch";
case category::sdk_tool_buffered_tracing: return "sdk_tool_buffered_tracing";
default: return "unknown";
}
}
} // namespace benchmark
} // namespace rocprofsys
@@ -312,6 +312,9 @@ configure_settings(bool _init)
"Enable causal profiling analysis", false, "backend",
"causal", "analysis");
ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_USE_ROCPD", "Enable rocpd backend", false,
"backend", "rocpd");
ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_USE_ROCM",
"Enable ROCm API and kernel tracing", true, "backend",
"rocm");
@@ -1246,10 +1249,17 @@ configure_signal_handler(const std::shared_ptr<settings>& _config)
static auto _dyninst_trampoline_signal =
getenv("DYNINST_SIGNAL_TRAMPOLINE_SIGILL") ? SIGILL : SIGTRAP;
static auto root_pid =
get_env<pid_t>("ROCPROFSYS_ROOT_PROCESS", process::get_id(), false);
if(_config->get_enable_signal_handler())
{
tim::signals::disable_signal_detection();
signal_settings::enable(sys_signal::Interrupt);
auto is_child_process = root_pid != getpid();
if(is_child_process)
{
signal_settings::enable(sys_signal::Terminate);
}
signal_settings::set_exit_action(rocprofsys_exit_action);
signal_settings::check_environment();
auto default_signals = signal_settings::get_default();
@@ -2347,6 +2357,40 @@ get_tmpdir()
return static_cast<tim::tsettings<std::string>&>(*_v->second).get();
}
std::string
get_database_absolute_path(std::string_view database_name)
{
const auto* _existing_path = std::getenv("ROCPROFSYS_DATABASE_DIR");
auto _dir = _existing_path ? std::string{ _existing_path } : std::string{};
auto _ext = std::string{ "db" };
auto _cfg = settings::compose_filename_config{ settings::use_output_suffix(),
settings::default_process_suffix(),
false, _dir };
const auto get_path = [](const std::string& path) {
size_t last_slash = path.find_last_of("/\\");
return (last_slash != std::string::npos) ? path.substr(0, last_slash + 1)
: std::string{};
};
auto _val = settings::compose_output_filename(std::string(database_name), _ext, _cfg);
_dir = get_path(_val);
setenv("ROCPROFSYS_DATABASE_DIR", _dir.c_str(), 1);
if(!_val.empty() && _val.at(0) != '/')
return settings::format(JOIN('/', "%env{PWD}%", _val), get_config()->get_tag());
return _val;
}
bool&
get_use_rocpd()
{
static auto _v = get_config()->at("ROCPROFSYS_USE_ROCPD");
return static_cast<tim::tsettings<bool>&>(*_v).get();
}
tmp_file::tmp_file(std::string _v)
: filename{ std::move(_v) }
{}
@@ -358,6 +358,12 @@ get_use_tmp_files();
std::string
get_tmpdir();
std::string
get_database_absolute_path(std::string_view database_name);
bool&
get_use_rocpd() ROCPROFSYS_HOT;
struct tmp_file
{
tmp_file(std::string);
@@ -0,0 +1,168 @@
#include "cpu.hpp"
#include "agent_manager.hpp"
#include <algorithm>
#include <cstdint>
#include <fstream>
#include <functional>
#include <unordered_map>
namespace rocprofsys
{
namespace cpu
{
std::vector<cpu_info>
process_cpu_info_data()
{
std::vector<cpu_info> cpu_data;
std::ifstream cpuinfo_file("/proc/cpuinfo");
if(!cpuinfo_file.is_open())
{
return cpu_data;
}
std::string line;
cpu_info current_cpu;
bool has_processor_entry = false;
auto parse_long = [](const std::string& value) -> long {
try
{
return std::stol(value);
} catch(const std::exception&)
{
return -1;
}
};
auto trim_whitespace = [](const std::string& str) -> std::string {
size_t start = str.find_first_not_of(" \t");
if(start == std::string::npos) return "";
size_t end = str.find_last_not_of(" \t");
return str.substr(start, end - start + 1);
};
static const std::unordered_map<std::string,
std::function<void(cpu_info&, const std::string&)>>
field_parsers = {
{ "processor",
[&parse_long](cpu_info& cpu, const std::string& val) {
cpu.processor = parse_long(val);
} },
{ "cpu family",
[&parse_long](cpu_info& cpu, const std::string& val) {
cpu.family = parse_long(val);
} },
{ "model",
[&parse_long](cpu_info& cpu, const std::string& val) {
cpu.model = parse_long(val);
} },
{ "physical id",
[&parse_long](cpu_info& cpu, const std::string& val) {
cpu.physical_id = parse_long(val);
} },
{ "core id",
[&parse_long](cpu_info& cpu, const std::string& val) {
cpu.core_id = parse_long(val);
} },
{ "apicid",
[&parse_long](cpu_info& cpu, const std::string& val) {
cpu.apicid = parse_long(val);
} },
{ "vendor_id",
[](cpu_info& cpu, const std::string& val) { cpu.vendor_id = val; } },
{ "model name",
[](cpu_info& cpu, const std::string& val) { cpu.model_name = val; } }
};
while(std::getline(cpuinfo_file, line))
{
if(line.empty())
{
if(has_processor_entry)
{
cpu_data.push_back(current_cpu);
return cpu_data; // Return immediately after first core
}
continue;
}
size_t colon_pos = line.find(':');
if(colon_pos == std::string::npos)
{
continue;
}
std::string key = trim_whitespace(line.substr(0, colon_pos));
std::string value = trim_whitespace(line.substr(colon_pos + 1));
std::transform(key.begin(), key.end(), key.begin(), ::tolower);
auto it = field_parsers.find(key);
if(it != field_parsers.end())
{
it->second(current_cpu, value);
if(key == "processor")
{
has_processor_entry = true;
}
}
}
if(has_processor_entry)
{
cpu_data.push_back(current_cpu);
}
return cpu_data;
}
std::vector<cpu_info>
get_cpu_info()
{
static auto _v = process_cpu_info_data();
return _v;
}
size_t
device_count()
{
auto cpu_data = get_cpu_info();
return cpu_data.size();
}
void
query_cpu_agents()
{
int32_t id_count = 0;
uint32_t node_count = 0;
uint32_t cpu_count = 0;
if(device_count() == 0)
{
return;
}
auto& _agent_manager = agent_manager::get_instance();
auto cpu_data = get_cpu_info();
for(auto& cpu : cpu_data)
{
auto node_id = node_count++;
auto logical_id = id_count++;
auto id = cpu_count++;
auto cur_agent = agent{ agent_type::CPU,
id,
node_id,
logical_id,
static_cast<int32_t>(id),
cpu.model_name,
cpu.model_name,
cpu.vendor_id,
"" };
_agent_manager.insert_agent(cur_agent);
}
}
} // namespace cpu
} // namespace rocprofsys
@@ -0,0 +1,57 @@
// MIT License
//
// Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include <string>
#include <vector>
namespace rocprofsys
{
namespace cpu
{
struct cpu_info
{
long processor = -1;
long family = -1;
long model = -1;
long physical_id = -1;
long core_id = -1;
long apicid = -1;
std::string vendor_id = {};
std::string model_name = {};
};
std::vector<cpu_info>
process_cpu_info_data();
std::vector<cpu_info>
get_cpu_info();
size_t
device_count();
void
query_cpu_agents();
} // namespace cpu
} // namespace rocprofsys
@@ -20,6 +20,7 @@
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "agent.hpp"
#define ROCPROFILER_SDK_CEREAL_NAMESPACE_BEGIN \
namespace tim \
{ \
@@ -41,6 +42,10 @@
#include <timemory/manager.hpp>
#include <string>
#include "core/agent_manager.hpp"
#if ROCPROFSYS_USE_ROCM > 0
# include <amd_smi/amdsmi.h>
# include <rocprofiler-sdk/agent.h>
@@ -108,18 +113,31 @@ amdsmi_init()
}
#endif // ROCPROFSYS_USE_ROCM > 0
int32_t
query_rocm_gpu_agents()
size_t
query_rocm_agents()
{
int32_t _dev_cnt = 0;
size_t _dev_cnt = 0;
#if ROCPROFSYS_USE_ROCM > 0
auto iterator = [](rocprofiler_agent_version_t /*version*/, const void** agents,
size_t num_agents, void* user_data) -> rocprofiler_status_t {
auto* _cnt = static_cast<int32_t*>(user_data);
auto iterator = []([[maybe_unused]] rocprofiler_agent_version_t version,
const void** agents, size_t num_agents,
[[maybe_unused]] void* user_data) -> rocprofiler_status_t {
auto& _agent_manager = agent_manager::get_instance();
for(size_t i = 0; i < num_agents; ++i)
{
const auto* _agent = static_cast<const rocprofiler_agent_v0_t*>(agents[i]);
if(_agent && _agent->type == ROCPROFILER_AGENT_TYPE_GPU) *_cnt += 1;
const auto* _agent = static_cast<const rocprofiler_agent_v0_t*>(agents[i]);
auto cur_agent = agent{
(_agent->type == ROCPROFILER_AGENT_TYPE_GPU ? agent_type::GPU
: agent_type::CPU),
_agent->device_id,
_agent->node_id,
_agent->logical_node_id,
_agent->logical_node_type_id,
std::string(_agent->name),
std::string(_agent->vendor_name),
std::string(_agent->product_name),
std::string(_agent->model_name),
};
_agent_manager.insert_agent(cur_agent);
}
return ROCPROFILER_STATUS_SUCCESS;
};
@@ -127,15 +145,14 @@ query_rocm_gpu_agents()
try
{
rocprofiler_query_available_agents(ROCPROFILER_AGENT_INFO_VERSION_0, iterator,
sizeof(rocprofiler_agent_v0_t), &_dev_cnt);
sizeof(rocprofiler_agent_v0_t), nullptr);
} catch(std::exception& _e)
{
ROCPROFSYS_BASIC_VERBOSE(
1, "Exception thrown getting the rocm agents: %s. _dev_cnt=%d\n", _e.what(),
1, "Exception thrown getting the rocm agents: %s. _dev_cnt=%ld\n", _e.what(),
_dev_cnt);
}
// rocprofiler_query_available_agents(ROCPROFILER_AGENT_INFO_VERSION_0, iterator,
// sizeof(rocprofiler_agent_v0_t), &_dev_cnt);
_dev_cnt = agent_manager::get_instance().get_gpu_agents_count();
#endif
return _dev_cnt;
}
@@ -145,7 +162,7 @@ int
device_count()
{
#if ROCPROFSYS_USE_ROCM > 0
static int _num_devices = query_rocm_gpu_agents();
static int _num_devices = query_rocm_agents();
return _num_devices;
#else
return 0;
@@ -174,20 +191,31 @@ add_device_metadata(ArchiveT& ar)
#if ROCPROFSYS_USE_ROCM > 0
using agent_vec_t = std::vector<rocprofiler_agent_v0_t>;
auto _agents_vec = agent_vec_t{};
auto iterator = [](rocprofiler_agent_version_t /*version*/, const void** agents,
size_t num_agents, void* user_data) -> rocprofiler_status_t {
auto* _agents_vec_v = static_cast<agent_vec_t*>(user_data);
_agents_vec_v->reserve(num_agents);
auto iterator_cb = []([[maybe_unused]] rocprofiler_agent_version_t version,
const void** agents, size_t num_agents,
[[maybe_unused]] void* user_data) -> rocprofiler_status_t {
auto* agents_vec = static_cast<agent_vec_t*>(user_data);
for(size_t i = 0; i < num_agents; ++i)
{
const auto* _agent = static_cast<const rocprofiler_agent_v0_t*>(agents[i]);
if(_agent) _agents_vec_v->emplace_back(*_agent);
if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU)
{
agents_vec->push_back(*_agent);
}
}
return ROCPROFILER_STATUS_SUCCESS;
};
rocprofiler_query_available_agents(ROCPROFILER_AGENT_INFO_VERSION_0, iterator,
sizeof(rocprofiler_agent_v0_t), &_agents_vec);
auto _agents_vec = agent_vec_t{};
try
{
rocprofiler_query_available_agents(ROCPROFILER_AGENT_INFO_VERSION_0, iterator_cb,
sizeof(rocprofiler_agent_v0_t), &_agents_vec);
} catch(std::exception& _e)
{
ROCPROFSYS_BASIC_VERBOSE(1, "Exception thrown getting the rocm agents: %s.\n",
_e.what());
}
ar(make_nvp("rocm_agents", _agents_vec));
#else
@@ -228,6 +256,7 @@ get_processor_handles()
{
uint32_t socket_count;
uint32_t processor_count;
processors::processors_list.clear();
// Passing nullptr will return us the number of sockets available for read in this
// system
@@ -0,0 +1,72 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "node_info.hpp"
#include "debug.hpp"
#include <fstream>
#include <iostream>
#include <limits>
#include <sys/utsname.h>
namespace rocprofsys
{
node_info::node_info()
{
auto ifs = std::ifstream{ "/etc/machine-id" };
if(!ifs.is_open())
{
ROCPROFSYS_WARNING(0, "Error: Unable to open /etc/machine-id!");
return;
}
if(!(ifs >> machine_id) || machine_id.empty())
{
ROCPROFSYS_WARNING(0, "Error: Unable to read machine ID from /etc/machine-id!");
}
hash = std::hash<std::string>{}(machine_id) % std::numeric_limits<int64_t>::max();
id = hash % std::numeric_limits<size_t>::max();
struct utsname _sys_info;
if(uname(&_sys_info))
{
ROCPROFSYS_WARNING(0, "Error: Unable to get system information!");
return;
}
system_name = _sys_info.sysname;
node_name = _sys_info.nodename;
release = _sys_info.release;
version = _sys_info.version;
machine = _sys_info.machine;
domain_name = _sys_info.domainname;
}
node_info&
node_info::get_instance()
{
static node_info instance;
return instance;
}
} // namespace rocprofsys
@@ -0,0 +1,58 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include <cstdint>
#include <string>
namespace rocprofsys
{
struct node_info
{
private:
node_info();
public:
~node_info() = default;
node_info(const node_info&) = default;
node_info(node_info&&) noexcept = default;
node_info& operator=(const node_info&) = default;
node_info& operator=(node_info&&) noexcept = default;
static node_info& get_instance();
uint64_t id = 0;
uint64_t hash = 0;
std::string machine_id = {};
std::string system_name = {};
std::string node_name = {};
std::string release = {};
std::string version = {};
std::string machine = {};
std::string domain_name = {};
};
const node_info&
get_node_info();
} // namespace rocprofsys
@@ -0,0 +1,13 @@
set(rocpd_sources
${CMAKE_CURRENT_LIST_DIR}/data_processor.cpp
${CMAKE_CURRENT_LIST_DIR}/json.cpp
)
set(rocpd_headers
${CMAKE_CURRENT_LIST_DIR}/data_processor.hpp
${CMAKE_CURRENT_LIST_DIR}/json.hpp
)
target_sources(rocprofiler-systems-core-library PRIVATE ${rocpd_sources} ${rocpd_headers})
add_subdirectory(data_storage)
@@ -0,0 +1,674 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "data_processor.hpp"
#include "core/rocpd/data_storage/database.hpp"
#include "core/rocpd/data_storage/table_insert_query.hpp"
#include "debug.hpp"
namespace rocprofsys
{
namespace rocpd
{
data_processor::data_processor()
{
data_storage::database::get_instance().initialize_schema();
_upid = data_storage::database::get_instance().get_upid();
// Initialize event statement
initialize_event_stmt();
initialize_pmc_event_stmt();
initialize_sample_stmt();
initialize_region_stmt();
initialize_kernel_dispatch_stmt();
initialize_memory_copy_stmt();
initialize_code_object_stmt();
initialize_kernel_symbol_stmt();
initialize_metadata();
initialize_args_stmt();
initialize_memory_alloc_stmt();
}
data_processor&
data_processor::get_instance()
{
static data_processor _instance;
return _instance;
}
void
data_processor::initialize_metadata()
{
data_storage::queries::table_insert_query query;
data_storage::database::get_instance().execute_query(
query.set_table_name("rocpd_metadata_" + _upid)
.set_columns("tag", "value")
.set_values("upid", _upid)
.get_query_string());
}
size_t
data_processor::insert_string(const char* str)
{
std::lock_guard<std::mutex> lock(_data_mutex);
auto it = _string_map.find(str);
if(it != _string_map.end()) return _string_map.at(str);
data_storage::queries::table_insert_query query;
data_storage::database::get_instance().execute_query(
query.set_table_name("rocpd_string_" + _upid)
.set_columns("guid", "string")
.set_values(_upid, str)
.get_query_string());
const auto string_id = data_storage::database::get_instance().get_last_insert_id();
_string_map.emplace(str, string_id);
return string_id;
}
void
data_processor::insert_node_info(size_t node_id, size_t hash, const char* machine_id,
const char* system_name, const char* hostname,
const char* release, const char* version,
const char* hardware_name, const char* domain_name)
{
data_storage::queries::table_insert_query query;
data_storage::database::get_instance().execute_query(
query.set_table_name("rocpd_info_node_" + _upid)
.set_columns("id", "guid", "hash", "machine_id", "system_name", "hostname",
"release", "version", "hardware_name", "domain_name")
.set_values(node_id, _upid, hash, machine_id, system_name, hostname, release,
version, hardware_name, domain_name)
.get_query_string());
}
void
data_processor::insert_process_info(size_t nid, size_t ppid, size_t pid, size_t init,
size_t fini, size_t start, size_t end,
const char* command, const char* environment,
const char* extdata)
{
data_storage::queries::table_insert_query query;
data_storage::database::get_instance().execute_query(
query.set_table_name("rocpd_info_process_" + _upid)
.set_columns("id", "guid", "nid", "ppid", "pid", "init", "fini", "start",
"end", "command", "environment", "extdata")
.set_values(pid, _upid, nid, ppid, pid, init, fini, start, end, command,
environment, extdata)
.get_query_string());
}
size_t
data_processor::insert_agent(size_t node_id, size_t pid, const char* agent_type,
size_t absolute_index, size_t logical_index,
size_t type_index, uint64_t uuid, const char* name,
const char* model_name, const char* vendor_name,
const char* product_name, const char* user_name,
const char* extdata)
{
data_storage::queries::table_insert_query query;
data_storage::database::get_instance().execute_query(
query.set_table_name("rocpd_info_agent_" + _upid)
.set_columns("guid", "nid", "pid", "type", "absolute_index", "logical_index",
"type_index", "uuid", "name", "model_name", "vendor_name",
"product_name", "user_name", "extdata")
.set_values(_upid, node_id, pid, agent_type, absolute_index, logical_index,
type_index, uuid, name, model_name, vendor_name, product_name,
user_name, extdata)
.get_query_string());
return data_storage::database::get_instance().get_last_insert_id();
}
void
data_processor::insert_track(const char* track_name, size_t node_id, size_t process_id,
std::optional<size_t> thread_id, const char* extdata)
{
if(_tracks.find(track_name) != _tracks.end())
{
ROCPROFSYS_WARNING(2, "Fail to add track %s, already exist!\n", track_name);
return;
}
auto name_id = insert_string(track_name);
data_storage::queries::table_insert_query query;
data_storage::database::get_instance().execute_query(
query.set_table_name("rocpd_track_" + _upid)
.set_columns("guid", "nid", "pid", "tid", "name_id", "extdata")
.set_values(_upid, node_id, process_id, thread_id, name_id, extdata)
.get_query_string());
auto track_id = data_storage::database::get_instance().get_last_insert_id();
_tracks[track_name] = track_name_map{ track_id, name_id };
}
void
data_processor::insert_pmc_description(
size_t node_id, size_t process_id, size_t agent_id, const char* target_arch,
size_t event_code, size_t instance_id, const char* name, const char* symbol,
const char* description, const char* long_description, const char* component,
const char* units, const char* value_type, const char* block, const char* expression,
uint32_t is_constant, uint32_t is_derived, const char* extdata)
{
auto it = _pmc_descriptor_map.find({ agent_id, name });
if(it != _pmc_descriptor_map.end())
{
ROCPROFSYS_WARNING(0,
"Insert PMC description failed! Error: PMC descriptor "
"(name:%s) (ID:%lu) already exist!\n",
name, agent_id);
return;
}
data_storage::queries::table_insert_query query_builder;
auto query =
query_builder.set_table_name("rocpd_info_pmc_" + _upid)
.set_columns("guid", "nid", "pid", "agent_id", "target_arch", "event_code",
"instance_id", "name", "symbol", "description",
"long_description", "component", "units", "value_type", "block",
"expression", "is_constant", "is_derived", "extdata")
.set_values(_upid, node_id, process_id, agent_id, target_arch, event_code,
instance_id, name, symbol, description, long_description,
component, units, value_type, block, expression, is_constant,
is_derived, extdata)
.get_query_string();
data_storage::database::get_instance().execute_query(query);
auto pmc_id = data_storage::database::get_instance().get_last_insert_id();
_pmc_descriptor_map.emplace(
std::pair<pmc_identifier, size_t>{ { agent_id, name }, pmc_id });
}
void
data_processor::insert_pmc_event(size_t event_id, size_t agent_id, const char* pmc_name,
double value, const char* extdata)
{
ROCPROFSYS_VERBOSE(2,
"Insert PMC event: id %ld, agent id: %ld, pmc name: %s, value: "
"%lf, extdata: %s\n",
event_id, agent_id, pmc_name, value, extdata);
auto it = _pmc_descriptor_map.find({ agent_id, pmc_name });
if(it == _pmc_descriptor_map.end())
{
ROCPROFSYS_WARNING(0,
"Insert PMC event failed! Error: non-existing PMC description "
"agent id: %ld, pmc name: %s !\n",
agent_id, pmc_name);
return;
}
const auto pmc_description_id = it->second;
_insert_pmc_event_statement(_upid.c_str(), event_id, pmc_description_id, value,
extdata);
}
void
data_processor::insert_sample(const char* track, uint64_t timestamp, size_t event_id,
const char* extdata)
{
ROCPROFSYS_VERBOSE(
3, "Insert sample: track: %s, timestamp: %lu, event id: %ld, extdata: %s\n",
track, timestamp, event_id, extdata);
auto it = _tracks.find(track);
if(it == _tracks.end())
{
ROCPROFSYS_WARNING(0, "Insert sample failed! Error: Unexisting track %s!\n",
track);
return;
}
auto track_info = it->second;
_insert_sample_statement(_upid.c_str(), track_info.track_id, timestamp, event_id,
extdata);
}
size_t
data_processor::insert_event(size_t category_id, size_t stack_id, size_t parent_stack_id,
size_t correlation_id, const char* call_stack,
const char* line_info, const char* extdata)
{
std::lock_guard<std::mutex> lock(_data_mutex);
auto it = _category_map.find(category_id);
if(it == _category_map.end())
{
std::ostringstream oss;
oss << "Insert event failed! Error: Unknown category id: " << category_id
<< " for UPID: " << _upid;
throw std::runtime_error(oss.str());
}
ROCPROFSYS_VERBOSE(3, "Insert event category id: %ld, string id: %ld\n", category_id,
it->second);
_insert_event_statement(_upid.c_str(), it->second, stack_id, parent_stack_id,
correlation_id, call_stack, line_info, extdata);
return data_storage::database::get_instance().get_last_insert_id();
}
void
data_processor::initialize_event_stmt()
{
data_storage::queries::table_insert_query query_builder;
auto query = query_builder.set_table_name("rocpd_event_" + _upid)
.set_columns("guid", "category_id", "stack_id", "parent_stack_id",
"correlation_id", "call_stack", "line_info", "extdata")
.set_values('?', '?', '?', '?', '?', '?', '?', '?')
.get_query_string();
_insert_event_statement =
data_storage::database::get_instance()
.create_statement_executor<const char*, size_t, size_t, size_t, size_t,
const char*, const char*, const char*>(query);
}
void
data_processor::initialize_pmc_event_stmt()
{
data_storage::queries::table_insert_query query_builder;
auto query = query_builder.set_table_name("rocpd_pmc_event_" + _upid)
.set_columns("guid", "event_id", "pmc_id", "value", "extdata")
.set_values('?', '?', '?', '?', '?')
.get_query_string();
_insert_pmc_event_statement =
data_storage::database::get_instance()
.create_statement_executor<const char*, size_t, size_t, double, const char*>(
query);
}
void
data_processor::initialize_sample_stmt()
{
data_storage::queries::table_insert_query query_builder;
auto query = query_builder.set_table_name("rocpd_sample_" + _upid)
.set_columns("guid", "track_id", "timestamp", "event_id", "extdata")
.set_values('?', '?', '?', '?', '?')
.get_query_string();
_insert_sample_statement =
data_storage::database::get_instance()
.create_statement_executor<const char*, size_t, uint64_t, size_t,
const char*>(query);
}
void
data_processor::initialize_region_stmt()
{
data_storage::queries::table_insert_query query_builder;
auto query = query_builder.set_table_name("rocpd_region_" + _upid)
.set_columns("guid", "nid", "pid", "tid", "start", "end", "name_id",
"event_id", "extdata")
.set_values('?', '?', '?', '?', '?', '?', '?', '?', '?')
.get_query_string();
_insert_region_statement =
data_storage::database::get_instance()
.create_statement_executor<const char*, size_t, size_t, size_t, uint64_t,
uint64_t, size_t, size_t, const char*>(query);
}
void
data_processor::initialize_kernel_dispatch_stmt()
{
data_storage::queries::table_insert_query query_builder;
auto query = query_builder.set_table_name("rocpd_kernel_dispatch_" + _upid)
.set_columns("guid", "nid", "pid", "tid", "agent_id", "kernel_id",
"dispatch_id", "queue_id", "stream_id", "start", "end",
"private_segment_size", "group_segment_size",
"workgroup_size_x", "workgroup_size_y",
"workgroup_size_z", "grid_size_x", "grid_size_y",
"grid_size_z", "region_name_id", "event_id", "extdata")
.set_values('?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
'?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?')
.get_query_string();
_insert_kernel_dispatch_statement =
data_storage::database::get_instance()
.create_statement_executor<const char*, size_t, size_t, size_t, size_t,
size_t, size_t, size_t, size_t, uint64_t, uint64_t,
size_t, size_t, size_t, size_t, size_t, size_t,
size_t, size_t, size_t, size_t, const char*>(
query);
}
void
data_processor::initialize_memory_copy_stmt()
{
data_storage::queries::table_insert_query query_builder;
auto query = query_builder.set_table_name("rocpd_memory_copy_" + _upid)
.set_columns("guid", "nid", "pid", "tid", "start", "end", "name_id",
"dst_agent_id", "dst_address", "src_agent_id",
"src_address", "size", "queue_id", "stream_id",
"region_name_id", "event_id", "extdata")
.set_values('?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
'?', '?', '?', '?', '?', '?')
.get_query_string();
_insert_memory_copy_statement =
data_storage::database::get_instance()
.create_statement_executor<const char*, size_t, size_t, size_t, uint64_t,
uint64_t, size_t, size_t, size_t, size_t, size_t,
size_t, size_t, size_t, size_t, size_t,
const char*>(query);
}
void
data_processor::initialize_kernel_symbol_stmt()
{
data_storage::queries::table_insert_query query_builder;
auto query =
query_builder.set_table_name("rocpd_info_kernel_symbol_" + _upid)
.set_columns("id", "guid", "nid", "pid", "code_object_id", "kernel_name",
"display_name", "kernel_object", "kernarg_segment_size",
"kernarg_segment_alignment", "group_segment_size",
"private_segment_size", "sgpr_count", "arch_vgpr_count",
"accum_vgpr_count", "extdata")
.set_values('?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
'?', '?', '?')
.get_query_string();
_insert_kernel_symbol_statement =
data_storage::database::get_instance()
.create_statement_executor<size_t, const char*, size_t, size_t, uint64_t,
const char*, const char*, uint64_t, uint32_t,
uint32_t, uint32_t, uint32_t, uint32_t, uint32_t,
uint32_t, const char*>(query);
}
void
data_processor::initialize_code_object_stmt()
{
data_storage::queries::table_insert_query query_builder;
auto query =
query_builder.set_table_name("rocpd_info_code_object_" + _upid)
.set_columns("id", "guid", "nid", "pid", "agent_id", "uri", "load_base",
"load_size", "load_delta", "storage_type", "extdata")
.set_values('?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?')
.get_query_string();
_insert_code_object_statement =
data_storage::database::get_instance()
.create_statement_executor<size_t, const char*, size_t, size_t, size_t,
const char*, uint64_t, uint64_t, uint64_t,
const char*, const char*>(query);
}
void
data_processor::initialize_args_stmt()
{
data_storage::queries::table_insert_query query_builder;
auto query = query_builder.set_table_name("rocpd_arg_" + _upid)
.set_columns("guid", "event_id", "position", "type", "name", "value",
"extdata")
.set_values('?', '?', '?', '?', '?', '?', '?')
.get_query_string();
_insert_args_statement =
data_storage::database::get_instance()
.create_statement_executor<const char*, size_t, size_t, const char*,
const char*, const char*, const char*>(query);
}
void
data_processor::initialize_memory_alloc_stmt()
{
data_storage::queries::table_insert_query query_builder;
auto query = query_builder.set_table_name("rocpd_memory_allocate_" + _upid)
.set_columns("guid", "nid", "pid", "tid", "agent_id", "type",
"level", "start", "end", "address", "size", "queue_id",
"stream_id", "event_id", "extdata")
.set_values('?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
'?', '?', '?', '?')
.get_query_string();
_insert_memory_alloc_statement =
data_storage::database::get_instance()
.create_statement_executor<
const char*, size_t, size_t, size_t, size_t, const char*, const char*,
uint64_t, uint64_t, size_t, size_t, size_t, size_t, size_t, const char*>(
query);
// Statement without agent_id
query = query_builder.set_table_name("rocpd_memory_allocate_" + _upid)
.set_columns("guid", "nid", "pid", "tid", "type", "level", "start", "end",
"address", "size", "queue_id", "stream_id", "event_id",
"extdata")
.set_values('?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
'?', '?')
.get_query_string();
_insert_memory_alloc_no_agent_statement =
data_storage::database::get_instance()
.create_statement_executor<const char*, size_t, size_t, size_t, const char*,
const char*, uint64_t, uint64_t, size_t, size_t,
size_t, size_t, size_t, const char*>(query);
}
void
data_processor::insert_args(size_t event_id, size_t position, const char* type,
const char* name, const char* value, const char* extdata)
{
std::lock_guard<std::mutex> lock(_data_mutex);
_insert_args_statement(_upid.c_str(), event_id, position, type, name, value, extdata);
}
void
data_processor::insert_stream_info(size_t stream_id, size_t node_id, size_t process_id,
const char* name, const char* extdata)
{
if(_stream_ids.count(stream_id) > 0)
{
// ROCPROFSYS_WARNING(
// 1, "Insert stream info failed! Error: Stream ID %ld already exists!\n",
// stream_id);
return;
}
data_storage::queries::table_insert_query query;
data_storage::database::get_instance().execute_query(
query.set_table_name("rocpd_info_stream_" + _upid)
.set_columns("id", "guid", "nid", "pid", "name", "extdata")
.set_values(stream_id, _upid, node_id, process_id, name, extdata)
.get_query_string());
_stream_ids.insert(stream_id);
}
void
data_processor::insert_queue_info(size_t queue_id, size_t node_id, size_t process_id,
const char* name, const char* extdata)
{
if(_queue_ids.count(queue_id) > 0)
{
// ROCPROFSYS_WARNING(
// 1, "Insert queue info failed! Error: Queue ID %ld already exists!\n",
// queue_id);
return;
}
data_storage::queries::table_insert_query query;
data_storage::database::get_instance().execute_query(
query.set_table_name("rocpd_info_queue_" + _upid)
.set_columns("id", "guid", "nid", "pid", "name", "extdata")
.set_values(queue_id, _upid, node_id, process_id, name, extdata)
.get_query_string());
_queue_ids.insert(queue_id);
}
void
data_processor::insert_code_object(size_t id, size_t node_id, size_t process_id,
size_t agent_id, const char* uri, uint64_t ld_base,
uint64_t ld_size, uint64_t ld_delta,
const char* storage_type, const char* extdata)
{
if(_code_object_ids.count(id) > 0)
{
// ROCPROFSYS_WARNING(
// 1,
// "Insert code object info failed! Error: Code object ID %ld already
// exists!\n", id);
return;
}
ROCPROFSYS_VERBOSE(2, "Insert code object with ID: %ld\n", id);
std::lock_guard<std::mutex> lock(_data_mutex);
_insert_code_object_statement(id, _upid.c_str(), node_id, process_id, agent_id, uri,
ld_base, ld_size, ld_delta, storage_type, extdata);
_code_object_ids.insert(id);
}
void
data_processor::insert_kernel_symbol(
size_t id, size_t node_id, size_t process_id, uint64_t code_obj_id, const char* name,
const char* display_name, uint32_t kernel_obj, uint32_t kernarg_segmnt_size,
uint32_t kernarg_segment_alignment, uint32_t group_segment_size,
uint32_t private_segment_size, uint32_t sgrp_count, uint32_t arch_vgrp_count,
uint32_t accum_vgrp_count, const char* extdata)
{
if(_kernel_sym_ids.count(id) > 0)
{
// ROCPROFSYS_WARNING(
// 1,
// "Insert kernel symbol failed! Error: Kernel symbol ID %ld already
// exists!\n", id);
return;
}
ROCPROFSYS_VERBOSE(2, "Insert kernel symbol: %s with ID: %ld\n", name, id);
std::lock_guard<std::mutex> lock(_data_mutex);
_insert_kernel_symbol_statement(
id, _upid.c_str(), node_id, process_id, code_obj_id, name, display_name,
kernel_obj, kernarg_segmnt_size, kernarg_segment_alignment, group_segment_size,
private_segment_size, sgrp_count, arch_vgrp_count, accum_vgrp_count, extdata);
_kernel_sym_ids.insert(id);
}
void
data_processor::insert_category(size_t category_id, const char* name)
{
auto it = _category_map.find(category_id);
if(it != _category_map.end())
{
// ROCPROFSYS_WARNING(
// 1, "Insert category failed! Error: Category %s already exist!\n", name);
return;
}
auto name_id = insert_string(name);
std::lock_guard<std::mutex> lock(_data_mutex);
ROCPROFSYS_VERBOSE(2, "Insert category: name: %s, id: %ld, name id: %ld\n", name,
category_id, name_id);
_category_map.emplace(category_id, name_id);
}
void
data_processor::insert_region(size_t node_id, size_t process_id, size_t thread_id,
uint64_t start, uint64_t end, size_t name_id,
size_t event_id, const char* extdata)
{
std::lock_guard<std::mutex> lock(_data_mutex);
ROCPROFSYS_VERBOSE(2, "Insert region for event id: %ld\n", event_id);
_insert_region_statement(_upid.c_str(), node_id, process_id, thread_id, start, end,
name_id, event_id, extdata);
}
void
data_processor::insert_kernel_dispatch(
size_t node_id, size_t process_id, size_t thread_id, size_t agent_id,
size_t kernel_id, size_t dispatch_id, size_t queue_id, size_t stream_id,
uint64_t start, uint64_t end, size_t private_segment_size, size_t group_segment_size,
size_t workgroup_size_x, size_t workgroup_size_y, size_t workgroup_size_z,
size_t grid_size_x, size_t grid_size_y, size_t grid_size_z, size_t region_name_id,
size_t event_id, const char* extdata)
{
std::lock_guard<std::mutex> lock(_data_mutex);
ROCPROFSYS_VERBOSE(2, "Insert kernel dispatch for event id: %ld\n", event_id);
_insert_kernel_dispatch_statement(
_upid.c_str(), node_id, process_id, thread_id, agent_id, kernel_id, dispatch_id,
queue_id, stream_id, start, end, private_segment_size, group_segment_size,
workgroup_size_x, workgroup_size_y, workgroup_size_z, grid_size_x, grid_size_y,
grid_size_z, region_name_id, event_id, extdata);
}
void
data_processor::insert_memory_copy(size_t node_id, size_t process_id, size_t thread_id,
uint64_t start, uint64_t end, size_t name_id,
size_t dst_agent_id, size_t dst_addr,
size_t src_agent_id, size_t src_addr, size_t size,
size_t queue_id, size_t stream_id,
size_t region_name_id, size_t event_id,
const char* extdata)
{
std::lock_guard<std::mutex> lock(_data_mutex);
_insert_memory_copy_statement(_upid.c_str(), node_id, process_id, thread_id, start,
end, name_id, dst_agent_id, dst_addr, src_agent_id,
src_addr, size, queue_id, stream_id, region_name_id,
event_id, extdata);
}
void
data_processor::insert_memory_alloc(size_t node_id, size_t process_id, size_t thread_id,
std::optional<size_t> agent_id, const char* type,
const char* level, uint64_t start, uint64_t end,
size_t address, size_t size, size_t queue_id,
size_t stream_id, size_t event_id,
const char* extdata)
{
if(agent_id.has_value())
{
_insert_memory_alloc_statement(_upid.c_str(), node_id, process_id, thread_id,
agent_id.value(), type, level, start, end, address,
size, queue_id, stream_id, event_id, extdata);
}
else
{
_insert_memory_alloc_no_agent_statement(
_upid.c_str(), node_id, process_id, thread_id, type, level, start, end,
address, size, queue_id, stream_id, event_id, extdata);
}
}
size_t
data_processor::insert_thread_info(size_t node_id, size_t parent_process_id,
size_t process_id, size_t thread_id, const char* name,
uint64_t start, uint64_t end, const char* extdata)
{
auto it = _thread_id_map.find(thread_id);
if(it != _thread_id_map.end())
{
return _thread_id_map.at(thread_id);
}
data_storage::queries::table_insert_query query;
data_storage::database::get_instance().execute_query(
query.set_table_name("rocpd_info_thread_" + _upid)
.set_columns("guid", "nid", "ppid", "pid", "tid", "name", "start", "end",
"extdata")
.set_values(_upid.c_str(), node_id, parent_process_id, process_id, thread_id,
name, start, end, extdata)
.get_query_string());
auto thread_idx = data_storage::database::get_instance().get_last_insert_id();
_thread_id_map.emplace(thread_id, thread_idx);
return thread_idx;
}
void
data_processor::flush()
{
// Flush all pending data to the database
data_storage::database::get_instance().flush();
}
} // namespace rocpd
} // namespace rocprofsys
@@ -0,0 +1,252 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include <cstdint>
#include <functional>
#include <mutex>
#include <optional>
#include <set>
#include <string>
#include <unordered_map>
namespace rocprofsys
{
namespace rocpd
{
struct data_processor
{
using insert_event_stmt =
std::function<void(const char*, size_t, size_t, size_t, size_t, const char*,
const char*, const char*)>;
using insert_pmc_event_stms =
std::function<void(const char*, size_t, size_t, double, const char*)>;
using insert_sample_stmt =
std::function<void(const char*, size_t, uint64_t, size_t, const char*)>;
using insert_region_stmt =
std::function<void(const char*, size_t, size_t, size_t, uint64_t, uint64_t,
size_t, size_t, const char*)>;
using insert_kernel_dispatch_stmt = std::function<void(
const char*, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t,
uint64_t, uint64_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t,
size_t, size_t, size_t, const char*)>;
using insert_memory_copy_stmt = std::function<void(
const char*, size_t, size_t, size_t, uint64_t, uint64_t, size_t, size_t, size_t,
size_t, size_t, size_t, size_t, size_t, size_t, size_t, const char*)>;
using insert_memory_alloc_stmt = std::function<void(
const char*, size_t, size_t, size_t, size_t, const char*, const char*, uint64_t,
uint64_t, size_t, size_t, size_t, size_t, size_t, const char*)>;
using insert_memory_alloc_no_agent_stmt = std::function<void(
const char*, size_t, size_t, size_t, const char*, const char*, uint64_t, uint64_t,
size_t, size_t, size_t, size_t, size_t, const char*)>;
using insert_kernel_symbol_stmt =
std::function<void(size_t, const char*, size_t, size_t, uint64_t, const char*,
const char*, uint64_t, uint32_t, uint32_t, uint32_t, uint32_t,
uint32_t, uint32_t, uint32_t, const char*)>;
using insert_code_object_stmt =
std::function<void(size_t, const char*, size_t, size_t, size_t, const char*,
uint64_t, uint64_t, uint64_t, const char*, const char*)>;
using insert_args_stmt = std::function<void(const char*, size_t, size_t, const char*,
const char*, const char*, const char*)>;
private:
struct track_name_map
{
size_t track_id;
size_t name_id;
};
struct pmc_identifier
{
size_t agent_id;
std::string name;
};
struct pmc_identifier_hash
{
std::size_t operator()(const pmc_identifier& pmc) const noexcept
{
std::size_t h1 = std::hash<size_t>{}(pmc.agent_id);
std::size_t h2 = std::hash<std::string>{}(pmc.name);
return h1 ^ (h2 << 1);
}
};
struct pmc_identifier_equal
{
bool operator()(const pmc_identifier& lhs,
const pmc_identifier& rhs) const noexcept
{
return lhs.agent_id == rhs.agent_id && lhs.name == rhs.name;
}
};
public:
static data_processor& get_instance();
size_t insert_string(const char* str);
void insert_node_info(size_t node_id, size_t hash, const char* machine_id,
const char* system_name, const char* hostname,
const char* release, const char* version,
const char* hardware_name, const char* domain_name);
void insert_process_info(size_t node_id, size_t ppid, size_t pid, size_t init,
size_t fini, size_t start, size_t end, const char* command,
const char* environment = "{}", const char* extdata = "{}");
size_t insert_agent(size_t node_id, size_t pid, const char* agent_type,
size_t absolute_index, size_t logical_index, size_t type_index,
uint64_t uuid, const char* name, const char* model_name,
const char* vendor_name, const char* product_name,
const char* user_name, const char* extdata = "{}");
void insert_track(const char* track_name, size_t node_id, size_t process_id,
std::optional<size_t> thread_id, const char* extdata = "{}");
size_t insert_event(size_t category_id, size_t stack_id, size_t parent_stack_id,
size_t correlation_id, const char* call_stack = "{}",
const char* line_info = "{}", const char* extdata = "{}");
void insert_pmc_event(size_t event_id, size_t agent_id, const char* pmc_descriptor,
double value, const char* extdata = "{}");
void insert_pmc_description(size_t node_id, size_t process_id, size_t agent_id,
const char* target_arch, size_t event_code,
size_t instance_id, const char* name, const char* symbol,
const char* description, const char* long_description,
const char* component, const char* units,
const char* value_type, const char* block,
const char* expression, uint32_t is_constant,
uint32_t is_derived, const char* extdata = "{}");
void insert_sample(const char* track, uint64_t timestamp, size_t event_id,
const char* extdata = "{}");
void insert_category(size_t category_id, const char* name);
void insert_region(size_t node_id, size_t process_id, size_t thread_id,
uint64_t start, uint64_t end, size_t name_id, size_t event_id,
const char* extdata = "{}");
size_t insert_thread_info(size_t node_id, size_t parent_process_id, size_t process_id,
size_t thread_id, const char* name, uint64_t start = 0,
uint64_t end = 0, const char* extdata = "{}");
void insert_stream_info(size_t stream_id, size_t node_id, size_t process_id,
const char* name, const char* extdata = "{}");
void insert_queue_info(size_t queue_id, size_t node_id, size_t process_id,
const char* name, const char* extdata = "{}");
void insert_kernel_dispatch(size_t node_id, size_t process_id, size_t thread_id,
size_t agent_id, size_t kernel_id, size_t dispatch_id,
size_t queue_id, size_t stream_id, uint64_t start,
uint64_t end, size_t private_segment_size,
size_t group_segment_size, size_t workgroup_size_x,
size_t workgroup_size_y, size_t workgroup_size_z,
size_t grid_size_x, size_t grid_size_y,
size_t grid_size_z, size_t region_name_id,
size_t event_id, const char* extdata = "{}");
void insert_memory_copy(size_t node_id, size_t process_id, size_t thread_id,
uint64_t start, uint64_t end, size_t name_id,
size_t dst_agent_id, size_t dst_addr, size_t src_agent_id,
size_t src_addr, size_t size, size_t queue_id,
size_t stream_id, size_t region_name_id, size_t event_id,
const char* extdata = "{}");
void insert_kernel_symbol(size_t id, size_t node_id, size_t process_id,
uint64_t code_obj_id, const char* name,
const char* display_name, uint32_t kernel_obj,
uint32_t kernarg_segmnt_size,
uint32_t kernarg_segment_alignment,
uint32_t group_segment_size, uint32_t private_segment_size,
uint32_t sgrp_count, uint32_t arch_vgrp_count,
uint32_t accum_vgrp_count, const char* extdata = "{}");
void insert_code_object(size_t id, size_t node_id, size_t process_id, size_t agent_id,
const char* uri, uint64_t ld_base, uint64_t ld_size,
uint64_t ld_delta, const char* storage_type,
const char* extdata = "{}");
void insert_args(size_t event_id, size_t position, const char* type, const char* name,
const char* value, const char* extdata = "{}");
void insert_memory_alloc(size_t node_id, size_t process_id, size_t thread_id,
std::optional<size_t> agent_id, const char* type,
const char* level, uint64_t start, uint64_t end,
size_t address, size_t size, size_t queue_id,
size_t stream_id, size_t event_id,
const char* extdata = "{}");
void flush();
private:
data_processor();
data_processor(data_processor&) = delete;
data_processor& operator=(const data_processor&) = delete;
void initialize_pmc_event_stmt();
void initialize_event_stmt();
void initialize_sample_stmt();
void initialize_region_stmt();
void initialize_kernel_dispatch_stmt();
void initialize_memory_copy_stmt();
void initialize_kernel_symbol_stmt();
void initialize_code_object_stmt();
void initialize_metadata();
void initialize_args_stmt();
void initialize_memory_alloc_stmt();
private:
std::unordered_map<std::string, track_name_map> _tracks;
std::unordered_map<pmc_identifier, size_t, pmc_identifier_hash, pmc_identifier_equal>
_pmc_descriptor_map;
std::unordered_map<size_t, size_t> _thread_id_map;
std::unordered_map<size_t, size_t> _category_map;
std::unordered_map<std::string, size_t> _string_map;
std::set<uint64_t> _code_object_ids;
std::set<uint64_t> _kernel_sym_ids;
std::set<uint64_t> _stream_ids;
std::set<uint64_t> _queue_ids;
insert_event_stmt _insert_event_statement;
insert_pmc_event_stms _insert_pmc_event_statement;
insert_sample_stmt _insert_sample_statement;
insert_region_stmt _insert_region_statement;
insert_kernel_dispatch_stmt _insert_kernel_dispatch_statement;
insert_memory_copy_stmt _insert_memory_copy_statement;
insert_kernel_symbol_stmt _insert_kernel_symbol_statement;
insert_code_object_stmt _insert_code_object_statement;
insert_args_stmt _insert_args_statement;
insert_memory_alloc_stmt _insert_memory_alloc_statement;
insert_memory_alloc_no_agent_stmt _insert_memory_alloc_no_agent_statement;
std::string _upid{};
std::mutex _data_mutex;
};
} // namespace rocpd
} // namespace rocprofsys
@@ -0,0 +1,14 @@
set(data_storage_sources ${CMAKE_CURRENT_LIST_DIR}/database.cpp)
set(data_storage_headers
${CMAKE_CURRENT_LIST_DIR}/database.hpp
${CMAKE_CURRENT_LIST_DIR}/insert_query_builders.hpp
${CMAKE_CURRENT_LIST_DIR}/table_insert_query.hpp
)
target_sources(rocprofiler-systems-core-library PRIVATE ${data_storage_sources})
target_link_libraries(
rocprofiler-systems-core-library
PRIVATE $<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-sqlite3>
)
@@ -0,0 +1,170 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "database.hpp"
#include "common/md5sum.hpp"
#include "debug.hpp"
#include "node_info.hpp"
#include <config.hpp>
#include <fstream>
#include <regex>
#include <timemory/environment/types.hpp>
#include <timemory/utility/filepath.hpp>
#include <unistd.h>
namespace
{
void
create_directory_for_database_file(const std::string& db_file)
{
auto _db_dirname = tim::filepath::dirname(db_file);
if(!tim::filepath::direxists(_db_dirname))
{
tim::filepath::makedir(_db_dirname);
}
}
} // namespace
namespace rocprofsys
{
namespace rocpd
{
namespace data_storage
{
database&
database::get_instance()
{
static database _instance;
return _instance;
}
database::database()
{
auto db_name = std::string_view{ "rocpd.db" };
auto abs_db_path = rocprofsys::get_database_absolute_path(db_name);
create_directory_for_database_file(abs_db_path);
ROCPROFSYS_VERBOSE(0, "Database: %s\r\n", abs_db_path.c_str());
validate_sqlite3_result(sqlite3_open(":memory:", &_sqlite3_db_temp), "",
"database open failed!");
validate_sqlite3_result(sqlite3_open(abs_db_path.c_str(), &_sqlite3_db), "",
"database open failed!");
}
database::~database()
{
sqlite3_close(_sqlite3_db_temp);
sqlite3_close(_sqlite3_db);
}
void
database::initialize_schema()
{
auto get_file_path = [](const std::string_view filename) {
auto _rocprofsys_root = tim::get_env<std::string>(
"rocprofiler_systems_ROOT", tim::get_env<std::string>("ROCPROFSYS_ROOT", ""));
if(!_rocprofsys_root.empty() &&
tim::filepath::direxists(std::string(_rocprofsys_root)))
{
auto new_file_path = std::string(_rocprofsys_root)
.append("/share/rocprofiler-systems/")
.append(filename);
if(tim::filepath::exists(new_file_path))
{
return new_file_path;
}
}
return std::string(
"rocprofiler-systems/source/lib/core/rocpd/data_storage/schema/")
.append(filename);
};
std::vector<std::string_view> schema_files = { "rocpd_tables.sql", "rocpd_views.sql",
"data_views.sql", "marker_views.sql",
"summary_views.sql" };
// Process each schema file
for(const auto& schema_file : schema_files)
{
auto file_path = get_file_path(schema_file);
std::ifstream file(file_path);
if(!file.is_open())
{
throw std::runtime_error(
std::string("Failed to open schema file ").append(file_path));
}
std::stringstream ss_query;
ss_query << file.rdbuf();
std::string query = ss_query.str();
std::regex upid_pattern("\\{\\{uuid\\}\\}");
std::regex view_upid_pattern("\\{\\{view_upid\\}\\}");
query = std::regex_replace(query, upid_pattern, "_" + get_upid());
query = std::regex_replace(query, view_upid_pattern, "");
validate_sqlite3_result(
sqlite3_exec(_sqlite3_db_temp, query.c_str(), 0, 0, 0), query.c_str(),
std::string("Invalid schema file, init database failed!").append(file_path));
file.close();
}
}
void
database::execute_query(const std::string& query)
{
validate_sqlite3_result(sqlite3_exec(_sqlite3_db_temp, query.c_str(), 0, 0, 0),
"Failed to execute query - ", query);
}
std::string
database::get_upid()
{
static std::string _upid = []() {
auto n_info = node_info::get_instance();
auto guid = common::md5sum{ n_info.id, getpid(), getppid() };
return guid.hexdigest();
}();
return _upid;
}
size_t
database::get_last_insert_id() const
{
return sqlite3_last_insert_rowid(_sqlite3_db_temp);
}
void
database::flush()
{
auto* backup = sqlite3_backup_init(_sqlite3_db, "main", _sqlite3_db_temp, "main");
if(backup)
{
sqlite3_backup_step(backup, -1); // Copy all pages
sqlite3_backup_finish(backup);
}
}
} // namespace data_storage
} // namespace rocpd
} // namespace rocprofsys
@@ -0,0 +1,204 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include "common/traits.hpp"
#include <memory>
#include <mutex>
#include <sqlite3.h>
#include <sstream>
#include <stdexcept>
namespace rocprofsys
{
namespace rocpd
{
namespace data_storage
{
static std::mutex _mutex;
class database
{
public:
static database& get_instance();
database(database&) = delete;
database& operator=(database&) = delete;
void flush();
~database();
private:
database();
template <typename... Args>
inline void validate_sqlite3_result(int sqlite3_error_code, const char* query,
Args&&... args)
{
std::stringstream ss;
ss << "\n===========================================================\n";
ss << "Database Error\n";
((ss << args << " "), ...);
ss << "\nQuery: " << query << "\n";
switch(sqlite3_error_code)
{
case SQLITE_OK:
case SQLITE_DONE: return;
case SQLITE_CONSTRAINT:
{
sqlite3_stmt* stmt;
ss << "Constraint violation(s): " << "\n";
sqlite3_exec(_sqlite3_db_temp, "PRAGMA foreign_keys = OFF;", nullptr,
nullptr, nullptr);
sqlite3_exec(_sqlite3_db_temp, query, nullptr, nullptr, nullptr);
sqlite3_exec(_sqlite3_db_temp, "PRAGMA foreign_keys = ON;", nullptr,
nullptr, nullptr);
sqlite3_prepare_v2(_sqlite3_db_temp, "PRAGMA foreign_key_check", -1,
&stmt, nullptr);
int rc = 0;
while((rc = sqlite3_step(stmt)) == SQLITE_ROW)
{
const char* table = (const char*) sqlite3_column_text(stmt, 0);
int rowid = sqlite3_column_int(stmt, 1);
const char* parent = (const char*) sqlite3_column_text(stmt, 2);
int fkid = sqlite3_column_int(stmt, 3);
ss << " - " << "FK Violation - Table: " << (table ? table : "NULL")
<< ", RowID: " << rowid
<< ", Parent: " << (parent ? parent : "NULL") << ", FKID: " << fkid
<< "\n";
}
sqlite3_finalize(stmt);
}
break;
default:
{
}
break;
}
ss << " [Sqlite3 error: " << sqlite3_errstr(sqlite3_error_code);
ss << " (Extended error message: " << sqlite3_errmsg(_sqlite3_db_temp) << ")]";
throw std::runtime_error(ss.str());
}
template <typename T, std::enable_if_t<!(common::traits::is_string_literal<T>() ||
std::is_floating_point_v<std::decay_t<T>> ||
std::is_same_v<std::decay_t<T>, int64_t> ||
std::is_same_v<std::decay_t<T>, uint64_t> ||
std::is_same_v<std::decay_t<T>, int32_t> ||
std::is_same_v<std::decay_t<T>, uint32_t>),
int> = 0>
inline void bind_value([[maybe_unused]] sqlite3_stmt* stmt,
[[maybe_unused]] int position, [[maybe_unused]] T& _value,
[[maybe_unused]] const std::string& query)
{
throw std::runtime_error("Unsupported type for binding!");
}
template <typename T,
std::enable_if_t<common::traits::is_string_literal<T>(), int> = 0>
inline void bind_value(sqlite3_stmt* stmt, int position, T&& _value,
const std::string& query)
{
validate_sqlite3_result(
sqlite3_bind_text(stmt, position, _value, -1, SQLITE_STATIC), query.c_str(),
"Failed to bind text! Position: ", position, ", Value: ", _value);
}
template <typename T,
std::enable_if_t<std::is_floating_point_v<std::decay_t<T>>, int> = 0>
inline void bind_value(sqlite3_stmt* stmt, int position, T&& _value,
const std::string& query)
{
validate_sqlite3_result(
sqlite3_bind_double(stmt, position, _value), query.c_str(),
"Failed to bind double! Position: ", position, ", Value: ", _value);
}
template <typename T, std::enable_if_t<std::is_same_v<std::decay_t<T>, int64_t> ||
std::is_same_v<std::decay_t<T>, uint64_t>,
int> = 0>
inline void bind_value(sqlite3_stmt* stmt, int position, T&& _value,
const std::string& query)
{
validate_sqlite3_result(sqlite3_bind_int64(stmt, position, _value), query.c_str(),
"Failed to bind int64_t/uint64_t! Position: ", position,
", Value: ", _value);
}
template <typename T, std::enable_if_t<std::is_same_v<std::decay_t<T>, int32_t> ||
std::is_same_v<std::decay_t<T>, uint32_t>,
int> = 0>
inline void bind_value(sqlite3_stmt* stmt, int position, T&& _value,
const std::string& query)
{
validate_sqlite3_result(sqlite3_bind_int(stmt, position, _value), query.c_str(),
"Failed to bind int32_t/uint32_t! Position: ", position,
", Value: ", _value);
}
public:
void initialize_schema();
void execute_query(const std::string& query);
size_t get_last_insert_id() const;
/**
* This function prepares an SQLite statement based on the provided SQL query and
* returns a lambda that can execute the prepared statement, binding the provided
* values to the respective placeholders in the query.
*/
template <typename... Values>
auto create_statement_executor(const std::string& query)
{
sqlite3_stmt* p_stmt;
validate_sqlite3_result(
sqlite3_prepare_v2(_sqlite3_db_temp, query.c_str(), -1, &p_stmt, nullptr),
query.c_str(), "Failed to create statement!");
std::shared_ptr<sqlite3_stmt> stmt{ p_stmt, sqlite3_finalize };
return [stmt, query, this](Values... value) {
std::lock_guard lock{ _mutex };
int position = 1;
((bind_value(stmt.get(), position++, value, query)), ...);
validate_sqlite3_result(sqlite3_step(stmt.get()), query.c_str(),
"Failed to execute step!\n", "Values: ", value...);
sqlite3_reset(stmt.get());
};
}
static std::string get_upid();
private:
sqlite3* _sqlite3_db{ nullptr };
sqlite3* _sqlite3_db_temp{ nullptr };
};
} // namespace data_storage
} // namespace rocpd
} // namespace rocprofsys
@@ -0,0 +1,126 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include "common/traits.hpp"
#include <sstream>
#include <string>
#include <type_traits>
namespace rocprofsys
{
namespace rocpd
{
namespace data_storage
{
namespace queries
{
namespace query_builders
{
struct query_value_builder
{
query_value_builder(std::stringstream& ss)
: _ss{ ss }
{}
template <typename... Values>
query_value_builder& set_values(Values&&... values)
{
auto i = sizeof...(values);
_ss << "( ";
((process_value(values) << (i-- > 1 ? ", " : " ")), ...);
_ss << ")";
return *this;
}
std::string get_query_string() { return _ss.str(); }
private:
template <typename T>
std::enable_if_t<common::traits::is_string_literal<T>(), std::stringstream&>
process_value(T& value)
{
_ss << "\"" << value << "\"";
return _ss;
}
template <typename T>
std::enable_if_t<common::traits::is_optional_v<std::decay_t<T>>, std::stringstream&>
process_value(T& value)
{
if(value.has_value())
{
_ss << value.value();
}
else
{
_ss << "NULL";
}
return _ss;
}
template <typename T>
std::enable_if_t<!common::traits::is_string_literal<T>() &&
!common::traits::is_optional_v<std::decay_t<T>>,
std::stringstream&>
process_value(T& value)
{
_ss << value;
return _ss;
}
private:
std::stringstream& _ss;
};
struct query_columns_builder
{
query_columns_builder(std::stringstream& ss)
: _ss{ ss }
, _query_value_builder{ _ss }
{}
template <typename... Columns,
typename =
std::enable_if_t<(common::traits::is_string_literal<Columns>() && ...)>>
query_value_builder& set_columns(Columns&... columns)
{
auto i = sizeof...(columns);
_ss << "( ";
((_ss << columns << (i-- > 1 ? ", " : " ")), ...) << ") VALUES ";
return _query_value_builder;
}
private:
std::stringstream& _ss;
query_value_builder _query_value_builder;
};
} // namespace query_builders
} // namespace queries
} // namespace data_storage
} // namespace rocpd
} // namespace rocprofsys
@@ -0,0 +1,722 @@
--
-- Useful views
--
-- Code objects
CREATE VIEW IF NOT EXISTS
`code_objects` AS
SELECT
CO.id,
CO.guid,
CO.nid,
P.pid,
A.absolute_index AS agent_abs_index,
CO.uri,
CO.load_base,
CO.load_size,
CO.load_delta,
CO.storage_type AS storage_type_str,
JSON_EXTRACT(CO.extdata, '$.size') AS code_object_size,
JSON_EXTRACT(CO.extdata, '$.storage_type') AS storage_type,
JSON_EXTRACT(CO.extdata, '$.memory_base') AS memory_base,
JSON_EXTRACT(CO.extdata, '$.memory_size') AS memory_size
FROM
`rocpd_info_code_object` CO
INNER JOIN `rocpd_info_agent` A ON CO.agent_id = A.id
AND CO.guid = A.guid
INNER JOIN `rocpd_info_process` P ON CO.pid = P.id
AND CO.guid = P.guid;
CREATE VIEW IF NOT EXISTS
`kernel_symbols` AS
SELECT
KS.id,
KS.guid,
KS.nid,
P.pid,
KS.code_object_id,
KS.kernel_name,
KS.display_name,
KS.kernel_object,
KS.kernarg_segment_size,
KS.kernarg_segment_alignment,
KS.group_segment_size,
KS.private_segment_size,
KS.sgpr_count,
KS.arch_vgpr_count,
KS.accum_vgpr_count,
JSON_EXTRACT(KS.extdata, '$.size') AS kernel_symbol_size,
JSON_EXTRACT(KS.extdata, '$.kernel_id') AS kernel_id,
JSON_EXTRACT(KS.extdata, '$.kernel_code_entry_byte_offset') AS kernel_code_entry_byte_offset,
JSON_EXTRACT(KS.extdata, '$.formatted_kernel_name') AS formatted_kernel_name,
JSON_EXTRACT(KS.extdata, '$.demangled_kernel_name') AS demangled_kernel_name,
JSON_EXTRACT(KS.extdata, '$.truncated_kernel_name') AS truncated_kernel_name,
JSON_EXTRACT(KS.extdata, '$.kernel_address.handle') AS kernel_address
FROM
`rocpd_info_kernel_symbol` KS
INNER JOIN `rocpd_info_process` P ON KS.pid = P.id
AND KS.guid = P.guid;
-- Processes
CREATE VIEW IF NOT EXISTS
`processes` AS
SELECT
N.id AS nid,
N.machine_id,
N.system_name,
N.hostname,
N.release AS system_release,
N.version AS system_version,
P.guid,
P.ppid,
P.pid,
P.init,
P.start,
P.end,
P.fini,
P.command
FROM
`rocpd_info_process` P
INNER JOIN `rocpd_info_node` N ON N.id = P.nid
AND N.guid = P.guid;
-- Threads
CREATE VIEW IF NOT EXISTS
`threads` AS
SELECT
N.id AS nid,
N.machine_id,
N.system_name,
N.hostname,
N.release AS system_release,
N.version AS system_version,
P.guid,
P.ppid,
P.pid,
T.tid,
T.start,
T.end,
T.name
FROM
`rocpd_info_thread` T
INNER JOIN `rocpd_info_process` P ON P.id = T.pid
AND N.guid = T.guid
INNER JOIN `rocpd_info_node` N ON N.id = T.nid
AND N.guid = T.guid;
-- CPU regions
CREATE VIEW IF NOT EXISTS
`regions` AS
SELECT
R.id,
R.guid,
(
SELECT
string
FROM
`rocpd_string` RS
WHERE
RS.id = E.category_id
AND RS.guid = E.guid
) AS category,
S.string AS name,
R.nid,
P.pid,
T.tid,
R.start,
R.end,
(R.end - R.start) AS duration,
R.event_id,
E.stack_id,
E.parent_stack_id,
E.correlation_id AS corr_id,
E.extdata,
E.call_stack,
E.line_info
FROM
`rocpd_region` R
INNER JOIN `rocpd_event` E ON E.id = R.event_id
AND E.guid = R.guid
INNER JOIN `rocpd_string` S ON S.id = R.name_id
AND S.guid = R.guid
INNER JOIN `rocpd_info_process` P ON P.id = R.pid
AND P.guid = R.guid
INNER JOIN `rocpd_info_thread` T ON T.id = R.tid
AND T.guid = R.guid;
CREATE VIEW IF NOT EXISTS
`region_args` AS
SELECT
R.id,
R.guid,
R.nid,
P.pid,
A.type,
A.name,
A.value
FROM
`rocpd_region` R
INNER JOIN `rocpd_event` E ON E.id = R.event_id
AND E.guid = R.guid
INNER JOIN `rocpd_arg` A ON A.event_id = E.id
AND A.guid = R.guid
INNER JOIN `rocpd_info_process` P ON P.id = R.pid
AND P.guid = R.guid;
--
-- Samples
CREATE VIEW IF NOT EXISTS
`samples` AS
SELECT
R.id,
R.guid,
(
SELECT
string
FROM
`rocpd_string` RS
WHERE
RS.id = E.category_id
AND RS.guid = E.guid
) AS category,
(
SELECT
string
FROM
`rocpd_string` RS
WHERE
RS.id = T.name_id
AND RS.guid = T.guid
) AS name,
T.nid,
P.pid,
TH.tid,
R.timestamp,
R.event_id,
E.stack_id AS stack_id,
E.parent_stack_id AS parent_stack_id,
E.correlation_id AS corr_id,
E.extdata AS extdata,
E.call_stack AS call_stack,
E.line_info AS line_info
FROM
`rocpd_sample` R
INNER JOIN `rocpd_track` T ON T.id = R.track_id
AND T.guid = R.guid
INNER JOIN `rocpd_event` E ON E.id = R.event_id
AND E.guid = R.guid
INNER JOIN `rocpd_info_process` P ON P.id = T.pid
AND P.guid = T.guid
INNER JOIN `rocpd_info_thread` TH ON TH.id = T.tid
AND TH.guid = T.guid;
--
-- Provides samples view with the same columns as regions view
CREATE VIEW IF NOT EXISTS
`sample_regions` AS
SELECT
R.id,
R.guid,
(
SELECT
string
FROM
`rocpd_string` RS
WHERE
RS.id = E.category_id
AND RS.guid = E.guid
) AS category,
(
SELECT
string
FROM
`rocpd_string` RS
WHERE
RS.id = T.name_id
AND RS.guid = T.guid
) AS name,
T.nid,
P.pid,
TH.tid,
R.timestamp AS start,
R.timestamp AS END,
(R.timestamp - R.timestamp) AS duration,
R.event_id,
E.stack_id AS stack_id,
E.parent_stack_id AS parent_stack_id,
E.correlation_id AS corr_id,
E.extdata AS extdata,
E.call_stack AS call_stack,
E.line_info AS line_info
FROM
`rocpd_sample` R
INNER JOIN `rocpd_track` T ON T.id = R.track_id
AND T.guid = R.guid
INNER JOIN `rocpd_event` E ON E.id = R.event_id
AND E.guid = R.guid
INNER JOIN `rocpd_info_process` P ON P.id = T.pid
AND P.guid = T.guid
INNER JOIN `rocpd_info_thread` TH ON TH.id = T.tid
AND TH.guid = T.guid;
--
-- Provides a unified view of the regions and samples
CREATE VIEW IF NOT EXISTS
`regions_and_samples` AS
SELECT
*
FROM
`regions`
UNION ALL
SELECT
*
FROM
`sample_regions`;
--
-- Kernel information
CREATE VIEW
`kernels` AS
SELECT
K.id,
K.guid,
T.tid,
(
SELECT
string
FROM
`rocpd_string` RS
WHERE
RS.id = E.category_id
AND RS.guid = E.guid
) AS category,
R.string AS region,
S.display_name AS name,
K.nid,
P.pid,
A.absolute_index AS agent_abs_index,
A.logical_index AS agent_log_index,
A.type_index AS agent_type_index,
A.type AS agent_type,
S.code_object_id AS code_object_id,
K.kernel_id,
K.dispatch_id,
K.stream_id,
K.queue_id,
Q.name AS queue,
ST.name AS stream,
K.start,
K.end,
(K.end - K.start) AS duration,
K.grid_size_x AS grid_x,
K.grid_size_y AS grid_y,
K.grid_size_z AS grid_z,
K.workgroup_size_x AS workgroup_x,
K.workgroup_size_y AS workgroup_y,
K.workgroup_size_z AS workgroup_z,
K.group_segment_size AS lds_size,
K.private_segment_size AS scratch_size,
S.group_segment_size AS static_lds_size,
S.private_segment_size AS static_scratch_size,
E.stack_id,
E.parent_stack_id,
E.correlation_id AS corr_id
FROM
`rocpd_kernel_dispatch` K
INNER JOIN `rocpd_info_agent` A ON A.id = K.agent_id
AND A.guid = K.guid
INNER JOIN `rocpd_event` E ON E.id = K.event_id
AND E.guid = K.guid
INNER JOIN `rocpd_string` R ON R.id = K.region_name_id
AND R.guid = K.guid
INNER JOIN `rocpd_info_kernel_symbol` S ON S.id = K.kernel_id
AND S.guid = K.guid
LEFT JOIN `rocpd_info_stream` ST ON ST.id = K.stream_id
AND ST.guid = K.guid
LEFT JOIN `rocpd_info_queue` Q ON Q.id = K.queue_id
AND Q.guid = K.guid
INNER JOIN `rocpd_info_process` P ON P.id = Q.pid
AND P.guid = Q.guid
INNER JOIN `rocpd_info_thread` T ON T.id = K.tid
AND T.guid = K.guid;
--
-- Performance Monitoring Counters (PMC)
CREATE VIEW IF NOT EXISTS
`pmc_info` AS
SELECT
PMC_I.id,
PMC_I.guid,
PMC_I.nid,
P.pid,
A.absolute_index AS agent_abs_index,
PMC_I.is_constant,
PMC_I.is_derived,
PMC_I.name,
PMC_I.description,
PMC_I.block,
PMC_I.expression
FROM
`rocpd_info_pmc` PMC_I
INNER JOIN `rocpd_info_agent` A ON PMC_I.agent_id = A.id
AND PMC_I.guid = A.guid
INNER JOIN `rocpd_info_process` P ON P.id = PMC_I.pid
AND PMC_I.guid = P.guid;
CREATE VIEW IF NOT EXISTS
`pmc_events` AS
SELECT
PMC_E.id,
PMC_E.guid,
PMC_E.pmc_id,
E.id AS event_id,
(
SELECT
string
FROM
`rocpd_string` RS
WHERE
RS.id = E.category_id
AND RS.guid = E.guid
) AS category,
(
SELECT
display_name
FROM
`rocpd_info_kernel_symbol` KS
WHERE
KS.id = K.kernel_id
AND KS.guid = K.guid
) AS name,
K.nid,
P.pid,
K.dispatch_id,
K.start,
K.end,
(K.end - K.start) AS duration,
PMC_I.name AS counter_name,
PMC_E.value AS counter_value
FROM
`rocpd_pmc_event` PMC_E
INNER JOIN `rocpd_info_pmc` PMC_I ON PMC_I.id = PMC_E.pmc_id
AND PMC_I.guid = PMC_E.guid
INNER JOIN `rocpd_event` E ON E.id = PMC_E.event_id
AND E.guid = PMC_E.guid
INNER JOIN `rocpd_kernel_dispatch` K ON K.event_id = PMC_E.event_id
AND K.guid = PMC_E.guid
INNER JOIN `rocpd_info_process` P ON P.id = K.pid
AND P.guid = K.guid;
-- events with arguments ---
CREATE VIEW IF NOT EXISTS
`events_args` AS
SELECT
E.id AS event_id,
(
SELECT
string
FROM
`rocpd_string` RS
WHERE
RS.id = E.category_id
AND RS.guid = E.guid
) AS category,
E.stack_id,
E.parent_stack_id,
E.correlation_id,
A.position AS arg_position,
A.type AS arg_type,
A.name AS arg_name,
A.value AS arg_value,
E.call_stack,
E.line_info,
A.extdata
FROM
`rocpd_event` E
INNER JOIN `rocpd_arg` A ON A.event_id = E.id
AND A.guid = E.guid;
-- list of astream arguments enriched by the corresponding stream descriptions
CREATE VIEW IF NOT EXISTS
`stream_args` AS
SELECT
A.id AS argument_id,
A.event_id AS event_id,
A.position AS arg_position,
A.type AS arg_type,
A.value AS arg_value,
JSON_EXTRACT(A.extdata, '$.stream_id') AS stream_id,
S.nid,
P.pid,
S.name AS stream_name,
S.extdata AS extdata
FROM
`rocpd_arg` A
INNER JOIN `rocpd_info_stream` S ON JSON_EXTRACT(A.extdata, '$.stream_id') = S.id
AND A.guid = S.guid
INNER JOIN `rocpd_info_process` P ON P.id = S.pid
AND P.guid = S.guid
WHERE
A.name = 'stream';
--
--
CREATE VIEW IF NOT EXISTS
`memory_copies` AS
SELECT
M.id,
M.guid,
(
SELECT
string
FROM
`rocpd_string` RS
WHERE
RS.id = E.category_id
AND RS.guid = E.guid
) AS category,
M.nid,
P.pid,
T.tid,
M.start,
M.end,
(M.end - M.start) AS duration,
S.string AS name,
R.string AS region_name,
M.stream_id,
M.queue_id,
ST.name AS stream_name,
Q.name AS queue_name,
M.size,
dst_agent.name AS dst_device,
dst_agent.absolute_index AS dst_agent_abs_index,
dst_agent.logical_index AS dst_agent_log_index,
dst_agent.type_index AS dst_agent_type_index,
dst_agent.type AS dst_agent_type,
M.dst_address,
src_agent.name AS src_device,
src_agent.absolute_index AS src_agent_abs_index,
src_agent.logical_index AS src_agent_log_index,
src_agent.type_index AS src_agent_type_index,
src_agent.type AS src_agent_type,
M.src_address,
E.stack_id,
E.parent_stack_id,
E.correlation_id AS corr_id
FROM
`rocpd_memory_copy` M
INNER JOIN `rocpd_string` S ON S.id = M.name_id
AND S.guid = M.guid
LEFT JOIN `rocpd_string` R ON R.id = M.region_name_id
AND R.guid = M.guid
INNER JOIN `rocpd_info_agent` dst_agent ON dst_agent.id = M.dst_agent_id
AND dst_agent.guid = M.guid
INNER JOIN `rocpd_info_agent` src_agent ON src_agent.id = M.src_agent_id
AND src_agent.guid = M.guid
LEFT JOIN `rocpd_info_queue` Q ON Q.id = M.queue_id
AND Q.guid = M.guid
LEFT JOIN `rocpd_info_stream` ST ON ST.id = M.stream_id
AND ST.guid = M.guid
INNER JOIN `rocpd_event` E ON E.id = M.event_id
AND E.guid = M.guid
INNER JOIN `rocpd_info_process` P ON P.id = M.pid
AND P.guid = M.guid
INNER JOIN `rocpd_info_thread` T ON T.id = M.tid
AND T.guid = M.guid;
--
--
CREATE VIEW IF NOT EXISTS
`memory_allocations` AS
SELECT
M.id,
M.guid,
(
SELECT
string
FROM
`rocpd_string` RS
WHERE
RS.id = E.category_id
AND RS.guid = E.guid
) AS category,
M.nid,
P.pid,
T.tid,
M.start,
M.end,
(M.end - M.start) AS duration,
M.type,
M.level,
A.name AS agent_name,
A.absolute_index AS agent_abs_index,
A.logical_index AS agent_log_index,
A.type_index AS agent_type_index,
A.type AS agent_type,
M.address,
M.size,
M.queue_id,
Q.name AS queue_name,
M.stream_id,
ST.name AS stream_name,
E.stack_id,
E.parent_stack_id,
E.correlation_id AS corr_id
FROM
`rocpd_memory_allocate` M
LEFT JOIN `rocpd_info_agent` A ON M.agent_id = A.id
AND M.guid = A.guid
LEFT JOIN `rocpd_info_queue` Q ON Q.id = M.queue_id
AND Q.guid = M.guid
LEFT JOIN `rocpd_info_stream` ST ON ST.id = M.stream_id
AND ST.guid = M.guid
INNER JOIN `rocpd_event` E ON E.id = M.event_id
AND E.guid = M.guid
INNER JOIN `rocpd_info_process` P ON P.id = M.pid
AND P.guid = M.guid
INNER JOIN `rocpd_info_thread` T ON T.id = M.tid
AND P.guid = M.guid;
--
--
CREATE VIEW IF NOT EXISTS
`scratch_memory` AS
SELECT
M.id,
M.guid,
M.nid,
P.pid,
M.type AS operation,
A.name AS agent_name,
A.absolute_index AS agent_abs_index,
A.logical_index AS agent_log_index,
A.type_index AS agent_type_index,
A.type AS agent_type,
M.queue_id,
T.tid,
JSON_EXTRACT(M.extdata, '$.flags') AS alloc_flags,
M.start,
M.end,
M.size,
M.address,
E.correlation_id,
E.stack_id,
E.parent_stack_id,
E.correlation_id AS corr_id,
(
SELECT
string
FROM
`rocpd_string` RS
WHERE
RS.id = E.category_id
AND RS.guid = E.guid
) AS category,
E.extdata AS event_extdata
FROM
`rocpd_memory_allocate` M
LEFT JOIN `rocpd_info_agent` A ON M.agent_id = A.id
AND M.guid = A.guid
LEFT JOIN `rocpd_info_queue` Q ON Q.id = M.queue_id
AND Q.guid = M.guid
INNER JOIN `rocpd_event` E ON E.id = M.event_id
AND E.guid = M.guid
INNER JOIN `rocpd_info_process` P ON P.id = M.pid
AND P.guid = M.guid
INNER JOIN `rocpd_info_thread` T ON T.id = M.tid
AND T.guid = M.guid
WHERE
M.level = 'SCRATCH'
ORDER BY
M.start ASC;
--
--
CREATE VIEW IF NOT EXISTS
`counters_collection` AS
SELECT
MIN(PMC_E.id) AS id,
PMC_E.guid,
K.dispatch_id,
K.kernel_id,
E.id AS event_id,
E.correlation_id,
E.stack_id,
E.parent_stack_id,
P.pid,
T.tid,
K.agent_id,
A.absolute_index AS agent_abs_index,
A.logical_index AS agent_log_index,
A.type_index AS agent_type_index,
A.type AS agent_type,
K.queue_id,
k.grid_size_x AS grid_size_x,
k.grid_size_y AS grid_size_y,
k.grid_size_z AS grid_size_z,
(K.grid_size_x * K.grid_size_y * K.grid_size_z) AS grid_size,
S.display_name AS kernel_name,
(
SELECT
string
FROM
`rocpd_string` RS
WHERE
RS.id = K.region_name_id
AND RS.guid = K.guid
) AS kernel_region,
K.workgroup_size_x AS workgroup_size_x,
K.workgroup_size_y AS workgroup_size_y,
K.workgroup_size_z AS workgroup_size_z,
(K.workgroup_size_x * K.workgroup_size_y * K.workgroup_size_z) AS workgroup_size,
K.group_segment_size AS lds_block_size,
K.private_segment_size AS scratch_size,
S.arch_vgpr_count AS vgpr_count,
S.accum_vgpr_count,
S.sgpr_count,
PMC_I.name AS counter_name,
PMC_I.symbol AS counter_symbol,
PMC_I.component,
PMC_I.description,
PMC_I.block,
PMC_I.expression,
PMC_I.value_type,
PMC_I.id AS counter_id,
SUM(PMC_E.value) AS value,
K.start,
K.end,
PMC_I.is_constant,
PMC_I.is_derived,
(K.end - K.start) AS duration,
(
SELECT
string
FROM
`rocpd_string` RS
WHERE
RS.id = E.category_id
AND RS.guid = E.guid
) AS category,
K.nid,
E.extdata,
S.code_object_id
FROM
`rocpd_pmc_event` PMC_E
INNER JOIN `rocpd_info_pmc` PMC_I ON PMC_I.id = PMC_E.pmc_id
AND PMC_I.guid = PMC_E.guid
INNER JOIN `rocpd_event` E ON E.id = PMC_E.event_id
AND E.guid = PMC_E.guid
INNER JOIN `rocpd_kernel_dispatch` K ON K.event_id = PMC_E.event_id
AND K.guid = PMC_E.guid
INNER JOIN `rocpd_info_agent` A ON A.id = K.agent_id
AND A.guid = K.guid
INNER JOIN `rocpd_info_kernel_symbol` S ON S.id = K.kernel_id
AND S.guid = K.guid
INNER JOIN `rocpd_info_process` P ON P.id = K.pid
AND P.guid = K.guid
INNER JOIN `rocpd_info_thread` T ON T.id = K.tid
AND T.guid = K.guid
GROUP BY
PMC_E.guid,
K.dispatch_id,
PMC_I.name,
K.agent_id;
@@ -0,0 +1,3 @@
--
-- Views related to markers
--
@@ -0,0 +1,45 @@
--
-- Indexes for the various fields
--
-- string field
-- CREATE INDEX `rocpd_string{{uuid}}_string_idx` ON `rocpd_string{{uuid}}` ("string");
-- guid field
-- CREATE INDEX `rocpd_string{{uuid}}_guid_idx` ON `rocpd_string{{uuid}}` ("id", "guid");
-- CREATE INDEX `rocpd_info_node{{uuid}}_guid_idx` ON `rocpd_info_node{{uuid}}` ("id", "guid");
-- CREATE INDEX `rocpd_info_process{{uuid}}_guid_idx` ON `rocpd_info_process{{uuid}}` ("id", "guid");
-- CREATE INDEX `rocpd_info_thread{{uuid}}_guid_idx` ON `rocpd_info_thread{{uuid}}` ("id", "guid");
-- CREATE INDEX `rocpd_info_agent{{uuid}}_guid_idx` ON `rocpd_info_agent{{uuid}}` ("id", "guid");
-- CREATE INDEX `rocpd_info_queue{{uuid}}_guid_idx` ON `rocpd_info_queue{{uuid}}` ("id", "guid");
-- CREATE INDEX `rocpd_info_stream{{uuid}}_guid_idx` ON `rocpd_info_stream{{uuid}}` ("id", "guid");
-- CREATE INDEX `rocpd_info_pmc{{uuid}}_guid_idx` ON `rocpd_info_pmc{{uuid}}` ("id", "guid");
-- CREATE INDEX `rocpd_info_code_object{{uuid}}_guid_idx` ON `rocpd_info_code_object{{uuid}}` ("id", "guid");
-- CREATE INDEX `rocpd_info_kernel_symbol{{uuid}}_guid_idx` ON `rocpd_info_kernel_symbol{{uuid}}` ("id", "guid");
-- CREATE INDEX `rocpd_track{{uuid}}_guid_idx` ON `rocpd_track{{uuid}}` ("id", "guid");
-- CREATE INDEX `rocpd_event{{uuid}}_guid_idx` ON `rocpd_event{{uuid}}` ("id", "guid");
-- CREATE INDEX `rocpd_arg{{uuid}}_guid_idx` ON `rocpd_arg{{uuid}}` ("id", "guid");
-- CREATE INDEX `rocpd_pmc_event{{uuid}}_guid_idx` ON `rocpd_pmc_event{{uuid}}` ("id", "guid");
-- CREATE INDEX `rocpd_region{{uuid}}_guid_idx` ON `rocpd_region{{uuid}}` ("id", "guid");
-- CREATE INDEX `rocpd_sample{{uuid}}_guid_idx` ON `rocpd_sample{{uuid}}` ("id", "guid");
-- CREATE INDEX `rocpd_kernel_dispatch{{uuid}}_guid_idx` ON `rocpd_kernel_dispatch{{uuid}}` ("id", "guid");
-- CREATE INDEX `rocpd_memory_copy{{uuid}}_guid_idx` ON `rocpd_memory_copy{{uuid}}` ("id", "guid");
-- CREATE INDEX `rocpd_memory_allocate{{uuid}}_guid_idx` ON `rocpd_memory_allocate{{uuid}}` ("id", "guid");
-- CREATE INDEX `rocpd_event{{uuid}}_category_idx` ON `rocpd_event{{uuid}}` ("id", "guid", "category_id");
-- CREATE INDEX `rocpd_region{{uuid}}_event_idx` ON `rocpd_region{{uuid}}` ("id", "guid", "event_id");
-- CREATE INDEX `rocpd_region{{uuid}}_name_idx` ON `rocpd_region{{uuid}}` ("id", "guid", "name_id");
-- CREATE INDEX `rocpd_sample{{uuid}}_event_idx` ON `rocpd_sample{{uuid}}` ("id", "guid", "event_id");
-- CREATE INDEX `rocpd_sample{{uuid}}_track_idx` ON `rocpd_sample{{uuid}}` ("id", "guid", "track_id");
-- CREATE INDEX `rocpd_track{{uuid}}_name_idx` ON `rocpd_track{{uuid}}` ("id", "guid", "name_id");
-- CREATE INDEX `rocpd_memory_copy{{uuid}}_guid_nid_pid_idx` ON `rocpd_memory_copy{{uuid}}` ("guid", "nid", "pid");
-- CREATE INDEX `rocpd_kernel_dispatch{{uuid}}_guid_nid_pid_idx` ON `rocpd_kernel_dispatch{{uuid}}` ("guid", "nid", "pid");
-- CREATE INDEX `rocpd_region{{uuid}}_guid_idx` ON `rocpd_region{{uuid}}` ("guid", "nid", "pid");
-- CREATE INDEX `rocpd_sample{{uuid}}_guid_nid_pid_idx` ON `rocpd_sample{{uuid}}` ("guid", "nid", "pid");
-- CREATE INDEX `rocpd_region{{uuid}}_guid_idx` ON `rocpd_region{{uuid}}` ("guid");
-- CREATE INDEX `rocpd_region{{uuid}}_nid_idx` ON `rocpd_region{{uuid}}` ("nid");
-- CREATE INDEX `rocpd_region{{uuid}}_pid_idx` ON `rocpd_region{{uuid}}` ("pid");
-- CREATE INDEX `rocpd_region{{uuid}}_start_idx` ON `rocpd_region{{uuid}}` ("start");
-- CREATE INDEX `rocpd_region{{uuid}}_end_idx` ON `rocpd_region{{uuid}}` ("end");
@@ -0,0 +1,373 @@
-- Enable foreign key support for cascading
PRAGMA foreign_keys = ON;
CREATE TABLE IF NOT EXISTS
"rocpd_metadata{{uuid}}" (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"tag" TEXT NOT NULL,
"value" TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS
`rocpd_string{{uuid}}` (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
"string" TEXT NOT NULL UNIQUE ON CONFLICT ABORT
);
CREATE TABLE IF NOT EXISTS
`rocpd_info_node{{uuid}}` (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
"hash" BIGINT NOT NULL UNIQUE,
"machine_id" TEXT NOT NULL UNIQUE,
"system_name" TEXT,
"hostname" TEXT,
"release" TEXT,
"version" TEXT,
"hardware_name" TEXT,
"domain_name" TEXT
);
CREATE TABLE IF NOT EXISTS
`rocpd_info_process{{uuid}}` (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
"nid" INTEGER NOT NULL,
"ppid" INTEGER,
"pid" INTEGER NOT NULL,
"init" BIGINT,
"fini" BIGINT,
"start" BIGINT,
"end" BIGINT,
"command" TEXT,
"environment" JSONB DEFAULT "{}" NOT NULL,
"extdata" JSONB DEFAULT "{}" NOT NULL,
FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE
);
CREATE TABLE IF NOT EXISTS
`rocpd_info_thread{{uuid}}` (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
"nid" INTEGER NOT NULL,
"ppid" INTEGER,
"pid" INTEGER NOT NULL,
"tid" INTEGER NOT NULL,
"name" TEXT,
"start" BIGINT,
"end" BIGINT,
"extdata" JSONB DEFAULT "{}" NOT NULL,
FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE
);
CREATE TABLE IF NOT EXISTS
`rocpd_info_agent{{uuid}}` (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
"nid" INTEGER NOT NULL,
"pid" INTEGER NOT NULL,
"type" TEXT CHECK ("type" IN ('CPU', 'GPU')),
"absolute_index" INTEGER,
"logical_index" INTEGER,
"type_index" INTEGER,
"uuid" INTEGER,
"name" TEXT,
"model_name" TEXT,
"vendor_name" TEXT,
"product_name" TEXT,
"user_name" TEXT,
"extdata" JSONB DEFAULT "{}" NOT NULL,
FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE
);
CREATE TABLE IF NOT EXISTS
`rocpd_info_queue{{uuid}}` (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
"nid" INTEGER NOT NULL,
"pid" INTEGER NOT NULL,
"name" TEXT,
"extdata" JSONB DEFAULT "{}" NOT NULL,
FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE
);
CREATE TABLE IF NOT EXISTS
`rocpd_info_stream{{uuid}}` (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
"nid" INTEGER NOT NULL,
"pid" INTEGER NOT NULL,
"name" TEXT,
"extdata" JSONB DEFAULT "{}" NOT NULL,
FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE
);
-- 2993533, 2269219937, 2993533
-- 2993533, 2269219937, 2993533
-- Performance monitoring counters (PMC) descriptions
CREATE TABLE IF NOT EXISTS
`rocpd_info_pmc{{uuid}}` (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
"nid" INTEGER NOT NULL,
"pid" INTEGER NOT NULL,
"agent_id" INTEGER,
"target_arch" TEXT CHECK ("target_arch" IN ('CPU', 'GPU')),
"event_code" INT,
"instance_id" INTEGER,
"name" TEXT NOT NULL,
"symbol" TEXT NOT NULL,
"description" TEXT,
"long_description" TEXT DEFAULT "",
"component" TEXT,
"units" TEXT DEFAULT "",
"value_type" TEXT CHECK ("value_type" IN ('ABS', 'ACCUM', 'RELATIVE')),
"block" TEXT,
"expression" TEXT,
"is_constant" INTEGER,
"is_derived" INTEGER,
"extdata" JSONB DEFAULT "{}" NOT NULL,
FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (agent_id) REFERENCES `rocpd_info_agent{{uuid}}` (id) ON UPDATE CASCADE
);
CREATE TABLE IF NOT EXISTS
`rocpd_info_code_object{{uuid}}` (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
"nid" INTEGER NOT NULL,
"pid" INTEGER NOT NULL,
"agent_id" INTEGER,
"uri" TEXT,
"load_base" BIGINT,
"load_size" BIGINT,
"load_delta" BIGINT,
"storage_type" TEXT CHECK ("storage_type" IN ('FILE', 'MEMORY')),
"extdata" JSONB DEFAULT "{}" NOT NULL,
FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (agent_id) REFERENCES `rocpd_info_agent{{uuid}}` (id) ON UPDATE CASCADE
);
CREATE TABLE IF NOT EXISTS
`rocpd_info_kernel_symbol{{uuid}}` (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
"nid" INTEGER NOT NULL,
"pid" INTEGER NOT NULL,
"code_object_id" INTEGER NOT NULL,
"kernel_name" TEXT,
"display_name" TEXT,
"kernel_object" INTEGER,
"kernarg_segment_size" INTEGER,
"kernarg_segment_alignment" INTEGER,
"group_segment_size" INTEGER,
"private_segment_size" INTEGER,
"sgpr_count" INTEGER,
"arch_vgpr_count" INTEGER,
"accum_vgpr_count" INTEGER,
"extdata" JSONB DEFAULT "{}" NOT NULL,
FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (code_object_id) REFERENCES `rocpd_info_code_object{{uuid}}` (id) ON UPDATE CASCADE
);
-- Stores repetitive info for samples
CREATE TABLE IF NOT EXISTS
`rocpd_track{{uuid}}` (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
"nid" INTEGER NOT NULL,
"pid" INTEGER,
"tid" INTEGER,
"name_id" INTEGER,
"extdata" JSONB DEFAULT "{}" NOT NULL,
FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (tid) REFERENCES `rocpd_info_thread{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (name_id) REFERENCES `rocpd_string{{uuid}}` (id) ON UPDATE CASCADE
);
-- Storage for a region, instant, and counter
CREATE TABLE IF NOT EXISTS
`rocpd_event{{uuid}}` (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
"category_id" INTEGER,
"stack_id" INTEGER,
"parent_stack_id" INTEGER,
"correlation_id" INTEGER,
"call_stack" JSONB DEFAULT "{}" NOT NULL,
"line_info" JSONB DEFAULT "{}" NOT NULL,
"extdata" JSONB DEFAULT "{}" NOT NULL,
FOREIGN KEY (category_id) REFERENCES `rocpd_string{{uuid}}` (id) ON UPDATE CASCADE
);
-- stores arguments for events
CREATE TABLE IF NOT EXISTS
`rocpd_arg{{uuid}}` (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
"event_id" INTEGER NOT NULL,
"position" INTEGER NOT NULL,
"type" TEXT NOT NULL,
"name" TEXT NOT NULL,
"value" TEXT, -- TODO: discuss make it value_id and integer, refer to string table --
"extdata" JSONB DEFAULT "{}" NOT NULL,
FOREIGN KEY (event_id) REFERENCES `rocpd_event{{uuid}}` (id) ON UPDATE CASCADE
);
-- Region with a start/stop on the same thread (CPU)
CREATE TABLE IF NOT EXISTS
`rocpd_pmc_event{{uuid}}` (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
"event_id" INTEGER,
"pmc_id" INTEGER NOT NULL,
"value" REAL DEFAULT 0.0,
"extdata" JSONB DEFAULT "{}",
FOREIGN KEY (pmc_id) REFERENCES `rocpd_info_pmc{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (event_id) REFERENCES `rocpd_event{{uuid}}` (id) ON UPDATE CASCADE
);
-- Region with a start/stop on the same thread (CPU)
CREATE TABLE IF NOT EXISTS
`rocpd_region{{uuid}}` (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
"nid" INTEGER NOT NULL,
"pid" INTEGER NOT NULL,
"tid" INTEGER NOT NULL,
"start" BIGINT NOT NULL,
"end" BIGINT NOT NULL,
"name_id" INTEGER NOT NULL,
"event_id" INTEGER,
"extdata" JSONB DEFAULT "{}" NOT NULL,
FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (tid) REFERENCES `rocpd_info_thread{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (name_id) REFERENCES `rocpd_string{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (event_id) REFERENCES `rocpd_event{{uuid}}` (id) ON UPDATE CASCADE
);
-- Instantaneous sample
CREATE TABLE IF NOT EXISTS
`rocpd_sample{{uuid}}` (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
"track_id" INTEGER NOT NULL,
"timestamp" BIGINT NOT NULL,
"event_id" INTEGER,
"extdata" JSONB DEFAULT "{}" NOT NULL,
FOREIGN KEY (track_id) REFERENCES `rocpd_track{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (event_id) REFERENCES `rocpd_event{{uuid}}` (id) ON UPDATE CASCADE
);
CREATE TABLE IF NOT EXISTS
`rocpd_kernel_dispatch{{uuid}}` (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
"nid" INTEGER NOT NULL,
"pid" INTEGER NOT NULL,
"tid" INTEGER,
"agent_id" INTEGER NOT NULL,
"kernel_id" INTEGER NOT NULL,
"dispatch_id" INTEGER NOT NULL,
"queue_id" INTEGER NOT NULL,
"stream_id" INTEGER NOT NULL,
"start" BIGINT NOT NULL,
"end" BIGINT NOT NULL,
"private_segment_size" INTEGER,
"group_segment_size" INTEGER,
"workgroup_size_x" INTEGER NOT NULL,
"workgroup_size_y" INTEGER NOT NULL,
"workgroup_size_z" INTEGER NOT NULL,
"grid_size_x" INTEGER NOT NULL,
"grid_size_y" INTEGER NOT NULL,
"grid_size_z" INTEGER NOT NULL,
"region_name_id" INTEGER,
"event_id" INTEGER,
"extdata" JSONB DEFAULT "{}" NOT NULL,
FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (tid) REFERENCES `rocpd_info_thread{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (agent_id) REFERENCES `rocpd_info_agent{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (kernel_id) REFERENCES `rocpd_info_kernel_symbol{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (queue_id) REFERENCES `rocpd_info_queue{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (stream_id) REFERENCES `rocpd_info_stream{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (region_name_id) REFERENCES `rocpd_string{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (event_id) REFERENCES `rocpd_event{{uuid}}` (id) ON UPDATE CASCADE
);
CREATE TABLE IF NOT EXISTS
`rocpd_memory_copy{{uuid}}` (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
"nid" INTEGER NOT NULL,
"pid" INTEGER NOT NULL,
"tid" INTEGER,
"start" BIGINT NOT NULL,
"end" BIGINT NOT NULL,
"name_id" INTEGER NOT NULL,
"dst_agent_id" INTEGER,
"dst_address" INTEGER,
"src_agent_id" INTEGER,
"src_address" INTEGER,
"size" INTEGER NOT NULL,
"queue_id" INTEGER,
"stream_id" INTEGER,
"region_name_id" INTEGER,
"event_id" INTEGER,
"extdata" JSONB DEFAULT "{}" NOT NULL,
FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (tid) REFERENCES `rocpd_info_thread{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (name_id) REFERENCES `rocpd_string{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (dst_agent_id) REFERENCES `rocpd_info_agent{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (src_agent_id) REFERENCES `rocpd_info_agent{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (stream_id) REFERENCES `rocpd_info_stream{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (queue_id) REFERENCES `rocpd_info_queue{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (region_name_id) REFERENCES `rocpd_string{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (event_id) REFERENCES `rocpd_event{{uuid}}` (id) ON UPDATE CASCADE
);
-- Memory allocations (real memory, virtual memory, and scratch memory)
CREATE TABLE IF NOT EXISTS
`rocpd_memory_allocate{{uuid}}` (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"guid" TEXT DEFAULT "{{guid}}" NOT NULL,
"nid" INTEGER NOT NULL,
"pid" INTEGER NOT NULL,
"tid" INTEGER,
"agent_id" INTEGER,
"type" TEXT CHECK ("type" IN ('ALLOC', 'FREE', 'REALLOC', 'RECLAIM')),
"level" TEXT CHECK ("level" IN ('REAL', 'VIRTUAL', 'SCRATCH')),
"start" BIGINT NOT NULL,
"end" BIGINT NOT NULL,
"address" INTEGER,
"size" INTEGER NOT NULL,
"queue_id" INTEGER,
"stream_id" INTEGER,
"event_id" INTEGER,
"extdata" JSONB DEFAULT "{}" NOT NULL,
FOREIGN KEY (nid) REFERENCES `rocpd_info_node{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (pid) REFERENCES `rocpd_info_process{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (tid) REFERENCES `rocpd_info_thread{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (agent_id) REFERENCES `rocpd_info_agent{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (stream_id) REFERENCES `rocpd_info_stream{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (queue_id) REFERENCES `rocpd_info_queue{{uuid}}` (id) ON UPDATE CASCADE,
FOREIGN KEY (event_id) REFERENCES `rocpd_event{{uuid}}` (id) ON UPDATE CASCADE
);
INSERT INTO
`rocpd_metadata{{uuid}}` ("tag", "value")
VALUES
("schema_version", "3"),
("uuid", "{{uuid}}"),
("guid", "{{guid}}");
@@ -0,0 +1,139 @@
CREATE VIEW IF NOT EXISTS
`rocpd_metadata` AS
SELECT
*
FROM
`rocpd_metadata{{uuid}}`;
CREATE VIEW IF NOT EXISTS
`rocpd_string` AS
SELECT
*
FROM
`rocpd_string{{uuid}}`;
CREATE VIEW IF NOT EXISTS
`rocpd_info_node` AS
SELECT
*
FROM
`rocpd_info_node{{uuid}}`;
CREATE VIEW IF NOT EXISTS
`rocpd_info_process` AS
SELECT
*
FROM
`rocpd_info_process{{uuid}}`;
CREATE VIEW IF NOT EXISTS
`rocpd_info_thread` AS
SELECT
*
FROM
`rocpd_info_thread{{uuid}}`;
CREATE VIEW IF NOT EXISTS
`rocpd_info_agent` AS
SELECT
*
FROM
`rocpd_info_agent{{uuid}}`;
CREATE VIEW IF NOT EXISTS
`rocpd_info_queue` AS
SELECT
*
FROM
`rocpd_info_queue{{uuid}}`;
CREATE VIEW IF NOT EXISTS
`rocpd_info_stream` AS
SELECT
*
FROM
`rocpd_info_stream{{uuid}}`;
CREATE VIEW IF NOT EXISTS
`rocpd_info_pmc` AS
SELECT
*
FROM
`rocpd_info_pmc{{uuid}}`;
CREATE VIEW IF NOT EXISTS
`rocpd_info_code_object` AS
SELECT
*
FROM
`rocpd_info_code_object{{uuid}}`;
CREATE VIEW IF NOT EXISTS
`rocpd_info_kernel_symbol` AS
SELECT
*
FROM
`rocpd_info_kernel_symbol{{uuid}}`;
CREATE VIEW IF NOT EXISTS
`rocpd_track` AS
SELECT
*
FROM
`rocpd_track{{uuid}}`;
CREATE VIEW IF NOT EXISTS
`rocpd_event` AS
SELECT
*
FROM
`rocpd_event{{uuid}}`;
CREATE VIEW IF NOT EXISTS
`rocpd_arg` AS
SELECT
*
FROM
`rocpd_arg{{uuid}}`;
CREATE VIEW IF NOT EXISTS
`rocpd_pmc_event` AS
SELECT
*
FROM
`rocpd_pmc_event{{uuid}}`;
CREATE VIEW IF NOT EXISTS
`rocpd_region` AS
SELECT
*
FROM
`rocpd_region{{uuid}}`;
CREATE VIEW IF NOT EXISTS
`rocpd_sample` AS
SELECT
*
FROM
`rocpd_sample{{uuid}}`;
CREATE VIEW IF NOT EXISTS
`rocpd_kernel_dispatch` AS
SELECT
*
FROM
`rocpd_kernel_dispatch{{uuid}}`;
CREATE VIEW IF NOT EXISTS
`rocpd_memory_copy` AS
SELECT
*
FROM
`rocpd_memory_copy{{uuid}}`;
CREATE VIEW IF NOT EXISTS
`rocpd_memory_allocate` AS
SELECT
*
FROM
`rocpd_memory_allocate{{uuid}}`;
@@ -0,0 +1,376 @@
--
-- Useful summary views
--
--
-- Sorted list of kernels which consume the most overall time
CREATE VIEW IF NOT EXISTS
`top_kernels` AS
SELECT
S.display_name AS name,
COUNT(K.kernel_id) AS total_calls,
SUM(K.end - K.start) / 1000.0 AS total_duration,
(SUM(K.end - K.start) / COUNT(K.kernel_id)) / 1000.0 AS average,
SUM(K.end - K.start) * 100.0 / (
SELECT
SUM(A.end - A.start)
FROM
`rocpd_kernel_dispatch` A
) AS percentage
FROM
`rocpd_kernel_dispatch` K
INNER JOIN `rocpd_info_kernel_symbol` S ON S.id = K.kernel_id
AND S.guid = K.guid
GROUP BY
name
ORDER BY
total_duration DESC;
--
-- GPU utilization metrics including kernels and memory copy operations
CREATE VIEW IF NOT EXISTS
`busy` AS
SELECT
A.agent_id,
AG.type,
GpuTime,
WallTime,
GpuTime * 1.0 / WallTime AS Busy
FROM
(
SELECT
agent_id,
guid,
SUM(END - start) AS GpuTime
FROM
(
SELECT
agent_id,
guid,
END,
start
FROM
`rocpd_kernel_dispatch`
UNION ALL
SELECT
dst_agent_id AS agent_id,
guid,
END,
start
FROM
`rocpd_memory_copy`
)
GROUP BY
agent_id,
guid
) A
INNER JOIN (
SELECT
MAX(END) - MIN(start) AS WallTime
FROM
(
SELECT
END,
start
FROM
`rocpd_kernel_dispatch`
UNION ALL
SELECT
END,
start
FROM
`rocpd_memory_copy`
)
) W ON 1 = 1
INNER JOIN `rocpd_info_agent` AG ON AG.id = A.agent_id
AND AG.guid = A.guid;
--
-- Overall performance summary including kernels and memory copy operations
CREATE VIEW
`top` AS
SELECT
name,
COUNT(*) AS total_calls,
SUM(duration) / 1000.0 AS total_duration,
(SUM(duration) / COUNT(*)) / 1000.0 AS average,
SUM(duration) * 100.0 / total_time AS percentage
FROM
(
-- Kernel operations
SELECT
ks.display_name AS name,
(kd.end - kd.start) AS duration
FROM
`rocpd_kernel_dispatch` kd
INNER JOIN `rocpd_info_kernel_symbol` ks ON kd.kernel_id = ks.id
AND kd.guid = ks.guid
UNION ALL
-- Memory operations
SELECT
rs.string AS name,
(END - start) AS duration
FROM
`rocpd_memory_copy` mc
INNER JOIN `rocpd_string` rs ON rs.id = mc.name_id
AND rs.guid = mc.guid
UNION ALL
-- Regions
SELECT
rs.string AS name,
(END - start) AS duration
FROM
`rocpd_region` rr
INNER JOIN `rocpd_string` rs ON rs.id = rr.name_id
AND rs.guid = rr.guid
) operations
CROSS JOIN (
SELECT
SUM(END - start) AS total_time
FROM
(
SELECT
END,
start
FROM
`rocpd_kernel_dispatch`
UNION ALL
SELECT
END,
start
FROM
`rocpd_memory_copy`
UNION ALL
SELECT
END,
start
FROM
`rocpd_region`
)
) TOTAL
GROUP BY
name
ORDER BY
total_duration DESC;
-- Kernel summary by name
CREATE VIEW
`kernel_summary` AS
WITH
avg_data AS (
SELECT
name,
AVG(duration) AS avg_duration
FROM
`kernels`
GROUP BY
name
),
aggregated_data AS (
SELECT
K.name,
COUNT(*) AS calls,
SUM(K.duration) AS total_duration,
SUM(CAST(K.duration AS REAL) * CAST(K.duration AS REAL)) AS sqr_duration,
A.avg_duration AS average_duration,
MIN(K.duration) AS min_duration,
MAX(K.duration) AS max_duration,
SUM(CAST((K.duration - A.avg_duration) AS REAL) * CAST((K.duration - A.avg_duration) AS REAL)) / (COUNT(*) - 1) AS variance_duration,
SQRT(
SUM(CAST((K.duration - A.avg_duration) AS REAL) * CAST((K.duration - A.avg_duration) AS REAL)) / (COUNT(*) - 1)
) AS std_dev_duration
FROM
`kernels` K
JOIN avg_data A ON K.name = A.name
GROUP BY
K.name
),
total_duration AS (
SELECT
SUM(total_duration) AS grand_total_duration
FROM
aggregated_data
)
SELECT
AD.name AS name,
AD.calls,
AD.total_duration AS "DURATION (nsec)",
AD.sqr_duration AS "SQR (nsec)",
AD.average_duration AS "AVERAGE (nsec)",
(CAST(AD.total_duration AS REAL) / TD.grand_total_duration) * 100 AS "PERCENT (INC)",
AD.min_duration AS "MIN (nsec)",
AD.max_duration AS "MAX (nsec)",
AD.variance_duration AS "VARIANCE",
AD.std_dev_duration AS "STD_DEV"
FROM
aggregated_data AD
CROSS JOIN total_duration TD;
--
-- Kernel summary by region name
CREATE VIEW
`kernel_summary_region` AS
WITH
avg_data AS (
SELECT
region,
AVG(duration) AS avg_duration
FROM
`kernels`
GROUP BY
region
),
aggregated_data AS (
SELECT
K.region AS name,
COUNT(*) AS calls,
SUM(K.duration) AS total_duration,
SUM(CAST(K.duration AS REAL) * CAST(K.duration AS REAL)) AS sqr_duration,
A.avg_duration AS average_duration,
MIN(K.duration) AS min_duration,
MAX(K.duration) AS max_duration,
SUM(CAST((K.duration - A.avg_duration) AS REAL) * CAST((K.duration - A.avg_duration) AS REAL)) / (COUNT(*) - 1) AS variance_duration,
SQRT(
SUM(CAST((K.duration - A.avg_duration) AS REAL) * CAST((K.duration - A.avg_duration) AS REAL)) / (COUNT(*) - 1)
) AS std_dev_duration
FROM
`kernels` K
JOIN avg_data A ON K.region = A.region
GROUP BY
K.region
),
total_duration AS (
SELECT
SUM(total_duration) AS grand_total_duration
FROM
aggregated_data
)
SELECT
AD.name AS name,
AD.calls,
AD.total_duration AS "DURATION (nsec)",
AD.sqr_duration AS "SQR (nsec)",
AD.average_duration AS "AVERAGE (nsec)",
(CAST(AD.total_duration AS REAL) / TD.grand_total_duration) * 100 AS "PERCENT (INC)",
AD.min_duration AS "MIN (nsec)",
AD.max_duration AS "MAX (nsec)",
AD.variance_duration AS "VARIANCE",
AD.std_dev_duration AS "STD_DEV"
FROM
aggregated_data AD
CROSS JOIN total_duration TD;
--
-- Memory copy summary
CREATE VIEW
`memory_copy_summary` AS
WITH
avg_data AS (
SELECT
name,
AVG(duration) AS avg_duration
FROM
`memory_copies`
GROUP BY
name
),
aggregated_data AS (
SELECT
MC.name,
COUNT(*) AS calls,
SUM(MC.duration) AS total_duration,
SUM(CAST(MC.duration AS REAL) * CAST(MC.duration AS REAL)) AS sqr_duration,
A.avg_duration AS average_duration,
MIN(MC.duration) AS min_duration,
MAX(MC.duration) AS max_duration,
SUM(
CAST((MC.duration - A.avg_duration) AS REAL) * CAST((MC.duration - A.avg_duration) AS REAL)
) / (COUNT(*) - 1) AS variance_duration,
SQRT(
SUM(
CAST((MC.duration - A.avg_duration) AS REAL) * CAST((MC.duration - A.avg_duration) AS REAL)
) / (COUNT(*) - 1)
) AS std_dev_duration
FROM
`memory_copies` MC
JOIN avg_data A ON MC.name = A.name
GROUP BY
MC.name
),
total_duration AS (
SELECT
SUM(total_duration) AS grand_total_duration
FROM
aggregated_data
)
SELECT
AD.name AS name,
AD.calls,
AD.total_duration AS "DURATION (nsec)",
AD.sqr_duration AS "SQR (nsec)",
AD.average_duration AS "AVERAGE (nsec)",
(CAST(AD.total_duration AS REAL) / TD.grand_total_duration) * 100 AS "PERCENT (INC)",
AD.min_duration AS "MIN (nsec)",
AD.max_duration AS "MAX (nsec)",
AD.variance_duration AS "VARIANCE",
AD.std_dev_duration AS "STD_DEV"
FROM
aggregated_data AD
CROSS JOIN total_duration TD;
--
-- Memory allocation summary
CREATE VIEW
`memory_allocation_summary` AS
WITH
avg_data AS (
SELECT
type AS name,
AVG(duration) AS avg_duration
FROM
`memory_allocations`
GROUP BY
type
),
aggregated_data AS (
SELECT
MA.type AS name,
COUNT(*) AS calls,
SUM(MA.duration) AS total_duration,
SUM(CAST(MA.duration AS REAL) * CAST(MA.duration AS REAL)) AS sqr_duration,
A.avg_duration AS average_duration,
MIN(MA.duration) AS min_duration,
MAX(MA.duration) AS max_duration,
SUM(
CAST((MA.duration - A.avg_duration) AS REAL) * CAST((MA.duration - A.avg_duration) AS REAL)
) / (COUNT(*) - 1) AS variance_duration,
SQRT(
SUM(
CAST((MA.duration - A.avg_duration) AS REAL) * CAST((MA.duration - A.avg_duration) AS REAL)
) / (COUNT(*) - 1)
) AS std_dev_duration
FROM
`memory_allocations` MA
JOIN avg_data A ON MA.type = A.name
GROUP BY
MA.type
),
total_duration AS (
SELECT
SUM(total_duration) AS grand_total_duration
FROM
aggregated_data
)
SELECT
'MEMORY_ALLOCATION_' || AD.name AS name,
AD.calls,
AD.total_duration AS "DURATION (nsec)",
AD.sqr_duration AS "SQR (nsec)",
AD.average_duration AS "AVERAGE (nsec)",
(CAST(AD.total_duration AS REAL) / TD.grand_total_duration) * 100 AS "PERCENT (INC)",
AD.min_duration AS "MIN (nsec)",
AD.max_duration AS "MAX (nsec)",
AD.variance_duration AS "VARIANCE",
AD.std_dev_duration AS "STD_DEV"
FROM
aggregated_data AD
CROSS JOIN total_duration TD;
@@ -0,0 +1,57 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include "insert_query_builders.hpp"
namespace rocprofsys
{
namespace rocpd
{
namespace data_storage
{
namespace queries
{
struct table_insert_query
{
table_insert_query()
: _query_columns_builder{ _ss }
{}
query_builders::query_columns_builder& set_table_name(const std::string& tableName)
{
_ss.str("");
_ss << "INSERT INTO " << tableName << " ";
return _query_columns_builder;
}
private:
std::stringstream _ss;
query_builders::query_columns_builder _query_columns_builder;
};
} // namespace queries
} // namespace data_storage
} // namespace rocpd
} // namespace rocprofsys
@@ -0,0 +1,99 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "json.hpp"
#include <sstream>
namespace rocpd
{
std::shared_ptr<json>
json::create()
{
return std::shared_ptr<json>(new json());
}
void
json::set(const std::string& key, const json_value& value)
{
data[key] = std::make_shared<json_value>(value);
}
std::string
json::to_string() const
{
std::ostringstream oss;
oss << "{";
bool first = true;
for(const auto& [key, value] : data)
{
if(!first) oss << ", ";
first = false;
oss << "\"" << key << "\": " << stringify(value);
}
oss << "}";
return oss.str();
}
std::string
json::stringify(const std::shared_ptr<json_value>& value)
{
std::ostringstream oss;
std::visit(
[&oss](auto&& arg) {
using T = std::decay_t<decltype(arg)>;
if constexpr(std::is_same_v<T, std::string>)
oss << "\"" << arg << "\"";
else if constexpr(std::is_same_v<T, bool>)
oss << (arg ? "true" : "false");
else if constexpr(std::is_same_v<T, std::nullptr_t>)
oss << "null";
else if constexpr(std::is_same_v<T, std::vector<json>>)
{
oss << "[";
bool first = true;
for(const auto& item : arg)
{
if(!first) oss << ", ";
first = false;
oss << item.to_string();
}
oss << "]";
}
else if constexpr(std::is_same_v<T, std::shared_ptr<json>>)
{
oss << arg->to_string();
}
else
{
// handle int + double
oss << arg;
}
},
*value);
return oss.str();
}
} // namespace rocpd
@@ -0,0 +1,57 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <variant>
#include <vector>
namespace rocpd
{
class json
{
public:
static std::shared_ptr<json> create();
using json_value =
std::variant<std::string, int, double, long long, bool, std::vector<json>,
std::nullptr_t, std::shared_ptr<json>>;
void set(const std::string& key, const json_value& value);
std::string to_string() const;
private:
json() = default;
private:
static std::string stringify(const std::shared_ptr<json_value>& value);
private:
std::unordered_map<std::string, std::shared_ptr<json_value>> data;
};
} // namespace rocpd
@@ -27,17 +27,22 @@
#include "api.hpp"
#include "common/setup.hpp"
#include "common/static_object.hpp"
#include "core/agent.hpp"
#include "core/agent_manager.hpp"
#include "core/categories.hpp"
#include "core/components/fwd.hpp"
#include "core/concepts.hpp"
#include "core/config.hpp"
#include "core/constraint.hpp"
#include "core/cpu.hpp"
#include "core/debug.hpp"
#include "core/defines.hpp"
#include "core/dynamic_library.hpp"
#include "core/gpu.hpp"
#include "core/locking.hpp"
#include "core/node_info.hpp"
#include "core/perfetto_fwd.hpp"
#include "core/rocpd/data_processor.hpp"
#include "core/timemory.hpp"
#include "core/utility.hpp"
#include "library/causal/data.hpp"
@@ -77,6 +82,10 @@
#include <timemory/utility/join.hpp>
#include <timemory/utility/procfs/maps.hpp>
#if ROCPROFSYS_USE_ROCM > 0
# include <rocprofiler-sdk/agent.h>
#endif
#include <atomic>
#include <chrono>
#include <csignal>
@@ -86,6 +95,7 @@
#include <pthread.h>
#include <stdexcept>
#include <string_view>
#include <unistd.h>
#include <utility>
using namespace rocprofsys;
@@ -297,6 +307,66 @@ namespace
bool _set_mpi_called = false;
std::function<void()> _preinit_callback = []() { get_preinit_bundle()->start(); };
std::vector<std::string>
read_command_line(pid_t _pid)
{
auto _cmdline = std::vector<std::string>{};
auto fcmdline = std::stringstream{};
fcmdline << "/proc/" << _pid << "/cmdline";
auto ifs = std::ifstream{ fcmdline.str().c_str() };
if(ifs)
{
std::string sarg;
while(std::getline(ifs, sarg, '\0'))
{
_cmdline.push_back(sarg);
}
ifs.close();
}
return _cmdline;
}
void
rocprofsys_preinit_rocpd()
{
auto& _data_processor = rocpd::data_processor::get_instance();
const auto& _n_info = node_info::get_instance();
auto _cmd_line = read_command_line(getpid());
auto& _agent_manager = agent_manager::get_instance();
if(_cmd_line.empty())
{
_cmd_line.emplace_back("rocprofiler-systems");
}
_data_processor.insert_node_info(
_n_info.id, _n_info.hash, _n_info.machine_id.c_str(), _n_info.system_name.c_str(),
_n_info.node_name.c_str(), _n_info.release.c_str(), _n_info.version.c_str(),
_n_info.machine.c_str(), _n_info.domain_name.c_str());
_data_processor.insert_process_info(_n_info.id, getppid(), getpid(), 0, 0, 0, 0,
_cmd_line[0].c_str(), "{}");
const auto& agents = _agent_manager.get_agents();
for(const auto& rocpd_agent : agents)
{
auto _base_id = rocpd::data_processor::get_instance().insert_agent(
_n_info.id, getpid(),
((rocpd_agent->type == agent_type::GPU) ? "GPU" : "CPU"),
rocpd_agent->node_id, rocpd_agent->logical_node_id,
rocpd_agent->logical_node_type_id, rocpd_agent->id, rocpd_agent->name.c_str(),
rocpd_agent->model_name.c_str(), rocpd_agent->vendor_name.c_str(),
rocpd_agent->product_name.c_str(), "");
rocpd_agent->base_id = _base_id;
}
}
void
rocprofsys_preinit_cpu_agents()
{
cpu::query_cpu_agents();
}
void
rocprofsys_preinit_hidden()
{
@@ -423,17 +493,17 @@ rocprofsys_init_tooling_hidden(void)
{ ROCPROFSYS_DEFAULT_ROCM_PATH }) };
#endif
static bool _once = false;
static auto _debug_init = get_debug_init();
static pid_t _once = 0;
static auto _debug_init = get_debug_init();
ROCPROFSYS_CONDITIONAL_BASIC_PRINT_F(_debug_init, "State is %s...\n",
std::to_string(get_state()).c_str());
if(get_state() != State::PreInit || get_state() == State::Init || _once)
if(get_state() != State::PreInit || get_state() == State::Init || _once == getpid())
{
return false;
}
_once = true;
_once = getpid();
ROCPROFSYS_SCOPED_THREAD_STATE(ThreadState::Internal);
@@ -460,6 +530,12 @@ rocprofsys_init_tooling_hidden(void)
auto _dtor = scope::destructor{ []() {
// if set to finalized, don't continue
if(get_state() > State::Active) return;
#if !(ROCPROFSYS_USE_ROCM > 0)
rocprofsys_preinit_cpu_agents();
#endif
if(get_use_rocpd()) rocprofsys_preinit_rocpd();
if(get_use_process_sampling())
{
ROCPROFSYS_SCOPED_SAMPLING_ON_CHILD_THREADS(false);
@@ -681,7 +757,6 @@ rocprofsys_finalize_hidden(void)
threading::remove_callback(&ensure_initialization);
bool _is_child = is_child_process();
set_thread_state(ThreadState::Completed);
// return if not active
@@ -693,6 +768,18 @@ rocprofsys_finalize_hidden(void)
}
else if(_is_child)
{
#if defined(ROCPROFSYS_USE_ROCM) && ROCPROFSYS_USE_ROCM > 0
// Flush buffered traces in case of child process
if(get_use_rocm())
{
ROCPROFSYS_VERBOSE_F(1, "Shutting down ROCm...\n");
rocprofiler_sdk::shutdown();
}
#endif
if(get_use_rocpd())
{
rocpd::data_processor::get_instance().flush();
}
set_state(State::Finalized);
std::quick_exit(EXIT_SUCCESS);
return;
@@ -983,6 +1070,10 @@ rocprofsys_finalize_hidden(void)
[](int) {});
common::destroy_static_objects();
if(get_use_rocpd())
{
rocpd::data_processor::get_instance().flush();
}
}
//======================================================================================//
@@ -26,18 +26,22 @@
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#include "core/agent.hpp"
#if defined(NDEBUG)
# undef NDEBUG
#endif
#include "library/amd_smi.hpp"
#include "core/agent_manager.hpp"
#include "core/common.hpp"
#include "core/components/fwd.hpp"
#include "core/config.hpp"
#include "core/debug.hpp"
#include "core/gpu.hpp"
#include "core/node_info.hpp"
#include "core/perfetto.hpp"
#include "core/rocpd/data_processor.hpp"
#include "core/state.hpp"
#include "library/amd_smi.hpp"
#include "library/runtime.hpp"
#include "library/thread_info.hpp"
@@ -49,13 +53,11 @@
#include <timemory/utility/locking.hpp>
#include <cassert>
#include <chrono>
#include <ios>
#include <optional>
#include <sstream>
#include <stdexcept>
#include <string>
#include <sys/resource.h>
#include <thread>
#define ROCPROFSYS_AMD_SMI_CALL(...) \
::rocprofsys::amd_smi::check_error(__FILE__, __LINE__, __VA_ARGS__)
@@ -69,6 +71,115 @@ using sampler_instances = thread_data<bundle_t, category::amd_smi>;
namespace
{
int64_t
get_tid()
{
static thread_local auto _v = threading::get_id();
return _v;
}
rocpd::data_processor&
get_data_processor()
{
return rocpd::data_processor::get_instance();
}
void
rocpd_initialize_category()
{
get_data_processor().insert_category(ROCPROFSYS_CATEGORY_AMD_SMI,
trait::name<category::amd_smi>::value);
}
void
rocpd_initialize_smi_tracks()
{
auto& data_processor = get_data_processor();
auto& n_info = node_info::get_instance();
const auto thread_id = std::nullopt; // Internal thread ID for amd-smi
data_processor.insert_track(trait::name<category::amd_smi_mm_busy>::value, n_info.id,
getpid(), thread_id);
data_processor.insert_track(trait::name<category::amd_smi_power>::value, n_info.id,
getpid(), thread_id);
data_processor.insert_track(trait::name<category::amd_smi_temp>::value, n_info.id,
getpid(), thread_id);
data_processor.insert_track(trait::name<category::amd_smi_memory_usage>::value,
n_info.id, getpid(), thread_id);
}
void
rocpd_initialize_smi_pmc(size_t gpu_id)
{
auto& data_processor = get_data_processor();
// find the proper values for a following definitions
size_t EVENT_CODE = 0;
size_t INSTANCE_ID = 0;
const char* LONG_DESCRIPTION = "";
const char* COMPONENT = "";
const char* BLOCK = "";
const char* EXPRESSION = "";
const char* CELSIUS_DEGREES = "\u00B0C";
auto ni = node_info::get_instance();
const auto* TARGET_ARCH = "GPU";
auto& _agent_manager = agent_manager::get_instance();
auto base_id = _agent_manager.get_agent_by_id(gpu_id, agent_type::GPU).base_id;
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::amd_smi_mm_busy>::value, "Busy",
trait::name<category::amd_smi_mm_busy>::description, LONG_DESCRIPTION, COMPONENT,
"%", "ABS", BLOCK, EXPRESSION, 0, 0);
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::amd_smi_temp>::value, "Temp",
trait::name<category::amd_smi_temp>::description, LONG_DESCRIPTION, COMPONENT,
CELSIUS_DEGREES, "ABS", BLOCK, EXPRESSION, 0, 0);
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::amd_smi_power>::value, "Pow",
trait::name<category::amd_smi_power>::description, LONG_DESCRIPTION, COMPONENT,
"w", "ABS", BLOCK, EXPRESSION, 0, 0);
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::amd_smi_memory_usage>::value, "MemUsg",
trait::name<category::amd_smi_memory_usage>::description, LONG_DESCRIPTION,
COMPONENT, "MB", "ABS", BLOCK, EXPRESSION, 0, 0);
}
void
rocpd_process_smi_pmc_events(const uint32_t device_id, const amd_smi::settings& settings,
uint64_t timestamp, double busy, double temp, double power,
double usage)
{
if(!(settings.busy || settings.temp || settings.power || settings.mem_usage)) return;
auto& data_processor = get_data_processor();
auto event_id = data_processor.insert_event(ROCPROFSYS_CATEGORY_AMD_SMI, 0, 0, 0);
auto& _agent_manager = agent_manager::get_instance();
auto base_id = _agent_manager.get_agent_by_id(device_id, agent_type::GPU).base_id;
auto insert_event_and_sample = [&](bool enabled, const char* name, double value) {
if(!enabled) return;
data_processor.insert_pmc_event(event_id, base_id, name, value);
data_processor.insert_sample(name, timestamp, event_id);
};
insert_event_and_sample(settings.busy, trait::name<category::amd_smi_mm_busy>::value,
busy);
insert_event_and_sample(settings.temp, trait::name<category::amd_smi_temp>::value,
temp);
insert_event_and_sample(settings.power, trait::name<category::amd_smi_power>::value,
power);
insert_event_and_sample(settings.mem_usage,
trait::name<category::amd_smi_memory_usage>::value, usage);
}
auto&
get_settings(uint32_t _dev_id)
{
@@ -140,6 +251,8 @@ data::data(uint32_t _dev_id) { sample(_dev_id); }
void
data::sample(uint32_t _dev_id)
{
if(is_child_process()) return;
auto _ts = tim::get_clock_real_now<size_t, std::nano>();
assert(_ts < std::numeric_limits<int64_t>::max());
amdsmi_gpu_metrics_t _gpu_metrics;
@@ -168,7 +281,6 @@ data::sample(uint32_t _dev_id)
}
amdsmi_processor_handle sample_handle = gpu::get_handle_from_id(_dev_id);
ROCPROFSYS_AMDSMI_GET(get_settings(m_dev_id).busy, amdsmi_get_gpu_activity,
sample_handle, &m_busy_perc);
ROCPROFSYS_AMDSMI_GET(get_settings(m_dev_id).temp, amdsmi_get_temp_metric,
@@ -276,10 +388,15 @@ config()
*_bundle_data.at(i) = unique_ptr_t<bundle_t>{ new bundle_t{} };
}
}
data::get_initial().resize(data::device_count);
for(auto itr : data::device_list)
data::get_initial().at(itr).sample(itr);
if(get_use_rocpd())
{
rocpd_initialize_category();
rocpd_initialize_smi_tracks();
}
}
void
@@ -363,192 +480,194 @@ data::post_process(uint32_t _dev_id)
auto _settings = get_settings(_dev_id);
auto _process_perfetto = [&]() {
constexpr uint8_t AMD_SMI_METRICS_COUNT = 8;
auto _idx = std::array<uint64_t, AMD_SMI_METRICS_COUNT>{};
auto use_perfetto = get_use_perfetto();
auto use_rocpd = get_use_rocpd();
if(use_rocpd)
{
rocpd_initialize_smi_pmc(_dev_id);
}
for(auto& itr : _amd_smi)
{
using counter_track = perfetto_counter_track<data>;
if(itr.m_dev_id != _dev_id) continue;
uint64_t _ts = itr.m_ts;
if(!_thread_info->is_valid_time(_ts)) continue;
double _gfxbusy = itr.m_busy_perc.gfx_activity;
double _umcbusy = itr.m_busy_perc.umc_activity;
double _mmbusy = itr.m_busy_perc.mm_activity;
double _temp = itr.m_temp;
double _power = itr.m_power.current_socket_power;
double _usage = itr.m_mem_usage / static_cast<double>(units::megabyte);
auto setup_perfetto_counter_tracks = [&]() {
if(counter_track::exists(_dev_id)) return;
auto addendum = [&](const char* _v) {
return JOIN(" ", "GPU", _v, JOIN("", '[', _dev_id, ']'), "(S)");
};
auto addendum_blk = [&](std::size_t _i, const char* _metric,
std::size_t xcp_idx = SIZE_MAX) {
if(xcp_idx != SIZE_MAX)
{
return JOIN(
" ", "GPU", JOIN("", '[', _dev_id, ']'), _metric,
JOIN("", "XCP_", xcp_idx, ": [", (_i < 10 ? "0" : ""), _i, ']'),
"(S)");
}
else
{
return JOIN(" ", "GPU", JOIN("", '[', _dev_id, ']'), _metric,
JOIN("", "[", (_i < 10 ? "0" : ""), _i, ']'), "(S)");
}
};
{
_idx.fill(_idx.size());
uint64_t nidx = 0;
if(_settings.busy)
{
_idx.at(0) = nidx++; // GFX Busy
_idx.at(1) = nidx++; // UMC Busy
_idx.at(2) = nidx++; // MM Busy
counter_track::emplace(_dev_id, addendum("GFX Busy"), "%");
counter_track::emplace(_dev_id, addendum("UMC Busy"), "%");
counter_track::emplace(_dev_id, addendum("MM Busy"), "%");
}
if(_settings.temp) _idx.at(3) = nidx++;
if(_settings.power) _idx.at(4) = nidx++;
if(_settings.mem_usage) _idx.at(5) = nidx++;
if(_settings.vcn_activity) _idx.at(6) = nidx++;
if(_settings.jpeg_activity) _idx.at(7) = nidx++;
}
for(auto& itr : _amd_smi)
{
using counter_track = perfetto_counter_track<data>;
if(itr.m_dev_id != _dev_id) continue;
if(!counter_track::exists(_dev_id))
if(_settings.temp)
{
auto addendum = [&](const char* _v) {
return JOIN(" ", "GPU", _v, JOIN("", '[', _dev_id, ']'), "(S)");
};
auto addendum_blk = [&](std::size_t _i, const char* _metric,
std::size_t xcp_idx = SIZE_MAX) {
if(xcp_idx != SIZE_MAX)
{
return JOIN(" ", "GPU", JOIN("", '[', _dev_id, ']'), _metric,
JOIN("", "XCP_", xcp_idx, ": [", (_i < 10 ? "0" : ""),
_i, ']'),
"(S)");
}
else
{
return JOIN(" ", "GPU", JOIN("", '[', _dev_id, ']'), _metric,
JOIN("", "[", (_i < 10 ? "0" : ""), _i, ']'), "(S)");
}
};
if(_settings.busy)
counter_track::emplace(_dev_id, addendum("Temperature"), "deg C");
}
if(_settings.power)
{
counter_track::emplace(_dev_id, addendum("Current Power"), "watts");
}
if(_settings.mem_usage)
{
counter_track::emplace(_dev_id, addendum("Memory Usage"), "megabytes");
}
if(_settings.vcn_activity)
{
if(itr.m_xcp_metrics.empty())
{
counter_track::emplace(_dev_id, addendum("GFX Busy"), "%");
counter_track::emplace(_dev_id, addendum("UMC Busy"), "%");
counter_track::emplace(_dev_id, addendum("MM Busy"), "%");
ROCPROFSYS_VERBOSE(
1, "No VCN activity data collected from device %u\n", _dev_id);
}
if(_settings.temp)
counter_track::emplace(_dev_id, addendum("Temperature"), "deg C");
if(_settings.power)
counter_track::emplace(_dev_id, addendum("Current Power"), "watts");
if(_settings.mem_usage)
counter_track::emplace(_dev_id, addendum("Memory Usage"),
"megabytes");
if(_settings.vcn_activity)
else if(gpu::is_vcn_activity_supported(_dev_id))
{
if(itr.m_xcp_metrics.empty())
{
ROCPROFSYS_VERBOSE(
1, "No VCN activity data collected from device %u\n",
_dev_id);
}
else if(gpu::is_vcn_activity_supported(_dev_id))
{
// For VCN activity, use simple indexing
for(std::size_t i = 0;
i < std::size(itr.m_xcp_metrics[0].vcn_busy); ++i)
counter_track::emplace(_dev_id,
addendum_blk(i, "VCN Activity"), "%");
}
else
{
for(std::size_t xcp = 0; xcp < std::size(itr.m_xcp_metrics);
++xcp)
{
for(std::size_t i = 0;
i < std::size(itr.m_xcp_metrics[xcp].vcn_busy); ++i)
{
counter_track::emplace(
_dev_id, addendum_blk(i, "VCN Activity", xcp), "%");
}
}
}
// For VCN activity, use simple indexing
for(std::size_t i = 0; i < std::size(itr.m_xcp_metrics[0].vcn_busy);
++i)
counter_track::emplace(_dev_id, addendum_blk(i, "VCN Activity"),
"%");
}
if(_settings.jpeg_activity)
else
{
if(itr.m_xcp_metrics.empty())
for(std::size_t xcp = 0; xcp < std::size(itr.m_xcp_metrics); ++xcp)
{
ROCPROFSYS_VERBOSE(
1, "No JPEG activity data collected from device %u\n",
_dev_id);
}
else if(gpu::is_jpeg_activity_supported(_dev_id))
{
// For JPEG activity, use simple indexing
for(std::size_t i = 0;
i < std::size(itr.m_xcp_metrics[0].jpeg_busy); ++i)
counter_track::emplace(_dev_id,
addendum_blk(i, "JPEG Activity"), "%");
}
else
{
for(std::size_t xcp = 0; xcp < std::size(itr.m_xcp_metrics);
++xcp)
i < std::size(itr.m_xcp_metrics[xcp].vcn_busy); ++i)
{
for(std::size_t i = 0;
i < std::size(itr.m_xcp_metrics[xcp].jpeg_busy); ++i)
counter_track::emplace(
_dev_id, addendum_blk(i, "JPEG Activity", xcp), "%");
counter_track::emplace(
_dev_id, addendum_blk(i, "VCN Activity", xcp), "%");
}
}
}
}
uint64_t _ts = itr.m_ts;
if(!_thread_info->is_valid_time(_ts)) continue;
if(_settings.jpeg_activity)
{
if(itr.m_xcp_metrics.empty())
{
ROCPROFSYS_VERBOSE(
1, "No JPEG activity data collected from device %u\n", _dev_id);
}
else if(gpu::is_jpeg_activity_supported(_dev_id))
{
for(std::size_t i = 0; i < std::size(itr.m_xcp_metrics[0].jpeg_busy);
++i)
counter_track::emplace(_dev_id, addendum_blk(i, "JPEG Activity"),
"%");
}
else
{
for(std::size_t xcp = 0; xcp < std::size(itr.m_xcp_metrics); ++xcp)
{
for(std::size_t i = 0;
i < std::size(itr.m_xcp_metrics[xcp].jpeg_busy); ++i)
counter_track::emplace(
_dev_id, addendum_blk(i, "JPEG Activity", xcp), "%");
}
}
}
};
double _gfxbusy = itr.m_busy_perc.gfx_activity;
double _umcbusy = itr.m_busy_perc.umc_activity;
double _mmbusy = itr.m_busy_perc.mm_activity;
double _temp = itr.m_temp;
double _power = itr.m_power.current_socket_power;
double _usage = itr.m_mem_usage / static_cast<double>(units::megabyte);
auto write_perfetto_metrics = [&]() {
size_t track_index = 0;
if(_settings.busy)
{
TRACE_COUNTER("device_busy_gfx", counter_track::at(_dev_id, _idx.at(0)),
_ts, _gfxbusy);
TRACE_COUNTER("device_busy_umc", counter_track::at(_dev_id, _idx.at(1)),
_ts, _umcbusy);
TRACE_COUNTER("device_busy_mm", counter_track::at(_dev_id, _idx.at(2)),
TRACE_COUNTER("device_busy_gfx",
counter_track::at(_dev_id, track_index++), _ts, _gfxbusy);
TRACE_COUNTER("device_busy_umc",
counter_track::at(_dev_id, track_index++), _ts, _umcbusy);
TRACE_COUNTER("device_busy_mm", counter_track::at(_dev_id, track_index++),
_ts, _mmbusy);
}
if(_settings.temp)
TRACE_COUNTER("device_temp", counter_track::at(_dev_id, _idx.at(3)), _ts,
_temp);
{
TRACE_COUNTER("device_temp", counter_track::at(_dev_id, track_index++),
_ts, _temp);
}
if(_settings.power)
TRACE_COUNTER("device_power", counter_track::at(_dev_id, _idx.at(4)), _ts,
_power);
{
TRACE_COUNTER("device_power", counter_track::at(_dev_id, track_index++),
_ts, _power);
}
if(_settings.mem_usage)
{
TRACE_COUNTER("device_memory_usage",
counter_track::at(_dev_id, _idx.at(5)), _ts, _usage);
counter_track::at(_dev_id, track_index++), _ts, _usage);
}
if(_settings.vcn_activity && !itr.m_xcp_metrics.empty())
{
uint64_t idx = _idx.at(6);
// Iterate over all XCPs and their VCN busy/activity values
for(const auto& metrics : itr.m_xcp_metrics)
{
for(const auto& vcn_val : metrics.vcn_busy)
{
TRACE_COUNTER("device_vcn_activity",
counter_track::at(_dev_id, idx), _ts, vcn_val);
++idx;
counter_track::at(_dev_id, track_index++), _ts,
vcn_val);
}
}
}
if(_settings.jpeg_activity && !itr.m_xcp_metrics.empty())
{
uint64_t idx = _idx.at(7);
// Calculate total VCN metrics to properly offset JPEG metrics index
if(_settings.vcn_activity)
{
size_t total_vcn_metrics = 0;
for(const auto& metrics : itr.m_xcp_metrics)
total_vcn_metrics += metrics.vcn_busy.size();
if(total_vcn_metrics > 0) idx += (total_vcn_metrics - 1);
}
// Iterate over all XCPs and their JPEG busy/activity values
for(const auto& metrics : itr.m_xcp_metrics)
{
for(const auto& jpeg_val : metrics.jpeg_busy)
{
TRACE_COUNTER("device_jpeg_activity",
counter_track::at(_dev_id, idx), _ts, jpeg_val);
++idx;
counter_track::at(_dev_id, track_index++), _ts,
jpeg_val);
}
}
}
}
};
};
if(get_use_perfetto()) _process_perfetto();
if(use_perfetto)
{
setup_perfetto_counter_tracks();
write_perfetto_metrics();
}
if(use_rocpd)
{
rocpd_process_smi_pmc_events(_dev_id, _settings, _ts, _mmbusy, _temp, _power,
_usage);
}
}
}
//--------------------------------------------------------------------------------------//
@@ -573,7 +692,7 @@ setup()
ROCPROFSYS_VERBOSE_F(0, "AMD SMI version: %u.%u.%u - str: %s.\n", _version.major,
_version.minor, _version.release, _version.build);
data::device_count = gpu::get_processor_count();
data::device_count = gpu::device_count();
auto _devices_v = get_sampling_gpus();
for(auto& itr : _devices_v)
@@ -668,7 +787,6 @@ setup()
}
is_initialized() = true;
data::setup();
} catch(std::runtime_error& _e)
{
@@ -705,7 +823,10 @@ void
post_process()
{
for(auto itr : data::device_list)
{
ROCPROFSYS_VERBOSE(2, "Post-processing amd-smi data for device: %d", itr);
data::post_process(itr);
}
}
uint32_t
@@ -21,10 +21,14 @@
// SOFTWARE.
#include "library/components/backtrace_metrics.hpp"
#include "core/agent.hpp"
#include "core/agent_manager.hpp"
#include "core/components/fwd.hpp"
#include "core/config.hpp"
#include "core/debug.hpp"
#include "core/node_info.hpp"
#include "core/perfetto.hpp"
#include "core/rocpd/data_processor.hpp"
#include "library/components/ensure_storage.hpp"
#include "library/ptl.hpp"
#include "library/runtime.hpp"
@@ -137,6 +141,12 @@ backtrace_metrics::get_hw_counter_labels(int64_t _tid)
return (_v) ? *_v : std::vector<std::string>{};
}
rocpd::data_processor&
get_data_processor()
{
return rocpd::data_processor::get_instance();
}
void
backtrace_metrics::start()
{}
@@ -327,6 +337,228 @@ backtrace_metrics::fini_perfetto(int64_t _tid, valid_array_t _valid)
}
}
void
rocpd_init_categories()
{
static bool _is_initialized = false;
if(_is_initialized) return;
get_data_processor().insert_category(
category_enum_id<category::thread_cpu_time>::value,
trait::name<category::thread_cpu_time>::value);
get_data_processor().insert_category(
category_enum_id<category::thread_peak_memory>::value,
trait::name<category::thread_peak_memory>::value);
get_data_processor().insert_category(
category_enum_id<category::thread_context_switch>::value,
trait::name<category::thread_context_switch>::value);
get_data_processor().insert_category(
category_enum_id<category::thread_page_fault>::value,
trait::name<category::thread_page_fault>::value);
get_data_processor().insert_category(
category_enum_id<category::thread_hardware_counter>::value,
trait::name<category::thread_hardware_counter>::value);
_is_initialized = true;
}
template <typename Category>
void
rocpd_init_tracks(int64_t _tid)
{
auto& data_processor = get_data_processor();
auto& n_info = node_info::get_instance();
const auto& t_info = thread_info::get(_tid, SequentTID);
auto _tid_name = JOIN("", '[', _tid, ']');
auto thread_idx = data_processor.insert_thread_info(
n_info.id, getppid(), getpid(), t_info->index_data->system_value,
JOIN(" ", "Thread", _tid).c_str(), t_info->get_start(), t_info->get_stop(), "{}");
if constexpr(std::is_same_v<Category, category::thread_hardware_counter>)
{
// Initialize hw_counter_tracks and create one track for each hardware counter
auto _hw_cnt_labels = *get_papi_labels(_tid);
for(auto& itr : _hw_cnt_labels)
{
std::string _desc = tim::papi::get_event_info(itr).short_descr;
if(_desc.empty()) _desc = itr;
ROCPROFSYS_CI_THROW(_desc.empty(), "Empty description for %s\n", itr.c_str());
std::string track_name = JOIN(' ', "Thread", _desc, _tid_name, "(S)");
data_processor.insert_track(track_name.c_str(), n_info.id, getpid(),
thread_idx, "{}");
}
}
else
data_processor.insert_track(
JOIN('_', trait::name<Category>::value, _tid_name).c_str(), n_info.id,
getpid(), thread_idx, "{}");
}
template <typename Category>
void
rocpd_initialize_backtrace_metrics_pmc(size_t dev_id, const char* units, int64_t _tid)
{
auto& data_processor = get_data_processor();
auto _tid_name = JOIN("", '[', _tid, ']');
size_t EVENT_CODE = 0;
size_t INSTANCE_ID = 0;
const char* LONG_DESCRIPTION = "";
const char* COMPONENT = "";
const char* BLOCK = "";
const char* EXPRESSION = "";
auto ni = node_info::get_instance();
const auto* TARGET_ARCH = "CPU";
auto& _agent_manager = agent_manager::get_instance();
auto _base_id = _agent_manager.get_agent_by_id(dev_id, agent_type::CPU).base_id;
if constexpr(std::is_same_v<Category, category::thread_hardware_counter>)
{
auto _hw_cnt_labels = *get_papi_labels(_tid);
for(auto& itr : _hw_cnt_labels)
{
std::string _desc = tim::papi::get_event_info(itr).short_descr;
if(_desc.empty()) _desc = itr;
ROCPROFSYS_CI_THROW(_desc.empty(), "Empty description for %s\n", itr.c_str());
std::string track_name = JOIN(' ', "Thread", _desc, _tid_name, "(S)");
data_processor.insert_pmc_description(
ni.id, getpid(), _base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
track_name.c_str(), trait::name<Category>::value,
trait::name<Category>::description, LONG_DESCRIPTION, COMPONENT, units,
"ABS", BLOCK, EXPRESSION, 0, 0);
}
}
else
data_processor.insert_pmc_description(
ni.id, getpid(), _base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
JOIN("_", trait::name<Category>::value, _tid_name).c_str(),
trait::name<Category>::value, trait::name<Category>::description,
LONG_DESCRIPTION, COMPONENT, units, "ABS", BLOCK, EXPRESSION, 0, 0);
}
template <typename Category, typename Value>
void
rocpd_process_backtrace_metrics_events(const uint32_t device_id, uint64_t timestamp,
Value value, int64_t _tid)
{
auto& data_processor = get_data_processor();
auto _tid_name = JOIN("", '[', _tid, ']');
auto event_id =
data_processor.insert_event(category_enum_id<Category>::value, 0, 0, 0);
auto& agent_mngr = agent_manager::get_instance();
auto base_id = agent_mngr.get_agent_by_id(device_id, agent_type::CPU).base_id;
auto insert_event_and_sample = [&](const char* name, double _value) {
data_processor.insert_pmc_event(event_id, base_id, name, _value);
data_processor.insert_sample(name, timestamp, event_id);
};
if constexpr(std::is_same_v<Category, category::thread_hardware_counter>)
{
auto _hw_cnt_labels = *get_papi_labels(_tid);
const auto& _hw_counters =
static_cast<backtrace_metrics::hw_counter_data_t>(value);
for(size_t i = 0; i < _hw_cnt_labels.size() && i < _hw_counters.size(); ++i)
{
std::string _desc = tim::papi::get_event_info(_hw_cnt_labels[i]).short_descr;
if(_desc.empty()) _desc = _hw_cnt_labels[i];
std::string track_name = JOIN(' ', "Thread", _desc, _tid_name, "(S)");
insert_event_and_sample(track_name.c_str(), _hw_counters.at(i));
}
}
else
insert_event_and_sample(
JOIN("_", trait::name<Category>::value, _tid_name).c_str(), value);
}
void
backtrace_metrics::init_rocpd(int64_t _tid, valid_array_t _valid)
{
rocpd_init_categories();
if(get_valid(category::thread_cpu_time{}, _valid))
{
rocpd_init_tracks<category::thread_cpu_time>(_tid);
rocpd_initialize_backtrace_metrics_pmc<category::thread_cpu_time>(0, "sec", _tid);
}
if(get_valid(category::thread_peak_memory{}, _valid))
{
rocpd_init_tracks<category::thread_peak_memory>(_tid);
rocpd_initialize_backtrace_metrics_pmc<category::thread_peak_memory>(0, "MB",
_tid);
}
if(get_valid(category::thread_context_switch{}, _valid))
{
rocpd_init_tracks<category::thread_context_switch>(_tid);
rocpd_initialize_backtrace_metrics_pmc<category::thread_context_switch>(0, "",
_tid);
}
if(get_valid(category::thread_page_fault{}, _valid))
{
rocpd_init_tracks<category::thread_page_fault>(_tid);
rocpd_initialize_backtrace_metrics_pmc<category::thread_page_fault>(0, "", _tid);
}
if(get_valid(type_list<hw_counters>{}, _valid) &&
get_valid(category::thread_hardware_counter{}, _valid))
{
rocpd_init_tracks<category::thread_hardware_counter>(_tid);
rocpd_initialize_backtrace_metrics_pmc<category::thread_hardware_counter>(0, "",
_tid);
}
}
void
backtrace_metrics::fini_rocpd(int64_t _tid, valid_array_t _valid)
{
const auto& _thread_info = thread_info::get(_tid, SequentTID);
ROCPROFSYS_CI_THROW(!_thread_info, "Error! missing thread info for tid=%li\n", _tid);
if(!_thread_info) return;
uint64_t _ts = _thread_info->get_stop();
if(get_valid(category::thread_cpu_time{}, _valid))
{
rocpd_process_backtrace_metrics_events<category::thread_cpu_time, double>(
0, _ts, 0, _tid);
}
if(get_valid(category::thread_peak_memory{}, _valid))
{
rocpd_process_backtrace_metrics_events<category::thread_peak_memory, double>(
0, _ts, 0, _tid);
}
if(get_valid(category::thread_context_switch{}, _valid))
{
rocpd_process_backtrace_metrics_events<category::thread_context_switch, int64_t>(
0, _ts, 0, _tid);
}
if(get_valid(category::thread_page_fault{}, _valid))
{
rocpd_process_backtrace_metrics_events<category::thread_page_fault, int64_t>(
0, _ts, 0, _tid);
}
if(get_valid(type_list<hw_counters>{}, _valid) &&
get_valid(category::thread_hardware_counter{}, _valid))
{
auto _hw_cnt_labels = *get_papi_labels(_tid);
hw_counter_data_t zero_counters{};
zero_counters.fill(0.0);
rocpd_process_backtrace_metrics_events<category::thread_hardware_counter,
hw_counter_data_t>(0, _ts, zero_counters,
_tid);
}
}
backtrace_metrics&
backtrace_metrics::operator-=(const backtrace_metrics& _rhs)
{
@@ -407,6 +639,43 @@ backtrace_metrics::post_process_perfetto(int64_t _tid, uint64_t _ts) const
}
}
}
void
backtrace_metrics::post_process_rocpd(int64_t _tid, uint64_t _ts) const
{
auto is_category_enabled = [&](const auto& _category) { return (*this)(_category); };
if(is_category_enabled(category::thread_cpu_time{}))
{
rocpd_process_backtrace_metrics_events<category::thread_cpu_time, double>(
0, _ts, m_cpu / units::sec, _tid);
}
if(is_category_enabled(category::thread_peak_memory{}))
{
rocpd_process_backtrace_metrics_events<category::thread_peak_memory, double>(
0, _ts, m_mem_peak / units::megabyte, _tid);
}
if(is_category_enabled(category::thread_context_switch{}))
{
rocpd_process_backtrace_metrics_events<category::thread_context_switch, int64_t>(
0, _ts, m_ctx_swch, _tid);
}
if(is_category_enabled(category::thread_page_fault{}))
{
rocpd_process_backtrace_metrics_events<category::thread_page_fault, int64_t>(
0, _ts, m_page_flt, _tid);
}
if(is_category_enabled(type_list<hw_counters>{}) &&
is_category_enabled(category::thread_hardware_counter{}))
{
rocpd_process_backtrace_metrics_events<category::thread_hardware_counter,
hw_counter_data_t>(0, _ts, m_hw_counter,
_tid);
}
}
} // namespace component
} // namespace rocprofsys
@@ -83,6 +83,8 @@ struct backtrace_metrics : comp::empty_base
static void configure(bool, int64_t _tid = threading::get_id());
static void init_perfetto(int64_t _tid, valid_array_t);
static void fini_perfetto(int64_t _tid, valid_array_t);
static void init_rocpd(int64_t _tid, valid_array_t);
static void fini_rocpd(int64_t _tid, valid_array_t);
static std::vector<std::string> get_hw_counter_labels(int64_t);
template <typename Tp>
@@ -113,6 +115,7 @@ struct backtrace_metrics : comp::empty_base
const auto& get_hw_counters() const { return m_hw_counter; }
void post_process_perfetto(int64_t _tid, uint64_t _ts) const;
void post_process_rocpd(int64_t _tid, uint64_t _ts) const;
backtrace_metrics& operator-=(const backtrace_metrics&);
@@ -21,15 +21,15 @@
// SOFTWARE.
#include "library/components/comm_data.hpp"
#include "core/agent_manager.hpp"
#include "core/components/fwd.hpp"
#include "core/config.hpp"
#include "core/node_info.hpp"
#include "core/perfetto.hpp"
#include "core/rocpd/data_processor.hpp"
#include "library/tracing.hpp"
#include <timemory/backends/mpi.hpp>
#include <timemory/manager.hpp>
#include <timemory/units.hpp>
#include <timemory/utility/locking.hpp>
namespace rocprofsys
{
@@ -74,6 +74,138 @@ write_perfetto_counter_track(uint64_t _val)
}
} // namespace
namespace
{
rocpd::data_processor&
get_data_processor()
{
return rocpd::data_processor::get_instance();
}
void
rocpd_initialize_comm_data_categories()
{
static bool _is_initialized = false;
if(_is_initialized) return;
get_data_processor().insert_category(category_enum_id<category::comm_data>::value,
trait::name<category::comm_data>::value);
#if defined(ROCPROFSYS_USE_MPI)
get_data_processor().insert_category(category_enum_id<category::mpi>::value,
trait::name<category::mpi>::value);
#endif
#if defined(ROCPROFSYS_USE_RCCL)
get_data_processor().insert_category(category_enum_id<category::rocm_rccl>::value,
trait::name<category::rocm_rccl>::value);
#endif
_is_initialized = true;
}
template <typename Track>
void
rocpd_initialize_track()
{
auto& n_info = node_info::get_instance();
auto thread_id = std::nullopt;
auto _init_track = [&](const char* label) {
ROCPROFSYS_VERBOSE(3, "INSERT_TRACK label: %s, node ID: %d, Process ID: %d",
label, n_info.id, getpid());
get_data_processor().insert_track(label, n_info.id, getpid(), thread_id);
};
static std::once_flag _once{};
std::call_once(_once, _init_track, Track::label);
}
void
rocpd_initialize_comm_data_pmc()
{
[[maybe_unused]] auto& data_processor = get_data_processor();
// find the proper values for a following definitions
[[maybe_unused]] size_t EVENT_CODE = 0;
[[maybe_unused]] size_t INSTANCE_ID = 0;
[[maybe_unused]] constexpr const char* LONG_DESCRIPTION = "";
[[maybe_unused]] constexpr const char* COMPONENT = "";
[[maybe_unused]] constexpr const char* BLOCK = "";
[[maybe_unused]] constexpr const char* EXPRESSION = "";
[[maybe_unused]] constexpr const char* MSG = "bytes";
[[maybe_unused]] constexpr const auto* TARGET_ARCH = "CPU";
auto ni = node_info::get_instance();
constexpr const auto DEVICE_ID = 0; // Assuming CPU device ID is 0
auto& _agent_manager = agent_manager::get_instance();
[[maybe_unused]] auto base_id =
_agent_manager.get_agent_by_id(DEVICE_ID, agent_type::CPU).base_id;
#if defined(ROCPROFSYS_USE_MPI)
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
comm_data::mpi_send::label, "Tracks MPI Send communication data sizes",
trait::name<category::mpi>::description, LONG_DESCRIPTION, COMPONENT, MSG, "ABS",
BLOCK, EXPRESSION, 0, 0);
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
comm_data::mpi_recv::label, "Tracks MPI Receive communication data sizes",
trait::name<category::mpi>::description, LONG_DESCRIPTION, COMPONENT, MSG, "ABS",
BLOCK, EXPRESSION, 0, 0);
#endif
#if defined(ROCPROFSYS_USE_RCCL)
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, rccl_send::label,
"Tracks RCCL Send communication data sizes",
trait::name<category::rocm_rccl>::description, LONG_DESCRIPTION, COMPONENT, MSG,
"ABS", BLOCK, EXPRESSION, 0, 0);
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, rccl_recv::label,
"Tracks RCCL Receive communication data sizes",
trait::name<category::rocm_rccl>::description, LONG_DESCRIPTION, COMPONENT, MSG,
"ABS", BLOCK, EXPRESSION, 0, 0);
#endif
}
template <typename Track>
void
rocpd_process_cpu_usage_events(const uint32_t device_id, int bytes)
{
auto& data_processor = get_data_processor();
auto event_id = data_processor.insert_event(
category_enum_id<category::comm_data>::value, 0, 0, 0);
auto& agents = agent_manager::get_instance();
auto agent = agents.get_agent_by_id(device_id, agent_type::CPU);
auto insert_event_and_sample = [&](const char* name, uint64_t timestamp,
double value) {
data_processor.insert_pmc_event(event_id, agent.device_id, name, value);
data_processor.insert_sample(name, timestamp, event_id);
};
static std::mutex _mutex{};
static uint64_t value = 0;
uint64_t _now = 0;
{
std::unique_lock<std::mutex> _lk{ _mutex };
_now = rocprofsys::tracing::now<uint64_t>();
bytes = (value += bytes);
}
insert_event_and_sample(Track::label, _now, bytes);
}
} // namespace
void
comm_data::start()
{
if(get_use_rocpd())
{
rocpd_initialize_comm_data_categories();
rocpd_initialize_comm_data_pmc();
}
}
void
comm_data::preinit()
{
@@ -116,13 +248,22 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, int cou
write_perfetto_counter_track<mpi_send>(count * _size);
if(!rocprofsys::get_use_timemory()) return;
auto _name = std::string_view{ _data.tool_id };
tracker_t _a{ _name };
add(_a, count * _size);
tracker_t _b{ JOIN('/', _name, JOIN('=', "dst", dst)) };
add(_b, count * _size);
add(JOIN('/', _name, JOIN('=', "dst", dst), JOIN('=', "tag", tag)), count * _size);
if(get_use_rocpd())
{
rocpd_initialize_track<mpi_send>();
rocpd_process_cpu_usage_events<mpi_send>(0, count * _size);
}
if(rocprofsys::get_use_timemory())
{
auto _name = std::string_view{ _data.tool_id };
tracker_t _a{ _name };
add(_a, count * _size);
tracker_t _b{ JOIN('/', _name, JOIN('=', "dst", dst)) };
add(_b, count * _size);
add(JOIN('/', _name, JOIN('=', "dst", dst), JOIN('=', "tag", tag)),
count * _size);
}
}
// MPI_Recv
@@ -133,15 +274,24 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, void*, int count,
int _size = mpi_type_size(datatype);
if(_size == 0) return;
write_perfetto_counter_track<mpi_recv>(count * _size);
if(get_use_perfetto()) write_perfetto_counter_track<mpi_recv>(count * _size);
if(!rocprofsys::get_use_timemory()) return;
auto _name = std::string_view{ _data.tool_id };
tracker_t _a{ _name };
add(_a, count * _size);
tracker_t _b{ JOIN('/', _name, JOIN('=', "dst", dst)) };
add(_b, count * _size);
add(JOIN('/', _name, JOIN('=', "dst", dst), JOIN('=', "tag", tag)), count * _size);
if(get_use_rocpd())
{
rocpd_initialize_track<mpi_recv>();
rocpd_process_cpu_usage_events<mpi_recv>(0, count * _size);
}
if(rocprofsys::get_use_timemory())
{
auto _name = std::string_view{ _data.tool_id };
tracker_t _a{ _name };
add(_a, count * _size);
tracker_t _b{ JOIN('/', _name, JOIN('=', "dst", dst)) };
add(_b, count * _size);
add(JOIN('/', _name, JOIN('=', "dst", dst), JOIN('=', "tag", tag)),
count * _size);
}
}
// MPI_Isend
@@ -152,15 +302,24 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, int cou
int _size = mpi_type_size(datatype);
if(_size == 0) return;
write_perfetto_counter_track<mpi_send>(count * _size);
if(get_use_perfetto()) write_perfetto_counter_track<mpi_send>(count * _size);
if(!rocprofsys::get_use_timemory()) return;
auto _name = std::string_view{ _data.tool_id };
tracker_t _a{ _name };
add(_a, count * _size);
tracker_t _b{ JOIN('/', _name, JOIN('=', "dst", dst)) };
add(_b, count * _size);
add(JOIN('/', _name, JOIN('=', "dst", dst), JOIN('=', "tag", tag)), count * _size);
if(get_use_rocpd())
{
rocpd_initialize_track<mpi_send>();
rocpd_process_cpu_usage_events<mpi_send>(0, count * _size);
}
if(rocprofsys::get_use_timemory())
{
auto _name = std::string_view{ _data.tool_id };
tracker_t _a{ _name };
add(_a, count * _size);
tracker_t _b{ JOIN('/', _name, JOIN('=', "dst", dst)) };
add(_b, count * _size);
add(JOIN('/', _name, JOIN('=', "dst", dst), JOIN('=', "tag", tag)),
count * _size);
}
}
// MPI_Irecv
@@ -171,15 +330,24 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, void*, int count,
int _size = mpi_type_size(datatype);
if(_size == 0) return;
write_perfetto_counter_track<mpi_recv>(count * _size);
if(get_use_perfetto()) write_perfetto_counter_track<mpi_recv>(count * _size);
if(!rocprofsys::get_use_timemory()) return;
auto _name = std::string_view{ _data.tool_id };
tracker_t _a{ _name };
add(_a, count * _size);
tracker_t _b{ JOIN('/', _name, JOIN('=', "dst", dst)) };
add(_b, count * _size);
add(JOIN('/', _name, JOIN('=', "dst", dst), JOIN('=', "tag", tag)), count * _size);
if(get_use_rocpd())
{
rocpd_initialize_track<mpi_recv>();
rocpd_process_cpu_usage_events<mpi_recv>(0, count * _size);
}
if(rocprofsys::get_use_timemory())
{
auto _name = std::string_view{ _data.tool_id };
tracker_t _a{ _name };
add(_a, count * _size);
tracker_t _b{ JOIN('/', _name, JOIN('=', "dst", dst)) };
add(_b, count * _size);
add(JOIN('/', _name, JOIN('=', "dst", dst), JOIN('=', "tag", tag)),
count * _size);
}
}
// MPI_Bcast
@@ -190,13 +358,21 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, void*, int count,
int _size = mpi_type_size(datatype);
if(_size == 0) return;
write_perfetto_counter_track<mpi_send>(count * _size);
if(get_use_perfetto()) write_perfetto_counter_track<mpi_send>(count * _size);
if(!rocprofsys::get_use_timemory()) return;
auto _name = std::string_view{ _data.tool_id };
tracker_t _t{ _name };
add(_t, count * _size);
add(JOIN('/', _name, JOIN('=', "root", root)), count * _size);
if(get_use_rocpd())
{
rocpd_initialize_track<mpi_send>();
rocpd_process_cpu_usage_events<mpi_send>(0, count * _size);
}
if(rocprofsys::get_use_timemory())
{
auto _name = std::string_view{ _data.tool_id };
tracker_t _t{ _name };
add(_t, count * _size);
add(JOIN('/', _name, JOIN('=', "root", root)), count * _size);
}
}
// MPI_Allreduce
@@ -207,11 +383,21 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, void*,
int _size = mpi_type_size(datatype);
if(_size == 0) return;
write_perfetto_counter_track<mpi_recv>(count * _size);
write_perfetto_counter_track<mpi_send>(count * _size);
if(get_use_perfetto())
{
write_perfetto_counter_track<mpi_recv>(count * _size);
write_perfetto_counter_track<mpi_send>(count * _size);
}
if(!rocprofsys::get_use_timemory()) return;
add(_data, count * _size);
if(get_use_rocpd())
{
rocpd_initialize_track<mpi_send>();
rocpd_initialize_track<mpi_recv>();
rocpd_process_cpu_usage_events<mpi_recv>(0, count * _size);
rocpd_process_cpu_usage_events<mpi_send>(0, count * _size);
}
if(rocprofsys::get_use_timemory()) add(_data, count * _size);
}
// MPI_Sendrecv
@@ -224,30 +410,43 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, int sen
int _recv_size = mpi_type_size(recvtype);
if(_send_size == 0 || _recv_size == 0) return;
write_perfetto_counter_track<mpi_send>(sendcount * _send_size);
write_perfetto_counter_track<mpi_recv>(recvcount * _recv_size);
if(!rocprofsys::get_use_timemory()) return;
auto _name = std::string_view{ _data.tool_id };
tracker_t _t{ _name };
add(_t, sendcount * _send_size + recvcount * _recv_size);
if(get_use_perfetto())
{
tracker_t _b{ JOIN('/', _name, "send") };
add(_b, sendcount * _send_size);
tracker_t _c{ JOIN('/', _name, JOIN('=', "send", dst)) };
add(_b, sendcount * _send_size);
add(JOIN('/', _name, "send", JOIN('=', "tag", sendtag)), sendcount * _send_size);
add(JOIN('/', _name, JOIN('=', "send", dst), JOIN('=', "tag", sendtag)),
sendcount * _send_size);
write_perfetto_counter_track<mpi_send>(sendcount * _send_size);
write_perfetto_counter_track<mpi_recv>(recvcount * _recv_size);
}
if(get_use_rocpd())
{
tracker_t _b{ JOIN('/', _name, "recv") };
add(_b, recvcount * _recv_size);
tracker_t _c{ JOIN('/', _name, JOIN('=', "recv", src)) };
add(_b, recvcount * _recv_size);
add(JOIN('/', _name, "recv", JOIN('=', "tag", recvtag)), recvcount * _recv_size);
add(JOIN('/', _name, JOIN('=', "recv", src), JOIN('=', "tag", recvtag)),
recvcount * _recv_size);
rocpd_process_cpu_usage_events<mpi_send>(0, sendcount * _send_size);
rocpd_process_cpu_usage_events<mpi_recv>(0, recvcount * _send_size);
}
if(rocprofsys::get_use_timemory())
{
auto _name = std::string_view{ _data.tool_id };
tracker_t _t{ _name };
add(_t, sendcount * _send_size + recvcount * _recv_size);
{
tracker_t _b{ JOIN('/', _name, "send") };
add(_b, sendcount * _send_size);
tracker_t _c{ JOIN('/', _name, JOIN('=', "send", dst)) };
add(_b, sendcount * _send_size);
add(JOIN('/', _name, "send", JOIN('=', "tag", sendtag)),
sendcount * _send_size);
add(JOIN('/', _name, JOIN('=', "send", dst), JOIN('=', "tag", sendtag)),
sendcount * _send_size);
}
{
tracker_t _b{ JOIN('/', _name, "recv") };
add(_b, recvcount * _recv_size);
tracker_t _c{ JOIN('/', _name, JOIN('=', "recv", src)) };
add(_b, recvcount * _recv_size);
add(JOIN('/', _name, "recv", JOIN('=', "tag", recvtag)),
recvcount * _recv_size);
add(JOIN('/', _name, JOIN('=', "recv", src), JOIN('=', "tag", recvtag)),
recvcount * _recv_size);
}
}
}
@@ -262,17 +461,28 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, int sen
int _recv_size = mpi_type_size(recvtype);
if(_send_size == 0 || _recv_size == 0) return;
write_perfetto_counter_track<mpi_send>(sendcount * _send_size);
write_perfetto_counter_track<mpi_recv>(recvcount * _recv_size);
if(get_use_perfetto())
{
write_perfetto_counter_track<mpi_send>(sendcount * _send_size);
write_perfetto_counter_track<mpi_recv>(recvcount * _recv_size);
}
if(!rocprofsys::get_use_timemory()) return;
auto _name = std::string_view{ _data.tool_id };
tracker_t _t{ _name };
add(_t, sendcount * _send_size + recvcount * _recv_size);
tracker_t _r(JOIN('/', _name, JOIN('=', "root", root)));
add(_r, sendcount * _send_size + recvcount * _recv_size);
add(JOIN('/', _name, JOIN('=', "root", root), "send"), sendcount * _send_size);
add(JOIN('/', _name, JOIN('=', "root", root), "recv"), recvcount * _recv_size);
if(get_use_rocpd())
{
rocpd_process_cpu_usage_events<mpi_send>(0, sendcount * _send_size);
rocpd_process_cpu_usage_events<mpi_recv>(0, recvcount * _send_size);
}
if(rocprofsys::get_use_timemory())
{
auto _name = std::string_view{ _data.tool_id };
tracker_t _t{ _name };
add(_t, sendcount * _send_size + recvcount * _recv_size);
tracker_t _r(JOIN('/', _name, JOIN('=', "root", root)));
add(_r, sendcount * _send_size + recvcount * _recv_size);
add(JOIN('/', _name, JOIN('=', "root", root), "send"), sendcount * _send_size);
add(JOIN('/', _name, JOIN('=', "root", root), "recv"), recvcount * _recv_size);
}
}
// MPI_Alltoall
@@ -285,15 +495,26 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, int sen
int _recv_size = mpi_type_size(recvtype);
if(_send_size == 0 || _recv_size == 0) return;
write_perfetto_counter_track<mpi_send>(sendcount * _send_size);
write_perfetto_counter_track<mpi_recv>(recvcount * _recv_size);
if(get_use_perfetto())
{
write_perfetto_counter_track<mpi_send>(sendcount * _send_size);
write_perfetto_counter_track<mpi_recv>(recvcount * _recv_size);
}
if(!rocprofsys::get_use_timemory()) return;
auto _name = std::string_view{ _data.tool_id };
tracker_t _t{ _name };
add(_t, sendcount * _send_size + recvcount * _recv_size);
add(JOIN('/', _name, "send"), sendcount * _send_size);
add(JOIN('/', _name, "recv"), recvcount * _recv_size);
if(get_use_rocpd())
{
rocpd_process_cpu_usage_events<mpi_send>(0, sendcount * _send_size);
rocpd_process_cpu_usage_events<mpi_recv>(0, recvcount * _recv_size);
}
if(rocprofsys::get_use_timemory())
{
auto _name = std::string_view{ _data.tool_id };
tracker_t _t{ _name };
add(_t, sendcount * _send_size + recvcount * _recv_size);
add(JOIN('/', _name, "send"), sendcount * _send_size);
add(JOIN('/', _name, "recv"), recvcount * _recv_size);
}
}
#endif
@@ -309,13 +530,17 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, const v
int _size = rccl_type_size(datatype);
if(_size <= 0) return;
write_perfetto_counter_track<rccl_recv>(count * _size);
if(get_use_perfetto()) write_perfetto_counter_track<rccl_recv>(count * _size);
if(!rocprofsys::get_use_timemory()) return;
auto _name = std::string_view{ _data.tool_id };
tracker_t _t{ _name };
add(_t, count * _size);
add(JOIN('/', _name, JOIN('=', "root", root)), count * _size);
if(get_use_rocpd()) rocpd_process_cpu_usage_events<rccl_recv>(0, count * _size);
if(rocprofsys::get_use_timemory())
{
auto _name = std::string_view{ _data.tool_id };
tracker_t _t{ _name };
add(_t, count * _size);
add(JOIN('/', _name, JOIN('=', "root", root)), count * _size);
}
}
// ncclSend
@@ -334,27 +559,32 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, size_t
if(_send_types.count(_data.tool_id) > 0)
{
write_perfetto_counter_track<rccl_send>(count * _size);
if(get_use_perfetto()) write_perfetto_counter_track<rccl_send>(count * _size);
if(get_use_rocpd()) rocpd_process_cpu_usage_events<rccl_send>(0, count * _size);
}
else if(_recv_types.count(_data.tool_id) > 0)
{
write_perfetto_counter_track<rccl_recv>(count * _size);
if(get_use_perfetto()) write_perfetto_counter_track<rccl_recv>(count * _size);
if(get_use_rocpd()) rocpd_process_cpu_usage_events<rccl_recv>(0, count * _size);
}
else
{
ROCPROFSYS_CI_THROW(true, "RCCL function not handled: %s", _data.tool_id.c_str());
}
write_perfetto_counter_track<rccl_recv>(count * _size);
if(get_use_perfetto()) write_perfetto_counter_track<rccl_recv>(count * _size);
if(get_use_rocpd()) rocpd_process_cpu_usage_events<rccl_recv>(0, count * _size);
if(!rocprofsys::get_use_timemory()) return;
auto _name = std::string_view{ _data.tool_id };
std::string _label = "root";
if(_name.find("Send") != std::string::npos) _label = "peer";
if(rocprofsys::get_use_timemory())
{
auto _name = std::string_view{ _data.tool_id };
std::string _label = "root";
if(_name.find("Send") != std::string::npos) _label = "peer";
tracker_t _t{ _name };
add(_t, count * _size);
add(JOIN('/', _name, JOIN('=', _label, peer)), count * _size);
tracker_t _t{ _name };
add(_t, count * _size);
add(JOIN('/', _name, JOIN('=', _label, peer)), count * _size);
}
}
// ncclBroadcast
@@ -365,13 +595,16 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, const v
int _size = rccl_type_size(datatype);
if(_size <= 0) return;
write_perfetto_counter_track<rccl_send>(count * _size);
if(get_use_perfetto()) write_perfetto_counter_track<rccl_send>(count * _size);
if(get_use_rocpd()) rocpd_process_cpu_usage_events<rccl_send>(0, count * _size);
if(!rocprofsys::get_use_timemory()) return;
auto _name = std::string_view{ _data.tool_id };
tracker_t _t{ _name };
add(_t, count * _size);
add(JOIN('/', _data.tool_id, JOIN('=', "root", root)), count * _size);
if(rocprofsys::get_use_timemory())
{
auto _name = std::string_view{ _data.tool_id };
tracker_t _t{ _name };
add(_t, count * _size);
add(JOIN('/', _data.tool_id, JOIN('=', "root", root)), count * _size);
}
}
// ncclAllReduce
@@ -389,19 +622,20 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, const v
if(_send_types.count(_data.tool_id) > 0)
{
write_perfetto_counter_track<rccl_send>(count * _size);
if(get_use_perfetto()) write_perfetto_counter_track<rccl_send>(count * _size);
if(get_use_rocpd()) rocpd_process_cpu_usage_events<rccl_send>(0, count * _size);
}
else if(_recv_types.count(_data.tool_id) > 0)
{
write_perfetto_counter_track<rccl_recv>(count * _size);
if(get_use_perfetto()) write_perfetto_counter_track<rccl_recv>(count * _size);
if(get_use_rocpd()) rocpd_process_cpu_usage_events<rccl_recv>(0, count * _size);
}
else
{
ROCPROFSYS_CI_THROW(true, "RCCL function not handled: %s", _data.tool_id.c_str());
}
if(!rocprofsys::get_use_timemory()) return;
add(_data, count * _size);
if(rocprofsys::get_use_timemory()) add(_data, count * _size);
}
// ncclAllGather
@@ -413,10 +647,9 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, const v
int _size = rccl_type_size(datatype);
if(_size <= 0) return;
write_perfetto_counter_track<rccl_recv>(count * _size);
if(!rocprofsys::get_use_timemory()) return;
add(_data, count * _size);
if(get_use_perfetto()) write_perfetto_counter_track<rccl_recv>(count * _size);
if(get_use_rocpd()) rocpd_process_cpu_usage_events<rccl_recv>(0, count * _size);
if(rocprofsys::get_use_timemory()) add(_data, count * _size);
}
#endif
} // namespace component
@@ -82,7 +82,7 @@ struct comm_data : base<comm_data, void>
static void preinit();
static void configure();
static void global_finalize();
static void start() {}
static void start();
static void stop() {}
#if defined(ROCPROFSYS_USE_MPI)
@@ -21,15 +21,15 @@
// SOFTWARE.
#include "library/cpu_freq.hpp"
#include "core/agent.hpp"
#include "core/agent_manager.hpp"
#include "core/common.hpp"
#include "core/components/fwd.hpp"
#include "core/config.hpp"
#include "core/debug.hpp"
#include "core/defines.hpp"
#include "core/node_info.hpp"
#include "core/perfetto.hpp"
#include "core/timemory.hpp"
#include "core/rocpd/data_processor.hpp"
#include "library/components/cpu_freq.hpp"
#include "library/thread_data.hpp"
#include "library/thread_info.hpp"
#include <timemory/components/rusage/backends.hpp>
@@ -44,7 +44,6 @@
#include <sys/resource.h>
#include <tuple>
#include <utility>
#include <vector>
namespace rocprofsys
{
@@ -65,6 +64,181 @@ init_perfetto_counter_tracks(type_list<Types...>)
{
(perfetto_counter_track<Types>::init(), ...);
}
template <typename Category>
inline std::string
get_cpu_freq_track_name(uint64_t cpu_id)
{
return std::string(trait::name<Category>::value) + " [" + std::to_string(cpu_id) +
"]";
}
template <typename Func>
void
do_for_enabled_cpus(Func&& func)
{
const auto& enabled_cpus = component::cpu_freq::get_enabled_cpus();
for(const auto& cpu : enabled_cpus)
{
func(cpu);
}
}
rocpd::data_processor&
get_data_processor()
{
return rocpd::data_processor::get_instance();
}
void
rocpd_initialize_cpu_freq_category()
{
get_data_processor().insert_category(ROCPROFSYS_CATEGORY_CPU_FREQ,
trait::name<category::cpu_freq>::value);
}
void
rocpd_initialize_cpu_freq_tracks()
{
auto& data_processor = get_data_processor();
auto& n_info = node_info::get_instance();
const auto thread_idx = std::nullopt; // Internal thread ID for cpu-freq
do_for_enabled_cpus([&](size_t cpu_id) {
data_processor.insert_track(
get_cpu_freq_track_name<category::cpu_freq>(cpu_id).c_str(), n_info.id,
getpid(), thread_idx);
});
}
void
rocpd_initialize_cpu_usage_tracks()
{
auto& data_processor = get_data_processor();
auto& n_info = node_info::get_instance();
const auto thread_idx = std::nullopt; // Internal thread ID for cpu-freq
data_processor.insert_track(trait::name<category::process_page>::value, n_info.id,
getpid(), thread_idx);
data_processor.insert_track(trait::name<category::process_virt>::value, n_info.id,
getpid(), thread_idx);
data_processor.insert_track(trait::name<category::process_peak>::value, n_info.id,
getpid(), thread_idx);
data_processor.insert_track(trait::name<category::process_context_switch>::value,
n_info.id, getpid(), thread_idx);
data_processor.insert_track(trait::name<category::process_page_fault>::value,
n_info.id, getpid(), thread_idx);
data_processor.insert_track(trait::name<category::process_user_mode_time>::value,
n_info.id, getpid(), thread_idx);
data_processor.insert_track(trait::name<category::process_kernel_mode_time>::value,
n_info.id, getpid(), thread_idx);
}
void
rocpd_initialize_cpu_freq_pmc(size_t dev_id)
{
auto& data_processor = get_data_processor();
// find the proper values for a following definitions
size_t EVENT_CODE = 0;
size_t INSTANCE_ID = 0;
const char* LONG_DESCRIPTION = "";
const char* COMPONENT = "";
const char* BLOCK = "";
const char* EXPRESSION = "";
const char* MEMORY = "MB";
const char* TIME = "sec";
auto ni = node_info::get_instance();
const auto* TARGET_ARCH = "CPU";
auto& _agent_manager = agent_manager::get_instance();
auto base_id = _agent_manager.get_agent_by_id(dev_id, agent_type::CPU).base_id;
do_for_enabled_cpus([&](size_t cpu_id) {
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
get_cpu_freq_track_name<category::cpu_freq>(cpu_id).c_str(), "Frequency",
trait::name<category::cpu_freq>::description, LONG_DESCRIPTION, COMPONENT,
component::cpu_freq::display_unit().c_str(), "ABS", BLOCK, EXPRESSION, 0, 0);
});
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::process_page>::value, "Memory Usage",
trait::name<category::process_page>::description, LONG_DESCRIPTION, COMPONENT,
MEMORY, "ABS", BLOCK, EXPRESSION, 0, 0);
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::process_virt>::value, "Virtual Memory Usage",
trait::name<category::process_virt>::description, LONG_DESCRIPTION, COMPONENT,
MEMORY, "ABS", BLOCK, EXPRESSION, 0, 0);
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::process_peak>::value, "Peak Memory",
trait::name<category::process_peak>::description, LONG_DESCRIPTION, COMPONENT,
MEMORY, "ABS", BLOCK, EXPRESSION, 0, 0);
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::process_context_switch>::value, "Context Switches",
trait::name<category::process_context_switch>::description, LONG_DESCRIPTION,
COMPONENT, "", "ABS", BLOCK, EXPRESSION, 0, 0);
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::process_page_fault>::value, "Page Faults",
trait::name<category::process_page_fault>::description, LONG_DESCRIPTION,
COMPONENT, "", "ABS", BLOCK, EXPRESSION, 0, 0);
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::process_user_mode_time>::value, "User Time",
trait::name<category::process_user_mode_time>::description, LONG_DESCRIPTION,
COMPONENT, TIME, "ABS", BLOCK, EXPRESSION, 0, 0);
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::process_kernel_mode_time>::value, "Kernel Time",
trait::name<category::process_kernel_mode_time>::description, LONG_DESCRIPTION,
COMPONENT, TIME, "ABS", BLOCK, EXPRESSION, 0, 0);
}
void
rocpd_process_cpu_usage_events(const uint32_t device_id, uint64_t timestamp,
const component::cpu_freq& freq, double mem_page,
double virt_mem_page, double peak_mem,
double context_switch, double page_fault, double user_time,
double kernel_time)
{
auto& data_processor = get_data_processor();
auto event_id = data_processor.insert_event(ROCPROFSYS_CATEGORY_CPU_FREQ, 0, 0, 0);
auto& agent_mngr = agent_manager::get_instance();
auto base_id = agent_mngr.get_agent_by_id(device_id, agent_type::CPU).base_id;
auto insert_event_and_sample = [&](const char* name, double value) {
data_processor.insert_pmc_event(event_id, base_id, name, value);
data_processor.insert_sample(name, timestamp, event_id);
};
do_for_enabled_cpus([&](size_t cpu_id) {
insert_event_and_sample(
get_cpu_freq_track_name<category::cpu_freq>(cpu_id).c_str(), freq.at(cpu_id));
});
insert_event_and_sample(trait::name<category::process_page>::value, mem_page);
insert_event_and_sample(trait::name<category::process_virt>::value, virt_mem_page);
insert_event_and_sample(trait::name<category::process_peak>::value, peak_mem);
insert_event_and_sample(trait::name<category::process_context_switch>::value,
context_switch);
insert_event_and_sample(trait::name<category::process_page_fault>::value, page_fault);
insert_event_and_sample(trait::name<category::process_user_mode_time>::value,
user_time);
insert_event_and_sample(trait::name<category::process_kernel_mode_time>::value,
kernel_time);
}
} // namespace
} // namespace cpu_freq
} // namespace rocprofsys
@@ -76,11 +250,14 @@ namespace cpu_freq
void
setup()
{
init_perfetto_counter_tracks(
type_list<category::cpu_freq, category::process_page, category::process_virt,
category::process_peak, category::process_context_switch,
category::process_page_fault, category::process_user_mode_time,
category::process_kernel_mode_time>{});
if(get_use_perfetto())
{
init_perfetto_counter_tracks(
type_list<category::cpu_freq, category::process_page, category::process_virt,
category::process_peak, category::process_context_switch,
category::process_page_fault, category::process_user_mode_time,
category::process_kernel_mode_time>{});
}
}
void
@@ -163,6 +340,28 @@ post_process()
ROCPROFSYS_VERBOSE(1,
"Post-processing %zu cpu frequency and memory usage entries...\n",
data.size());
auto& enabled_cpus = component::cpu_freq::get_enabled_cpus();
if(get_use_rocpd())
{
rocpd_initialize_cpu_freq_category();
rocpd_initialize_cpu_usage_tracks();
rocpd_initialize_cpu_freq_tracks();
// `get_enabled_cpus()` returns the number of cores enabled for monitoring but the
// actually device_id is 0, since there is a single device available. And the
// agents seems to be assigned per device basis not per core.
// TODO: `get_enabled_cpus()` should be fixed in the future to align with GPU
// implementation.
auto cpu_agents =
agent_manager::get_instance().get_agents_by_type(agent_type::CPU);
for(auto& agent : cpu_agents)
{
rocpd_initialize_cpu_freq_pmc(agent->device_id);
}
}
auto _process_frequencies = [](size_t _idx, size_t _offset) {
using freq_track = perfetto_counter_track<category::cpu_freq>;
@@ -191,14 +390,17 @@ post_process()
};
auto _process_cpu_rusage = []() {
config_perfetto_counter_tracks(
type_list<category::process_page, category::process_virt,
category::process_peak, category::process_context_switch,
category::process_page_fault, category::process_user_mode_time,
category::process_kernel_mode_time>{},
{ "Memory Usage", "Virtual Memory Usage", "Peak Memory", "Context Switches",
"Page Faults", "User Time", "Kernel Time" },
{ "MB", "MB", "MB", "", "", "sec", "sec" });
if(get_use_perfetto())
{
config_perfetto_counter_tracks(
type_list<category::process_page, category::process_virt,
category::process_peak, category::process_context_switch,
category::process_page_fault, category::process_user_mode_time,
category::process_kernel_mode_time>{},
{ "Memory Usage", "Virtual Memory Usage", "Peak Memory",
"Context Switches", "Page Faults", "User Time", "Kernel Time" },
{ "MB", "MB", "MB", "", "", "sec", "sec" });
}
const auto& _thread_info = thread_info::get(0, InternalTID);
ROCPROFSYS_CI_THROW(!_thread_info, "Missing thread info for thread 0");
@@ -209,47 +411,61 @@ post_process()
uint64_t _ts = std::get<0>(itr);
if(!_thread_info->is_valid_time(_ts)) continue;
double _page = std::get<1>(itr);
double _virt = std::get<2>(itr);
double _peak = std::get<3>(itr);
double _page = std::get<1>(itr) / units::megabyte;
double _virt = std::get<2>(itr) / units::megabyte;
double _peak = std::get<3>(itr) / units::megabyte;
uint64_t _cntx = std::get<4>(itr);
uint64_t _flts = std::get<5>(itr);
double _user = std::get<6>(itr);
double _kern = std::get<7>(itr);
write_perfetto_counter_track<category::process_page>(_ts,
_page / units::megabyte);
write_perfetto_counter_track<category::process_virt>(_ts,
_virt / units::megabyte);
write_perfetto_counter_track<category::process_peak>(_ts,
_peak / units::megabyte);
write_perfetto_counter_track<category::process_context_switch>(_ts, _cntx);
write_perfetto_counter_track<category::process_page_fault>(_ts, _flts);
write_perfetto_counter_track<category::process_user_mode_time>(
_ts, _user / units::sec);
write_perfetto_counter_track<category::process_kernel_mode_time>(
_ts, _kern / units::sec);
double _user = std::get<6>(itr) / units::sec;
double _kern = std::get<7>(itr) / units::sec;
if(get_use_perfetto())
{
write_perfetto_counter_track<category::process_page>(_ts, _page);
write_perfetto_counter_track<category::process_virt>(_ts, _virt);
write_perfetto_counter_track<category::process_peak>(_ts, _peak);
write_perfetto_counter_track<category::process_context_switch>(_ts,
_cntx);
write_perfetto_counter_track<category::process_page_fault>(_ts, _flts);
write_perfetto_counter_track<category::process_user_mode_time>(_ts,
_user);
write_perfetto_counter_track<category::process_kernel_mode_time>(_ts,
_kern);
}
if(get_use_rocpd())
{
const auto& freq_data = std::get<8>(itr);
rocpd_process_cpu_usage_events(0, _ts, freq_data, _page, _virt, _peak,
_cntx, _flts, _user, _kern);
}
}
auto _end_ts = _thread_info->get_stop();
write_perfetto_counter_track<category::process_page>(_end_ts, 0.0);
write_perfetto_counter_track<category::process_virt>(_end_ts, 0.0);
write_perfetto_counter_track<category::process_peak>(_end_ts, 0.0);
write_perfetto_counter_track<category::process_context_switch>(_end_ts, 0);
write_perfetto_counter_track<category::process_page_fault>(_end_ts, 0);
write_perfetto_counter_track<category::process_user_mode_time>(_end_ts, 0.0);
write_perfetto_counter_track<category::process_kernel_mode_time>(_end_ts, 0.0);
if(get_use_perfetto())
{
auto _end_ts = _thread_info->get_stop();
write_perfetto_counter_track<category::process_page>(_end_ts, 0.0);
write_perfetto_counter_track<category::process_virt>(_end_ts, 0.0);
write_perfetto_counter_track<category::process_peak>(_end_ts, 0.0);
write_perfetto_counter_track<category::process_context_switch>(_end_ts, 0);
write_perfetto_counter_track<category::process_page_fault>(_end_ts, 0);
write_perfetto_counter_track<category::process_user_mode_time>(_end_ts, 0.0);
write_perfetto_counter_track<category::process_kernel_mode_time>(_end_ts,
0.0);
}
};
_process_cpu_rusage();
auto& enabled_cpu_freqs = component::cpu_freq::get_enabled_cpus();
for(auto itr = enabled_cpu_freqs.begin(); itr != enabled_cpu_freqs.end(); ++itr)
if(get_use_perfetto())
{
auto _idx = *itr;
auto _offset = std::distance(enabled_cpu_freqs.begin(), itr);
_process_frequencies(_idx, _offset);
for(auto itr = enabled_cpus.begin(); itr != enabled_cpus.end(); ++itr)
{
auto _idx = *itr;
auto _offset = std::distance(enabled_cpus.begin(), itr);
_process_frequencies(_idx, _offset);
}
}
enabled_cpu_freqs.clear();
enabled_cpus.clear();
}
} // namespace cpu_freq
} // namespace rocprofsys
@@ -20,14 +20,19 @@
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include <optional>
#define TIMEMORY_KOKKOSP_POSTFIX ROCPROFSYS_PUBLIC_API
#include "api.hpp"
#include "core/agent_manager.hpp"
#include "core/components/fwd.hpp"
#include "core/config.hpp"
#include "core/debug.hpp"
#include "core/defines.hpp"
#include "core/node_info.hpp"
#include "core/perfetto.hpp"
#include "core/rocpd/data_processor.hpp"
#include "core/rocpd/json.hpp"
#include "library/components/category_region.hpp"
#include "library/runtime.hpp"
@@ -150,6 +155,52 @@ violates_name_rules(Arg&& _arg, Args&&... _args)
}
} // namespace
namespace
{
rocprofsys::rocpd::data_processor&
get_data_processor()
{
return rocprofsys::rocpd::data_processor::get_instance();
}
void
rocpd_initialize_kokkos_category()
{
get_data_processor().insert_category(
rocprofsys::category_enum_id<category::kokkos>::value,
rocprofsys::trait::name<category::kokkos>::value);
}
void
rocpd_initialize_kokkos_track()
{
auto& data_processor = get_data_processor();
auto& n_info = rocprofsys::node_info::get_instance();
auto thread_id = std::nullopt;
data_processor.insert_track(rocprofsys::trait::name<category::kokkos>::value,
n_info.id, getpid(), thread_id);
}
void
rocpd_process_kokkos_event(const char* name, const char* event_type, const char* target,
uint64_t timestamp_ns)
{
auto& data_processor = get_data_processor();
auto event_metadata = rocpd::json::create();
event_metadata->set("name", name);
event_metadata->set("event_type", event_type);
event_metadata->set("target", target);
auto event_id = data_processor.insert_event(
rocprofsys::category_enum_id<category::kokkos>::value, 0, 0, 0, "{}", "{}",
event_metadata->to_string().c_str());
data_processor.insert_sample(rocprofsys::trait::name<category::kokkos>::value,
timestamp_ns, event_id, "{}");
}
} // namespace
//--------------------------------------------------------------------------------------//
extern "C"
@@ -256,6 +307,12 @@ extern "C"
rocprofsys_set_mpi_hidden(false, false);
rocprofsys_init_hidden(_mode.c_str(), false, _arg0.c_str());
rocprofsys_push_trace_hidden("kokkos_main");
if(rocprofsys::get_use_rocpd())
{
rocpd_initialize_kokkos_category();
rocpd_initialize_kokkos_track();
}
}
setup_kernel_logger();
@@ -545,6 +602,8 @@ extern "C"
{
if(violates_name_rules(label)) return;
auto timestamp = tim::get_clock_real_now<uint64_t, std::nano>();
ROCPROFSYS_SCOPED_THREAD_STATE(ThreadState::Internal);
if(rocprofsys::config::get_use_perfetto())
{
@@ -559,12 +618,20 @@ extern "C"
"", label, " [dual_view_sync][", (is_device) ? "device" : "host", "]")));
kokkosp::profiler_t<kokkosp_region>{ _name }.mark();
}
if(rocprofsys::config::get_use_rocpd())
{
rocpd_process_kokkos_event(JOIN(" ", _kp_prefix, label).c_str(),
"[dual_view_sync]",
(is_device) ? "device" : "host", timestamp);
}
}
void kokkosp_dual_view_modify(const char* label, const void* const, bool is_device)
{
if(violates_name_rules(label)) return;
auto timestamp = tim::get_clock_real_now<uint64_t, std::nano>();
ROCPROFSYS_SCOPED_THREAD_STATE(ThreadState::Internal);
if(rocprofsys::config::get_use_perfetto())
{
@@ -580,6 +647,13 @@ extern "C"
(is_device) ? "device" : "host", "]")));
kokkosp::profiler_t<kokkosp_region>{ _name }.mark();
}
if(rocprofsys::config::get_use_rocpd())
{
rocpd_process_kokkos_event(JOIN(" ", _kp_prefix, label).c_str(),
"[dual_view_modify]",
(is_device) ? "device" : "host", timestamp);
}
}
//----------------------------------------------------------------------------------//
@@ -21,26 +21,10 @@
// SOFTWARE.
#include "library/rocprofiler-sdk/counters.hpp"
#include "common/synchronized.hpp"
#include "core/debug.hpp"
#include "core/timemory.hpp"
#include "library/rocprofiler-sdk/fwd.hpp"
#include <timemory/utility/types.hpp>
#include <rocprofiler-sdk/agent.h>
#include <rocprofiler-sdk/buffer_tracing.h>
#include <rocprofiler-sdk/callback_tracing.h>
#include <rocprofiler-sdk/cxx/hash.hpp>
#include <rocprofiler-sdk/cxx/name_info.hpp>
#include <rocprofiler-sdk/cxx/operators.hpp>
#include <rocprofiler-sdk/dispatch_counting_service.h>
#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/registration.h>
#include <memory>
#include <unordered_map>
#include <vector>
#include <timemory/utility/types.hpp>
namespace rocprofsys
{
@@ -26,7 +26,10 @@
#include "core/config.hpp"
#include "core/debug.hpp"
#include "core/locking.hpp"
#include "core/node_info.hpp"
#include "core/perf.hpp"
#include "core/rocpd/data_processor.hpp"
#include "core/rocpd/json.hpp"
#include "core/state.hpp"
#include "core/utility.hpp"
#include "library/components/backtrace.hpp"
@@ -153,6 +156,136 @@ namespace
{
using sampler_allocator_t = typename sampler_t::allocator_t;
template <typename Category>
inline std::string
get_category_track_name(uint64_t tid)
{
return std::string(trait::name<Category>::value) + "_" + std::to_string(tid);
}
std::string
generate_call_stack_json(const tim::unwind::processed_entry& stack_entry)
{
auto call_stack = ::rocpd::json::create();
call_stack->set("name", std::string(demangle(stack_entry.name)));
call_stack->set("pc", as_hex(stack_entry.address));
call_stack->set("file", std::string(stack_entry.location));
return call_stack->to_string();
}
std::string
generate_line_info_json(const tim::unwind::processed_entry& line_info_entry)
{
auto line_info = ::rocpd::json::create();
line_info->set("line_address", as_hex(line_info_entry.line_address));
line_info->set("name", std::string(demangle(line_info_entry.name)));
if(line_info_entry.lineinfo && !line_info_entry.lineinfo.lines.empty())
{
auto _lines = line_info_entry.lineinfo.lines;
std::reverse(_lines.begin(), _lines.end());
for(const auto& line : _lines)
{
auto inlined = ::rocpd::json::create();
inlined->set("name", std::string(demangle(line.name)));
inlined->set("location", std::string(line.location));
inlined->set("line", std::to_string(line.line));
line_info->set("inlined", inlined);
}
}
return line_info->to_string();
}
std::string
generate_hw_counter_json(int64_t _tid, const backtrace_metrics& metrics)
{
auto extdata = ::rocpd::json::create();
if(!metrics.get_hw_counters().empty())
{
auto _labels = backtrace_metrics::get_hw_counter_labels(_tid);
auto _hw_cnt_vals = metrics.get_hw_counters();
auto hw_counters = ::rocpd::json::create();
for(size_t i = 0; i < _labels.size(); ++i)
{
hw_counters->set(_labels.at(i), _hw_cnt_vals.at(i));
}
extdata->set("hw_counters", hw_counters);
}
return extdata->to_string();
}
rocpd::data_processor&
get_data_processor()
{
return rocpd::data_processor::get_instance();
}
void
rocpd_initialize_sampling_category()
{
static bool _is_initialized = false;
if(_is_initialized) return;
auto& data_processor = get_data_processor();
data_processor.insert_category(ROCPROFSYS_CATEGORY_SAMPLING,
trait::name<category::sampling>::value);
data_processor.insert_category(ROCPROFSYS_CATEGORY_OVERFLOW_SAMPLING,
trait::name<category::overflow_sampling>::value);
data_processor.insert_category(ROCPROFSYS_CATEGORY_TIMER_SAMPLING,
trait::name<category::timer_sampling>::value);
_is_initialized = true;
}
size_t
rocpd_initialize_thread_info(size_t tid)
{
const auto& _thread_info = thread_info::get(tid, SequentTID);
ROCPROFSYS_CI_THROW(!_thread_info, "No valid thread info for tid=%li\n", tid);
if(!_thread_info) return -1;
auto& data_processor = get_data_processor();
auto& n_info = node_info::get_instance();
return data_processor.insert_thread_info(
n_info.id, getppid(), getpid(), _thread_info->index_data->system_value,
threading::get_thread_name().c_str(), _thread_info->get_start(),
_thread_info->get_stop(), "{}");
}
void
rocpd_init_track(const char* track_name, int64_t tid)
{
auto& data_processor = get_data_processor();
auto& n_info = node_info::get_instance();
data_processor.insert_track(track_name, n_info.id, getpid(), tid, "{}");
}
template <typename Category>
void
rocpd_insert_region(size_t thread_id, size_t start_time, size_t end_time, size_t name_id,
const char* track, const char* call_stack = "{}",
const char* line_info = "{}", const char* extdata = "{}")
{
auto& data_processor = get_data_processor();
auto& n_info = node_info::get_instance();
auto event_id = data_processor.insert_event(category_enum_id<Category>::value, 0, 0,
0, call_stack, line_info, extdata);
data_processor.insert_region(n_info.id, getpid(), thread_id, start_time, end_time,
name_id, event_id);
data_processor.insert_sample(track, start_time, event_id);
}
auto&
get_sampler_allocators()
{
@@ -810,6 +943,10 @@ void
post_process_timemory(int64_t, const std::vector<timer_sampling_data>&,
const std::vector<overflow_sampling_data>&);
void
post_process_rocpd(int64_t _tid, const std::vector<timer_sampling_data>& _timer_data,
const std::vector<overflow_sampling_data>& _overflow_data);
auto static_strings = std::set<std::string>{};
} // namespace
@@ -939,15 +1076,15 @@ post_process()
auto _raw_data = _sampler->get_data();
auto _loaded_data = load_offload_buffer(i);
for(auto litr : _loaded_data)
for(auto line : _loaded_data)
{
while(!litr.is_empty())
while(!line.is_empty())
{
auto _v = sampler_bundle_t{};
litr.read(&_v);
line.read(&_v);
_raw_data.emplace_back(std::move(_v));
}
litr.destroy();
line.destroy();
}
ROCPROFSYS_VERBOSE(2 || get_debug_sampling(),
@@ -988,6 +1125,7 @@ post_process()
if(get_use_perfetto()) post_process_perfetto(i, _timer_data, _overflow_data);
if(get_use_timemory()) post_process_timemory(i, _timer_data, _overflow_data);
if(get_use_rocpd()) post_process_rocpd(i, _timer_data, _overflow_data);
}
else
{
@@ -1205,13 +1343,13 @@ post_process_perfetto(int64_t _tid, const std::vector<timer_sampling_data>& _tim
auto _lines = iitr.lineinfo.lines;
std::reverse(_lines.begin(), _lines.end());
size_t _n = 0;
for(const auto& litr : _lines)
for(const auto& line : _lines)
{
auto _label = JOIN('-', "lineinfo", _n++);
tracing::add_perfetto_annotation(
ctx, _label.c_str(),
JOIN('@', demangle(litr.name),
JOIN(':', litr.location, litr.line)));
JOIN('@', demangle(line.name),
JOIN(':', line.location, line.line)));
}
}
}
@@ -1298,11 +1436,11 @@ post_process_perfetto(int64_t _tid, const std::vector<timer_sampling_data>& _tim
auto _lines = iitr.lineinfo.lines;
std::reverse(_lines.begin(), _lines.end());
size_t _n = 0;
for(const auto& litr : _lines)
for(const auto& line : _lines)
{
const auto* _name =
static_strings.emplace(demangle(litr.name)).first->c_str();
auto _info = JOIN(':', litr.location, litr.line);
static_strings.emplace(demangle(line.name)).first->c_str();
auto _info = JOIN(':', line.location, line.line);
tracing::push_perfetto_track(
category::timer_sampling{}, _name, _track, _beg,
[&](::perfetto::EventContext ctx) {
@@ -1342,13 +1480,13 @@ post_process_perfetto(int64_t _tid, const std::vector<timer_sampling_data>& _tim
auto _lines = iitr.lineinfo.lines;
std::reverse(_lines.begin(), _lines.end());
size_t _n = 0;
for(const auto& litr : _lines)
for(const auto& line : _lines)
{
auto _label = JOIN('-', "lineinfo", _n++);
tracing::add_perfetto_annotation(
ctx, _label.c_str(),
JOIN('@', demangle(litr.name),
JOIN(':', litr.location, litr.line)));
JOIN('@', demangle(line.name),
JOIN(':', line.location, line.line)));
}
}
}
@@ -1540,6 +1678,189 @@ post_process_timemory(int64_t _tid, const std::vector<timer_sampling_data>& _tim
}
}
void
rocpd_post_process_overflow_data(
int64_t _tid, const std::vector<overflow_sampling_data>& _overflow_data)
{
auto& data_processor = get_data_processor();
const auto& _thread_info = thread_info::get(_tid, SequentTID);
ROCPROFSYS_CI_THROW(!_thread_info, "No valid thread info for tid=%li\n", _tid);
if(!_thread_info) return;
auto _overflow_event =
get_setting_value<std::string>("ROCPROFSYS_SAMPLING_OVERFLOW_EVENT").value_or("");
if(!_overflow_event.empty() && !_overflow_data.empty())
{
auto _beg_ns = std::max(_overflow_data.front().m_beg, _thread_info->get_start());
auto _end_ns = std::min(_overflow_data.back().m_end, _thread_info->get_stop());
const auto _overflow_prefix = std::string_view{ "PERF_COUNT_" };
const auto _overflow_pos = _overflow_event.find(_overflow_prefix);
if(_overflow_pos != std::string::npos)
_overflow_event =
_overflow_event.substr(_overflow_pos + _overflow_prefix.length());
const auto* _main_name =
static_strings.emplace(join(" ", _overflow_event, "samples [rocprof-sys]"))
.first->c_str();
auto main_name_id = data_processor.insert_string(_main_name);
const auto& _track_name =
JOIN(" ", "Thread", _thread_info->index_data->sequent_value, "Overflow",
"(S)", _thread_info->index_data->system_value);
auto thread_idx = rocpd_initialize_thread_info(_tid);
rocpd_init_track(_track_name.c_str(), thread_idx);
rocpd_insert_region<category::overflow_sampling>(
thread_idx, _beg_ns, _end_ns, main_name_id, _track_name.c_str());
for(const auto& itr : _overflow_data)
{
auto _beg = itr.m_beg;
auto _end = itr.m_end;
if(!_thread_info->is_valid_lifetime({ _beg, _end })) continue;
for(const auto& iitr : itr.m_stack)
{
const auto* _name =
static_strings.emplace(demangle(iitr.name)).first->c_str();
auto name_id = data_processor.insert_string(_name);
rocpd_insert_region<category::overflow_sampling>(
thread_idx, _beg, _end, name_id, _track_name.c_str(),
generate_call_stack_json(iitr).c_str(),
generate_line_info_json(iitr).c_str());
}
}
}
}
void
rocpd_post_process_backtrace_metrics(int64_t _tid,
const std::vector<timer_sampling_data>& _timer_data)
{
auto _valid_metrics = backtrace_metrics::valid_array_t{};
for(const auto& itr : _timer_data)
{
_valid_metrics |= itr.m_metrics.get_valid();
}
if(trait::runtime_enabled<backtrace_metrics>::get() && get_use_rocpd())
{
ROCPROFSYS_VERBOSE(3 || get_debug_sampling(),
"[%li] Post-processing metrics for rocpd...\n", _tid);
backtrace_metrics::init_rocpd(_tid, _valid_metrics);
for(const auto& itr : _timer_data)
itr.m_metrics.post_process_rocpd(_tid, 0.5 * (itr.m_beg + itr.m_end));
backtrace_metrics::fini_rocpd(_tid, _valid_metrics);
}
}
void
rocpd_post_process_timer_data(int64_t _tid,
const std::vector<timer_sampling_data>& _timer_data)
{
auto& data_processor = get_data_processor();
const auto& _thread_info = thread_info::get(_tid, SequentTID);
ROCPROFSYS_CI_THROW(!_thread_info, "No valid thread info for tid=%li\n", _tid);
if(!_thread_info) return;
if(!_timer_data.empty())
{
rocpd_post_process_backtrace_metrics(_tid, _timer_data);
auto _beg_ns = std::max(_timer_data.front().m_beg, _thread_info->get_start());
auto _end_ns = std::min(_timer_data.back().m_end, _thread_info->get_stop());
const auto _track_name =
JOIN(" ", "Thread", _thread_info->index_data->sequent_value, "(S)",
_thread_info->index_data->system_value);
auto thread_idx = rocpd_initialize_thread_info(_tid);
rocpd_init_track(_track_name.c_str(), thread_idx);
const auto main_name_id = data_processor.insert_string("samples [rocprof-sys]");
rocpd_insert_region<category::timer_sampling>(thread_idx, _beg_ns, _end_ns,
main_name_id, _track_name.c_str());
auto _labels = backtrace_metrics::get_hw_counter_labels(_tid);
for(const auto& itr : _timer_data)
{
size_t _ncount = 0;
uint64_t _beg = itr.m_beg;
uint64_t _end = itr.m_end;
if(!_thread_info->is_valid_lifetime({ _beg, _end })) continue;
for(const auto& iitr : itr.m_stack)
{
auto _ncur = _ncount++;
// the begin/end + HW counters will be same for entire call-stack so only
// annotate the top and the bottom functons to keep the data consumption
// low
bool _include_common = (_ncur == 0 || _ncur + 1 == itr.m_stack.size());
// Only annotate HW counters when first or last and HW counters are not
// empty
bool _include_hw =
_include_common && !itr.m_metrics.get_hw_counters().empty();
std::string hw_counter_json = "{}";
if(_include_hw)
{
// current values when read
hw_counter_json = generate_hw_counter_json(_tid, itr.m_metrics);
}
if(get_sampling_include_inlines() && iitr.lineinfo)
{
auto _lines = iitr.lineinfo.lines;
std::reverse(_lines.begin(), _lines.end());
size_t _n = 0;
for(const auto& line : _lines)
{
const auto* _name =
static_strings.emplace(demangle(line.name)).first->c_str();
auto inlined_name_id = data_processor.insert_string(_name);
auto inlined_call_stack = ::rocpd::json::create();
inlined_call_stack->set("name", std::string(demangle(line.name)));
inlined_call_stack->set("location", std::string(line.location));
inlined_call_stack->set("line", std::to_string(line.line));
inlined_call_stack->set("inlined", "true");
rocpd_insert_region<category::timer_sampling>(
thread_idx, _beg, _end, inlined_name_id, _track_name.c_str(),
inlined_call_stack->to_string().c_str(), "{}",
// Only include HW counters for first inlined function
(_n == 0) ? hw_counter_json.c_str() : "{}");
}
}
else
{
const auto* _name = static_strings.emplace(iitr.name).first->c_str();
const auto name_id = data_processor.insert_string(_name);
rocpd_insert_region<category::timer_sampling>(
thread_idx, _beg, _end, name_id, _track_name.c_str(),
generate_call_stack_json(iitr).c_str(),
generate_line_info_json(iitr).c_str(), hw_counter_json.c_str());
}
}
}
}
}
void
post_process_rocpd(int64_t _tid, const std::vector<timer_sampling_data>& _timer_data,
const std::vector<overflow_sampling_data>& _overflow_data)
{
rocpd_initialize_sampling_category();
rocpd_post_process_overflow_data(_tid, _overflow_data);
rocpd_post_process_timer_data(_tid, _timer_data);
}
struct sampling_initialization
{
static void preinit()