rocprofiler library unit tests (#81)

* Update CI and linting workflows

- delete linting workflow
- compile default CI job with clang-tidy
- split out code coverage matrix entry to separate job
- code coverage job runs code coverage 3x
  - once for total code coverage
  - once for unittests code coverage
  - once for samples code coverage

* Update PTL submodule

- improves handling of when thread pool is destroyed in atexit handler

* Update lib/rocprofiler/buffer

- buffer::instance::get_internal_buffer()
- allocate_buffer invokes internal_threading::initialize() on first entry
- update flush routine
 - if wait is false, does not wait for task group to finish syncing
 - checks for callback pointer

* Update lib/rocprofiler/internal_threading

- modifications to handle destruction of statics before atexit handler is invoked

* Update lib/rocprofiler/registration.cpp

- reorder atexit call in initialize()
- protect finalize from executing more than once

* Add unittests for rocprofiler buffer

* Update CI workflow

- disable fail-fast for sanitizers
- move AddressSanitizer job to top of the list

* Update lib/rocprofiler/tests/buffer/CMakeLists.txt

- do not set memcheck LD_PRELOAD for rocprofiler-lib-buffer-tests

* Update lib/rocprofiler/registration.{hpp,cpp}

- only invoke client finalizers if initialized
- remove invoke_client_initializer
- move invoke_client functions to anonymous namespace (no declaration in header)
- set fini status in finalize

* Update scripts/thread-sanitizer-suppr.txt

- suppress false positive for double mutex lock in external/ptl/source/PTL/TaskGroup.hh

* Restructure lib/rocprofiler/tests

* Update lib/common

- add utility.cpp
- move read_command_line to utility.{hpp,cpp}
  - was formerly in config.cpp

* Update lib/rocprofiler

- checks for init status return configuration locked if status is not greater than -1
  - in other words, this prevents calling these functions directly (which was possible when check was for greater than 0

* Update lib/rocprofiler/context/context.{hpp,cpp}

- provide deactivate_client_contexts and deregister_client_contexts
  - these functions are used when the tool fails to configure

* Update lib/rocprofiler/registration.{hpp,cpp}

- internal "public" get_client_offet()
- client ids are offset by a random value to avoid default values behaving correctly

* Update lib/rocprofiler/tests

- fix rocprofiler_lib.registration_lambda_no_result

* Update lib/rocprofiler/tests

- fix rocprofiler_lib.registration_lambda_with_result

* Update lib/rocprofiler/tests

- remove deep bind from rocprofiler_lib.registration_lambda_with_result

* Update lib/rocprofiler/tests

- use RTLD_NOW when dlopen'ing in rocprofiler_lib.registration_lambda_with_result

* Update rocprofiler registration tests

- split registration tests into separate exe that links to shared library

* Formatting

* Update CI workflow

- always checkout submodules via actions/checkout

* Update lib/rocprofiler/buffer.{hpp,cpp}

- fix issue with buffer flushing not working when only called once

* Update rocprofiler lib registration test

- test for buffered callback

* Update include/rocprofiler/rocprofiler.h

- include internal_threading.h header

* Update rocprofiler lib registration test

- add in internal threading for buffered test

[ROCm/rocprofiler-sdk commit: a646c1546c]
Этот коммит содержится в:
Jonathan R. Madsen
2023-09-26 19:21:31 -05:00
коммит произвёл GitHub
родитель b66bcdd107
Коммит cfcfd280d0
25 изменённых файлов: 1157 добавлений и 313 удалений
+116 -25
Просмотреть файл
@@ -41,18 +41,12 @@ jobs:
max-parallel: 4
matrix:
include:
- os: 'ubuntu-22.04'
runner: 'renderD131'
device: '/renderD131'
build-type: 'RelWithDebInfo'
ci-flags: ''
name-tag: ''
- os: 'ubuntu-22.04'
runner: 'renderD131'
device: '/renderD131'
build-type: 'Release'
ci-flags: '--coverage'
name-tag: '-codecov'
- os: 'ubuntu-22.04'
runner: 'renderD131'
device: '/renderD131'
build-type: 'RelWithDebInfo'
ci-flags: '--linter clang-tidy'
name-tag: ''
runs-on: ${{ matrix.runner }}
@@ -69,6 +63,19 @@ jobs:
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Install requirements
shell: bash
run: |
git config --global --add safe.directory '*'
apt-get update
apt-get install -y cmake clang-tidy-11 g++-11 g++-12 libgtest-dev python3-pip
update-alternatives --install /usr/bin/clang-tidy clang-tidy /usr/bin/clang-tidy-11 10
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10 --slave /usr/bin/g++ g++ /usr/bin/g++-11
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 20 --slave /usr/bin/g++ g++ /usr/bin/g++-12
python3 -m pip install -r requirements.txt
- name: List Files
shell: bash
@@ -77,13 +84,6 @@ jobs:
for i in python python3 git cmake ctest; do which-realpath $i; done
ls -la
- name: Install requirements
shell: bash
run: |
pip3 install -r requirements.txt
apt install -y cmake libgtest-dev
git config --global --add safe.directory '*'
- name: Configure, Build, and Test
timeout-minutes: 30
shell: bash
@@ -126,11 +126,108 @@ jobs:
${{github.workspace}}/build/*.rpm
${{github.workspace}}/build/*.tgz
code-coverage:
strategy:
fail-fast: true
max-parallel: 4
matrix:
include:
- os: 'ubuntu-22.04'
runner: 'renderD131'
device: '/renderD131'
build-type: 'Release'
runs-on: ${{ matrix.runner }}
# define this for containers
env:
GIT_DISCOVERY_ACROSS_FILESYSTEM: 1
container:
image: compute-artifactory.amd.com:5000/rocm-plus-docker/compute-rocm-dkms-no-npi-hipclang:${{ needs.get_latest_mainline_build_number.outputs.LATEST_BUILD_NUMBER }}-${{ matrix.os }}-stg1
options: --memory=128g --cpus=32 --ipc=host --device=/dev/kfd --device=/dev/dri${{ matrix.device }} --group-add video --cap-add=SYS_PTRACE --cap-add CAP_SYS_PTRACE --cap-add CAP_SYS_ADMIN --security-opt seccomp=unconfined
if: ${{ always() }}
needs: get_latest_mainline_build_number
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: List Files
shell: bash
run: |
which-realpath() { echo "$1 resolves to $(realpath $(which $1))"; }
for i in python python3 git cmake ctest; do which-realpath $i; done
ls -la
- name: Install requirements
shell: bash
run: |
pip3 install -r requirements.txt
apt install -y cmake libgtest-dev
git config --global --add safe.directory '*'
- name: Configure, Build, and Test (Total Code Coverage)
timeout-minutes: 30
shell: bash
run:
python3 ./source/scripts/run-ci.py -B build
--name ${{ github.repository }}-${{ github.ref_name }}-mi200-${{ matrix.os }}-codecov
--build-jobs 8
--site mi200
--gpu-targets ${{ env.GPU_LIST }}
--coverage
--
-DCMAKE_BUILD_TYPE=${{ matrix.build-type }}
-DPython3_EXECUTABLE=$(which python3)
- name: Configure, Build, and Test (Unit Tests Code Coverage)
timeout-minutes: 30
shell: bash
run:
find build -type f | egrep '\.gcda$' | xargs rm &&
python3 ./source/scripts/run-ci.py -B build
--name ${{ github.repository }}-${{ github.ref_name }}-mi200-${{ matrix.os }}-codecov-unittests
--build-jobs 8
--site mi200
--gpu-targets ${{ env.GPU_LIST }}
--coverage
--
-DCMAKE_BUILD_TYPE=${{ matrix.build-type }}
-DPython3_EXECUTABLE=$(which python3)
--
-L unittests
- name: Configure, Build, and Test (Samples Code Coverage)
timeout-minutes: 30
shell: bash
run:
find build -type f | egrep '\.gcda$' | xargs rm &&
python3 ./source/scripts/run-ci.py -B build
--name ${{ github.repository }}-${{ github.ref_name }}-mi200-${{ matrix.os }}-codecov-samples
--build-jobs 8
--site mi200
--gpu-targets ${{ env.GPU_LIST }}
--coverage
--
-DCMAKE_BUILD_TYPE=${{ matrix.build-type }}
-DPython3_EXECUTABLE=$(which python3)
--
-L samples
sanitizers:
strategy:
fail-fast: false
matrix:
include:
- os: 'ubuntu-22.04'
runner: 'renderD131'
device: '/renderD131'
build-type: 'RelWithDebInfo'
ci-flags: ''
sanitizer: 'AddressSanitizer'
- os: 'ubuntu-22.04'
runner: 'renderD131'
device: '/renderD131'
@@ -143,12 +240,6 @@ jobs:
build-type: 'RelWithDebInfo'
ci-flags: ''
sanitizer: 'LeakSanitizer'
- os: 'ubuntu-22.04'
runner: 'renderD131'
device: '/renderD131'
build-type: 'RelWithDebInfo'
ci-flags: ''
sanitizer: 'AddressSanitizer'
runs-on: ${{ matrix.runner }}
-86
Просмотреть файл
@@ -1,86 +0,0 @@
name: Linting
on:
workflow_dispatch:
push:
branches: [ "main" ]
paths-ignore:
- '*.md'
- 'source/docs/**'
pull_request:
branches: [ "main" ]
paths-ignore:
- '*.md'
- 'source/docs/**'
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
env:
# TODO(jrmadsen): replace LD_RUNPATH_FLAG, GPU_LIST, etc. with internal handling in cmake
ROCM_PATH: "/opt/rocm"
GPU_LIST: "gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102"
PATH: "/usr/bin:$PATH"
jobs:
get-latest-mainline-build-number:
runs-on: mi200
outputs:
LATEST_BUILD_NUMBER: ${{ steps.get_build_number.outputs.LATEST_BUILD_NUMBER }}
steps:
- id: get_build_number
run: echo "LATEST_BUILD_NUMBER=$(wget -qO- 'http://rocm-ci.amd.com/job/compute-rocm-dkms-no-npi-hipclang/lastSuccessfulBuild/buildNumber')" >> $GITHUB_OUTPUT
clang-tidy:
strategy:
fail-fast: false
matrix:
os: ['ubuntu-22.04']
runner: ['renderD131']
device: ['/renderD131']
linter: ['clang-tidy']
runs-on: ${{ matrix.runner }}
# define this for containers
env:
GIT_DISCOVERY_ACROSS_FILESYSTEM: 1
container:
image: compute-artifactory.amd.com:5000/rocm-plus-docker/compute-rocm-dkms-no-npi-hipclang:${{ needs.get-latest-mainline-build-number.outputs.LATEST_BUILD_NUMBER }}-${{ matrix.os }}-stg1
options: --privileged --ipc=host --device=/dev/kfd --device=/dev/dri${{ matrix.device }} --group-add video --cap-add=SYS_PTRACE --cap-add CAP_SYS_PTRACE --cap-add CAP_SYS_ADMIN --security-opt seccomp=unconfined
if: ${{ always() }}
needs: get-latest-mainline-build-number
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Update container
run: |
apt-get update
apt-get install -y cmake clang-tidy-11 g++-11 g++-12
update-alternatives --install /usr/bin/clang-tidy clang-tidy /usr/bin/clang-tidy-11 10
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10 --slave /usr/bin/g++ g++ /usr/bin/g++-11
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 20 --slave /usr/bin/g++ g++ /usr/bin/g++-12
- name: Configure, Build, and Test
timeout-minutes: 30
shell: bash
run:
python3 ./source/scripts/run-ci.py -B build
--name ${{ github.repository }}-${{ github.ref_name }}-mi200-${{ matrix.linter }}
--build-jobs 8
--site mi200
--gpu-targets ${{ env.GPU_LIST }}
--linter ${{ matrix.linter }}
--
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_INSTALL_PREFIX="${{ env.ROCM_PATH }}"
-DPython3_EXECUTABLE=$(which python3)
--
-VV
+1 -1
Submodule projects/rocprofiler-sdk/external/ptl updated: 5de7a8a431...978dac37fe
+1
Просмотреть файл
@@ -75,6 +75,7 @@ ROCPROFILER_EXTERN_C_FINI
#include "rocprofiler/external_correlation.h"
#include "rocprofiler/hip.h"
#include "rocprofiler/hsa.h"
#include "rocprofiler/internal_threading.h"
#include "rocprofiler/marker.h"
#include "rocprofiler/pc_sampling.h"
#include "rocprofiler/profile_config.h"
+1 -1
Просмотреть файл
@@ -3,7 +3,7 @@
#
rocprofiler_activate_clang_tidy()
set(common_sources config.cpp environment.cpp demangle.cpp)
set(common_sources config.cpp environment.cpp demangle.cpp utility.cpp)
set(common_headers config.hpp defines.hpp environment.hpp demangle.hpp mpl.hpp
utility.hpp xml.hpp)
+1 -33
Просмотреть файл
@@ -22,6 +22,7 @@
#include "lib/common/config.hpp"
#include "lib/common/demangle.hpp"
#include "lib/common/environment.hpp"
#include "lib/common/utility.hpp"
#include <fmt/core.h>
@@ -44,39 +45,6 @@ namespace
{
std::time_t* launch_time = new std::time_t{std::time(nullptr)};
std::vector<std::string>
read_command_line(pid_t _pid)
{
auto _cmdline = std::vector<std::string>{};
auto fcmdline = std::stringstream{};
fcmdline << "/proc/" << _pid << "/cmdline";
auto ifs = std::ifstream{fcmdline.str().c_str()};
if(ifs)
{
char cstr;
std::string sarg;
while(!ifs.eof())
{
ifs >> cstr;
if(!ifs.eof())
{
if(cstr != '\0')
{
sarg += cstr;
}
else
{
_cmdline.push_back(sarg);
sarg = "";
}
}
}
ifs.close();
}
return _cmdline;
}
std::string
get_local_datetime(const char* dt_format, std::time_t* dt_curr)
{
+4
Просмотреть файл
@@ -42,6 +42,7 @@ enum class config_context
int
get_mpi_size();
int
get_mpi_rank();
@@ -86,10 +87,13 @@ struct output_key
std::vector<output_key>
output_keys(std::string _tag = {});
std::string
compose_filename(const config&);
std::string
format(std::string _fpath, const std::string& _tag = {});
std::string
format_name(std::string_view _name, const config& = get_config<>());
+67
Просмотреть файл
@@ -0,0 +1,67 @@
// Copyright (c) 2023 Advanced Micro Devices, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
#include "lib/common/utility.hpp"
#include <unistd.h>
#include <fstream>
#include <sstream>
#include <string>
#include <vector>
namespace rocprofiler
{
namespace common
{
std::vector<std::string>
read_command_line(pid_t _pid)
{
auto _cmdline = std::vector<std::string>{};
auto fcmdline = std::stringstream{};
fcmdline << "/proc/" << _pid << "/cmdline";
auto ifs = std::ifstream{fcmdline.str().c_str()};
if(ifs)
{
char cstr;
std::string sarg;
while(!ifs.eof())
{
ifs >> cstr;
if(!ifs.eof())
{
if(cstr != '\0')
{
sarg += cstr;
}
else
{
_cmdline.push_back(sarg);
sarg = "";
}
}
}
ifs.close();
}
return _cmdline;
}
} // namespace common
} // namespace rocprofiler
+5
Просмотреть файл
@@ -26,6 +26,8 @@
#include <unistd.h>
#include <chrono>
#include <cstdint>
#include <string>
#include <vector>
namespace rocprofiler
{
@@ -46,6 +48,9 @@ timestamp_ns()
return std::chrono::steady_clock::now().time_since_epoch().count();
}
std::vector<std::string>
read_command_line(pid_t _pid);
template <class Container, typename Key = typename Container::key_type>
const auto*
get_val(const Container& map, const Key& key)
+10
Просмотреть файл
@@ -102,3 +102,13 @@ target_link_libraries(
set_target_properties(
rocprofiler-static-library PROPERTIES OUTPUT_NAME rocprofiler64 DEFINE_SYMBOL
rocprofiler_EXPORTS)
# ----------------------------------------------------------------------------------------#
#
# testing
#
# ----------------------------------------------------------------------------------------#
if(ROCPROFILER_BUILD_TESTS)
add_subdirectory(tests)
endif()
+44 -18
Просмотреть файл
@@ -34,6 +34,7 @@
#include <atomic>
#include <exception>
#include <mutex>
#include <vector>
namespace rocprofiler
@@ -59,9 +60,23 @@ get_buffers()
return _v;
}
instance*
get_buffer(rocprofiler_buffer_id_t buffer_id)
{
for(auto& itr : get_buffers())
{
if(itr && itr->buffer_id == buffer_id.handle) return itr.get();
}
return nullptr;
}
std::optional<rocprofiler_buffer_id_t>
allocate_buffer()
{
// ensure buffer has thread to handle flushing it
static auto _init_threads_once = std::once_flag{};
std::call_once(_init_threads_once, []() { internal_threading::initialize(); });
// ... allocate any internal space needed to handle another context ...
auto _lk = std::unique_lock<std::mutex>{get_buffers_mutex()};
@@ -84,51 +99,62 @@ allocate_buffer()
rocprofiler_status_t
flush(rocprofiler_buffer_id_t buffer_id, bool wait)
{
LOG(ERROR) << "flushing...";
if(buffer_id.handle >= get_buffers().size()) return ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND;
auto& buff = get_buffers().at(buffer_id.handle);
auto* task_group = rocprofiler::internal_threading::get_task_group(
rocprofiler_callback_thread_t{buff->task_group_id});
auto* task_group =
internal_threading::get_task_group(rocprofiler_callback_thread_t{buff->task_group_id});
if(task_group) task_group->wait();
if(wait && task_group) task_group->wait();
// buffer is currently being flushed or destroyed
if(buff->syncer.test_and_set()) return ROCPROFILER_STATUS_ERROR_BUFFER_BUSY;
auto buff_idx = buff->buffer_idx++;
auto idx = buff->buffer_idx++;
auto _task = [buff_idx, buffer_id]() {
auto& _buff = get_buffers().at(buffer_id.handle);
auto& buff_v = _buff->buffers.at(buff_idx % _buff->buffers.size());
auto _task = [buffer_id, idx]() {
LOG(ERROR) << "executing task...";
auto& buff_v = get_buffers().at(buffer_id.handle);
auto& buff_internal_v = buff_v->get_internal_buffer(idx);
if(!buff_v.is_empty())
if(!buff_internal_v.is_empty())
{
// get the array of record headers
auto buff_data = buff_v.get_record_headers();
auto buff_data = buff_internal_v.get_record_headers();
// invoke buffer callback
try
{
_buff->callback(rocprofiler_context_id_t{_buff->context_id},
rocprofiler_buffer_id_t{_buff->buffer_id},
buff_data.data(),
buff_data.size(),
_buff->callback_data,
_buff->drop_count);
if(buff_v->callback)
{
buff_v->callback(rocprofiler_context_id_t{buff_v->context_id},
rocprofiler_buffer_id_t{buff_v->buffer_id},
buff_data.data(),
buff_data.size(),
buff_v->callback_data,
buff_v->drop_count);
}
} catch(std::exception& e)
{
LOG(ERROR) << "buffer callback threw an exception: " << e.what();
}
// clear the buffer
buff_v.clear();
buff_internal_v.clear();
}
else
{
LOG(ERROR) << "buffer at " << buffer_id.handle << " is empty...";
}
_buff->syncer.clear();
buff_v->syncer.clear();
};
if(task_group)
{
LOG(ERROR) << "executing task...";
task_group->exec(_task);
if(wait) task_group->wait();
}
@@ -152,7 +178,7 @@ rocprofiler_create_buffer(rocprofiler_context_id_t context,
void* callback_data,
rocprofiler_buffer_id_t* buffer_id)
{
if(rocprofiler::registration::get_init_status() > 0)
if(rocprofiler::registration::get_init_status() > -1)
return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED;
auto opt_buff_id = rocprofiler::buffer::allocate_buffer();
+47 -17
Просмотреть файл
@@ -22,7 +22,6 @@
#pragma once
#include <bits/stdint-uintn.h>
#include <rocprofiler/buffer.h>
#include <rocprofiler/fwd.h>
@@ -43,20 +42,23 @@ struct instance
{
using buffer_t = common::container::record_header_buffer;
mutable std::array<buffer_t, 2> buffers = {};
mutable std::atomic<unsigned short> buffer_idx = {};
mutable std::atomic_flag syncer = ATOMIC_FLAG_INIT;
mutable std::atomic<uint64_t> drop_count = {};
uint64_t watermark = 0;
uint64_t context_id = 0;
uint64_t buffer_id = 0;
uint64_t task_group_id = 0;
rocprofiler_buffer_tracing_cb_t callback = nullptr;
void* callback_data = nullptr;
rocprofiler_buffer_policy_t policy = ROCPROFILER_BUFFER_POLICY_NONE;
mutable std::array<buffer_t, 2> buffers = {};
mutable std::atomic_flag syncer = ATOMIC_FLAG_INIT;
mutable std::atomic<uint32_t> buffer_idx = {};
mutable std::atomic<uint64_t> drop_count = {};
uint64_t watermark = 0;
uint64_t context_id = 0;
uint64_t buffer_id = 0;
uint64_t task_group_id = 0;
rocprofiler_buffer_tracing_cb_t callback = nullptr;
void* callback_data = nullptr;
rocprofiler_buffer_policy_t policy = ROCPROFILER_BUFFER_POLICY_NONE;
template <typename Tp>
void emplace(uint32_t, uint32_t, Tp&);
buffer_t& get_internal_buffer();
buffer_t& get_internal_buffer(size_t);
};
using unique_buffer_vec_t = common::container::stable_vector<std::unique_ptr<instance>, 4>;
@@ -67,16 +69,44 @@ allocate_buffer();
unique_buffer_vec_t&
get_buffers();
instance*
get_buffer(rocprofiler_buffer_id_t buffer_id);
instance*
get_buffer(uint64_t buffer_idx);
rocprofiler_status_t
flush(rocprofiler_buffer_id_t buffer_id, bool wait);
rocprofiler_status_t
flush(uint64_t buffer_idx, bool wait);
} // namespace buffer
} // namespace rocprofiler
inline rocprofiler::buffer::instance::buffer_t&
rocprofiler::buffer::instance::get_internal_buffer()
{
auto idx = buffer_idx.load() % buffers.size();
return buffers.at(idx);
}
inline rocprofiler::buffer::instance::buffer_t&
rocprofiler::buffer::instance::get_internal_buffer(size_t idx)
{
return buffers.at(idx % buffers.size());
}
inline rocprofiler::buffer::instance*
rocprofiler::buffer::get_buffer(uint64_t buffer_idx)
{
return get_buffer(rocprofiler_buffer_id_t{buffer_idx});
}
inline rocprofiler_status_t
flush(uint64_t buffer_idx, bool wait)
rocprofiler::buffer::flush(uint64_t buffer_idx, bool wait)
{
return flush(rocprofiler_buffer_id_t{buffer_idx}, wait);
}
} // namespace buffer
} // namespace rocprofiler
template <typename Tp>
inline void
@@ -88,10 +118,10 @@ rocprofiler::buffer::instance::emplace(uint32_t category, uint32_t kind, Tp& val
auto idx = get_idx();
if(!buffers.at(idx).emplace(category, kind, value))
{
if(buffers.at(idx).size() < sizeof(value))
if(buffers.at(idx).capacity() < sizeof(value))
{
auto msg = std::stringstream{};
msg << "buffer " << buffer_id << " to small (size=" << buffers.at(idx).size()
msg << "buffer " << buffer_id << " to small (size=" << buffers.at(idx).capacity()
<< ") to hold an object of type " << common::cxx_demangle(typeid(value).name())
<< " with size " << sizeof(value);
throw std::runtime_error(msg.str());
+1 -1
Просмотреть файл
@@ -48,7 +48,7 @@ rocprofiler_configure_buffer_tracing_service(rocprofiler_context_id_t
size_t operations_count,
rocprofiler_buffer_id_t buffer_id)
{
if(rocprofiler::registration::get_init_status() > 0)
if(rocprofiler::registration::get_init_status() > -1)
return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED;
if(context_id.handle >= rocprofiler::context::get_registered_contexts().size())
+1 -1
Просмотреть файл
@@ -47,7 +47,7 @@ rocprofiler_configure_callback_tracing_service(rocprofiler_context_id_t context_
rocprofiler_callback_tracing_cb_t callback,
void* callback_args)
{
if(rocprofiler::registration::get_init_status() > 0)
if(rocprofiler::registration::get_init_status() > -1)
return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED;
if(context_id.handle >= rocprofiler::context::get_registered_contexts().size())
+5 -1
Просмотреть файл
@@ -20,6 +20,7 @@
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include <rocprofiler/context.h>
#include <rocprofiler/rocprofiler.h>
#include "lib/rocprofiler/context/context.hpp"
@@ -34,7 +35,10 @@ extern "C" {
rocprofiler_status_t
rocprofiler_create_context(rocprofiler_context_id_t* context_id)
{
if(rocprofiler::registration::get_init_status() > 0)
// always set to none first
*context_id = ROCPROFILER_CONTEXT_NONE;
if(rocprofiler::registration::get_init_status() > -1)
return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED;
auto cfg_id = rocprofiler::context::allocate_context();
+30
Просмотреть файл
@@ -24,6 +24,7 @@
#include <rocprofiler/rocprofiler.h>
#include "lib/common/container/stable_vector.hpp"
#include "lib/rocprofiler/buffer.hpp"
#include "lib/rocprofiler/context/context.hpp"
#include <glog/logging.h>
@@ -226,5 +227,34 @@ stop_context(rocprofiler_context_id_t idx)
return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND; // compare exchange failed
}
void
deactivate_client_contexts(rocprofiler_client_id_t client_id)
{
for(auto& itr : get_active_contexts())
{
auto* itr_v = itr.load();
if(itr_v->client_idx == client_id.handle)
{
itr.store(nullptr);
}
}
}
void
deregister_client_contexts(rocprofiler_client_id_t client_id)
{
for(auto& itr : get_registered_contexts())
{
if(itr->client_idx == client_id.handle)
{
for(auto& bitr : buffer::get_buffers())
{
if(bitr->context_id == itr->context_idx) bitr.reset();
}
itr.reset();
}
}
}
} // namespace context
} // namespace rocprofiler
+6
Просмотреть файл
@@ -23,6 +23,7 @@
#pragma once
#include <rocprofiler/fwd.h>
#include <rocprofiler/registration.h>
#include <rocprofiler/rocprofiler.h>
#include "lib/common/container/stable_vector.hpp"
@@ -126,5 +127,10 @@ get_registered_contexts();
active_context_vec_t&
get_active_contexts();
void deactivate_client_contexts(rocprofiler_client_id_t);
// should only be called if the client failed to initialize
void deregister_client_contexts(rocprofiler_client_id_t);
} // namespace context
} // namespace rocprofiler
+36 -23
Просмотреть файл
@@ -28,9 +28,11 @@
#include "lib/rocprofiler/buffer.hpp"
#include "lib/rocprofiler/context/context.hpp"
#include "lib/rocprofiler/internal_threading.hpp"
#include "lib/rocprofiler/registration.hpp"
#include <cstdint>
#include <mutex>
#include <stdexcept>
#include <string>
#include <vector>
@@ -139,17 +141,21 @@ execute_creation_notifiers(rocprofiler_internal_thread_library_t libs,
(execute(get_creation_notifier<Idx>()), ...);
}
auto&
auto*&
get_thread_pools()
{
static auto _v = thread_pool_vec_t{};
// use raw pointers here because of the access to this variable via atexit function call.
// if not a raw pointer, this may be destroyed automatically when the atexit handler is invoked
static auto* _v = new thread_pool_vec_t{};
return _v;
}
auto&
auto*&
get_task_groups()
{
static auto _v = task_group_vec_t{};
// use raw pointers here because of the access to this variable via atexit function call.
// if not a raw pointer, this may be destroyed automatically when the atexit handler is invoked
static auto* _v = new task_group_vec_t{};
return _v;
}
} // namespace
@@ -159,28 +165,34 @@ void
initialize()
{
static auto _once = std::once_flag{};
std::call_once(_once, create_callback_thread);
std::call_once(_once, []() {
atexit(&registration::finalize);
create_callback_thread();
});
}
// sync all the task groups and destroy the thread pools
void
finalize()
{
for(auto& itr : get_task_groups())
if(get_task_groups())
{
if(itr) itr->join();
for(auto& itr : *get_task_groups())
if(itr) itr->join();
get_task_groups()->clear();
}
for(auto& itr : get_thread_pools())
if(get_thread_pools())
{
if(itr) itr->destroy_threadpool();
for(auto& itr : *get_thread_pools())
if(itr) itr->destroy_threadpool();
get_thread_pools()->clear();
}
for(auto& itr : get_task_groups())
itr.reset();
for(auto& itr : get_thread_pools())
itr.reset();
delete get_task_groups();
delete get_thread_pools();
get_task_groups() = nullptr;
get_thread_pools() = nullptr;
}
void
@@ -198,20 +210,20 @@ notify_post_internal_thread_create(rocprofiler_internal_thread_library_t libs)
rocprofiler_callback_thread_t
create_callback_thread()
{
if(!get_thread_pools()) throw std::runtime_error{"thread pools already deleted"};
if(!get_task_groups()) throw std::runtime_error{"task groups already deleted"};
// notify that rocprofiler library is about to create an inernal thread
notify_pre_internal_thread_create(ROCPROFILER_LIBRARY);
// this will be index after emplace_back
auto idx = get_thread_pools().size();
auto idx = get_thread_pools()->size();
auto& thr_pool = get_thread_pools().emplace_back(
new thread_pool_t{thread_pool_config_t{.pool_size = 1}}, [](thread_pool_t* v) {
v->destroy_threadpool();
delete v;
});
auto& thr_pool =
get_thread_pools()->emplace_back(new thread_pool_t{thread_pool_config_t{.pool_size = 1}});
// construct the task group to use the newly created thread pool
get_task_groups().emplace_back(new task_group_t{thr_pool.get()});
get_task_groups()->emplace_back(new task_group_t{thr_pool.get()});
// notify that rocprofiler library finished creating an internal thread
notify_post_internal_thread_create(ROCPROFILER_LIBRARY);
@@ -223,7 +235,7 @@ create_callback_thread()
task_group_t*
get_task_group(rocprofiler_callback_thread_t cb_tid)
{
return get_task_groups().at(cb_tid.handle).get();
return (get_task_groups()) ? get_task_groups()->at(cb_tid.handle).get() : nullptr;
}
} // namespace internal_threading
} // namespace rocprofiler
@@ -263,7 +275,8 @@ rocprofiler_status_t ROCPROFILER_API
rocprofiler_assign_callback_thread(rocprofiler_buffer_id_t buffer_id,
rocprofiler_callback_thread_t cb_thread_id)
{
if(cb_thread_id.handle >= rocprofiler::internal_threading::get_task_groups().size())
if(!rocprofiler::internal_threading::get_task_groups() ||
cb_thread_id.handle >= rocprofiler::internal_threading::get_task_groups()->size())
return ROCPROFILER_STATUS_ERROR_THREAD_NOT_FOUND;
for(auto& bitr : rocprofiler::buffer::get_buffers())
+4 -6
Просмотреть файл
@@ -38,12 +38,10 @@ namespace rocprofiler
{
namespace internal_threading
{
using thread_pool_t = PTL::ThreadPool;
using task_group_t = PTL::TaskGroup<void>;
using unique_thread_pool_t = std::unique_ptr<thread_pool_t, void (*)(thread_pool_t*)>;
using unique_task_group_t = std::unique_ptr<task_group_t>;
using thread_pool_vec_t = std::vector<unique_thread_pool_t>;
using task_group_vec_t = std::vector<unique_task_group_t>;
using thread_pool_t = PTL::ThreadPool;
using task_group_t = PTL::TaskGroup<void>;
using thread_pool_vec_t = std::vector<std::unique_ptr<thread_pool_t>>;
using task_group_vec_t = std::vector<std::unique_ptr<task_group_t>>;
void notify_pre_internal_thread_create(rocprofiler_internal_thread_library_t);
void notify_post_internal_thread_create(rocprofiler_internal_thread_library_t);
+118 -83
Просмотреть файл
@@ -40,8 +40,10 @@
#include <cstdint>
#include <fstream>
#include <iostream>
#include <limits>
#include <memory>
#include <mutex>
#include <random>
#include <stdexcept>
#include <string>
#include <string_view>
@@ -62,6 +64,21 @@ namespace registration
{
namespace
{
// invoke all rocprofiler_configure symbols
bool
invoke_client_configures();
// invoke initialize functions returned from rocprofiler_configure
bool
invoke_client_initializers();
// invoke finalize functions returned from rocprofiler_configure
bool
invoke_client_finalizers();
// explicitly invoke the finalize function of a specific client
void invoke_client_finalizer(rocprofiler_client_id_t);
auto&
get_status()
{
@@ -143,11 +160,19 @@ struct client_library
std::vector<client_library>
find_clients()
{
auto data = std::vector<client_library>{};
auto data = std::vector<client_library>{};
auto priority_offset = get_client_offset();
if(get_forced_configure())
{
data.emplace_back(client_library{"(forced)", nullptr, get_forced_configure()});
LOG(ERROR) << "adding forced configure";
uint32_t _prio = priority_offset + data.size();
data.emplace_back(client_library{"(forced)",
nullptr,
get_forced_configure(),
nullptr,
rocprofiler_client_id_t{nullptr, _prio},
rocprofiler_client_id_t{nullptr, _prio}});
}
if(!rocprofiler_configure && !get_forced_configure())
@@ -159,8 +184,16 @@ find_clients()
if(rocprofiler_configure != &rocprofiler_configure)
throw std::runtime_error("rocprofiler_configure != &rocprofiler_configure");
if(&rocprofiler_configure != get_forced_configure())
data.emplace_back(client_library{"unknown", nullptr, &rocprofiler_configure});
if(rocprofiler_configure && rocprofiler_configure != get_forced_configure())
{
uint32_t _prio = priority_offset + data.size();
data.emplace_back(client_library{"unknown",
nullptr,
rocprofiler_configure,
nullptr,
rocprofiler_client_id_t{nullptr, _prio},
rocprofiler_client_id_t{nullptr, _prio}});
}
for(const auto& itr : get_link_map())
{
@@ -195,7 +228,7 @@ find_clients()
}
else
{
uint32_t _prio = data.size();
uint32_t _prio = priority_offset + data.size();
auto& entry =
data.emplace_back(client_library{itr,
handle,
@@ -228,31 +261,6 @@ get_registration_mutex()
static auto _v = mutex_t{};
return _v;
}
} // namespace
int
get_init_status()
{
return get_status().first.load(std::memory_order_acquire);
}
int
get_fini_status()
{
return get_status().second.load(std::memory_order_acquire);
}
void
set_init_status(int v)
{
get_status().first.store(v, std::memory_order_release);
}
void
set_fini_status(int v)
{
get_status().second.store(v, std::memory_order_release);
}
bool
invoke_client_configures()
@@ -265,9 +273,16 @@ invoke_client_configures()
LOG(ERROR) << __FUNCTION__;
size_t prio = 0;
for(auto& itr : get_clients())
{
if(!itr.configure_func)
{
LOG(ERROR) << "rocprofiler::registration::invoke_client_configures() attempted to "
"invoke configure function from "
<< itr.name << " that had no configuration function";
continue;
}
if(get_invoked_configures().find(itr.configure_func) != get_invoked_configures().end())
{
LOG(ERROR) << "rocprofiler::registration::invoke_client_configures() attempted to "
@@ -286,10 +301,20 @@ invoke_client_configures()
<< ")";
}
auto* _result = itr.configure_func(
ROCPROFILER_VERSION, ROCPROFILER_VERSION_STRING, prio++, &itr.mutable_client_id);
auto* _result = itr.configure_func(ROCPROFILER_VERSION,
ROCPROFILER_VERSION_STRING,
itr.internal_client_id.handle - get_client_offset(),
&itr.mutable_client_id);
if(_result)
{
itr.configure_result = std::make_unique<rocprofiler_tool_configure_result_t>(*_result);
}
else
{
context::deactivate_client_contexts(itr.internal_client_id);
context::deregister_client_contexts(itr.internal_client_id);
}
get_invoked_configures().emplace(itr.configure_func);
}
@@ -308,7 +333,6 @@ invoke_client_initializers()
LOG(ERROR) << __FUNCTION__;
set_init_status(-1);
for(auto& itr : get_clients())
{
if(itr.configure_result && itr.configure_result->initialize)
@@ -322,22 +346,21 @@ invoke_client_initializers()
}
}
// initialization is no longer available
set_init_status(1);
return true;
}
bool
invoke_client_finalizers()
{
if(get_fini_status() > 0) return false;
// NOTE: this function is expected to only be invoked from the finalize function (which sets the
// fini status)
if(get_init_status() < 1 || get_fini_status() > 0) return false;
auto _lk = scoped_lock_t{get_registration_mutex(), std::defer_lock};
if(_lk.owns_lock()) return false;
_lk.lock();
set_fini_status(-1);
for(auto& itr : get_clients())
{
if(itr.configure_result && itr.configure_result->finalize)
@@ -348,44 +371,6 @@ invoke_client_finalizers()
}
}
set_fini_status(1);
return true;
}
bool
invoke_client_initializer(rocprofiler_client_id_t client_id)
{
if(get_init_status() > 0) return false;
auto _lk = scoped_lock_t{get_registration_mutex(), std::defer_lock};
if(_lk.owns_lock()) return false;
_lk.lock();
// save the original status
auto _restore_status = get_init_status();
set_init_status(-1);
for(auto& itr : get_clients())
{
if(itr.internal_client_id.handle == client_id.handle &&
itr.mutable_client_id.handle == client_id.handle)
{
if(itr.configure_result && itr.configure_result->initialize)
{
context::push_client(itr.internal_client_id.handle);
itr.configure_result->initialize(&invoke_client_finalizer,
itr.configure_result->tool_data);
context::pop_client(itr.internal_client_id.handle);
// set to nullptr so initialize only gets called once
itr.configure_result->initialize = nullptr;
}
}
}
// we don't want the explicit client initialization to set the init status to 1
// we just want to restore what it previously was
set_init_status(_restore_status);
return true;
}
@@ -410,6 +395,44 @@ invoke_client_finalizer(rocprofiler_client_id_t client_id)
}
}
}
} // namespace
uint32_t
get_client_offset()
{
static uint32_t _v = []() {
auto gen = std::mt19937{std::random_device{}()};
auto rng = std::uniform_int_distribution<uint32_t>{
std::numeric_limits<uint8_t>::max(),
std::numeric_limits<uint32_t>::max() - std::numeric_limits<uint8_t>::max()};
return rng(gen);
}();
return _v;
}
int
get_init_status()
{
return get_status().first.load(std::memory_order_acquire);
}
int
get_fini_status()
{
return get_status().second.load(std::memory_order_acquire);
}
void
set_init_status(int v)
{
get_status().first.store(v, std::memory_order_release);
}
void
set_fini_status(int v)
{
get_status().second.store(v, std::memory_order_release);
}
void
initialize()
@@ -418,11 +441,15 @@ initialize()
static auto _ready = std::atomic<bool>{false};
std::call_once(_once, []() {
// initialization is in process
set_init_status(-1);
std::atexit(&finalize);
init_logging();
invoke_client_configures();
invoke_client_initializers();
internal_threading::initialize();
std::atexit(&finalize);
// initialization is no longer available
set_init_status(1);
_ready.store(true, std::memory_order_release);
});
@@ -436,11 +463,19 @@ initialize()
void
finalize()
{
hsa_shut_down();
invoke_client_finalizers();
for(auto& itr : rocprofiler::context::get_active_contexts())
itr.store(nullptr, std::memory_order_seq_cst);
internal_threading::finalize();
static auto _once = std::once_flag{};
std::call_once(_once, []() {
set_fini_status(-1);
hsa_shut_down();
if(get_init_status() > 0)
{
invoke_client_finalizers();
for(auto& itr : rocprofiler::context::get_active_contexts())
itr.store(nullptr, std::memory_order_seq_cst);
}
internal_threading::finalize();
set_fini_status(1);
});
}
} // namespace registration
} // namespace rocprofiler
+3 -17
Просмотреть файл
@@ -62,23 +62,9 @@ initialize();
void
finalize();
// invoke all rocprofiler_configure symbols
bool
invoke_client_configures();
// invoke initialize functions returned from rocprofiler_configure
bool
invoke_client_initializers();
// invoke finalize functions returned from rocprofiler_configure
bool
invoke_client_finalizers();
// explicitly invoke the initialize function of a specific client
bool invoke_client_initializer(rocprofiler_client_id_t);
// explicitly invoke the finalize function of a specific client
void invoke_client_finalizer(rocprofiler_client_id_t);
// get the randomly generated client offset number
uint32_t
get_client_offset();
int
get_init_status();
+55
Просмотреть файл
@@ -0,0 +1,55 @@
#
#
#
rocprofiler_deactivate_clang_tidy()
include(GoogleTest)
# -------------------------------------------------------------------------------------- #
#
# Link to static rocprofiler library
#
# -------------------------------------------------------------------------------------- #
set(rocprofiler_lib_sources buffer.cpp)
add_executable(rocprofiler-lib-tests)
target_sources(rocprofiler-lib-tests PRIVATE ${rocprofiler_lib_sources})
target_link_libraries(
rocprofiler-lib-tests
PRIVATE rocprofiler::rocprofiler-static-library
rocprofiler::rocprofiler-common-library GTest::gtest GTest::gtest_main)
gtest_add_tests(
TARGET rocprofiler-lib-tests
SOURCES ${rocprofiler_lib_sources}
TEST_LIST lib_TESTS
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
set_tests_properties(${lib_TESTS} PROPERTIES TIMEOUT 45 LABELS "unittests")
# -------------------------------------------------------------------------------------- #
#
# Link to shared rocprofiler library
#
# -------------------------------------------------------------------------------------- #
set(rocprofiler_shared_lib_sources registration.cpp)
add_executable(rocprofiler-lib-tests-shared)
target_sources(rocprofiler-lib-tests-shared PRIVATE ${rocprofiler_shared_lib_sources})
target_link_libraries(
rocprofiler-lib-tests-shared
PRIVATE rocprofiler::rocprofiler-shared-library
rocprofiler::rocprofiler-common-library GTest::gtest GTest::gtest_main)
gtest_add_tests(
TARGET rocprofiler-lib-tests-shared
SOURCES ${rocprofiler_shared_lib_sources}
TEST_LIST shared_lib_TESTS
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
set_tests_properties(
${shared_lib_TESTS}
PROPERTIES TIMEOUT 45 LABELS "unittests" ENVIRONMENT
"HSA_TOOLS_LIB=$<TARGET_FILE:rocprofiler-shared-library>")
+75
Просмотреть файл
@@ -0,0 +1,75 @@
// MIT License
//
// Copyright (c) 2023 ROCm Developer Tools
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "lib/rocprofiler/buffer.hpp"
#include "lib/common/units.hpp"
#include <rocprofiler/buffer.h>
#include <rocprofiler/fwd.h>
#include <rocprofiler/registration.h>
#include <gtest/gtest.h>
#include <pthread.h>
#include <cstdint>
#include <cstdlib>
#include <random>
#include <typeinfo>
TEST(rocprofiler_lib, buffer)
{
namespace buffer = ::rocprofiler::buffer;
namespace common = ::rocprofiler::common;
ASSERT_EQ(buffer::get_buffers().size(), 0)
<< "no buffers should have been created at this point";
auto buffer_id = buffer::allocate_buffer();
EXPECT_TRUE(buffer_id) << "failed to allocate buffer";
ASSERT_EQ(buffer::get_buffers().size(), 1) << "incorrect number of buffers created";
// get pointer to buffer
auto* buffer_v = buffer::get_buffer(*buffer_id);
buffer_v->watermark = common::units::get_page_size();
ASSERT_NE(buffer_v, nullptr) << "get_buffer returned a nullptr";
{
auto records = buffer_v->get_internal_buffer().get_record_headers();
EXPECT_EQ(records.size(), 0);
}
EXPECT_TRUE(buffer_v->get_internal_buffer().allocate(sizeof(rocprofiler_buffer_id_t)));
EXPECT_EQ(buffer_v->get_internal_buffer().capacity(), common::units::get_page_size());
auto data = *buffer_id;
buffer_v->emplace(1, 1, data);
auto records = buffer_v->get_internal_buffer().get_record_headers();
EXPECT_EQ(records.size(), 1);
auto flush_status = buffer::flush(*buffer_id, true);
EXPECT_EQ(flush_status, ROCPROFILER_STATUS_SUCCESS);
auto destroy_status = rocprofiler_destroy_buffer(*buffer_id);
EXPECT_EQ(destroy_status, ROCPROFILER_STATUS_SUCCESS);
}
+522
Просмотреть файл
@@ -0,0 +1,522 @@
// MIT License
//
// Copyright (c) 2023 ROCm Developer Tools
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include <rocprofiler/registration.h>
#include <rocprofiler/rocprofiler.h>
#include "lib/common/environment.hpp"
#include "lib/common/units.hpp"
#include "lib/common/utility.hpp"
#include <gtest/gtest.h>
#include <dlfcn.h>
#include <pthread.h>
#include <cstdint>
#include <cstdlib>
#include <filesystem>
#include <iostream>
#include <random>
#include <sstream>
#include <string_view>
#include <typeinfo>
#include <vector>
#define ROCPROFILER_CALL(ARG, MSG) \
{ \
auto _status = (ARG); \
EXPECT_EQ(_status, ROCPROFILER_STATUS_SUCCESS) << MSG << " :: " << #ARG; \
}
namespace
{
struct callback_data
{
rocprofiler_client_id_t* client_id = nullptr;
rocprofiler_client_finalize_t client_fini_func = nullptr;
rocprofiler_context_id_t client_ctx = {};
rocprofiler_buffer_id_t client_buffer = {};
rocprofiler_callback_thread_t client_thread = {};
uint64_t client_workflow_count = {};
uint64_t client_callback_count = {};
int64_t current_depth = 0;
int64_t max_depth = 0;
};
struct agent_data
{
uint64_t agent_count = 0;
std::vector<hsa_device_type_t> agents = {};
};
void
tool_tracing_callback(rocprofiler_callback_tracing_record_t record, void* client_data)
{
using name_map_t = std::unordered_map<rocprofiler_service_callback_tracing_kind_t,
std::unordered_map<uint32_t, const char*>>;
auto* cb_data = static_cast<callback_data*>(client_data);
static auto name_map = [&record]() {
rocprofiler_callback_tracing_operation_name_cb_t cb =
[](rocprofiler_service_callback_tracing_kind_t kind,
uint32_t operation,
const char* operation_name,
void* data) {
auto mdata = *static_cast<name_map_t*>(data);
(*static_cast<name_map_t*>(data))[kind][operation] = operation_name;
return 0;
};
auto tmp = name_map_t{};
rocprofiler_iterate_callback_tracing_kind_operation_names(
record.kind, cb, static_cast<void*>(&tmp));
return tmp;
}();
std::cout << "[" << __FILE__ << ":" << __LINE__ << "] "
<< name_map[record.kind][record.operation] << "\n"
<< std::flush;
cb_data->client_callback_count++;
if(record.phase == ROCPROFILER_SERVICE_CALLBACK_PHASE_ENTER)
{
cb_data->current_depth++;
}
else if(record.phase == ROCPROFILER_SERVICE_CALLBACK_PHASE_EXIT)
{
cb_data->max_depth = std::max(cb_data->current_depth, cb_data->max_depth);
cb_data->current_depth--;
}
else
{
GTEST_FAIL() << "unsupported callback tracing phase " << record.phase;
}
struct info_data
{
uint64_t num_args = 0;
std::stringstream arg_ss = {};
} info_data_v;
auto info_data_cb = [](rocprofiler_service_callback_tracing_kind_t,
uint32_t,
uint32_t arg_num,
const char* arg_name,
const char* arg_value_str,
const void* const arg_value_addr,
void* data) -> int {
auto& info = *static_cast<info_data*>(data);
info.arg_ss << ((arg_num == 0) ? "(" : ", ");
info.arg_ss << arg_num << ": " << arg_name << "=" << arg_value_str;
EXPECT_NE(arg_name, nullptr);
EXPECT_NE(arg_value_str, nullptr);
EXPECT_NE(arg_value_addr, nullptr);
EXPECT_EQ(arg_num, info.num_args);
info.num_args++;
return 0;
};
ROCPROFILER_CALL(rocprofiler_iterate_callback_tracing_operation_args(
record, info_data_cb, static_cast<void*>(&info_data_v)),
"Failure iterating trace operation args");
EXPECT_GT(info_data_v.num_args, 0)
<< name_map[record.kind][record.operation] << info_data_v.arg_ss.str();
}
void
tool_tracing_buffered(rocprofiler_context_id_t context,
rocprofiler_buffer_id_t buffer_id,
rocprofiler_record_header_t** headers,
size_t num_headers,
void* buffer_data,
uint64_t drop_count)
{
using name_map_t = std::unordered_map<rocprofiler_service_buffer_tracing_kind_t,
std::unordered_map<uint32_t, const char*>>;
std::cout << __FUNCTION__ << "...\n" << std::endl;
auto* cb_data = static_cast<callback_data*>(buffer_data);
static auto name_map = []() {
auto tmp = name_map_t{};
//
static auto tracing_kind_names_cb = [](rocprofiler_service_buffer_tracing_kind_t kind,
const char* /*kind_name*/,
void* data) {
auto tracing_operation_names_cb = [](rocprofiler_service_buffer_tracing_kind_t kindv,
uint32_t operation,
const char* operation_name,
void* data_v) {
(*static_cast<name_map_t*>(data_v))[kindv][operation] = operation_name;
return 0;
};
rocprofiler_iterate_buffer_tracing_kind_operation_names(
kind, tracing_operation_names_cb, data);
return 0;
};
rocprofiler_iterate_buffer_tracing_kind_names(tracing_kind_names_cb,
static_cast<void*>(&tmp));
return tmp;
}();
auto v_records = std::vector<rocprofiler_buffer_tracing_hsa_api_record_t*>{};
v_records.reserve(num_headers);
for(size_t i = 0; i < num_headers; ++i)
{
auto* header = headers[i];
ASSERT_TRUE(header != nullptr);
auto hash = rocprofiler_record_header_compute_hash(header->category, header->kind);
EXPECT_EQ(header->hash, hash);
EXPECT_TRUE(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING &&
header->kind == ROCPROFILER_SERVICE_BUFFER_TRACING_HSA_API);
v_records.emplace_back(
static_cast<rocprofiler_buffer_tracing_hsa_api_record_t*>(header->payload));
}
std::sort(v_records.begin(), v_records.end(), [](auto lhs, auto rhs) {
return (lhs->start_timestamp == rhs->start_timestamp)
? (lhs->end_timestamp < rhs->end_timestamp)
: (lhs->start_timestamp < rhs->start_timestamp);
});
for(auto* record : v_records)
{
auto info = std::stringstream{};
info << "tid=" << record->thread_id << ", context=" << context.handle
<< ", buffer_id=" << buffer_id.handle << ", cid=" << record->correlation_id.id
<< ", kind=" << record->kind << ", operation=" << record->operation
<< ", drop_count=" << drop_count << ", start=" << record->start_timestamp
<< ", stop=" << record->end_timestamp;
static int64_t last_corr_id = -1;
auto corr_id = static_cast<int64_t>(record->correlation_id.id);
std::cout << info.str() << "\n" << std::flush;
EXPECT_GE(context.handle, 0) << info.str();
EXPECT_GT(record->thread_id, 0) << info.str();
EXPECT_GT(record->kind, 0) << info.str();
EXPECT_GT(corr_id, last_corr_id) << info.str();
EXPECT_GT(record->start_timestamp, 0) << info.str();
EXPECT_GT(record->end_timestamp, 0) << info.str();
EXPECT_LE(record->start_timestamp, record->end_timestamp) << info.str();
cb_data->client_callback_count++;
last_corr_id = corr_id;
}
}
void
thread_precreate(rocprofiler_internal_thread_library_t /*lib*/, void* tool_data)
{
auto* cb_data = static_cast<callback_data*>(tool_data);
cb_data->client_workflow_count++;
}
void
thread_postcreate(rocprofiler_internal_thread_library_t /*lib*/, void* tool_data)
{
auto* cb_data = static_cast<callback_data*>(tool_data);
cb_data->client_workflow_count++;
}
} // namespace
TEST(rocprofiler_lib, registration_lambda_no_result)
{
static rocprofiler_configure_func_t rocp_init =
[](uint32_t version,
const char* runtime_version,
uint32_t prio,
rocprofiler_client_id_t* client_id) -> rocprofiler_tool_configure_result_t* {
auto expected_version = ROCPROFILER_VERSION;
EXPECT_EQ(expected_version, version);
EXPECT_EQ(std::string_view{runtime_version}, std::string_view{ROCPROFILER_VERSION_STRING});
EXPECT_EQ(prio, 0);
EXPECT_EQ(client_id->name, nullptr);
return nullptr;
};
auto ctx = rocprofiler_context_id_t{};
EXPECT_NE(rocprofiler_create_context(&ctx), ROCPROFILER_STATUS_SUCCESS);
EXPECT_EQ(rocprofiler_force_configure(rocp_init), ROCPROFILER_STATUS_SUCCESS);
EXPECT_NE(rocprofiler_create_context(&ctx), ROCPROFILER_STATUS_SUCCESS);
}
TEST(rocprofiler_lib, callback_registration_lambda_with_result)
{
using init_func_t = int (*)(rocprofiler_client_finalize_t, void*);
using fini_func_t = void (*)(void*);
using hsa_iterate_agents_cb_t = hsa_status_t (*)(hsa_agent_t, void*);
auto cmd_line = rocprofiler::common::read_command_line(getpid());
ASSERT_FALSE(cmd_line.empty());
static init_func_t tool_init = [](rocprofiler_client_finalize_t fini_func,
void* client_data) -> int {
auto* cb_data = static_cast<callback_data*>(client_data);
cb_data->client_workflow_count++;
cb_data->client_fini_func = fini_func;
ROCPROFILER_CALL(rocprofiler_create_context(&cb_data->client_ctx),
"failed to create context");
ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service(
cb_data->client_ctx,
ROCPROFILER_SERVICE_CALLBACK_TRACING_HSA_API,
nullptr,
0,
tool_tracing_callback,
client_data),
"callback tracing service failed to configure");
int valid_ctx = 0;
ROCPROFILER_CALL(rocprofiler_context_is_valid(cb_data->client_ctx, &valid_ctx),
"failure checking context validity");
EXPECT_EQ(valid_ctx, 1);
ROCPROFILER_CALL(rocprofiler_start_context(cb_data->client_ctx),
"rocprofiler context start failed");
// no errors
return 0;
};
static fini_func_t tool_fini = [](void* client_data) -> void {
auto* cb_data = static_cast<callback_data*>(client_data);
ROCPROFILER_CALL(rocprofiler_stop_context(cb_data->client_ctx),
"rocprofiler context stop failed");
static_cast<callback_data*>(client_data)->client_workflow_count++;
};
static auto cb_data = callback_data{};
static auto cfg_result =
rocprofiler_tool_configure_result_t{sizeof(rocprofiler_tool_configure_result_t),
tool_init,
tool_fini,
static_cast<void*>(&cb_data)};
static rocprofiler_configure_func_t rocp_init =
[](uint32_t version,
const char* runtime_version,
uint32_t prio,
rocprofiler_client_id_t* client_id) -> rocprofiler_tool_configure_result_t* {
auto expected_version = ROCPROFILER_VERSION;
EXPECT_EQ(expected_version, version);
EXPECT_EQ(std::string_view{runtime_version}, std::string_view{ROCPROFILER_VERSION_STRING});
EXPECT_EQ(prio, 0);
EXPECT_EQ(client_id->name, nullptr);
cb_data.client_id = client_id;
cb_data.client_id->name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
return &cfg_result;
};
auto ctx = rocprofiler_context_id_t{};
EXPECT_NE(rocprofiler_create_context(&ctx), ROCPROFILER_STATUS_SUCCESS);
EXPECT_EQ(rocprofiler_force_configure(rocp_init), ROCPROFILER_STATUS_SUCCESS);
EXPECT_NE(rocprofiler_create_context(&ctx), ROCPROFILER_STATUS_SUCCESS);
hsa_iterate_agents_cb_t agent_cb = [](hsa_agent_t agent, void* data) {
static_cast<agent_data*>(data)->agent_count++;
auto status = HSA_STATUS_SUCCESS;
auto agent_type = hsa_device_type_t{};
if((status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &agent_type)) ==
HSA_STATUS_SUCCESS)
static_cast<agent_data*>(data)->agents.emplace_back(agent_type);
return status;
};
auto _agent_data = agent_data{};
hsa_init();
hsa_status_t itr_status = hsa_iterate_agents(agent_cb, static_cast<void*>(&_agent_data));
EXPECT_EQ(itr_status, HSA_STATUS_SUCCESS);
EXPECT_GT(_agent_data.agent_count, 0);
EXPECT_EQ(_agent_data.agent_count, _agent_data.agents.size());
ASSERT_NE(cb_data.client_id, nullptr);
ASSERT_NE(cb_data.client_fini_func, nullptr);
cb_data.client_fini_func(*cb_data.client_id);
// expected callback count is two for hsa_iterate_agents and two callbacks for
// hsa_agent_get_info for each agent.
uint64_t expected_cb_count = 2 + (2 * _agent_data.agent_count);
EXPECT_EQ(cb_data.client_workflow_count, 2);
EXPECT_EQ(cb_data.client_callback_count, expected_cb_count);
EXPECT_EQ(cb_data.current_depth, 0);
EXPECT_EQ(cb_data.max_depth, 2);
}
TEST(rocprofiler_lib, buffer_registration_lambda_with_result)
{
using init_func_t = int (*)(rocprofiler_client_finalize_t, void*);
using fini_func_t = void (*)(void*);
using hsa_iterate_agents_cb_t = hsa_status_t (*)(hsa_agent_t, void*);
auto cmd_line = rocprofiler::common::read_command_line(getpid());
ASSERT_FALSE(cmd_line.empty());
static init_func_t tool_init = [](rocprofiler_client_finalize_t fini_func,
void* client_data) -> int {
auto* cb_data = static_cast<callback_data*>(client_data);
cb_data->client_workflow_count++;
cb_data->client_fini_func = fini_func;
ROCPROFILER_CALL(rocprofiler_create_context(&cb_data->client_ctx),
"failed to create context");
ROCPROFILER_CALL(rocprofiler_create_buffer(cb_data->client_ctx,
4096,
2048,
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
tool_tracing_buffered,
client_data,
&cb_data->client_buffer),
"buffer creation failed");
ROCPROFILER_CALL(
rocprofiler_configure_buffer_tracing_service(cb_data->client_ctx,
ROCPROFILER_SERVICE_BUFFER_TRACING_HSA_API,
nullptr,
0,
cb_data->client_buffer),
"buffer tracing service failed to configure");
ROCPROFILER_CALL(rocprofiler_create_callback_thread(&cb_data->client_thread),
"failure creating callback thread");
ROCPROFILER_CALL(
rocprofiler_assign_callback_thread(cb_data->client_buffer, cb_data->client_thread),
"failed to assign thread for buffer");
int valid_ctx = 0;
ROCPROFILER_CALL(rocprofiler_context_is_valid(cb_data->client_ctx, &valid_ctx),
"failure checking context validity");
EXPECT_EQ(valid_ctx, 1);
ROCPROFILER_CALL(rocprofiler_start_context(cb_data->client_ctx),
"rocprofiler context start failed");
// no errors
return 0;
};
static fini_func_t tool_fini = [](void* client_data) -> void {
auto* cb_data = static_cast<callback_data*>(client_data);
ROCPROFILER_CALL(rocprofiler_stop_context(cb_data->client_ctx),
"rocprofiler context stop failed");
static_cast<callback_data*>(client_data)->client_workflow_count++;
};
static auto cb_data = callback_data{};
static auto cfg_result =
rocprofiler_tool_configure_result_t{sizeof(rocprofiler_tool_configure_result_t),
tool_init,
tool_fini,
static_cast<void*>(&cb_data)};
static rocprofiler_configure_func_t rocp_init =
[](uint32_t version,
const char* runtime_version,
uint32_t prio,
rocprofiler_client_id_t* client_id) -> rocprofiler_tool_configure_result_t* {
auto expected_version = ROCPROFILER_VERSION;
EXPECT_EQ(expected_version, version);
EXPECT_EQ(std::string_view{runtime_version}, std::string_view{ROCPROFILER_VERSION_STRING});
EXPECT_EQ(prio, 0);
EXPECT_EQ(client_id->name, nullptr);
cb_data.client_id = client_id;
cb_data.client_id->name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
ROCPROFILER_CALL(rocprofiler_at_internal_thread_create(thread_precreate,
thread_postcreate,
ROCPROFILER_LIBRARY,
static_cast<void*>(&cb_data)),
"failed to register for thread creation notifications");
return &cfg_result;
};
auto ctx = rocprofiler_context_id_t{};
EXPECT_NE(rocprofiler_create_context(&ctx), ROCPROFILER_STATUS_SUCCESS);
EXPECT_EQ(rocprofiler_force_configure(rocp_init), ROCPROFILER_STATUS_SUCCESS);
EXPECT_NE(rocprofiler_create_context(&ctx), ROCPROFILER_STATUS_SUCCESS);
hsa_iterate_agents_cb_t agent_cb = [](hsa_agent_t agent, void* data) {
static_cast<agent_data*>(data)->agent_count++;
auto status = HSA_STATUS_SUCCESS;
auto agent_type = hsa_device_type_t{};
if((status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &agent_type)) ==
HSA_STATUS_SUCCESS)
static_cast<agent_data*>(data)->agents.emplace_back(agent_type);
return status;
};
auto _agent_data = agent_data{};
hsa_init();
hsa_status_t itr_status = hsa_iterate_agents(agent_cb, static_cast<void*>(&_agent_data));
EXPECT_EQ(itr_status, HSA_STATUS_SUCCESS);
EXPECT_GT(_agent_data.agent_count, 0);
EXPECT_EQ(_agent_data.agent_count, _agent_data.agents.size());
ASSERT_NE(cb_data.client_id, nullptr);
ASSERT_NE(cb_data.client_fini_func, nullptr);
EXPECT_EQ(rocprofiler_flush_buffer(cb_data.client_buffer), ROCPROFILER_STATUS_SUCCESS);
cb_data.client_fini_func(*cb_data.client_id);
// expected callback count is two for hsa_iterate_agents and two callbacks for
// hsa_agent_get_info for each agent.
uint64_t expected_cb_count = 1 + _agent_data.agent_count;
// expect the tool init, tool fini, and two calls to thread_precreate and thread_postcreate each
// (the main thread and the assigned thread for the buffer)
uint64_t expected_workflow_count = 6;
EXPECT_EQ(cb_data.client_workflow_count, expected_workflow_count);
EXPECT_EQ(cb_data.client_callback_count, expected_cb_count);
EXPECT_GT(cb_data.client_thread.handle, 0);
EXPECT_EQ(cb_data.current_depth, 0);
EXPECT_EQ(cb_data.max_depth, 0);
}
+4
Просмотреть файл
@@ -14,3 +14,7 @@ mutex:librocm_smi64.so
# google logging
race:google::LogMessageTime::CalcGmtOffset
race:tzset_internal
# bug in libtsan.so.0 which thinks there is a
# double mutex lock (there isn't one)
mutex:external/ptl/source/PTL/TaskGroup.hh