* Add external/cereal submodule

- used for integration testing

* Update lib/common/container/small_vector.hpp

- documentation notes

* Update tests/apps

- update transpose app (fix build)
- add reproducible-runtime app

* Update include/rocprofiler/fwd.h

- rocprofiler_service_callback_phase_t -> rocprofiler_callback_phase_t

* Update PTL submodule

- fix for task group: submitting tasks from different thread

* Update lib/rocprofiler/hsa/queue.cpp

- CHECK_NOTNULL(_buffer)

* Update lib/rocprofiler/hsa/hsa.cpp

- use buffer::get_buffer instead of manually looking for buffer

* Update lib/rocprofiler/internal_threading.cpp

- use buffer::get_buffer instead of manually looking for buffer

* Update lib/rocprofiler/buffer.cpp

- offset the buffer id
- properly handle rocprofiler_create_buffer reusing rocprofiler_buffer_id_t on a different context

* Update tests

- kernel tracing library for integration testing

* Add cereal submodule

* Update lib/rocprofiler/registration.*

- OnUnload
- Support ROCP_TOOL_LIBRARIES for python usage
- improve finalize function
- remove calling hsa_shut_down in finalize function

* Update lib/rocprofiler/buffer.*

- allocate_buffer sets the buffer id value
- expose (internally) is_valid_buffer_id
- update test

* Update tests/kernel-tracing

- installation
- better organization of JSON groups
- improved messaging

* Update lib/rocprofiler/registration.cpp

- add workaround for hsa-runtime supporting rocprofiler-register

* Update tests/kernel-tracing/kernel-tracing.cpp

- fix memory leaks

* cereal support for minimal JSON

- update cereal submodule to rocprofiler branch
- change REPO_BRANCH in rocprofiler_checkout_git_submodule for cereal
- update tests/kernel-tracing/kernel-tracing.cpp
  - use minimal json
  - slight tweak putting giving contexts name in storing name + context pointer pair in map

* Update tests/kernel-tracing/kernel-tracing.cpp

- support runtime selection of contexts via KERNEL_TRACING_CONTEXTS environment variable

* Update tests

- tests/CMakeLists.txt
  - find_package(Python3 REQUIRED)
- tests/kernel-tracing
  - pytest validation

* Update CI workflow

- install pytest
- add checks for test labels

* Update scripts/run-ci.py

- change --coverage options
  - replace 'unittests' with 'tests'
- replace test label regex '-L unittests' with '-L tests'

* Update requirements.txt

- this is now an empty file since none of the packages are required for this repo
Этот коммит содержится в:
Jonathan R. Madsen
2023-11-16 03:21:39 -06:00
коммит произвёл GitHub
родитель 8f73db5b1c
Коммит cf5e4b4b1b
29 изменённых файлов: 2176 добавлений и 115 удалений
+44 -7
Просмотреть файл
@@ -75,6 +75,7 @@ jobs:
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10 --slave /usr/bin/g++ g++ /usr/bin/g++-11
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 20 --slave /usr/bin/g++ g++ /usr/bin/g++-12
python3 -m pip install -r requirements.txt
python3 -m pip install pytest
- name: List Files
shell: bash
@@ -163,9 +164,10 @@ jobs:
- name: Install requirements
shell: bash
run: |
pip3 install -r requirements.txt
apt install -y cmake libgtest-dev
git config --global --add safe.directory '*'
apt-get install -y cmake libgtest-dev python3-pip
python3 -m pip install -r requirements.txt
python3 -m pip install pytest
- name: Configure, Build, and Test (Total Code Coverage)
timeout-minutes: 30
@@ -181,17 +183,17 @@ jobs:
-DCMAKE_BUILD_TYPE=${{ matrix.build-type }}
-DPython3_EXECUTABLE=$(which python3)
- name: Configure, Build, and Test (Unit Tests Code Coverage)
- name: Configure, Build, and Test (Tests Code Coverage)
timeout-minutes: 30
shell: bash
run:
find build -type f | egrep '\.gcda$' | xargs rm &&
python3 ./source/scripts/run-ci.py -B build
--name ${{ github.repository }}-${{ github.ref_name }}-mi200-${{ matrix.os }}-codecov-unittests
--name ${{ github.repository }}-${{ github.ref_name }}-mi200-${{ matrix.os }}-codecov-tests
--build-jobs 8
--site mi200
--gpu-targets ${{ env.GPU_LIST }}
--coverage unittests
--coverage tests
--
-DCMAKE_BUILD_TYPE=${{ matrix.build-type }}
-DPython3_EXECUTABLE=$(which python3)
@@ -211,6 +213,40 @@ jobs:
-DCMAKE_BUILD_TYPE=${{ matrix.build-type }}
-DPython3_EXECUTABLE=$(which python3)
- name: Verify Test Labels
timeout-minutes: 5
shell: bash
run: |
pushd build
#
# if following fails, there is a test that does not have
# a label identifying it as sample or test (unit or integration).
# Recommended labels are:
# - samples
# - unittests
# - integration-tests
#
ctest -N -LE 'samples|tests' -O ctest.mislabeled.log
grep 'Total Tests: 0' ctest.mislabeled.log
#
# if following fails, then there is overlap between the labels.
# A test cannot both be a sample and (unit/integration) test.
#
ctest -N -O ctest.all.log
ctest -N -O ctest.samples.log -L samples
ctest -N -O ctest.tests.log -L tests
NUM_ALL=$(grep 'Total Tests:' ctest.all.log | awk '{print $NF}')
NUM_SAMPLE=$(grep 'Total Tests:' ctest.samples.log | awk '{print $NF}')
NUM_TEST=$(grep 'Total Tests:' ctest.tests.log | awk '{print $NF}')
NUM_SUM=$((${NUM_SAMPLE} + ${NUM_TEST}))
echo "Total tests: ${NUM_ALL}"
echo "Total labeled tests: ${NUM_SUM}"
if [ ${NUM_ALL} != ${NUM_SUM} ]; then
echo "Test label overlap"
exit 1
fi
popd
sanitizers:
strategy:
fail-fast: false
@@ -262,9 +298,10 @@ jobs:
- name: Install requirements
shell: bash
run: |
pip3 install -r requirements.txt
apt install -y cmake libgtest-dev libasan8 libtsan2
git config --global --add safe.directory '*'
apt-get install -y cmake libgtest-dev python3-pip libasan8 libtsan2
python3 -m pip install -r requirements.txt
python3 -m pip install pytest
- name: Configure, Build, and Test
timeout-minutes: 45
+3
Просмотреть файл
@@ -13,3 +13,6 @@
[submodule "external/ptl"]
path = external/ptl
url = https://github.com/jrmadsen/PTL
[submodule "external/cereal"]
path = external/cereal
url = https://github.com/jrmadsen/cereal.git
поставляемый
+15
Просмотреть файл
@@ -41,6 +41,21 @@ if(ROCPROFILER_BUILD_TESTS)
find_package(GTest REQUIRED)
target_link_libraries(rocprofiler-gtest INTERFACE GTest::gtest)
endif()
rocprofiler_checkout_git_submodule(
RECURSIVE
RELATIVE_PATH external/cereal
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
REPO_URL https://github.com/jrmadsen/cereal.git
REPO_BRANCH "rocprofiler")
add_library(rocprofiler-cereal INTERFACE)
add_library(rocprofiler::cereal ALIAS rocprofiler-cereal)
target_compile_definitions(rocprofiler-cereal
INTERFACE $<BUILD_INTERFACE:CEREAL_THREAD_SAFE=1>)
target_include_directories(
rocprofiler-cereal
INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/cereal/include>)
endif()
if(ROCPROFILER_BUILD_GLOG)
поставляемый Подмодуль
+1
Submodule external/cereal added at cc723c4fd6
поставляемый
+1 -1
-10
Просмотреть файл
@@ -1,10 +0,0 @@
barectf
bcrypt
CppHeaderParser
lxml
matplotlib
pandas
protobuf
pycparser
pyparsing
websockets
+2 -2
Просмотреть файл
@@ -113,7 +113,7 @@ typedef enum // NOLINT(performance-enum-size)
ROCPROFILER_CALLBACK_PHASE_UNLOAD =
ROCPROFILER_CALLBACK_PHASE_EXIT, ///< Callback invoked prior to code object unloading
ROCPROFILER_CALLBACK_PHASE_LAST,
} rocprofiler_service_callback_phase_t;
} rocprofiler_callback_phase_t;
/**
* @brief Service Callback Tracing Kind.
@@ -373,7 +373,7 @@ typedef struct rocprofiler_callback_tracing_record_t
rocprofiler_correlation_id_t correlation_id;
rocprofiler_service_callback_tracing_kind_t kind;
uint32_t operation;
rocprofiler_service_callback_phase_t phase;
rocprofiler_callback_phase_t phase;
void* payload;
} rocprofiler_callback_tracing_record_t;
+15
Просмотреть файл
@@ -22,6 +22,21 @@
#pragma once
/**
* @file small_vector.hpp
* @brief This is inspired and largely derived from llvm/ADT/SmallVector.h. It provides a STL-like
* vector class which uses a small allocation on the stack when the number of elements is small.
*
* This container is ideal for vectors which are allocated frequently, will more than likely only
* contain a few elements, and are allocated in places where performance is a concern. When the
* number of elements is small, storing these elements will not require a heap allocation but it can
* also grow to accommodate larger allocation needs. In other words, it effectively has memory
* allocation like std::array<T, N> until the number of elements exceeds N. Once the number of
* elements exceeds N, it turns into std::vector<T>.
*
* Reference: https://github.com/llvm/llvm-project/blob/main/llvm/include/llvm/ADT/SmallVector.h
*/
#include "lib/common/defines.hpp"
#include <algorithm>
+57 -12
Просмотреть файл
@@ -22,6 +22,7 @@
#include "lib/rocprofiler/buffer.hpp"
#include <glog/logging.h>
#include <rocprofiler/rocprofiler.h>
#include "lib/common/container/stable_vector.hpp"
@@ -35,6 +36,7 @@
#include <atomic>
#include <exception>
#include <mutex>
#include <random>
#include <vector>
namespace rocprofiler
@@ -51,8 +53,28 @@ get_buffers_mutex()
static auto _v = std::mutex{};
return _v;
}
uint64_t
get_buffer_offset()
{
static uint64_t _v = []() {
auto gen = std::mt19937{std::random_device{}()};
auto rng = std::uniform_int_distribution<uint64_t>{std::numeric_limits<uint8_t>::max(),
std::numeric_limits<uint16_t>::max()};
return rng(gen);
}();
return _v;
}
} // namespace
bool
is_valid_buffer_id(rocprofiler_buffer_id_t id)
{
auto nbuffers = get_buffers().size();
auto offset = get_buffer_offset();
return (id.handle >= offset && id.handle < (offset + nbuffers));
}
unique_buffer_vec_t&
get_buffers()
{
@@ -63,9 +85,15 @@ get_buffers()
instance*
get_buffer(rocprofiler_buffer_id_t buffer_id)
{
for(auto& itr : get_buffers())
if(is_valid_buffer_id(buffer_id))
{
if(itr && itr->buffer_id == buffer_id.handle) return itr.get();
for(auto& itr : get_buffers())
{
if(itr && itr->buffer_id == buffer_id.handle)
{
return itr.get();
}
}
}
return nullptr;
}
@@ -81,7 +109,7 @@ allocate_buffer()
auto _lk = std::unique_lock<std::mutex>{get_buffers_mutex()};
// initial context identifier number
auto _idx = get_buffers().size();
auto _idx = get_buffer_offset() + get_buffers().size();
// make space in registered
get_buffers().emplace_back(nullptr);
@@ -93,6 +121,9 @@ allocate_buffer()
if(!_cfg) return std::nullopt;
// set the buffer id value
_cfg_v->buffer_id = _idx;
return rocprofiler_buffer_id_t{_idx};
}
@@ -101,9 +132,11 @@ flush(rocprofiler_buffer_id_t buffer_id, bool wait)
{
if(registration::get_fini_status() > 0) return ROCPROFILER_STATUS_SUCCESS;
if(buffer_id.handle >= get_buffers().size()) return ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND;
auto offset = get_buffer_offset();
auto& buff = get_buffers().at(buffer_id.handle);
if(!is_valid_buffer_id(buffer_id)) return ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND;
auto* buff = get_buffer(buffer_id);
auto* task_group =
internal_threading::get_task_group(rocprofiler_callback_thread_t{buff->task_group_id});
@@ -115,8 +148,8 @@ flush(rocprofiler_buffer_id_t buffer_id, bool wait)
auto idx = buff->buffer_idx++;
auto _task = [buffer_id, idx]() {
auto& buff_v = get_buffers().at(buffer_id.handle);
auto _task = [buffer_id, idx, offset]() {
auto& buff_v = get_buffers().at(buffer_id.handle - offset);
auto& buff_internal_v = buff_v->get_internal_buffer(idx);
if(!buff_internal_v.is_empty())
@@ -179,11 +212,20 @@ rocprofiler_create_buffer(rocprofiler_context_id_t context,
if(rocprofiler::registration::get_init_status() > -1)
return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED;
auto* existing_buff = rocprofiler::buffer::get_buffer(*buffer_id);
if(existing_buff)
{
LOG(ERROR) << "buffer (handle=" << buffer_id->handle
<< ") already allocated: handle=" << existing_buff->buffer_id;
return ROCPROFILER_STATUS_ERROR_SERVICE_ALREADY_CONFIGURED;
}
auto opt_buff_id = rocprofiler::buffer::allocate_buffer();
if(!opt_buff_id) return ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND;
*buffer_id = *opt_buff_id;
buffer_id->handle = opt_buff_id->handle;
auto& buff = rocprofiler::buffer::get_buffers().at(opt_buff_id->handle);
auto& buff = rocprofiler::buffer::get_buffers().at(opt_buff_id->handle -
rocprofiler::buffer::get_buffer_offset());
// allocate the buffers. if it is lossless, we allocate a second buffer to store data while
// other buffer is being flushed
@@ -195,7 +237,8 @@ rocprofiler_create_buffer(rocprofiler_context_id_t context,
buff->callback = callback;
buff->callback_data = callback_data;
buff->context_id = context.handle;
buff->buffer_idx = buffer_id->handle;
buff->buffer_id = buffer_id->handle;
buff->buffer_idx = 0;
return ROCPROFILER_STATUS_SUCCESS;
}
@@ -209,10 +252,12 @@ rocprofiler_flush_buffer(rocprofiler_buffer_id_t buffer_id)
rocprofiler_status_t
rocprofiler_destroy_buffer(rocprofiler_buffer_id_t buffer_id)
{
if(buffer_id.handle >= rocprofiler::buffer::get_buffers().size())
if(!rocprofiler::buffer::is_valid_buffer_id(buffer_id))
return ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND;
auto& buff = rocprofiler::buffer::get_buffers().at(buffer_id.handle);
auto offset = rocprofiler::buffer::get_buffer_offset();
auto& buffers = rocprofiler::buffer::get_buffers();
auto& buff = buffers.at(buffer_id.handle - offset);
// buffer is currently being flushed or destroyed
if(buff->syncer.test_and_set()) return ROCPROFILER_STATUS_ERROR_BUFFER_BUSY;
+7 -4
Просмотреть файл
@@ -45,12 +45,12 @@ struct instance
mutable std::array<buffer_t, 2> buffers = {};
mutable std::atomic_flag syncer = ATOMIC_FLAG_INIT;
mutable std::atomic<uint32_t> buffer_idx = {};
mutable std::atomic<uint32_t> buffer_idx = {}; // array index
mutable std::atomic<uint64_t> drop_count = {};
uint64_t watermark = 0;
uint64_t context_id = 0;
uint64_t buffer_id = 0;
uint64_t task_group_id = 0;
uint64_t context_id = 0; // rocprofiler_context_id_t value
uint64_t buffer_id = 0; // rocprofiler_buffer_id_t value
uint64_t task_group_id = 0; // thread-pool assignment
rocprofiler_buffer_tracing_cb_t callback = nullptr;
void* callback_data = nullptr;
rocprofiler_buffer_policy_t policy = ROCPROFILER_BUFFER_POLICY_NONE;
@@ -65,6 +65,9 @@ struct instance
using unique_buffer_vec_t =
common::container::stable_vector<allocator::unique_static_ptr_t<instance>, 4>;
bool
is_valid_buffer_id(rocprofiler_buffer_id_t id);
std::optional<rocprofiler_buffer_id_t>
allocate_buffer();
+9 -13
Просмотреть файл
@@ -336,21 +336,17 @@ hsa_api_impl<Idx>::functor(Args&&... args)
assert(itr.ctx->buffered_tracer);
auto buffer_id =
itr.ctx->buffered_tracer->buffer_data.at(info_type::buffered_domain_idx);
for(auto& bitr : buffer::get_buffers())
auto buffer_v = buffer::get_buffer(buffer_id);
if(buffer_v && buffer_v->context_id == itr.ctx->context_idx &&
buffer_v->buffer_id == buffer_id.handle)
{
if(bitr && bitr->context_id == itr.ctx->context_idx &&
bitr->buffer_id == buffer_id.handle)
{
// make copy of record
auto record_v = buffer_record;
// update the record with the correlation
record_v.correlation_id.external = itr.external_correlation;
// make copy of record
auto record_v = buffer_record;
// update the record with the correlation
record_v.correlation_id.external = itr.external_correlation;
bitr->emplace(ROCPROFILER_BUFFER_CATEGORY_TRACING,
info_type::buffered_domain_idx,
record_v);
break;
}
buffer_v->emplace(
ROCPROFILER_BUFFER_CATEGORY_TRACING, info_type::buffered_domain_idx, record_v);
}
}
}
+6 -6
Просмотреть файл
@@ -175,9 +175,9 @@ AsyncSignalHandler(hsa_signal_value_t /*signal_v*/, void* data)
dispatch_packet.grid_size_y,
dispatch_packet.grid_size_z}};
_buffer->emplace(ROCPROFILER_BUFFER_CATEGORY_TRACING,
ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH,
record);
CHECK_NOTNULL(_buffer)->emplace(ROCPROFILER_BUFFER_CATEGORY_TRACING,
ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH,
record);
}
}
@@ -195,9 +195,9 @@ AsyncSignalHandler(hsa_signal_value_t /*signal_v*/, void* data)
_queue_id,
_kern_id};
_buffer->emplace(ROCPROFILER_BUFFER_CATEGORY_TRACING,
ROCPROFILER_BUFFER_TRACING_MEMORY_COPY,
record);
CHECK_NOTNULL(_buffer)->emplace(ROCPROFILER_BUFFER_CATEGORY_TRACING,
ROCPROFILER_BUFFER_TRACING_MEMORY_COPY,
record);
}
}
}
+5 -6
Просмотреть файл
@@ -292,14 +292,13 @@ rocprofiler_assign_callback_thread(rocprofiler_buffer_id_t buffer_id,
if(cb_thread_id.handle >= rocprofiler::internal_threading::get_task_groups()->size())
return ROCPROFILER_STATUS_ERROR_THREAD_NOT_FOUND;
for(auto& bitr : rocprofiler::buffer::get_buffers())
auto* buff_v = rocprofiler::buffer::get_buffer(buffer_id);
if(buff_v)
{
if(bitr && bitr->buffer_id == buffer_id.handle)
{
bitr->task_group_id = cb_thread_id.handle;
return ROCPROFILER_STATUS_SUCCESS;
}
buff_v->task_group_id = cb_thread_id.handle;
return ROCPROFILER_STATUS_SUCCESS;
}
return ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND;
}
}
+97 -11
Просмотреть файл
@@ -21,6 +21,7 @@
// SOFTWARE.
#include "lib/rocprofiler/registration.hpp"
#include "lib/common/environment.hpp"
#include "lib/rocprofiler/agent.hpp"
#include "lib/rocprofiler/allocator.hpp"
#include "lib/rocprofiler/context/context.hpp"
@@ -171,9 +172,38 @@ find_clients()
rocprofiler_client_id_t{nullptr, _prio}});
}
if(!rocprofiler_configure && !get_forced_configure())
auto get_env_libs = []() {
auto val = common::get_env("ROCP_TOOL_LIBRARIES", std::string{});
auto val_arr = std::vector<std::string>{};
size_t pos = 0;
const auto delimiter = std::string_view{":"};
auto token = std::string{};
if(val.empty())
{
// do nothing
}
else if(val.find(delimiter) == std::string::npos)
{
val_arr.emplace_back(val);
}
else
{
while((pos = val.find(delimiter)) != std::string::npos)
{
token = val.substr(0, pos);
if(!token.empty()) val_arr.emplace_back(token);
val.erase(0, pos + delimiter.length());
}
}
return val_arr;
};
auto env = get_env_libs();
if(!rocprofiler_configure && !get_forced_configure() && env.empty())
{
LOG(ERROR) << "no rocprofiler_configure function found";
LOG(ERROR) << "no rocprofiler_configure function(s) found";
return data;
}
@@ -201,6 +231,13 @@ find_clients()
decltype(::rocprofiler_configure)* _sym = nullptr;
*(void**) (&_sym) = dlsym(handle, "rocprofiler_configure");
// symbol not found
if(!_sym)
{
LOG(INFO) << "|_" << itr << " did not contain rocprofiler_configure symbol";
continue;
}
// skip the configure function that was forced
if(_sym == get_forced_configure())
{
@@ -210,12 +247,6 @@ find_clients()
continue;
}
if(!_sym)
{
LOG(INFO) << "|_" << itr << " did not contain rocprofiler_configure symbol";
continue;
}
if(_sym == &rocprofiler_configure && data.size() == 1)
{
data.front().name = itr;
@@ -236,6 +267,38 @@ find_clients()
}
}
if(!env.empty())
{
for(const auto& itr : env)
{
void* handle = dlopen(itr.c_str(), RTLD_GLOBAL | RTLD_LAZY);
LOG_IF(ERROR, handle == nullptr) << "error dlopening " << itr;
for(const auto& ditr : data)
{
if(ditr.dlhandle && ditr.dlhandle == handle)
{
handle = nullptr;
break;
}
}
if(handle)
{
decltype(::rocprofiler_configure)* _sym = nullptr;
*(void**) (&_sym) = dlsym(handle, "rocprofiler_configure");
uint32_t _prio = priority_offset + data.size();
data.emplace_back(client_library{itr,
handle,
_sym,
nullptr,
rocprofiler_client_id_t{nullptr, _prio},
rocprofiler_client_id_t{nullptr, _prio}});
}
}
}
LOG(ERROR) << __FUNCTION__ << " found " << data.size() << " clients";
return data;
@@ -479,16 +542,26 @@ initialize()
void
finalize()
{
if(get_fini_status() != 0) return;
if(get_fini_status() != 0)
{
LOG(INFO) << "ignoring finalization request (value=" << get_fini_status() << ")";
return;
}
static auto _sync = std::atomic_flag{};
if(_sync.test_and_set()) return;
if(_sync.test_and_set())
{
LOG(INFO) << "ignoring finalization request [already finalized] (value="
<< get_fini_status() << ")";
return;
}
// above returns true for all invocations after the first one
LOG(INFO) << "finalizing rocprofiler (value=" << get_fini_status() << ")";
static auto _once = std::once_flag{};
std::call_once(_once, []() {
set_fini_status(-1);
::hsa_shut_down();
hsa::code_object_shutdown();
if(get_init_status() > 0)
{
@@ -560,6 +633,11 @@ rocprofiler_set_api_table(const char* name,
}
else if(std::string_view{name} == "hsa")
{
// this is a slight hack due to a hsa-runtime bug with rocprofiler-register which
// causes it to register the API table twice when HSA_TOOL_LIB is set to this
// rocprofiler library. Fixed in Gerrit review 961592.
setenv("HSA_TOOLS_ROCPROFILER_V1_TOOLS", "0", 0);
// pass to hsa init
LOG_IF(ERROR, num_tables > 1)
<< " rocprofiler expected HSA library to pass 1 API table, not " << num_tables;
@@ -623,4 +701,12 @@ OnLoad(HsaApiTable* table,
return true;
}
void
OnUnload()
{
LOG(INFO) << "Unloading hsa-runtime...";
::rocprofiler::registration::finalize();
LOG(INFO) << "Finalization complete.";
}
}
+3
Просмотреть файл
@@ -40,6 +40,9 @@ OnLoad(HsaApiTable* table,
uint64_t failed_tool_count,
const char* const* failed_tool_names) ROCPROFILER_PUBLIC_API;
void
OnUnload() ROCPROFILER_PUBLIC_API;
// this is the "hidden" function that rocprofiler-register invokes to pass
// the API tables to rocprofiler
int
+6 -3
Просмотреть файл
@@ -45,13 +45,16 @@ TEST(rocprofiler_lib, buffer)
auto buffer_id = buffer::allocate_buffer();
EXPECT_TRUE(buffer_id) << "failed to allocate buffer";
EXPECT_GT(buffer_id->handle, 0);
EXPECT_TRUE(buffer::is_valid_buffer_id(*buffer_id)) << "id=" << buffer_id->handle;
ASSERT_EQ(buffer::get_buffers().size(), 1) << "incorrect number of buffers created";
// get pointer to buffer
auto* buffer_v = buffer::get_buffer(*buffer_id);
buffer_v->watermark = common::units::get_page_size();
ASSERT_NE(buffer_v, nullptr) << "get_buffer returned a nullptr";
auto* buffer_v = buffer::get_buffer(*buffer_id);
ASSERT_NE(buffer_v, nullptr) << "get_buffer returned a nullptr. id=" << buffer_id->handle;
EXPECT_EQ(buffer_v->buffer_id, buffer_id->handle);
buffer_v->watermark = common::units::get_page_size();
{
auto records = buffer_v->get_internal_buffer().get_record_headers();
EXPECT_EQ(records.size(), 0);
+4 -3
Просмотреть файл
@@ -241,7 +241,7 @@ def parse_cdash_args(args):
"-c",
"--coverage",
help="Enable code coverage",
choices=("all", "unittests", "samples"),
choices=("all", "tests", "samples"),
type=str,
default=None,
)
@@ -376,8 +376,8 @@ def parse_args(args=None):
cmake_args += ["-DROCPROFILER_BUILD_CODECOV=ON"]
if cdash_args.coverage == "samples":
ctest_args += ["-L", "samples"]
elif cdash_args.coverage == "unittests":
ctest_args += ["-L", "unittests"]
elif cdash_args.coverage == "tests":
ctest_args += ["-L", "tests"]
if cdash_args.linter == "clang-tidy":
cmake_args += ["-DROCPROFILER_ENABLE_CLANG_TIDY=ON"]
@@ -438,6 +438,7 @@ if __name__ == "__main__":
dashboard_args.append(f"{args.mode}{itr}")
try:
ctest_args += ["--no-tests=error"]
if not args.quiet and len(ctest_args) == 0:
ctest_args = ["--output-on-failure", "-V"]
+45 -1
Просмотреть файл
@@ -1,3 +1,47 @@
#
# Integration tests
# Integration tests
#
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
project(rocprofiler-tests LANGUAGES C CXX)
if(COMMAND rocprofiler_deactivate_clang_tidy)
rocprofiler_deactivate_clang_tidy()
endif()
if(NOT TARGET rocprofiler::cereal)
get_filename_component(ROCPROFILER_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/.."
REALPATH)
rocprofiler_checkout_git_submodule(
RECURSIVE
RELATIVE_PATH external/cereal
WORKING_DIRECTORY ${ROCPROFILER_SOURCE_DIR}
REPO_URL https://github.com/jrmadsen/cereal.git
REPO_BRANCH "rocprofiler")
add_library(rocprofiler-cereal INTERFACE)
add_library(rocprofiler::cereal ALIAS rocprofiler-cereal)
target_compile_definitions(rocprofiler-cereal
INTERFACE $<BUILD_INTERFACE:CEREAL_THREAD_SAFE=1>)
target_include_directories(
rocprofiler-cereal
INTERFACE $<BUILD_INTERFACE:${ROCPROFILER_SOURCE_DIR}/external/cereal/include>)
endif()
add_library(rocprofiler-tests-build-flags INTERFACE)
add_library(rocprofiler::tests-build-flags ALIAS rocprofiler-tests-build-flags)
target_compile_options(rocprofiler-tests-build-flags INTERFACE -W -Wall -Wextra -Wshadow)
if(ROCPROFILER_BUILD_CI OR ROCPROFILER_BUILD_WERROR)
target_compile_options(rocprofiler-tests-build-flags INTERFACE -Werror)
endif()
# needed for validation
find_package(Python3 REQUIRED)
# applications used by integration tests
add_subdirectory(apps)
# tool libraries
add_subdirectory(kernel-tracing)
+10
Просмотреть файл
@@ -0,0 +1,10 @@
#
# Integration test applications
#
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
project(rocprofiler-test-apps LANGUAGES C CXX)
# applications used by integration tests
add_subdirectory(reproducible-runtime)
add_subdirectory(transpose)
+55
Просмотреть файл
@@ -0,0 +1,55 @@
#
#
#
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
if(NOT CMAKE_HIP_COMPILER)
find_program(
amdclangpp_EXECUTABLE
NAMES amdclang++
HINTS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
PATHS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
PATH_SUFFIXES bin llvm/bin NO_CACHE)
mark_as_advanced(amdclangpp_EXECUTABLE)
if(amdclangpp_EXECUTABLE)
set(CMAKE_HIP_COMPILER "${amdclangpp_EXECUTABLE}")
endif()
endif()
project(rocprofiler-test-app-reproducible-runtime LANGUAGES CXX HIP)
foreach(_TYPE DEBUG MINSIZEREL RELEASE RELWITHDEBINFO)
if("${CMAKE_HIP_FLAGS_${_TYPE}}" STREQUAL "")
set(CMAKE_HIP_FLAGS_${_TYPE} "${CMAKE_CXX_FLAGS_${_TYPE}}")
endif()
endforeach()
option(REPRODUCIBLE_RUNTIME_USE_MPI "Enable MPI support in reproducible-runtime exe" OFF)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_HIP_STANDARD 17)
set(CMAKE_HIP_EXTENSIONS OFF)
set(CMAKE_HIP_STANDARD_REQUIRED ON)
set_source_files_properties(reproducible-runtime.cpp PROPERTIES LANGUAGE HIP)
add_executable(reproducible-runtime)
target_sources(reproducible-runtime PRIVATE reproducible-runtime.cpp)
target_compile_options(reproducible-runtime PRIVATE -W -Wall -Wextra -Wpedantic -Wshadow
-Werror)
find_package(Threads REQUIRED)
target_link_libraries(reproducible-runtime PRIVATE Threads::Threads)
if(REPRODUCIBLE_RUNTIME_USE_MPI)
find_package(MPI REQUIRED)
target_compile_definitions(reproducible-runtime PRIVATE USE_MPI)
target_link_libraries(reproducible-runtime PRIVATE MPI::MPI_C)
endif()
install(
TARGETS reproducible-runtime
DESTINATION bin
COMPONENT rocprofiler-test-apps)
+191
Просмотреть файл
@@ -0,0 +1,191 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "hip/hip_runtime.h"
#include <chrono>
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <mutex>
#include <random>
#include <stdexcept>
#if defined(USE_MPI)
# include <mpi.h>
#endif
#define HIP_API_CALL(CALL) \
{ \
hipError_t error_ = (CALL); \
if(error_ != hipSuccess) \
{ \
auto _hip_api_print_lk = auto_lock_t{print_lock}; \
fprintf(stderr, \
"%s:%d :: HIP error : %s\n", \
__FILE__, \
__LINE__, \
hipGetErrorString(error_)); \
throw std::runtime_error("hip_api_call"); \
} \
}
namespace
{
using auto_lock_t = std::unique_lock<std::mutex>;
auto print_lock = std::mutex{};
double nruntime = 1.0;
size_t nspin = 500000;
size_t nthreads = 2;
size_t nitr = 2;
size_t nsync = 1;
void
check_hip_error(void);
} // namespace
__global__ void
reproducible_runtime(int64_t nspin);
void
run(int rank, int tid, hipStream_t stream);
int
main(int argc, char** argv)
{
int rank = 0;
for(int i = 1; i < argc; ++i)
{
auto _arg = std::string{argv[i]};
if(_arg == "?" || _arg == "-h" || _arg == "--help")
{
fprintf(stderr,
"usage: reproducible-runtime [KERNEL SPIN CYCLES (%zu)] [NUM_THREADS (%zu)] "
"[NUM_ITERATION (%zu)] [SYNC_EVERY_N_ITERATIONS (%zu)]\n",
nspin,
nthreads,
nitr,
nsync);
exit(EXIT_SUCCESS);
}
}
if(argc > 1) nruntime = std::stod(argv[1]);
if(argc > 2) nspin = std::stoll(argv[2]);
if(argc > 3) nthreads = std::stoll(argv[3]);
if(argc > 4) nitr = std::stoll(argv[4]);
if(argc > 5) nsync = std::stoll(argv[5]);
printf("[reproducible-runtime] Kernel spin time: %zu cycles\n", nspin);
printf("[reproducible-runtime] Number of threads: %zu\n", nthreads);
printf("[reproducible-runtime] Number of iterations: %zu\n", nitr);
printf("[reproducible-runtime] Syncing every %zu iterations\n", nsync);
// this is a temporary workaround in omnitrace when HIP + MPI is enabled
int ndevice = 0;
int devid = rank;
HIP_API_CALL(hipGetDeviceCount(&ndevice));
printf("[reproducible-runtime] Number of devices found: %i\n", ndevice);
if(ndevice > 0)
{
devid = rank % ndevice;
HIP_API_CALL(hipSetDevice(devid));
printf("[reproducible-runtime] Rank %i assigned to device %i\n", rank, devid);
}
if(rank == devid && rank < ndevice)
{
std::vector<std::thread> _threads{};
std::vector<hipStream_t> _streams(nthreads);
for(size_t i = 0; i < nthreads; ++i)
HIP_API_CALL(hipStreamCreate(&_streams.at(i)));
for(size_t i = 1; i < nthreads; ++i)
_threads.emplace_back(run, rank, i, _streams.at(i));
run(rank, 0, _streams.at(0));
for(auto& itr : _threads)
itr.join();
for(size_t i = 0; i < nthreads; ++i)
HIP_API_CALL(hipStreamDestroy(_streams.at(i)));
}
HIP_API_CALL(hipDeviceSynchronize());
HIP_API_CALL(hipDeviceReset());
return 0;
}
__global__ void
reproducible_runtime(int64_t nspin_v)
{
int64_t start = clock64();
int64_t diff = 0;
do
{
__syncthreads();
diff = (clock64() - start);
} while(diff < nspin_v);
}
void
run(int rank, int tid, hipStream_t stream)
{
dim3 grid(4096);
dim3 block(64);
double time = 0.0;
auto t1 = std::chrono::high_resolution_clock::now();
do
{
for(size_t i = 0; i < nitr; ++i)
{
reproducible_runtime<<<grid, block, 0, stream>>>(nspin);
check_hip_error();
if(i % nsync == (nsync - 1)) HIP_API_CALL(hipStreamSynchronize(stream));
}
auto t2 = std::chrono::high_resolution_clock::now();
HIP_API_CALL(hipStreamSynchronize(stream));
time = std::chrono::duration_cast<std::chrono::duration<double>>(t2 - t1).count();
} while(time < nruntime);
{
auto_lock_t _lk{print_lock};
std::cout << "[" << rank << "][" << tid << "] Runtime of reproducible-runtime is " << time
<< " sec\n"
<< std::flush;
}
HIP_API_CALL(hipStreamSynchronize(stream));
}
namespace
{
void
check_hip_error(void)
{
hipError_t err = hipGetLastError();
if(err != hipSuccess)
{
auto_lock_t _lk{print_lock};
std::cerr << "Error: " << hipGetErrorString(err) << std::endl;
throw std::runtime_error("hip_api_call");
}
}
} // namespace
+27 -11
Просмотреть файл
@@ -1,24 +1,40 @@
cmake_minimum_required(VERSION 3.21 FATAL_ERROR)
#
#
#
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
find_program(
HIPCC_EXECUTABLE
NAMES hipcc
HINTS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
PATHS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm NO_CACHE)
mark_as_advanced(HIPCC_EXECUTABLE)
if(NOT CMAKE_HIP_COMPILER)
find_program(
amdclangpp_EXECUTABLE
NAMES amdclang++
HINTS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
PATHS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
PATH_SUFFIXES bin llvm/bin NO_CACHE)
mark_as_advanced(amdclangpp_EXECUTABLE)
if(HIPCC_EXECUTABLE)
set(CMAKE_CXX_COMPILER ${HIPCC_EXECUTABLE})
if(amdclangpp_EXECUTABLE)
set(CMAKE_HIP_COMPILER "${amdclangpp_EXECUTABLE}")
endif()
endif()
project(rocprofiler-transpose-sample LANGUAGES CXX)
project(rocprofiler-test-app-transpose LANGUAGES CXX HIP)
foreach(_TYPE DEBUG MINSIZEREL RELEASE RELWITHDEBINFO)
if("${CMAKE_HIP_FLAGS_${_TYPE}}" STREQUAL "")
set(CMAKE_HIP_FLAGS_${_TYPE} "${CMAKE_CXX_FLAGS_${_TYPE}}")
endif()
endforeach()
option(TRANSPOSE_USE_MPI "Enable MPI support in transpose exe" OFF)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_HIP_STANDARD 17)
set(CMAKE_HIP_EXTENSIONS OFF)
set(CMAKE_HIP_STANDARD_REQUIRED ON)
set_source_files_properties(transpose.cpp PROPERTIES LANGUAGE HIP)
add_executable(transpose)
target_sources(transpose PRIVATE transpose.cpp)
target_compile_options(transpose PRIVATE -W -Wall -Wextra -Wpedantic -Wshadow -Werror)
@@ -35,4 +51,4 @@ endif()
install(
TARGETS transpose
DESTINATION bin
COMPONENT rocprofiler-samples)
COMPONENT rocprofiler-test-apps)
+25 -25
Просмотреть файл
@@ -1,24 +1,24 @@
/*
Copyright (c) 2015-2020 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "hip/hip_runtime.h"
@@ -66,7 +66,7 @@ verify(int* in, int* out, int M, int N);
} // namespace
__global__ void
transpose_a(int* in, int* out, int M, int N);
transpose(int* in, int* out, int M, int N);
void
run(int rank, int tid, hipStream_t stream, int argc, char** argv);
@@ -148,7 +148,7 @@ main(int argc, char** argv)
}
__global__ void
transpose_a(int* in, int* out, int M, int N)
transpose(int* in, int* out, int M, int N)
{
__shared__ int tile[shared_mem_tile_dim][shared_mem_tile_dim];
@@ -193,12 +193,12 @@ run(int rank, int tid, hipStream_t stream, int argc, char** argv)
HIP_API_CALL(hipStreamSynchronize(stream));
dim3 grid(M / 32, N / 32, 1);
dim3 block(32, 32, 1); // transpose_a
dim3 block(32, 32, 1); // transpose
auto t1 = std::chrono::high_resolution_clock::now();
for(size_t i = 0; i < nitr; ++i)
{
transpose_a<<<grid, block, 0, stream>>>(in, out, M, N);
transpose<<<grid, block, 0, stream>>>(in, out, M, N);
check_hip_error();
if(i % nsync == (nsync - 1)) HIP_API_CALL(hipStreamSynchronize(stream));
}
+53
Просмотреть файл
@@ -0,0 +1,53 @@
#
#
#
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
project(rocprofiler-tests-kernel-tracing LANGUAGES CXX)
if(NOT TARGET rocprofiler::rocprofiler)
find_package(rocprofiler REQUIRED)
endif()
add_library(kernel-tracing-test-tool SHARED)
target_sources(kernel-tracing-test-tool PRIVATE kernel-tracing.cpp)
target_link_libraries(
kernel-tracing-test-tool
PRIVATE rocprofiler::rocprofiler rocprofiler::cereal
$<TARGET_NAME_IF_EXISTS:rocprofiler::tests-build-flags>)
set_target_properties(kernel-tracing-test-tool PROPERTIES INSTALL_RPATH "\$ORIGIN"
INSTALL_RPATH_USE_LINK_PATH ON)
install(
TARGETS kernel-tracing-test-tool
DESTINATION lib
COMPONENT rocprofiler-test-libs)
if(ROCPROFILER_MEMCHECK_PRELOAD_ENV)
set(PRELOAD_ENV
"${ROCPROFILER_MEMCHECK_PRELOAD_ENV}:$<TARGET_FILE:kernel-tracing-test-tool>")
else()
set(PRELOAD_ENV "LD_PRELOAD=$<TARGET_FILE:kernel-tracing-test-tool>")
endif()
add_test(NAME test-kernel-tracing-execute COMMAND $<TARGET_FILE:reproducible-runtime>)
set_tests_properties(
test-kernel-tracing-execute
PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT
"${PRELOAD_ENV};HSA_TOOLS_LIB=$<TARGET_FILE:rocprofiler::rocprofiler>"
FAIL_REGULAR_EXPRESSION "threw an exception")
foreach(FILENAME validate.py pytest.ini conftest.py)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}
${CMAKE_CURRENT_BINARY_DIR}/${FILENAME} COPYONLY)
endforeach()
add_test(NAME test-kernel-tracing-validate
COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --input
${CMAKE_CURRENT_BINARY_DIR}/kernel-tracing-test-tool.json)
set_tests_properties(
test-kernel-tracing-validate
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DEPENDS test-kernel-tracing-execute
FAIL_REGULAR_EXPRESSION "threw an exception")
+20
Просмотреть файл
@@ -0,0 +1,20 @@
#!/usr/bin/env python3
import json
import pytest
def pytest_addoption(parser):
parser.addoption(
"--input",
action="store",
default="kernel-tracing-test-tool.json",
help="Input JSON",
)
@pytest.fixture
def input_data(request):
filename = request.config.getoption("--input")
with open(filename, "r") as inp:
return json.load(inp)
+902
Просмотреть файл
@@ -0,0 +1,902 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
// undefine NDEBUG so asserts are implemented
#ifdef NDEBUG
# undef NDEBUG
#endif
/**
* @file tests/kernel-tracing/kernel-tracing.cpp
*
* @brief Test rocprofiler tool
*/
#include "serialization.hpp"
#include <rocprofiler/buffer.h>
#include <rocprofiler/callback_tracing.h>
#include <rocprofiler/external_correlation.h>
#include <rocprofiler/fwd.h>
#include <rocprofiler/internal_threading.h>
#include <rocprofiler/registration.h>
#include <rocprofiler/rocprofiler.h>
#include <unistd.h>
#include <atomic>
#include <cassert>
#include <chrono>
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <exception>
#include <filesystem>
#include <fstream>
#include <functional>
#include <iostream>
#include <map>
#include <mutex>
#include <string>
#include <string_view>
#include <thread>
#include <variant>
#include <vector>
#define ROCPROFILER_CALL(result, msg) \
{ \
rocprofiler_status_t CHECKSTATUS = result; \
if(CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS) \
{ \
std::string status_name = rocprofiler_get_status_name(CHECKSTATUS); \
std::string status_msg = rocprofiler_get_status_string(CHECKSTATUS); \
std::cerr << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg \
<< " failed with error code " << status_name << " (" << CHECKSTATUS \
<< "): " << status_msg << std::endl; \
std::stringstream errmsg{}; \
errmsg << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg " failure (" \
<< status_name << ": " << status_msg << ")"; \
throw std::runtime_error(errmsg.str()); \
} \
}
namespace client
{
namespace
{
struct source_location
{
std::string function = {};
std::string file = {};
uint32_t line = 0;
std::string context = {};
template <typename ArchiveT>
void save(ArchiveT& ar) const
{
ar(cereal::make_nvp("function", function));
ar(cereal::make_nvp("file", file));
ar(cereal::make_nvp("line", line));
ar(cereal::make_nvp("context", context));
}
};
using call_stack_t = std::vector<source_location>;
using buffer_kind_names_t = std::map<rocprofiler_service_buffer_tracing_kind_t, std::string>;
using buffer_kind_operation_names_t =
std::map<rocprofiler_service_buffer_tracing_kind_t, std::map<uint32_t, std::string>>;
using callback_kind_names_t = std::map<rocprofiler_service_callback_tracing_kind_t, std::string>;
using callback_kind_operation_names_t =
std::map<rocprofiler_service_callback_tracing_kind_t, std::map<uint32_t, std::string>>;
using kernel_symbol_data_t = rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t;
using kernel_symbol_map_t = std::unordered_map<rocprofiler_kernel_id_t, kernel_symbol_data_t>;
struct callback_name_info
{
callback_kind_names_t kind_names = {};
callback_kind_operation_names_t operation_names = {};
template <typename ArchiveT>
void save(ArchiveT& ar) const
{
ar(cereal::make_nvp("kind_names", kind_names));
ar(cereal::make_nvp("operation_names", operation_names));
}
};
struct buffer_name_info
{
buffer_kind_names_t kind_names = {};
buffer_kind_operation_names_t operation_names = {};
template <typename ArchiveT>
void save(ArchiveT& ar) const
{
ar(cereal::make_nvp("kind_names", kind_names));
ar(cereal::make_nvp("operation_names", operation_names));
}
};
rocprofiler_client_id_t* client_id = nullptr;
rocprofiler_client_finalize_t client_fini_func = nullptr;
callback_name_info
get_callback_tracing_names()
{
auto cb_name_info = callback_name_info{};
//
// callback for each kind operation
//
static auto tracing_kind_operation_cb =
[](rocprofiler_service_callback_tracing_kind_t kindv, uint32_t operation, void* data_v) {
auto* name_info_v = static_cast<callback_name_info*>(data_v);
if(kindv == ROCPROFILER_CALLBACK_TRACING_HSA_API)
{
const char* name = nullptr;
ROCPROFILER_CALL(rocprofiler_query_callback_tracing_kind_operation_name(
kindv, operation, &name, nullptr),
"query buffer tracing kind operation name");
if(name) name_info_v->operation_names[kindv][operation] = name;
}
return 0;
};
//
// callback for each buffer kind (i.e. domain)
//
static auto tracing_kind_cb = [](rocprofiler_service_callback_tracing_kind_t kind, void* data) {
// store the buffer kind name
auto* name_info_v = static_cast<callback_name_info*>(data);
const char* name = nullptr;
ROCPROFILER_CALL(rocprofiler_query_callback_tracing_kind_name(kind, &name, nullptr),
"query buffer tracing kind operation name");
if(name) name_info_v->kind_names[kind] = name;
if(kind == ROCPROFILER_CALLBACK_TRACING_HSA_API)
{
ROCPROFILER_CALL(rocprofiler_iterate_callback_tracing_kind_operations(
kind, tracing_kind_operation_cb, static_cast<void*>(data)),
"iterating buffer tracing kind operations");
}
return 0;
};
ROCPROFILER_CALL(rocprofiler_iterate_callback_tracing_kinds(tracing_kind_cb,
static_cast<void*>(&cb_name_info)),
"iterating buffer tracing kinds");
return cb_name_info;
}
buffer_name_info
get_buffer_tracing_names()
{
auto cb_name_info = buffer_name_info{};
//
// callback for each kind operation
//
static auto tracing_kind_operation_cb =
[](rocprofiler_service_buffer_tracing_kind_t kindv, uint32_t operation, void* data_v) {
auto* name_info_v = static_cast<buffer_name_info*>(data_v);
if(kindv == ROCPROFILER_BUFFER_TRACING_HSA_API)
{
const char* name = nullptr;
ROCPROFILER_CALL(rocprofiler_query_buffer_tracing_kind_operation_name(
kindv, operation, &name, nullptr),
"query buffer tracing kind operation name");
if(name) name_info_v->operation_names[kindv][operation] = name;
}
return 0;
};
//
// callback for each buffer kind (i.e. domain)
//
static auto tracing_kind_cb = [](rocprofiler_service_buffer_tracing_kind_t kind, void* data) {
// store the buffer kind name
auto* name_info_v = static_cast<buffer_name_info*>(data);
const char* name = nullptr;
ROCPROFILER_CALL(rocprofiler_query_buffer_tracing_kind_name(kind, &name, nullptr),
"query buffer tracing kind operation name");
if(name) name_info_v->kind_names[kind] = name;
if(kind == ROCPROFILER_BUFFER_TRACING_HSA_API)
{
ROCPROFILER_CALL(rocprofiler_iterate_buffer_tracing_kind_operations(
kind, tracing_kind_operation_cb, static_cast<void*>(data)),
"iterating buffer tracing kind operations");
}
return 0;
};
ROCPROFILER_CALL(rocprofiler_iterate_buffer_tracing_kinds(tracing_kind_cb,
static_cast<void*>(&cb_name_info)),
"iterating buffer tracing kinds");
return cb_name_info;
}
using callback_payload_t =
std::variant<rocprofiler_callback_tracing_code_object_load_data_t,
rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t,
rocprofiler_callback_tracing_hsa_api_data_t>;
struct code_object_callback_record_t
{
uint64_t timestamp = 0;
rocprofiler_callback_tracing_record_t record = {};
rocprofiler_callback_tracing_code_object_load_data_t payload = {};
template <typename ArchiveT>
void save(ArchiveT& ar) const
{
ar(cereal::make_nvp("timestamp", timestamp));
ar(cereal::make_nvp("record", record));
ar(cereal::make_nvp("payload", payload));
}
};
struct kernel_symbol_callback_record_t
{
uint64_t timestamp = 0;
rocprofiler_callback_tracing_record_t record = {};
rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t payload = {};
template <typename ArchiveT>
void save(ArchiveT& ar) const
{
ar(cereal::make_nvp("timestamp", timestamp));
ar(cereal::make_nvp("record", record));
ar(cereal::make_nvp("payload", payload));
}
};
struct hsa_api_callback_record_t
{
uint64_t timestamp = 0;
rocprofiler_callback_tracing_record_t record = {};
rocprofiler_callback_tracing_hsa_api_data_t payload = {};
template <typename ArchiveT>
void save(ArchiveT& ar) const
{
ar(cereal::make_nvp("timestamp", timestamp));
ar(cereal::make_nvp("record", record));
ar(cereal::make_nvp("payload", payload));
}
};
auto code_object_records = std::deque<code_object_callback_record_t>{};
auto kernel_symbol_records = std::deque<kernel_symbol_callback_record_t>{};
auto hsa_api_cb_records = std::deque<hsa_api_callback_record_t>{};
rocprofiler_thread_id_t
push_external_correlation();
void
tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
rocprofiler_user_data_t* /*user_data*/,
void* /*callback_data*/)
{
static auto _mutex = std::mutex{};
auto _lk = std::unique_lock<std::mutex>{_mutex};
auto ts = rocprofiler_timestamp_t{};
ROCPROFILER_CALL(rocprofiler_get_timestamp(&ts), "get timestamp");
static thread_local auto _once = std::once_flag{};
std::call_once(_once, [&record]() {
// account for the fact that we are not wrapping pthread_create so the
// first external correlation id on a thread wont have updated value
record.correlation_id.external.value = push_external_correlation();
});
if(record.kind == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT)
{
if(record.operation == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT_LOAD)
{
auto data_v =
*static_cast<rocprofiler_callback_tracing_code_object_load_data_t*>(record.payload);
data_v.uri = ::strdup(data_v.uri);
code_object_records.emplace_back(code_object_callback_record_t{ts, record, data_v});
}
else if(record.operation ==
ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER)
{
auto data_v = *static_cast<kernel_symbol_data_t*>(record.payload);
data_v.kernel_name = ::strdup(data_v.kernel_name);
kernel_symbol_records.emplace_back(kernel_symbol_callback_record_t{ts, record, data_v});
}
}
else if(record.kind == ROCPROFILER_CALLBACK_TRACING_HSA_API)
{
auto* data = static_cast<rocprofiler_callback_tracing_hsa_api_data_t*>(record.payload);
hsa_api_cb_records.emplace_back(hsa_api_callback_record_t{ts, record, *data});
}
else
{
throw std::runtime_error{"unsupported callback kind"};
}
}
auto hsa_api_bf_records = std::deque<rocprofiler_buffer_tracing_hsa_api_record_t>{};
auto kernel_dispatch_records = std::deque<rocprofiler_buffer_tracing_kernel_dispatch_record_t>{};
auto memory_copy_records = std::deque<rocprofiler_buffer_tracing_memory_copy_record_t>{};
void
tool_tracing_buffered(rocprofiler_context_id_t /*context*/,
rocprofiler_buffer_id_t /*buffer_id*/,
rocprofiler_record_header_t** headers,
size_t num_headers,
void* user_data,
uint64_t drop_count)
{
// std::cerr << "[" << getpid() << "][" << __FUNCTION__ << "] buffer flush callback for "
// << num_headers << " records...\n"
// << std::flush;
static auto _mutex = std::mutex{};
auto _lk = std::unique_lock<std::mutex>{_mutex};
assert(user_data != nullptr);
assert(drop_count == 0 && "drop count should be zero for lossless policy");
if(num_headers == 0)
throw std::runtime_error{
"rocprofiler invoked a buffer callback with no headers. this should never happen"};
else if(headers == nullptr)
throw std::runtime_error{"rocprofiler invoked a buffer callback with a null pointer to the "
"array of headers. this should never happen"};
for(size_t i = 0; i < num_headers; ++i)
{
auto* header = headers[i];
if(header == nullptr)
{
throw std::runtime_error{
"rocprofiler provided a null pointer to header. this should never happen"};
}
else if(header->hash !=
rocprofiler_record_header_compute_hash(header->category, header->kind))
{
throw std::runtime_error{"rocprofiler_record_header_t (category | kind) != hash"};
}
else if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING)
{
if(header->kind == ROCPROFILER_BUFFER_TRACING_HSA_API)
{
auto* record =
static_cast<rocprofiler_buffer_tracing_hsa_api_record_t*>(header->payload);
hsa_api_bf_records.emplace_back(*record);
}
else if(header->kind == ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH)
{
auto* record = static_cast<rocprofiler_buffer_tracing_kernel_dispatch_record_t*>(
header->payload);
kernel_dispatch_records.emplace_back(*record);
}
else if(header->kind == ROCPROFILER_BUFFER_TRACING_MEMORY_COPY)
{
auto* record =
static_cast<rocprofiler_buffer_tracing_memory_copy_record_t*>(header->payload);
memory_copy_records.emplace_back(*record);
}
else
{
throw std::runtime_error{
"unexpected rocprofiler_record_header_t tracing category kind"};
}
}
else
{
throw std::runtime_error{"unexpected rocprofiler_record_header_t category + kind"};
}
}
}
void
thread_precreate(rocprofiler_runtime_library_t lib, void* tool_data)
{
static_cast<call_stack_t*>(tool_data)->emplace_back(
source_location{__FUNCTION__,
__FILE__,
__LINE__,
std::string{"internal thread about to be created by rocprofiler (lib="} +
std::to_string(lib) + ")"});
}
void
thread_postcreate(rocprofiler_runtime_library_t lib, void* tool_data)
{
static_cast<call_stack_t*>(tool_data)->emplace_back(
source_location{__FUNCTION__,
__FILE__,
__LINE__,
std::string{"internal thread was created by rocprofiler (lib="} +
std::to_string(lib) + ")"});
}
bool
is_active(rocprofiler_context_id_t ctx)
{
int status = 0;
auto errc = rocprofiler_context_is_active(ctx, &status);
return (errc == ROCPROFILER_STATUS_SUCCESS && status > 0);
}
void
start();
void
stop();
void
flush();
// contexts
rocprofiler_context_id_t api_callback_ctx = {};
rocprofiler_context_id_t code_object_ctx = {};
rocprofiler_context_id_t api_buffered_ctx = {};
rocprofiler_context_id_t kernel_dispatch_ctx = {};
rocprofiler_context_id_t memory_copy_ctx = {};
// buffers
rocprofiler_buffer_id_t api_buffered_buffer = {};
rocprofiler_buffer_id_t kernel_dispatch_buffer = {};
rocprofiler_buffer_id_t memory_copy_buffer = {};
auto contexts = std::unordered_map<std::string_view, rocprofiler_context_id_t*>{
{"API_CALLBACK", &api_callback_ctx},
{"CODE_OBJECT", &code_object_ctx},
{"API_BUFFERED", &api_buffered_ctx},
{"KERNEL_DISPATCH", &kernel_dispatch_ctx},
{"MEMORY_COPY", &memory_copy_ctx}};
auto buffers = std::array<rocprofiler_buffer_id_t*, 3>{&api_buffered_buffer,
&kernel_dispatch_buffer,
&memory_copy_buffer};
auto agents = std::vector<rocprofiler_agent_t>{};
int
tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
{
assert(tool_data != nullptr);
rocprofiler_available_agents_cb_t iterate_cb =
[](const rocprofiler_agent_t** agents_arr, size_t num_agents, void* user_data) {
auto* agents_v = static_cast<std::vector<rocprofiler_agent_t>*>(user_data);
for(size_t i = 0; i < num_agents; ++i)
{
const auto* agent = agents_arr[i];
auto& val = agents_v->emplace_back(*agent);
val.name = ::strdup(agent->name);
val.vendor_name = ::strdup(agent->vendor_name);
val.product_name = ::strdup(agent->product_name);
val.model_name = ::strdup(agent->model_name);
}
return ROCPROFILER_STATUS_SUCCESS;
};
ROCPROFILER_CALL(
rocprofiler_query_available_agents(iterate_cb,
sizeof(rocprofiler_agent_t),
const_cast<void*>(static_cast<const void*>(&agents))),
"query available agents");
auto* call_stack_v = static_cast<call_stack_t*>(tool_data);
call_stack_v->emplace_back(source_location{__FUNCTION__, __FILE__, __LINE__, ""});
client_fini_func = fini_func;
for(auto itr : contexts)
{
ROCPROFILER_CALL(rocprofiler_create_context(itr.second), "context creation");
}
ROCPROFILER_CALL(
rocprofiler_configure_callback_tracing_service(api_callback_ctx,
ROCPROFILER_CALLBACK_TRACING_HSA_API,
nullptr,
0,
tool_tracing_callback,
nullptr),
"hsa api tracing service configure");
ROCPROFILER_CALL(
rocprofiler_configure_callback_tracing_service(code_object_ctx,
ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT,
nullptr,
0,
tool_tracing_callback,
nullptr),
"code object tracing service configure");
constexpr auto buffer_size = 8192;
constexpr auto watermark = 7936;
ROCPROFILER_CALL(rocprofiler_create_buffer(api_buffered_ctx,
buffer_size,
watermark,
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
tool_tracing_buffered,
tool_data,
&api_buffered_buffer),
"buffer creation");
ROCPROFILER_CALL(rocprofiler_create_buffer(kernel_dispatch_ctx,
buffer_size,
watermark,
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
tool_tracing_buffered,
tool_data,
&kernel_dispatch_buffer),
"buffer creation");
ROCPROFILER_CALL(rocprofiler_create_buffer(memory_copy_ctx,
buffer_size,
watermark,
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
tool_tracing_buffered,
tool_data,
&memory_copy_buffer),
"buffer creation");
ROCPROFILER_CALL(
rocprofiler_configure_buffer_tracing_service(
api_buffered_ctx, ROCPROFILER_BUFFER_TRACING_HSA_API, nullptr, 0, api_buffered_buffer),
"buffer tracing service configure");
ROCPROFILER_CALL(
rocprofiler_configure_buffer_tracing_service(kernel_dispatch_ctx,
ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH,
nullptr,
0,
kernel_dispatch_buffer),
"buffer tracing service for kernel dispatch configure");
ROCPROFILER_CALL(
rocprofiler_configure_buffer_tracing_service(memory_copy_ctx,
ROCPROFILER_BUFFER_TRACING_MEMORY_COPY,
nullptr,
0,
memory_copy_buffer),
"buffer tracing service for memory copy configure");
auto client_thread = rocprofiler_callback_thread_t{};
ROCPROFILER_CALL(rocprofiler_create_callback_thread(&client_thread),
"creating callback thread");
for(auto* itr : buffers)
{
ROCPROFILER_CALL(rocprofiler_assign_callback_thread(*itr, client_thread),
"assignment of thread for buffer");
}
for(auto itr : contexts)
{
int valid_ctx = 0;
ROCPROFILER_CALL(rocprofiler_context_is_valid(*itr.second, &valid_ctx),
"context validity check");
if(valid_ctx == 0)
{
// notify rocprofiler that initialization failed
// and all the contexts, buffers, etc. created
// should be ignored
return -1;
}
}
// environment variable to select which contexts to collect
auto* context_settings_env = getenv("KERNEL_TRACING_CONTEXTS");
if(context_settings_env != nullptr && !std::string_view{context_settings_env}.empty())
{
auto context_settings = std::string{context_settings_env};
// ignore case
for(auto& itr : context_settings)
itr = toupper(itr);
// if context is not in string, set the pointer to null in the contexts array
auto options = std::stringstream{};
for(auto& itr : contexts)
{
options << "\n\t- " << itr.first;
auto pos = context_settings.find(itr.first);
if(pos == std::string::npos)
itr.second = nullptr;
else
context_settings.erase(pos, itr.first.length());
}
// detect if there are any invalid entries
if(context_settings.find_first_not_of(" ,;:\t\n\r") != std::string::npos)
{
auto filename = std::string_view{__FILE__};
auto msg = std::stringstream{};
msg << "[kernel-tracing][" << filename.substr(filename.find_last_of('/') + 1) << ":"
<< __LINE__ << "] invalid specification of KERNEL_TRACING_CONTEXTS ('"
<< context_settings_env << "'). Valid choices are: " << options.str();
throw std::runtime_error{msg.str()};
}
}
start();
// no errors
return 0;
}
void
tool_fini(void* tool_data)
{
static auto _once = std::atomic_flag{ATOMIC_FLAG_INIT};
if(_once.test_and_set()) return;
std::cerr << "[" << getpid() << "][" << __FUNCTION__
<< "] Finalizing... agents=" << agents.size()
<< ", code_object_callback_records=" << code_object_records.size()
<< ", kernel_symbol_callback_records=" << kernel_symbol_records.size()
<< ", hsa_api_callback_records=" << hsa_api_cb_records.size()
<< ", kernel_dispatch_records=" << kernel_dispatch_records.size()
<< ", memory_copy_records=" << memory_copy_records.size()
<< ", hsa_api_bf_records=" << hsa_api_bf_records.size() << " ...\n"
<< std::flush;
stop();
flush();
auto* _call_stack = static_cast<call_stack_t*>(tool_data);
if(_call_stack)
{
_call_stack->emplace_back(source_location{__FUNCTION__, __FILE__, __LINE__, ""});
}
auto ofname = std::string{"kernel-tracing-test-tool.json"};
if(auto* eofname = getenv("ROCPROFILER_KERNEL_TRACING_OUTPUT_FILE")) ofname = eofname;
std::ostream* ofs = nullptr;
auto cleanup = std::function<void(std::ostream*&)>{};
if(ofname == "stdout")
ofs = &std::cout;
else if(ofname == "stderr")
ofs = &std::cerr;
else
{
ofs = new std::ofstream{ofname};
if(ofs && *ofs)
{
std::cerr << "[" << getpid() << "][" << __FUNCTION__
<< "] Outputting collected data to " << ofname << "...\n"
<< std::flush;
cleanup = [](std::ostream*& _os) { delete _os; };
}
else
{
std::cerr << "Error outputting to " << ofname << ". Redirecting to stderr...\n"
<< std::flush;
ofname = "stderr";
ofs = &std::cerr;
}
}
{
using JSONOutputArchive = cereal::MinimalJSONOutputArchive;
constexpr auto json_prec = 32;
constexpr auto json_indent = JSONOutputArchive::Options::IndentChar::space;
auto json_opts = JSONOutputArchive::Options{json_prec, json_indent, 1};
auto json_ar = JSONOutputArchive{*ofs, json_opts};
auto buffer_name_info = get_buffer_tracing_names();
auto callback_name_info = get_callback_tracing_names();
json_ar.setNextName("kernel-tracing-test-tool");
json_ar.startNode();
json_ar(cereal::make_nvp("agents", agents));
if(_call_stack) json_ar(cereal::make_nvp("call_stack", *_call_stack));
json_ar.setNextName("callback_records");
json_ar.startNode();
try
{
json_ar(cereal::make_nvp("names", callback_name_info));
json_ar(cereal::make_nvp("code_objects", code_object_records));
json_ar(cereal::make_nvp("kernel_symbols", kernel_symbol_records));
json_ar(cereal::make_nvp("hsa_api_traces", hsa_api_cb_records));
} catch(std::exception& e)
{
std::cerr << "[" << getpid() << "][" << __FUNCTION__
<< "] threw an exception: " << e.what() << "\n"
<< std::flush;
}
json_ar.finishNode();
json_ar.setNextName("buffer_records");
json_ar.startNode();
try
{
json_ar(cereal::make_nvp("names", buffer_name_info));
json_ar(cereal::make_nvp("kernel_dispatches", kernel_dispatch_records));
json_ar(cereal::make_nvp("memory_copies", memory_copy_records));
json_ar(cereal::make_nvp("hsa_api_traces", hsa_api_bf_records));
} catch(std::exception& e)
{
std::cerr << "[" << getpid() << "][" << __FUNCTION__
<< "] threw an exception: " << e.what() << "\n"
<< std::flush;
}
json_ar.finishNode();
json_ar.finishNode();
}
*ofs << std::flush;
if(cleanup) cleanup(ofs);
std::cerr << "[" << getpid() << "][" << __FUNCTION__ << "] Finalization complete.\n"
<< std::flush;
delete _call_stack;
auto free_cstr = [](const char*& val) {
::free(const_cast<char*>(val));
val = nullptr;
};
// clean up our strdups to avoid triggering our leak sanitizer during CI
for(auto& itr : code_object_records)
free_cstr(itr.payload.uri);
for(auto& itr : kernel_symbol_records)
free_cstr(itr.payload.kernel_name);
for(auto& itr : agents)
{
free_cstr(itr.name);
free_cstr(itr.vendor_name);
free_cstr(itr.product_name);
free_cstr(itr.model_name);
}
}
void
start()
{
for(auto itr : contexts)
{
if(itr.second && !is_active(*itr.second))
{
ROCPROFILER_CALL(rocprofiler_start_context(*itr.second), "context start");
}
}
}
void
stop()
{
for(auto itr : contexts)
{
if(itr.second && is_active(*itr.second))
{
ROCPROFILER_CALL(rocprofiler_stop_context(*itr.second), "context stop");
}
}
}
void
flush()
{
for(auto* itr : buffers)
{
auto status = rocprofiler_flush_buffer(*itr);
if(status != ROCPROFILER_STATUS_ERROR_BUFFER_BUSY)
{
ROCPROFILER_CALL(status, "buffer flush");
}
}
}
rocprofiler_thread_id_t
push_external_correlation()
{
auto tid = rocprofiler_thread_id_t{};
ROCPROFILER_CALL(rocprofiler_get_thread_id(&tid), "get thread id");
for(auto itr : contexts)
{
if(itr.second)
{
ROCPROFILER_CALL(rocprofiler_push_external_correlation_id(
*itr.second, tid, rocprofiler_user_data_t{.value = tid}),
"push external correlation");
}
}
return tid;
}
} // namespace
} // namespace client
extern "C" rocprofiler_tool_configure_result_t*
rocprofiler_configure(uint32_t version,
const char* runtime_version,
uint32_t priority,
rocprofiler_client_id_t* id)
{
// only activate if main tool
if(priority > 0) return nullptr;
// set the client name
id->name = "kernel-tracing-test-tool";
// store client info
client::client_id = id;
// compute major/minor/patch version info
uint32_t major = version / 10000;
uint32_t minor = (version % 10000) / 100;
uint32_t patch = version % 100;
// generate info string
auto info = std::stringstream{};
info << id->name << " is using rocprofiler v" << major << "." << minor << "." << patch << " ("
<< runtime_version << ")";
std::clog << info.str() << std::endl;
auto* client_tool_data = new std::vector<client::source_location>{};
client_tool_data->emplace_back(
client::source_location{__FUNCTION__, __FILE__, __LINE__, info.str()});
ROCPROFILER_CALL(rocprofiler_at_internal_thread_create(
client::thread_precreate,
client::thread_postcreate,
ROCPROFILER_LIBRARY | ROCPROFILER_HSA_LIBRARY | ROCPROFILER_HIP_LIBRARY |
ROCPROFILER_MARKER_LIBRARY,
static_cast<void*>(client_tool_data)),
"registration for thread creation notifications");
std::atexit([]() {
if(client::client_fini_func) client::client_fini_func(*client::client_id);
});
std::at_quick_exit([]() {
if(client::client_fini_func) client::client_fini_func(*client::client_id);
});
// create configure data
static auto cfg =
rocprofiler_tool_configure_result_t{sizeof(rocprofiler_tool_configure_result_t),
&client::tool_init,
&client::tool_fini,
static_cast<void*>(client_tool_data)};
// return pointer to configure data
return &cfg;
}
+4
Просмотреть файл
@@ -0,0 +1,4 @@
[pytest]
addopts = --durations=20 -ras -vv
testpaths = validate.py
+452
Просмотреть файл
@@ -0,0 +1,452 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
#pragma once
#include <rocprofiler/buffer.h>
#include <rocprofiler/callback_tracing.h>
#include <rocprofiler/external_correlation.h>
#include <rocprofiler/fwd.h>
#include <rocprofiler/internal_threading.h>
#include <rocprofiler/rocprofiler.h>
#include <cereal/archives/json.hpp>
#include <cereal/cereal.hpp>
#include <cereal/types/array.hpp>
#include <cereal/types/atomic.hpp>
#include <cereal/types/bitset.hpp>
#include <cereal/types/chrono.hpp>
#include <cereal/types/common.hpp>
#include <cereal/types/complex.hpp>
#include <cereal/types/deque.hpp>
#include <cereal/types/functional.hpp>
#include <cereal/types/list.hpp>
#include <cereal/types/map.hpp>
#include <cereal/types/memory.hpp>
#include <cereal/types/optional.hpp>
#include <cereal/types/polymorphic.hpp>
#include <cereal/types/queue.hpp>
#include <cereal/types/set.hpp>
#include <cereal/types/stack.hpp>
#include <cereal/types/string.hpp>
#include <cereal/types/tuple.hpp>
#include <cereal/types/unordered_map.hpp>
#include <cereal/types/unordered_set.hpp>
#include <cereal/types/utility.hpp>
#include <cereal/types/variant.hpp>
#include <cereal/types/vector.hpp>
#define SAVE_DATA_FIELD(FIELD) ar(cereal::make_nvp(#FIELD, data.FIELD))
#define SAVE_DATA_VALUE(NAME, VALUE) ar(cereal::make_nvp(NAME, data.VALUE))
#define SAVE_DATA_CSTR(FIELD) ar(cereal::make_nvp(#FIELD, std::string{data.FIELD}))
#define SAVE_DATA_BITFIELD(NAME, VALUE) \
{ \
auto _val = data.VALUE; \
ar(cereal::make_nvp(NAME, _val)); \
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_context_id_t data)
{
SAVE_DATA_FIELD(handle);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_agent_id_t data)
{
SAVE_DATA_FIELD(handle);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, hsa_agent_t data)
{
SAVE_DATA_FIELD(handle);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_queue_id_t data)
{
SAVE_DATA_FIELD(handle);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_correlation_id_t data)
{
SAVE_DATA_FIELD(internal);
SAVE_DATA_VALUE("external", external.value);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_dim3_t data)
{
SAVE_DATA_FIELD(x);
SAVE_DATA_FIELD(y);
SAVE_DATA_FIELD(z);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_callback_tracing_code_object_load_data_t data)
{
SAVE_DATA_FIELD(size);
SAVE_DATA_FIELD(code_object_id);
SAVE_DATA_FIELD(rocp_agent);
SAVE_DATA_FIELD(hsa_agent);
SAVE_DATA_FIELD(rocp_agent);
SAVE_DATA_FIELD(hsa_agent);
SAVE_DATA_CSTR(uri);
SAVE_DATA_FIELD(load_base);
SAVE_DATA_FIELD(load_size);
SAVE_DATA_FIELD(load_delta);
SAVE_DATA_FIELD(storage_type);
if(data.storage_type == ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_FILE)
{
SAVE_DATA_FIELD(storage_file);
}
else if(data.storage_type == ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_MEMORY)
{
SAVE_DATA_FIELD(memory_base);
SAVE_DATA_FIELD(memory_size);
}
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t data)
{
SAVE_DATA_FIELD(size);
SAVE_DATA_FIELD(kernel_id);
SAVE_DATA_FIELD(code_object_id);
SAVE_DATA_CSTR(kernel_name);
SAVE_DATA_FIELD(kernel_object);
SAVE_DATA_FIELD(kernarg_segment_size);
SAVE_DATA_FIELD(kernarg_segment_alignment);
SAVE_DATA_FIELD(group_segment_size);
SAVE_DATA_FIELD(private_segment_size);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_hsa_api_retval_t data)
{
SAVE_DATA_FIELD(uint64_t_retval);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_callback_tracing_hsa_api_data_t data)
{
SAVE_DATA_FIELD(size);
// SAVE_DATA_FIELD(args);
SAVE_DATA_FIELD(retval);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_callback_tracing_record_t data)
{
SAVE_DATA_FIELD(context_id);
SAVE_DATA_FIELD(thread_id);
SAVE_DATA_FIELD(correlation_id);
SAVE_DATA_FIELD(kind);
SAVE_DATA_FIELD(operation);
SAVE_DATA_FIELD(phase);
}
template <typename ArchiveT, typename Tp>
void
save_buffer_tracing_api_record(ArchiveT& ar, Tp data)
{
SAVE_DATA_FIELD(size);
SAVE_DATA_FIELD(kind);
SAVE_DATA_FIELD(correlation_id);
SAVE_DATA_FIELD(operation);
SAVE_DATA_FIELD(start_timestamp);
SAVE_DATA_FIELD(end_timestamp);
SAVE_DATA_FIELD(thread_id);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_buffer_tracing_hsa_api_record_t data)
{
save_buffer_tracing_api_record(ar, data);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_buffer_tracing_hip_api_record_t data)
{
save_buffer_tracing_api_record(ar, data);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_buffer_tracing_marker_record_t data)
{
save_buffer_tracing_api_record(ar, data);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_buffer_tracing_kernel_dispatch_record_t data)
{
SAVE_DATA_FIELD(size);
SAVE_DATA_FIELD(kind);
SAVE_DATA_FIELD(correlation_id);
SAVE_DATA_FIELD(start_timestamp);
SAVE_DATA_FIELD(end_timestamp);
SAVE_DATA_FIELD(agent_id);
SAVE_DATA_FIELD(queue_id);
SAVE_DATA_FIELD(kernel_id);
SAVE_DATA_FIELD(private_segment_size);
SAVE_DATA_FIELD(group_segment_size);
SAVE_DATA_FIELD(workgroup_size);
SAVE_DATA_FIELD(grid_size);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_buffer_tracing_memory_copy_record_t data)
{
SAVE_DATA_FIELD(size);
SAVE_DATA_FIELD(kind);
SAVE_DATA_FIELD(correlation_id);
SAVE_DATA_FIELD(start_timestamp);
SAVE_DATA_FIELD(end_timestamp);
SAVE_DATA_FIELD(agent_id);
SAVE_DATA_FIELD(queue_id);
SAVE_DATA_FIELD(kernel_id);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, HsaCacheType data)
{
SAVE_DATA_BITFIELD("Data", ui32.Data);
SAVE_DATA_BITFIELD("Instruction", ui32.Instruction);
SAVE_DATA_BITFIELD("CPU", ui32.CPU);
SAVE_DATA_BITFIELD("HSACU", ui32.HSACU);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, HSA_LINKPROPERTY data)
{
SAVE_DATA_BITFIELD("Override", ui32.Override);
SAVE_DATA_BITFIELD("NonCoherent", ui32.NonCoherent);
SAVE_DATA_BITFIELD("NoAtomics32bit", ui32.NoAtomics32bit);
SAVE_DATA_BITFIELD("NoAtomics64bit", ui32.NoAtomics64bit);
SAVE_DATA_BITFIELD("NoPeerToPeerDMA", ui32.NoPeerToPeerDMA);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, HSA_CAPABILITY data)
{
SAVE_DATA_BITFIELD("HotPluggable", ui32.HotPluggable);
SAVE_DATA_BITFIELD("HSAMMUPresent", ui32.HSAMMUPresent);
SAVE_DATA_BITFIELD("SharedWithGraphics", ui32.SharedWithGraphics);
SAVE_DATA_BITFIELD("QueueSizePowerOfTwo", ui32.QueueSizePowerOfTwo);
SAVE_DATA_BITFIELD("QueueSize32bit", ui32.QueueSize32bit);
SAVE_DATA_BITFIELD("QueueIdleEvent", ui32.QueueIdleEvent);
SAVE_DATA_BITFIELD("VALimit", ui32.VALimit);
SAVE_DATA_BITFIELD("WatchPointsSupported", ui32.WatchPointsSupported);
SAVE_DATA_BITFIELD("WatchPointsTotalBits", ui32.WatchPointsTotalBits);
SAVE_DATA_BITFIELD("DoorbellType", ui32.DoorbellType);
SAVE_DATA_BITFIELD("AQLQueueDoubleMap", ui32.AQLQueueDoubleMap);
SAVE_DATA_BITFIELD("DebugTrapSupported", ui32.DebugTrapSupported);
SAVE_DATA_BITFIELD("WaveLaunchTrapOverrideSupported", ui32.WaveLaunchTrapOverrideSupported);
SAVE_DATA_BITFIELD("WaveLaunchModeSupported", ui32.WaveLaunchModeSupported);
SAVE_DATA_BITFIELD("PreciseMemoryOperationsSupported", ui32.PreciseMemoryOperationsSupported);
SAVE_DATA_BITFIELD("DEPRECATED_SRAM_EDCSupport", ui32.DEPRECATED_SRAM_EDCSupport);
SAVE_DATA_BITFIELD("Mem_EDCSupport", ui32.Mem_EDCSupport);
SAVE_DATA_BITFIELD("RASEventNotify", ui32.RASEventNotify);
SAVE_DATA_BITFIELD("ASICRevision", ui32.ASICRevision);
SAVE_DATA_BITFIELD("SRAM_EDCSupport", ui32.SRAM_EDCSupport);
SAVE_DATA_BITFIELD("SVMAPISupported", ui32.SVMAPISupported);
SAVE_DATA_BITFIELD("CoherentHostAccess", ui32.CoherentHostAccess);
SAVE_DATA_BITFIELD("DebugSupportedFirmware", ui32.DebugSupportedFirmware);
SAVE_DATA_BITFIELD("Reserved", ui32.Reserved);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, HSA_MEMORYPROPERTY data)
{
SAVE_DATA_BITFIELD("HotPluggable", ui32.HotPluggable);
SAVE_DATA_BITFIELD("NonVolatile", ui32.NonVolatile);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, HSA_ENGINE_VERSION data)
{
SAVE_DATA_BITFIELD("uCodeSDMA", uCodeSDMA);
SAVE_DATA_BITFIELD("uCodeRes", uCodeRes);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, HSA_ENGINE_ID data)
{
SAVE_DATA_BITFIELD("uCode", ui32.uCode);
SAVE_DATA_BITFIELD("Major", ui32.Major);
SAVE_DATA_BITFIELD("Minor", ui32.Minor);
SAVE_DATA_BITFIELD("Stepping", ui32.Stepping);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_agent_cache_t data)
{
SAVE_DATA_FIELD(processor_id_low);
SAVE_DATA_FIELD(size);
SAVE_DATA_FIELD(level);
SAVE_DATA_FIELD(cache_line_size);
SAVE_DATA_FIELD(cache_lines_per_tag);
SAVE_DATA_FIELD(association);
SAVE_DATA_FIELD(latency);
SAVE_DATA_FIELD(type);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_agent_io_link_t data)
{
SAVE_DATA_FIELD(type);
SAVE_DATA_FIELD(version_major);
SAVE_DATA_FIELD(version_minor);
SAVE_DATA_FIELD(node_from);
SAVE_DATA_FIELD(node_to);
SAVE_DATA_FIELD(weight);
SAVE_DATA_FIELD(min_latency);
SAVE_DATA_FIELD(max_latency);
SAVE_DATA_FIELD(min_bandwidth);
SAVE_DATA_FIELD(max_bandwidth);
SAVE_DATA_FIELD(recommended_transfer_size);
SAVE_DATA_FIELD(flags);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_agent_mem_bank_t data)
{
SAVE_DATA_FIELD(heap_type);
SAVE_DATA_FIELD(flags);
SAVE_DATA_FIELD(width);
SAVE_DATA_FIELD(mem_clk_max);
SAVE_DATA_FIELD(size_in_bytes);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_pc_sampling_configuration_t data)
{
SAVE_DATA_FIELD(method);
SAVE_DATA_FIELD(unit);
SAVE_DATA_FIELD(min_interval);
SAVE_DATA_FIELD(max_interval);
SAVE_DATA_FIELD(flags);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, const rocprofiler_agent_t& data)
{
SAVE_DATA_FIELD(size);
SAVE_DATA_FIELD(id);
SAVE_DATA_FIELD(type);
SAVE_DATA_FIELD(cpu_cores_count);
SAVE_DATA_FIELD(simd_count);
SAVE_DATA_FIELD(mem_banks_count);
SAVE_DATA_FIELD(caches_count);
SAVE_DATA_FIELD(io_links_count);
SAVE_DATA_FIELD(cpu_core_id_base);
SAVE_DATA_FIELD(simd_id_base);
SAVE_DATA_FIELD(max_waves_per_simd);
SAVE_DATA_FIELD(lds_size_in_kb);
SAVE_DATA_FIELD(gds_size_in_kb);
SAVE_DATA_FIELD(num_gws);
SAVE_DATA_FIELD(wave_front_size);
SAVE_DATA_FIELD(num_xcc);
SAVE_DATA_FIELD(cu_count);
SAVE_DATA_FIELD(array_count);
SAVE_DATA_FIELD(num_shader_banks);
SAVE_DATA_FIELD(simd_arrays_per_engine);
SAVE_DATA_FIELD(cu_per_simd_array);
SAVE_DATA_FIELD(simd_per_cu);
SAVE_DATA_FIELD(max_slots_scratch_cu);
SAVE_DATA_FIELD(gfx_target_version);
SAVE_DATA_FIELD(vendor_id);
SAVE_DATA_FIELD(device_id);
SAVE_DATA_FIELD(location_id);
SAVE_DATA_FIELD(domain);
SAVE_DATA_FIELD(drm_render_minor);
SAVE_DATA_FIELD(num_sdma_engines);
SAVE_DATA_FIELD(num_sdma_xgmi_engines);
SAVE_DATA_FIELD(num_sdma_queues_per_engine);
SAVE_DATA_FIELD(num_cp_queues);
SAVE_DATA_FIELD(max_engine_clk_ccompute);
SAVE_DATA_FIELD(max_engine_clk_fcompute);
SAVE_DATA_FIELD(sdma_fw_version);
SAVE_DATA_FIELD(fw_version);
SAVE_DATA_FIELD(capability);
SAVE_DATA_FIELD(cu_per_engine);
SAVE_DATA_FIELD(max_waves_per_cu);
SAVE_DATA_FIELD(family_id);
SAVE_DATA_FIELD(workgroup_max_size);
SAVE_DATA_FIELD(grid_max_size);
SAVE_DATA_FIELD(local_mem_size);
SAVE_DATA_FIELD(hive_id);
SAVE_DATA_FIELD(gpu_id);
SAVE_DATA_FIELD(workgroup_max_dim);
SAVE_DATA_FIELD(grid_max_dim);
SAVE_DATA_CSTR(name);
SAVE_DATA_CSTR(vendor_name);
SAVE_DATA_CSTR(product_name);
SAVE_DATA_CSTR(model_name);
SAVE_DATA_FIELD(num_pc_sampling_configs);
SAVE_DATA_FIELD(node_id);
auto generate = [&](auto name, const auto* value, uint64_t size) {
using value_type = std::remove_const_t<std::remove_pointer_t<decltype(value)>>;
auto vec = std::vector<value_type>{};
vec.reserve(size);
for(uint64_t i = 0; i < size; ++i)
vec.emplace_back(value[i]);
ar(cereal::make_nvp(name, vec));
};
generate("mem_banks", data.mem_banks, data.mem_banks_count);
generate("caches", data.caches, data.caches_count);
generate("io_links", data.io_links, data.io_links_count);
}
#undef SAVE_DATA_FIELD
+117
Просмотреть файл
@@ -0,0 +1,117 @@
#!/usr/bin/env python3
import sys
import pytest
# helper function
def node_exists(name, data, min_len=1):
assert name in data
assert data[name] is not None
assert len(data[name]) >= min_len
def test_data_structure(input_data):
"""verify minimum amount of expected data is present"""
data = input_data
node_exists("kernel-tracing-test-tool", data)
node_exists("agents", data["kernel-tracing-test-tool"])
node_exists("call_stack", data["kernel-tracing-test-tool"])
node_exists("callback_records", data["kernel-tracing-test-tool"])
node_exists("buffer_records", data["kernel-tracing-test-tool"])
node_exists("names", data["kernel-tracing-test-tool"]["callback_records"])
node_exists("code_objects", data["kernel-tracing-test-tool"]["callback_records"])
node_exists("kernel_symbols", data["kernel-tracing-test-tool"]["callback_records"])
node_exists("hsa_api_traces", data["kernel-tracing-test-tool"]["callback_records"])
node_exists("names", data["kernel-tracing-test-tool"]["buffer_records"])
node_exists("kernel_dispatches", data["kernel-tracing-test-tool"]["buffer_records"])
node_exists("memory_copies", data["kernel-tracing-test-tool"]["buffer_records"], 0)
node_exists("hsa_api_traces", data["kernel-tracing-test-tool"]["buffer_records"])
def test_timestamps(input_data):
data = input_data
cb_start = {}
cb_end = {}
for itr in data["kernel-tracing-test-tool"]["callback_records"]["hsa_api_traces"]:
cid = itr["record"]["correlation_id"]["internal"]
phase = itr["record"]["phase"]
if phase == 1:
cb_start[cid] = itr["timestamp"]
elif phase == 2:
cb_end[cid] = itr["timestamp"]
assert cb_start[cid] <= itr["timestamp"]
else:
assert phase == 1 or phase == 2
for itr in data["kernel-tracing-test-tool"]["buffer_records"]["hsa_api_traces"]:
assert itr["start_timestamp"] <= itr["end_timestamp"]
for itr in data["kernel-tracing-test-tool"]["buffer_records"]["kernel_dispatches"]:
assert itr["start_timestamp"] < itr["end_timestamp"]
assert itr["correlation_id"]["internal"] > 0
assert itr["correlation_id"]["external"] > 0
api_start = cb_start[itr["correlation_id"]["internal"]]
api_end = cb_end[itr["correlation_id"]["internal"]]
assert api_start < itr["start_timestamp"]
assert api_end <= itr["end_timestamp"]
def test_internal_correlation_ids(input_data):
data = input_data
api_corr_ids = []
for itr in data["kernel-tracing-test-tool"]["callback_records"]["hsa_api_traces"]:
api_corr_ids.append(itr["record"]["correlation_id"]["internal"])
for itr in data["kernel-tracing-test-tool"]["buffer_records"]["hsa_api_traces"]:
api_corr_ids.append(itr["correlation_id"]["internal"])
api_corr_ids_sorted = sorted(api_corr_ids)
api_corr_ids_unique = list(set(api_corr_ids))
len_corr_id_unq = len(api_corr_ids_unique)
assert len(api_corr_ids) != len_corr_id_unq
assert max(api_corr_ids_sorted) == len_corr_id_unq
def test_external_correlation_ids(input_data):
data = input_data
for itr in data["kernel-tracing-test-tool"]["callback_records"]["hsa_api_traces"]:
assert itr["record"]["thread_id"] == itr["record"]["correlation_id"]["external"]
for itr in data["kernel-tracing-test-tool"]["buffer_records"]["hsa_api_traces"]:
assert itr["thread_id"] == itr["correlation_id"]["external"]
def test_kernel_ids(input_data):
data = input_data
symbol_info = {}
for itr in data["kernel-tracing-test-tool"]["callback_records"]["kernel_symbols"]:
phase = itr["record"]["phase"]
payload = itr["payload"]
kern_id = payload["kernel_id"]
assert phase == 1 or phase == 2
assert kern_id > 0
if phase == 1:
assert len(payload["kernel_name"]) > 0
symbol_info[kern_id] = payload
elif phase == 2:
assert payload["kernel_id"] in symbol_info.keys()
assert payload["kernel_name"] == symbol_info[kern_id]["kernel_name"]
for itr in data["kernel-tracing-test-tool"]["buffer_records"]["kernel_dispatches"]:
assert itr["kernel_id"] in symbol_info.keys()
if __name__ == "__main__":
exit_code = pytest.main(["-x", __file__] + sys.argv[1:])
sys.exit(exit_code)