Buffering: initial implementation and tests (#20)
* Update source/lib/common
- CMakeLists.txt
- less verbose
- rocprofiler-common-library uses rocprofiler-headers target
- mpl.hpp
- metaprogramming header with type_list, size_of, index_of, and is_one_of
- record_header_buffer.{hpp,cpp}
- wrapper class around atomic_ring_buffer and vector of rocprofiler_record_header_t
- atomic_ring_buffer.{hpp,cpp}
- request function accepts wrap param when overwritting is not desirable
- can_clear member function
- clear member function for rewinding write pointer to start of buffer
- containers/CMakeLists.txt
- include record_header_buffer.{hpp,cpp} in build target
* Update source/lib/tests: Buffering tests
- Added buffering tests. See comments in code for description
* atomic_ring_buffer -> ring_buffer
- remove ring_buffer implementation
- rename atomic_ring_buffer to ring_buffer
* atomic_ring_buffer -> ring_buffer
- remove ring_buffer implementation
- rename atomic_ring_buffer to ring_buffer
* Update record_header_buffer
- lock, unlock, is_locked, clear, save, and load member functions
* Buffering tests
- add buffer test for save/load capability
* Update rocprofiler_memcheck.cmake
- fix erroneous spaces causing incorrect string evaluation
* Update ring_buffer
- fix exception message
* undef HIP_PROF_API
- make sure HIP_PROF_API is undefined before including hip_runtime.h
- avoid directly including hip/hip_runtime.h
* Update rocprofiler_config_interfaces
- remove stale preprocessor defines that are from old rocprofiler/roctracer
- HIP_PROF_HIP_API_STRING=1
- PROF_API_IMPL=1
* Update run-ci.py
- fix paths to suppression files
- improve printing logs to console in github actions
* Update buffering implementation
- remove support for using malloc instead of mmap in ring_buffer
- provide some info functions in record_header_buffer
- improve the testing of the save-load buffer test
* Update run-ci.py
- fix CTEST_CUSTOM_COVERAGE_EXCLUDE
* Update hip/api_args.h
- remove undef HIP_PROF_API
* Update buffering-save-load.cpp
- updated comments
* Update record_header_buffer
- default ctor
- allocate member function
- is_allocated member function
* Update buffering-save-load.cpp
- tweaked usage of record_header_buffer to delay allocation
Tento commit je obsažen v:
@@ -15,10 +15,8 @@ target_include_directories(
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
|
||||
|
||||
target_compile_definitions(
|
||||
rocprofiler-headers
|
||||
INTERFACE $<BUILD_INTERFACE:AMD_INTERNAL_BUILD=1> $<BUILD_INTERFACE:PROF_API_IMPL=1>
|
||||
$<BUILD_INTERFACE:HIP_PROF_HIP_API_STRING=1>
|
||||
$<BUILD_INTERFACE:__HIP_PLATFORM_AMD__=1>)
|
||||
rocprofiler-headers INTERFACE $<BUILD_INTERFACE:AMD_INTERNAL_BUILD=1>
|
||||
$<BUILD_INTERFACE:__HIP_PLATFORM_AMD__=1>)
|
||||
|
||||
# ensure the env overrides the appending /opt/rocm later
|
||||
string(REPLACE ":" ";" CMAKE_PREFIX_PATH "$ENV{CMAKE_PREFIX_PATH};${CMAKE_PREFIX_PATH}")
|
||||
|
||||
@@ -42,26 +42,28 @@ function(rocprofiler_set_memcheck_env _TYPE _LIB_BASE)
|
||||
|
||||
if(${_TYPE}_LIBRARY)
|
||||
set(ROCPROFILER_MEMCHECK_PRELOAD_ENV
|
||||
" LD_PRELOAD=${${_TYPE}_LIBRARY} "
|
||||
CACHE INTERNAL " LD_PRELOAD env variable for tests " FORCE)
|
||||
"LD_PRELOAD=${${_TYPE}_LIBRARY}"
|
||||
CACHE INTERNAL "LD_PRELOAD env variable for tests " FORCE)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
# always unset so that it doesn't preload if memcheck disabled
|
||||
unset(ROCPROFILER_MEMCHECK_PRELOAD_ENV CACHE)
|
||||
|
||||
if(ROCPROFILER_MEMCHECK STREQUAL " AddressSanitizer ")
|
||||
rocprofiler_add_memcheck_flags(" address ")
|
||||
rocprofiler_set_memcheck_env(" ${ROCPROFILER_MEMCHECK}" "asan ")
|
||||
elseif(ROCPROFILER_MEMCHECK STREQUAL " LeakSanitizer ")
|
||||
rocprofiler_add_memcheck_flags(" leak ")
|
||||
rocprofiler_set_memcheck_env(" ${ROCPROFILER_MEMCHECK}" "lsan ")
|
||||
elseif(ROCPROFILER_MEMCHECK STREQUAL " MemorySanitizer ")
|
||||
rocprofiler_add_memcheck_flags(" memory ")
|
||||
elseif(ROCPROFILER_MEMCHECK STREQUAL " ThreadSanitizer ")
|
||||
rocprofiler_add_memcheck_flags(" thread ")
|
||||
rocprofiler_set_memcheck_env(" ${ROCPROFILER_MEMCHECK}" "tsan ")
|
||||
elseif(ROCPROFILER_MEMCHECK STREQUAL " UndefinedBehaviorSanitizer ")
|
||||
rocprofiler_add_memcheck_flags(" undefined ")
|
||||
rocprofiler_set_memcheck_env(" ${ROCPROFILER_MEMCHECK}" "ubsan ")
|
||||
if(ROCPROFILER_MEMCHECK STREQUAL "AddressSanitizer")
|
||||
rocprofiler_add_memcheck_flags("address")
|
||||
rocprofiler_set_memcheck_env("${ROCPROFILER_MEMCHECK}" "asan")
|
||||
elseif(ROCPROFILER_MEMCHECK STREQUAL "LeakSanitizer")
|
||||
rocprofiler_add_memcheck_flags("leak")
|
||||
rocprofiler_set_memcheck_env("${ROCPROFILER_MEMCHECK}" "lsan")
|
||||
elseif(ROCPROFILER_MEMCHECK STREQUAL "MemorySanitizer")
|
||||
rocprofiler_add_memcheck_flags("memory")
|
||||
elseif(ROCPROFILER_MEMCHECK STREQUAL "ThreadSanitizer")
|
||||
rocprofiler_add_memcheck_flags("thread")
|
||||
rocprofiler_set_memcheck_env("${ROCPROFILER_MEMCHECK}" "tsan")
|
||||
elseif(ROCPROFILER_MEMCHECK STREQUAL "UndefinedBehaviorSanitizer")
|
||||
rocprofiler_add_memcheck_flags("undefined")
|
||||
rocprofiler_set_memcheck_env("${ROCPROFILER_MEMCHECK}" "ubsan")
|
||||
elseif(NOT ROCPROFILER_MEMCHECK STREQUAL "")
|
||||
message(FATAL_ERROR "Unsupported ROCPROFILER_MEMCHECK type: ${ROCPROFILER_MEMCHECK}")
|
||||
endif()
|
||||
|
||||
@@ -22,7 +22,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <rocprofiler/rocprofiler.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
set(common_sources ${CMAKE_CURRENT_LIST_DIR}/config.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/helper.cpp)
|
||||
|
||||
set(common_headers
|
||||
${CMAKE_CURRENT_LIST_DIR}/config.hpp ${CMAKE_CURRENT_LIST_DIR}/defines.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/environment.hpp ${CMAKE_CURRENT_LIST_DIR}/join.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/log.hpp ${CMAKE_CURRENT_LIST_DIR}/helper.hpp)
|
||||
#
|
||||
# Builds common utilities into a static library
|
||||
#
|
||||
set(common_sources config.cpp helper.cpp)
|
||||
set(common_headers config.hpp defines.hpp environment.hpp join.hpp log.hpp helper.hpp
|
||||
mpl.hpp)
|
||||
|
||||
add_library(rocprofiler-common-library STATIC)
|
||||
add_library(rocprofiler::rocprofiler-common-library ALIAS rocprofiler-common-library)
|
||||
@@ -17,7 +16,8 @@ target_include_directories(rocprofiler-common-library
|
||||
|
||||
target_link_libraries(
|
||||
rocprofiler-common-library
|
||||
PUBLIC $<BUILD_INTERFACE:rocprofiler::rocprofiler-build-flags>
|
||||
PUBLIC $<BUILD_INTERFACE:rocprofiler::rocprofiler-headers>
|
||||
$<BUILD_INTERFACE:rocprofiler::rocprofiler-build-flags>
|
||||
$<BUILD_INTERFACE:rocprofiler::rocprofiler-threading>
|
||||
$<BUILD_INTERFACE:rocprofiler::rocprofiler-memcheck>
|
||||
$<BUILD_INTERFACE:rocprofiler::rocprofiler-stdcxxfs>
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
#
|
||||
set(containers_sources)
|
||||
|
||||
set(containers_headers atomic_ring_buffer.hpp c_array.hpp operators.hpp ring_buffer.hpp
|
||||
stable_vector.hpp static_vector.hpp)
|
||||
|
||||
set(containers_sources atomic_ring_buffer.cpp ring_buffer.cpp)
|
||||
# add container sources and headers to common library target
|
||||
#
|
||||
set(containers_headers ring_buffer.hpp c_array.hpp operators.hpp record_header_buffer.hpp
|
||||
ring_buffer.hpp stable_vector.hpp static_vector.hpp)
|
||||
set(containers_sources ring_buffer.cpp record_header_buffer.cpp ring_buffer.cpp)
|
||||
|
||||
target_sources(rocprofiler-common-library PRIVATE ${containers_sources}
|
||||
${containers_headers})
|
||||
|
||||
@@ -1,295 +0,0 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 ROCm Developer Tools
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "atomic_ring_buffer.hpp"
|
||||
#include "lib/common/environment.hpp"
|
||||
#include "lib/common/units.hpp"
|
||||
|
||||
#include <sys/mman.h>
|
||||
#include <cerrno>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace common
|
||||
{
|
||||
namespace container
|
||||
{
|
||||
namespace base
|
||||
{
|
||||
atomic_ring_buffer::atomic_ring_buffer(size_t _size, bool _use_mmap)
|
||||
{
|
||||
set_use_mmap(_use_mmap);
|
||||
init(_size);
|
||||
}
|
||||
|
||||
atomic_ring_buffer::~atomic_ring_buffer() { destroy(); }
|
||||
|
||||
atomic_ring_buffer::atomic_ring_buffer(const atomic_ring_buffer& rhs)
|
||||
: m_use_mmap{rhs.m_use_mmap}
|
||||
, m_use_mmap_explicit{rhs.m_use_mmap_explicit}
|
||||
{
|
||||
init(rhs.m_size);
|
||||
}
|
||||
|
||||
atomic_ring_buffer::atomic_ring_buffer(atomic_ring_buffer&& rhs) noexcept
|
||||
: m_init{rhs.m_init}
|
||||
, m_use_mmap{rhs.m_use_mmap}
|
||||
, m_use_mmap_explicit{rhs.m_use_mmap_explicit}
|
||||
, m_ptr{rhs.m_ptr}
|
||||
, m_size{rhs.m_size}
|
||||
, m_read_count{rhs.m_read_count.load()}
|
||||
, m_write_count{rhs.m_write_count.load()}
|
||||
{
|
||||
rhs.reset();
|
||||
}
|
||||
|
||||
atomic_ring_buffer&
|
||||
atomic_ring_buffer::operator=(const atomic_ring_buffer& rhs)
|
||||
{
|
||||
if(this == &rhs) return *this;
|
||||
destroy();
|
||||
m_use_mmap = rhs.m_use_mmap;
|
||||
m_use_mmap_explicit = rhs.m_use_mmap_explicit;
|
||||
init(rhs.m_size);
|
||||
return *this;
|
||||
}
|
||||
|
||||
atomic_ring_buffer&
|
||||
atomic_ring_buffer::operator=(atomic_ring_buffer&& rhs) noexcept
|
||||
{
|
||||
if(this == &rhs) return *this;
|
||||
destroy();
|
||||
m_init = rhs.m_init;
|
||||
m_use_mmap = rhs.m_use_mmap;
|
||||
m_use_mmap_explicit = rhs.m_use_mmap_explicit;
|
||||
m_ptr = rhs.m_ptr;
|
||||
m_size = rhs.m_size;
|
||||
m_read_count = rhs.m_read_count.load();
|
||||
m_write_count = rhs.m_write_count.load();
|
||||
rhs.reset();
|
||||
return *this;
|
||||
}
|
||||
|
||||
void
|
||||
atomic_ring_buffer::init(size_t _size)
|
||||
{
|
||||
if(m_init)
|
||||
throw std::runtime_error(
|
||||
"tim::base::atomic_ring_buffer::init(size_t) :: already initialized");
|
||||
|
||||
m_init = true;
|
||||
|
||||
// Round up to multiple of page size.
|
||||
_size += units::get_page_size() - ((_size % units::get_page_size() > 0)
|
||||
? (_size % units::get_page_size())
|
||||
: units::get_page_size());
|
||||
|
||||
if((_size % units::get_page_size()) > 0)
|
||||
{
|
||||
std::ostringstream _oss{};
|
||||
_oss << "Error! size is not a multiple of page size: " << _size << " % "
|
||||
<< units::get_page_size() << " = " << (_size % units::get_page_size());
|
||||
throw std::runtime_error(_oss.str());
|
||||
}
|
||||
|
||||
m_size = _size;
|
||||
m_read_count = 0;
|
||||
m_write_count = 0;
|
||||
|
||||
if(!m_use_mmap_explicit) m_use_mmap = get_env("ROCPROFILER_USE_MMAP", m_use_mmap);
|
||||
|
||||
if(!m_use_mmap)
|
||||
{
|
||||
m_ptr = malloc(m_size * sizeof(char));
|
||||
return;
|
||||
}
|
||||
|
||||
// Map twice the buffer size.
|
||||
if((m_ptr =
|
||||
mmap(nullptr, m_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0)) ==
|
||||
MAP_FAILED)
|
||||
{
|
||||
destroy();
|
||||
auto _err = errno;
|
||||
// TIMEMORY_PRINTF_FATAL(stderr, "Error using mmap: %s\n", strerror(_err));
|
||||
throw std::runtime_error(strerror(_err));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
atomic_ring_buffer::destroy()
|
||||
{
|
||||
if(m_ptr && m_init)
|
||||
{
|
||||
if(!m_use_mmap)
|
||||
{
|
||||
::free(m_ptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Unmap the mapped virtual memmory.
|
||||
auto ret = munmap(m_ptr, m_size);
|
||||
if(ret != 0) perror("munmap");
|
||||
}
|
||||
}
|
||||
m_init = false;
|
||||
m_size = 0;
|
||||
m_read_count = 0;
|
||||
m_write_count = 0;
|
||||
m_ptr = nullptr;
|
||||
}
|
||||
|
||||
void
|
||||
atomic_ring_buffer::set_use_mmap(bool _v)
|
||||
{
|
||||
if(m_init)
|
||||
throw std::runtime_error("tim::base::atomic_ring_buffer::set_use_mmap(bool) cannot be "
|
||||
"called after initialization");
|
||||
m_use_mmap = _v;
|
||||
m_use_mmap_explicit = true;
|
||||
}
|
||||
|
||||
std::string
|
||||
atomic_ring_buffer::as_string() const
|
||||
{
|
||||
std::ostringstream ss{};
|
||||
ss << std::boolalpha << "is_initialized: " << is_initialized() << ", capacity: " << capacity()
|
||||
<< ", count: " << count() << ", free: " << free() << ", is_empty: " << is_empty()
|
||||
<< ", is_full: " << is_full() << ", pointer: " << m_ptr << ", read count: " << m_read_count
|
||||
<< ", write count: " << m_write_count;
|
||||
return ss.str();
|
||||
}
|
||||
//
|
||||
|
||||
void*
|
||||
atomic_ring_buffer::request(size_t _length)
|
||||
{
|
||||
if(m_ptr == nullptr || m_size == 0) return nullptr;
|
||||
|
||||
if(is_full()) return retrieve(_length);
|
||||
|
||||
// if write count is at the tail of buffer, bump to the end of buffer
|
||||
size_t _write_count = 0;
|
||||
size_t _offset = 0;
|
||||
do
|
||||
{
|
||||
// Make sure we don't put in more than there's room for, by writing no
|
||||
// more than there is free.
|
||||
if(_length > free()) return nullptr;
|
||||
|
||||
_offset = 0;
|
||||
_write_count = m_write_count.load();
|
||||
auto _modulo = m_size - (_write_count % m_size);
|
||||
if(_modulo < _length) _offset = _modulo;
|
||||
} while(!m_write_count.compare_exchange_strong(
|
||||
_write_count, _write_count + _length + _offset, std::memory_order_seq_cst));
|
||||
|
||||
// pointer in buffer
|
||||
void* _out = write_ptr(_write_count);
|
||||
|
||||
return _out;
|
||||
}
|
||||
//
|
||||
|
||||
void*
|
||||
atomic_ring_buffer::retrieve(size_t _length) const
|
||||
{
|
||||
if(m_ptr == nullptr || m_size == 0) return nullptr;
|
||||
|
||||
// Make sure we don't put in more than there's room for, by writing no
|
||||
// more than there is free.
|
||||
|
||||
// if read count is at the tail of buffer, bump to the end of buffer
|
||||
size_t _read_count = 0;
|
||||
size_t _offset = 0;
|
||||
do
|
||||
{
|
||||
if(_length > count()) return nullptr;
|
||||
_offset = 0;
|
||||
_read_count = m_read_count.load();
|
||||
auto _modulo = m_size - (_read_count % m_size);
|
||||
if(_modulo < _length) _offset = _modulo;
|
||||
} while(!m_read_count.compare_exchange_strong(
|
||||
_read_count, _read_count + _length + _offset, std::memory_order_seq_cst));
|
||||
|
||||
// pointer in buffer
|
||||
void* _out = read_ptr(_read_count);
|
||||
|
||||
return _out;
|
||||
}
|
||||
//
|
||||
|
||||
void
|
||||
atomic_ring_buffer::reset()
|
||||
{
|
||||
m_init = false;
|
||||
m_size = 0;
|
||||
m_ptr = nullptr;
|
||||
m_read_count.store(0);
|
||||
m_write_count.store(0);
|
||||
}
|
||||
//
|
||||
|
||||
void
|
||||
atomic_ring_buffer::save(std::fstream& _fs)
|
||||
{
|
||||
auto _read_count = m_read_count.load();
|
||||
auto _write_count = m_write_count.load();
|
||||
_fs.write(reinterpret_cast<char*>(&m_use_mmap), sizeof(m_use_mmap));
|
||||
_fs.write(reinterpret_cast<char*>(&m_use_mmap_explicit), sizeof(m_use_mmap_explicit));
|
||||
_fs.write(reinterpret_cast<char*>(&m_size), sizeof(m_size));
|
||||
_fs.write(reinterpret_cast<char*>(&_read_count), sizeof(_read_count));
|
||||
_fs.write(reinterpret_cast<char*>(&_write_count), sizeof(_write_count));
|
||||
_fs.write(reinterpret_cast<char*>(m_ptr), m_size * sizeof(char));
|
||||
}
|
||||
//
|
||||
|
||||
void
|
||||
atomic_ring_buffer::load(std::fstream& _fs)
|
||||
{
|
||||
destroy();
|
||||
size_t _read_count = 0;
|
||||
size_t _write_count = 0;
|
||||
|
||||
_fs.read(reinterpret_cast<char*>(&m_use_mmap), sizeof(m_use_mmap));
|
||||
_fs.read(reinterpret_cast<char*>(&m_use_mmap_explicit), sizeof(m_use_mmap_explicit));
|
||||
_fs.read(reinterpret_cast<char*>(&m_size), sizeof(m_size));
|
||||
|
||||
init(m_size);
|
||||
if(!m_ptr) m_ptr = malloc(m_size);
|
||||
|
||||
_fs.read(reinterpret_cast<char*>(&_read_count), sizeof(_read_count));
|
||||
_fs.read(reinterpret_cast<char*>(&_write_count), sizeof(_write_count));
|
||||
_fs.read(reinterpret_cast<char*>(m_ptr), m_size * sizeof(char));
|
||||
|
||||
m_read_count.store(_read_count);
|
||||
m_write_count.store(_write_count);
|
||||
}
|
||||
} // namespace base
|
||||
} // namespace container
|
||||
} // namespace common
|
||||
} // namespace rocprofiler
|
||||
@@ -1,424 +0,0 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 ROCm Developer Tools
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "lib/common/environment.hpp"
|
||||
#include "lib/common/units.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <cmath>
|
||||
#include <cstddef>
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <functional>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace common
|
||||
{
|
||||
namespace container
|
||||
{
|
||||
template <typename Tp>
|
||||
struct atomic_ring_buffer;
|
||||
//
|
||||
namespace base
|
||||
{
|
||||
/// \struct tim::base::atomic_ring_buffer
|
||||
/// \brief Ring buffer implementation, with support for mmap as backend (Linux only).
|
||||
struct atomic_ring_buffer
|
||||
{
|
||||
template <typename Tp>
|
||||
friend struct container::atomic_ring_buffer;
|
||||
|
||||
atomic_ring_buffer() = default;
|
||||
explicit atomic_ring_buffer(bool _use_mmap) { set_use_mmap(_use_mmap); }
|
||||
explicit atomic_ring_buffer(size_t _size) { init(_size); }
|
||||
atomic_ring_buffer(size_t _size, bool _use_mmap);
|
||||
|
||||
~atomic_ring_buffer();
|
||||
|
||||
atomic_ring_buffer(const atomic_ring_buffer&);
|
||||
atomic_ring_buffer& operator=(const atomic_ring_buffer&);
|
||||
|
||||
atomic_ring_buffer(atomic_ring_buffer&&) noexcept;
|
||||
atomic_ring_buffer& operator=(atomic_ring_buffer&&) noexcept;
|
||||
|
||||
/// Returns whether the buffer has been allocated
|
||||
bool is_initialized() const { return m_init; }
|
||||
|
||||
/// Get the total number of bytes supported
|
||||
size_t capacity() const { return m_size; }
|
||||
|
||||
/// Creates new ring buffer.
|
||||
void init(size_t size);
|
||||
|
||||
/// Destroy ring buffer.
|
||||
void destroy();
|
||||
|
||||
/// Request a pointer for writing at least \param n bytes.
|
||||
void* request(size_t n);
|
||||
|
||||
/// Retrieve a pointer for reading at least \param n bytes.
|
||||
void* retrieve(size_t n) const;
|
||||
|
||||
/// Write class-type data to buffer (uses placement new).
|
||||
template <typename Tp>
|
||||
std::pair<size_t, Tp*> write(Tp* in, std::enable_if_t<std::is_class<Tp>::value, int> = 0);
|
||||
|
||||
/// Write non-class-type data to buffer (uses memcpy).
|
||||
template <typename Tp>
|
||||
std::pair<size_t, Tp*> write(Tp* in, std::enable_if_t<!std::is_class<Tp>::value, int> = 0);
|
||||
|
||||
/// Request a pointer to an allocation. This is similar to a "write" except the
|
||||
/// memory is uninitialized. Typically used by allocators. If Tp is a class type,
|
||||
/// be sure to use a placement new instead of a memcpy.
|
||||
template <typename Tp>
|
||||
Tp* request();
|
||||
|
||||
/// Read class-type data from buffer (uses placement new).
|
||||
template <typename Tp>
|
||||
std::pair<size_t, Tp*> read(Tp* _dest,
|
||||
std::enable_if_t<std::is_class<Tp>::value, int> = 0) const;
|
||||
|
||||
/// Read non-class-type data from buffer (uses memcpy).
|
||||
template <typename Tp>
|
||||
std::pair<size_t, Tp*> read(Tp* _dest,
|
||||
std::enable_if_t<!std::is_class<Tp>::value, int> = 0) const;
|
||||
|
||||
/// Retrieve a pointer to the head allocation (read).
|
||||
template <typename Tp>
|
||||
Tp* retrieve() const;
|
||||
|
||||
/// Returns number of bytes currently held by the buffer.
|
||||
size_t count() const { return (m_write_count - m_read_count); }
|
||||
|
||||
/// Returns how many bytes are availiable in the buffer.
|
||||
size_t free() const { return (m_size - count()); }
|
||||
|
||||
/// Returns if the buffer is empty.
|
||||
bool is_empty() const { return (count() == 0); }
|
||||
|
||||
/// Returns if the buffer is full.
|
||||
bool is_full() const { return (count() == m_size); }
|
||||
|
||||
/// explicitly configure to use mmap if avail
|
||||
void set_use_mmap(bool);
|
||||
|
||||
/// query whether using mmap
|
||||
bool get_use_mmap() const { return m_use_mmap; }
|
||||
|
||||
std::string as_string() const;
|
||||
|
||||
void save(std::fstream& _fs);
|
||||
void load(std::fstream& _fs);
|
||||
|
||||
private:
|
||||
/// Returns the current write pointer.
|
||||
void* write_ptr(size_t _write_count) const
|
||||
{
|
||||
return static_cast<char*>(m_ptr) + (_write_count % m_size);
|
||||
}
|
||||
|
||||
/// Returns the current read pointer.
|
||||
void* read_ptr(size_t _read_count) const
|
||||
{
|
||||
return static_cast<char*>(m_ptr) + (_read_count % m_size);
|
||||
}
|
||||
|
||||
void reset();
|
||||
|
||||
private:
|
||||
bool m_init = false;
|
||||
bool m_use_mmap = true;
|
||||
bool m_use_mmap_explicit = false;
|
||||
void* m_ptr = nullptr;
|
||||
size_t m_size = 0;
|
||||
mutable std::atomic<size_t> m_read_count = 0;
|
||||
std::atomic<size_t> m_write_count = 0;
|
||||
};
|
||||
//
|
||||
template <typename Tp>
|
||||
std::pair<size_t, Tp*>
|
||||
atomic_ring_buffer::write(Tp* in, std::enable_if_t<std::is_class<Tp>::value, int>)
|
||||
{
|
||||
if(in == nullptr || m_ptr == nullptr) return {0, nullptr};
|
||||
|
||||
auto _length = sizeof(Tp);
|
||||
void* _out_p = request(_length);
|
||||
|
||||
if(_out_p == nullptr) return {0, nullptr};
|
||||
|
||||
// Copy in.
|
||||
new(_out_p) Tp{std::move(*in)};
|
||||
|
||||
// pointer in buffer
|
||||
Tp* _out = reinterpret_cast<Tp*>(_out_p);
|
||||
|
||||
return {_length, _out};
|
||||
}
|
||||
//
|
||||
template <typename Tp>
|
||||
std::pair<size_t, Tp*>
|
||||
atomic_ring_buffer::write(Tp* in, std::enable_if_t<!std::is_class<Tp>::value, int>)
|
||||
{
|
||||
if(in == nullptr || m_ptr == nullptr) return {0, nullptr};
|
||||
|
||||
auto _length = sizeof(Tp);
|
||||
void* _out_p = request(_length);
|
||||
|
||||
if(_out_p == nullptr) return {0, nullptr};
|
||||
|
||||
// Copy in.
|
||||
memcpy(_out_p, in, _length);
|
||||
|
||||
// pointer in buffer
|
||||
Tp* _out = reinterpret_cast<Tp*>(_out_p);
|
||||
|
||||
return {_length, _out};
|
||||
}
|
||||
//
|
||||
template <typename Tp>
|
||||
Tp*
|
||||
atomic_ring_buffer::request()
|
||||
{
|
||||
if(m_ptr == nullptr) return nullptr;
|
||||
|
||||
return request(sizeof(Tp));
|
||||
}
|
||||
//
|
||||
template <typename Tp>
|
||||
std::pair<size_t, Tp*>
|
||||
atomic_ring_buffer::read(Tp* _dest, std::enable_if_t<std::is_class<Tp>::value, int>) const
|
||||
{
|
||||
if(is_empty() || _dest == nullptr) return {0, nullptr};
|
||||
|
||||
auto _length = sizeof(Tp);
|
||||
void* _out_p = retrieve(_length);
|
||||
|
||||
if(_out_p == nullptr) return {0, nullptr};
|
||||
|
||||
// pointer in buffer
|
||||
Tp* in = reinterpret_cast<Tp*>(_out_p);
|
||||
|
||||
// Copy out for BYTE, nothing magic here.
|
||||
*_dest = *in;
|
||||
|
||||
return {_length, in};
|
||||
}
|
||||
//
|
||||
template <typename Tp>
|
||||
std::pair<size_t, Tp*>
|
||||
atomic_ring_buffer::read(Tp* _dest, std::enable_if_t<!std::is_class<Tp>::value, int>) const
|
||||
{
|
||||
if(is_empty() || _dest == nullptr) return {0, nullptr};
|
||||
|
||||
auto _length = sizeof(Tp);
|
||||
void* _out_p = retrieve(_length);
|
||||
|
||||
if(_out_p == nullptr) return {0, nullptr};
|
||||
|
||||
// pointer in buffer
|
||||
Tp* in = reinterpret_cast<Tp*>(_out_p);
|
||||
|
||||
using Up = typename std::remove_const<Tp>::type;
|
||||
|
||||
// Copy out for BYTE, nothing magic here.
|
||||
Up* _out = const_cast<Up*>(_dest);
|
||||
memcpy(_out, in, _length);
|
||||
|
||||
return {_length, in};
|
||||
}
|
||||
//
|
||||
template <typename Tp>
|
||||
Tp*
|
||||
atomic_ring_buffer::retrieve() const
|
||||
{
|
||||
if(m_ptr == nullptr) return nullptr;
|
||||
|
||||
return retrieve(sizeof(Tp));
|
||||
}
|
||||
//
|
||||
} // namespace base
|
||||
//
|
||||
/// \struct tim::data_storage::atomic_ring_buffer
|
||||
/// \brief Ring buffer wrapper around \ref tim::base::atomic_ring_buffer for data of type
|
||||
/// Tp. If the data object size is larger than the page size (typically 4KB), behavior is
|
||||
/// undefined. During initialization, one requests a minimum number of objects and the
|
||||
/// buffer will support that number of object + the remainder of the page, e.g. if a page
|
||||
/// is 1000 bytes, the object is 1 byte, and the buffer is requested to support 1500
|
||||
/// objects, then an allocation supporting 2000 objects (i.e. 2 pages) will be created.
|
||||
template <typename Tp>
|
||||
struct atomic_ring_buffer : private base::atomic_ring_buffer
|
||||
{
|
||||
using base_type = base::atomic_ring_buffer;
|
||||
|
||||
static size_t get_items_per_page();
|
||||
|
||||
atomic_ring_buffer() = default;
|
||||
~atomic_ring_buffer() = default;
|
||||
|
||||
explicit atomic_ring_buffer(bool _use_mmap)
|
||||
: base_type{_use_mmap}
|
||||
{}
|
||||
|
||||
explicit atomic_ring_buffer(size_t _size)
|
||||
: base_type{_size * sizeof(Tp)}
|
||||
{}
|
||||
|
||||
atomic_ring_buffer(size_t _size, bool _use_mmap)
|
||||
: base_type{_size * sizeof(Tp), _use_mmap}
|
||||
{}
|
||||
|
||||
atomic_ring_buffer(const atomic_ring_buffer&);
|
||||
atomic_ring_buffer(atomic_ring_buffer&&) noexcept = default;
|
||||
|
||||
atomic_ring_buffer& operator=(const atomic_ring_buffer&);
|
||||
atomic_ring_buffer& operator=(atomic_ring_buffer&&) noexcept = default;
|
||||
|
||||
/// Returns whether the buffer has been allocated
|
||||
bool is_initialized() const { return base_type::is_initialized(); }
|
||||
|
||||
/// Get the total number of Tp instances supported
|
||||
size_t capacity() const { return (base_type::capacity()) / sizeof(Tp); }
|
||||
|
||||
/// Creates new ring buffer.
|
||||
void init(size_t _size) { base_type::init(_size * sizeof(Tp)); }
|
||||
|
||||
/// Destroy ring buffer.
|
||||
void destroy() { base_type::destroy(); }
|
||||
|
||||
/// Write data to buffer.
|
||||
size_t data_size() const { return sizeof(Tp); }
|
||||
|
||||
/// Write data to buffer. Return pointer to location of write
|
||||
Tp* write(Tp* in) { return base_type::write<Tp>(in).second; }
|
||||
|
||||
/// Read data from buffer. Return pointer to location of read
|
||||
Tp* read(Tp* _dest) const { return base_type::read<Tp>(_dest).second; }
|
||||
|
||||
/// Get an uninitialized address at tail of buffer.
|
||||
Tp* request() { return base_type::request<Tp>(); }
|
||||
|
||||
/// Read data from head of buffer.
|
||||
Tp* retrieve() { return base_type::retrieve<Tp>(); }
|
||||
|
||||
/// Returns number of Tp instances currently held by the buffer.
|
||||
size_t count() const { return (base_type::count()) / sizeof(Tp); }
|
||||
|
||||
/// Returns how many Tp instances are availiable in the buffer.
|
||||
size_t free() const { return (base_type::free()) / sizeof(Tp); }
|
||||
|
||||
/// Returns if the buffer is empty.
|
||||
bool is_empty() const { return base_type::is_empty(); }
|
||||
|
||||
/// Returns if the buffer is full.
|
||||
bool is_full() const { return (base_type::free() < sizeof(Tp)); }
|
||||
|
||||
template <typename... Args>
|
||||
auto emplace(Args&&... args)
|
||||
{
|
||||
Tp _obj{std::forward<Args>(args)...};
|
||||
return write(&_obj);
|
||||
}
|
||||
|
||||
using base_type::get_use_mmap;
|
||||
using base_type::load;
|
||||
using base_type::save;
|
||||
using base_type::set_use_mmap;
|
||||
|
||||
std::string as_string() const
|
||||
{
|
||||
std::ostringstream ss{};
|
||||
size_t _w = std::log10(base_type::capacity()) + 1;
|
||||
ss << std::boolalpha << std::right << "data size: " << std::setw(_w) << data_size()
|
||||
<< " B, is_initialized: " << std::setw(5) << is_initialized()
|
||||
<< ", is_empty: " << std::setw(5) << is_empty() << ", is_full: " << std::setw(5)
|
||||
<< is_full() << ", capacity: " << std::setw(_w) << capacity()
|
||||
<< ", count: " << std::setw(_w) << count() << ", free: " << std::setw(_w) << free()
|
||||
<< ", raw capacity: " << std::setw(_w) << base_type::capacity()
|
||||
<< " B, raw count: " << std::setw(_w) << base_type::count()
|
||||
<< " B, raw free: " << std::setw(_w) << base_type::free()
|
||||
<< " B, pointer: " << std::setw(15) << base_type::m_ptr
|
||||
<< ", raw read count: " << std::setw(_w) << base_type::m_read_count
|
||||
<< ", raw write count: " << std::setw(_w) << base_type::m_write_count;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, const atomic_ring_buffer& obj)
|
||||
{
|
||||
return os << obj.as_string();
|
||||
}
|
||||
};
|
||||
//
|
||||
template <typename Tp>
|
||||
size_t
|
||||
atomic_ring_buffer<Tp>::get_items_per_page()
|
||||
{
|
||||
return std::max<size_t>(units::get_page_size() / sizeof(Tp), 1);
|
||||
}
|
||||
//
|
||||
template <typename Tp>
|
||||
atomic_ring_buffer<Tp>::atomic_ring_buffer(const atomic_ring_buffer<Tp>& rhs)
|
||||
: base_type{rhs}
|
||||
{
|
||||
size_t _n = rhs.count();
|
||||
char* _end = static_cast<char*>(rhs.m_ptr) + rhs.m_size;
|
||||
for(size_t i = 0; i < _n; ++i)
|
||||
{
|
||||
char* _addr = static_cast<char*>(rhs.read_ptr(m_read_count)) + (i * sizeof(Tp));
|
||||
if((_addr + sizeof(Tp)) > _end) _addr = static_cast<char*>(rhs.m_ptr);
|
||||
Tp* _in = static_cast<Tp*>(static_cast<void*>(_addr));
|
||||
write(_in);
|
||||
}
|
||||
}
|
||||
//
|
||||
template <typename Tp>
|
||||
atomic_ring_buffer<Tp>&
|
||||
atomic_ring_buffer<Tp>::operator=(const atomic_ring_buffer<Tp>& rhs)
|
||||
{
|
||||
if(this == &rhs) return *this;
|
||||
|
||||
base_type::operator=(rhs);
|
||||
size_t _n = rhs.count();
|
||||
char* _end = static_cast<char*>(rhs.m_ptr) + rhs.m_size;
|
||||
for(size_t i = 0; i < _n; ++i)
|
||||
{
|
||||
char* _addr = static_cast<char*>(rhs.read_ptr(m_read_count)) + (i * sizeof(Tp));
|
||||
if((_addr + sizeof(Tp)) > _end) _addr = static_cast<char*>(rhs.m_ptr);
|
||||
Tp* _in = static_cast<Tp*>(static_cast<void*>(_addr));
|
||||
write(_in);
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
//
|
||||
} // namespace container
|
||||
} // namespace common
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,165 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 ROCm Developer Tools
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "lib/common/container/record_header_buffer.hpp"
|
||||
|
||||
#include <rocprofiler/rocprofiler.h>
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <new>
|
||||
|
||||
namespace rocprofiler::common::container
|
||||
{
|
||||
namespace
|
||||
{
|
||||
// record_header_buffer RAII locker
|
||||
struct rhb_raii_lock
|
||||
{
|
||||
explicit rhb_raii_lock(record_header_buffer& _rhb)
|
||||
: m_rhb{_rhb}
|
||||
{
|
||||
m_rhb.lock();
|
||||
}
|
||||
|
||||
~rhb_raii_lock() { m_rhb.unlock(); }
|
||||
|
||||
record_header_buffer& m_rhb;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
record_header_buffer::record_header_buffer(size_t num_bytes) { allocate(num_bytes); }
|
||||
|
||||
record_header_buffer::record_header_buffer(record_header_buffer&& _rhs) noexcept
|
||||
{
|
||||
this->operator=(std::move(_rhs));
|
||||
}
|
||||
|
||||
record_header_buffer&
|
||||
record_header_buffer::operator=(record_header_buffer&& _rhs) noexcept
|
||||
{
|
||||
if(this != &_rhs)
|
||||
{
|
||||
auto _lk = rhb_raii_lock{_rhs};
|
||||
m_index = _rhs.m_index.load(std::memory_order_relaxed);
|
||||
m_buffer = std::move(_rhs.m_buffer);
|
||||
m_headers = std::move(_rhs.m_headers);
|
||||
_rhs.reset();
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool
|
||||
record_header_buffer::allocate(size_t num_bytes)
|
||||
{
|
||||
if(m_buffer.is_initialized()) return false;
|
||||
|
||||
auto _lk = rhb_raii_lock{*this};
|
||||
m_buffer.init(num_bytes);
|
||||
m_headers.resize(m_buffer.capacity(), rocprofiler_record_header_t{0, nullptr});
|
||||
return true;
|
||||
}
|
||||
|
||||
record_header_buffer::record_ptr_vec_t
|
||||
record_header_buffer::get_record_headers(size_t _n)
|
||||
{
|
||||
auto _lk = rhb_raii_lock{*this};
|
||||
|
||||
auto _sz = m_index.load(std::memory_order_relaxed);
|
||||
if(_n > _sz) _n = _sz;
|
||||
auto _ret = record_ptr_vec_t{};
|
||||
_ret.reserve(_n);
|
||||
for(size_t i = 0; i < _n; ++i)
|
||||
{
|
||||
if(auto& itr = m_headers.at(i); itr.kind > 0 && itr.payload != nullptr)
|
||||
_ret.emplace_back(&itr);
|
||||
}
|
||||
return _ret;
|
||||
}
|
||||
|
||||
size_t
|
||||
record_header_buffer::clear()
|
||||
{
|
||||
auto _lk = rhb_raii_lock{*this};
|
||||
|
||||
auto _n = m_index.load(std::memory_order_acquire);
|
||||
{
|
||||
auto _sz = m_buffer.capacity();
|
||||
if(!m_buffer.clear(std::nothrow_t{})) return 0;
|
||||
std::for_each(m_headers.begin(), m_headers.end(), [](auto& itr) {
|
||||
itr = rocprofiler_record_header_t{0, nullptr};
|
||||
});
|
||||
m_headers.resize(_sz, rocprofiler_record_header_t{0, nullptr});
|
||||
m_index.store(0, std::memory_order_release);
|
||||
}
|
||||
|
||||
return _n;
|
||||
}
|
||||
|
||||
size_t
|
||||
record_header_buffer::reset()
|
||||
{
|
||||
auto _lk = rhb_raii_lock{*this};
|
||||
|
||||
auto _n = m_index.load(std::memory_order_acquire);
|
||||
m_buffer.destroy();
|
||||
m_buffer.clear();
|
||||
m_headers.clear();
|
||||
m_index.store(0, std::memory_order_release);
|
||||
|
||||
return _n;
|
||||
}
|
||||
|
||||
void
|
||||
record_header_buffer::save(std::fstream& _fs)
|
||||
{
|
||||
auto _lk = rhb_raii_lock{*this};
|
||||
|
||||
auto _idx = m_index.load(std::memory_order_acquire);
|
||||
auto _sz = m_headers.size();
|
||||
_fs.write(reinterpret_cast<char*>(&_idx), sizeof(_idx));
|
||||
_fs.write(reinterpret_cast<char*>(&_sz), sizeof(_sz));
|
||||
_fs.write(reinterpret_cast<char*>(m_headers.data()), sizeof(rocprofiler_record_header_t) * _sz);
|
||||
m_buffer.save(_fs);
|
||||
}
|
||||
|
||||
void
|
||||
record_header_buffer::load(std::fstream& _fs)
|
||||
{
|
||||
auto _lk = rhb_raii_lock{*this};
|
||||
|
||||
{
|
||||
auto _idx = size_t{0};
|
||||
_fs.read(reinterpret_cast<char*>(&_idx), sizeof(_idx));
|
||||
m_index.store(_idx, std::memory_order_release);
|
||||
}
|
||||
|
||||
{
|
||||
auto _sz = size_t{0};
|
||||
_fs.read(reinterpret_cast<char*>(&_sz), sizeof(_sz));
|
||||
m_headers.resize(_sz);
|
||||
_fs.read(reinterpret_cast<char*>(m_headers.data()),
|
||||
sizeof(rocprofiler_record_header_t) * _sz);
|
||||
}
|
||||
|
||||
m_buffer.load(_fs);
|
||||
}
|
||||
} // namespace rocprofiler::common::container
|
||||
@@ -0,0 +1,220 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 ROCm Developer Tools
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <rocprofiler/rocprofiler.h>
|
||||
|
||||
#include "lib/common/container/ring_buffer.hpp"
|
||||
|
||||
#include <atomic>
|
||||
#include <limits>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace common
|
||||
{
|
||||
namespace container
|
||||
{
|
||||
/// @brief this struct stores all the record information in an ring_buffer.
|
||||
/// It is thread-safe to have multiple threads emplace records into the buffer.
|
||||
struct record_header_buffer
|
||||
{
|
||||
using base_buffer_t = base::ring_buffer;
|
||||
using record_vec_t = std::vector<rocprofiler_record_header_t>;
|
||||
using record_ptr_vec_t = std::vector<rocprofiler_record_header_t*>;
|
||||
|
||||
record_header_buffer() = default;
|
||||
explicit record_header_buffer(size_t nbytes);
|
||||
~record_header_buffer() = default;
|
||||
|
||||
record_header_buffer(const record_header_buffer&) = delete;
|
||||
record_header_buffer(record_header_buffer&&) noexcept;
|
||||
|
||||
record_header_buffer& operator=(const record_header_buffer&) = delete;
|
||||
record_header_buffer& operator =(record_header_buffer&&) noexcept;
|
||||
|
||||
// allocate the buffer if it is not already allocated. Will return false if buffer is already
|
||||
// allocated
|
||||
bool allocate(size_t nbytes);
|
||||
|
||||
// return whether the buffer has been allocated
|
||||
bool is_allocated() const;
|
||||
|
||||
/// place an object in the buffer using its typeid hash code
|
||||
template <typename Tp>
|
||||
bool emplace(Tp&);
|
||||
|
||||
/// place an object in the buffer using the specified numerical identifier
|
||||
template <typename Tp>
|
||||
bool emplace(uint64_t, Tp&);
|
||||
|
||||
/// this function will return a vector of pointers to the record headers
|
||||
/// at the time of invocation.
|
||||
record_ptr_vec_t get_record_headers(size_t _n = std::numeric_limits<size_t>::max());
|
||||
|
||||
/// prevent emplace
|
||||
void lock();
|
||||
|
||||
/// try to re-enable emplace
|
||||
void unlock();
|
||||
|
||||
/// check if emplace is available
|
||||
bool is_locked() const;
|
||||
|
||||
/// restores to original empty state
|
||||
size_t clear();
|
||||
|
||||
/// binary save to file
|
||||
void save(std::fstream& _fs);
|
||||
|
||||
/// binary load from file
|
||||
void load(std::fstream& _fs);
|
||||
|
||||
/// full deallocation
|
||||
size_t reset();
|
||||
|
||||
/// the number of header entries
|
||||
auto size() const;
|
||||
|
||||
/// the number of bytes in the buffer
|
||||
auto capacity() const;
|
||||
|
||||
/// the number of used bytes in the buffer
|
||||
auto count() const;
|
||||
|
||||
/// the number of free bytes in the buffer
|
||||
auto free() const;
|
||||
|
||||
/// true if no bytes are used in the buffer
|
||||
auto is_empty() const;
|
||||
|
||||
/// true if all the bytes are used in the buffer or there is no buffer allocation
|
||||
auto is_full() const;
|
||||
|
||||
private:
|
||||
std::atomic<int32_t> m_locked = {0};
|
||||
std::atomic<size_t> m_index = {};
|
||||
base_buffer_t m_buffer = {};
|
||||
record_vec_t m_headers = {};
|
||||
};
|
||||
|
||||
inline bool
|
||||
record_header_buffer::is_locked() const
|
||||
{
|
||||
return m_locked.load(std::memory_order_acquire) > 0;
|
||||
}
|
||||
|
||||
inline void
|
||||
record_header_buffer::lock()
|
||||
{
|
||||
m_locked.fetch_add(1, std::memory_order_release);
|
||||
}
|
||||
|
||||
inline void
|
||||
record_header_buffer::unlock()
|
||||
{
|
||||
m_locked.fetch_add(-1, std::memory_order_release);
|
||||
}
|
||||
|
||||
inline bool
|
||||
record_header_buffer::is_allocated() const
|
||||
{
|
||||
return m_buffer.is_initialized();
|
||||
}
|
||||
|
||||
inline auto
|
||||
record_header_buffer::size() const
|
||||
{
|
||||
return m_index.load(std::memory_order_acquire);
|
||||
}
|
||||
|
||||
inline auto
|
||||
record_header_buffer::capacity() const
|
||||
{
|
||||
return std::min<size_t>(m_headers.size(), m_buffer.capacity());
|
||||
}
|
||||
|
||||
inline auto
|
||||
record_header_buffer::count() const
|
||||
{
|
||||
return m_buffer.count();
|
||||
}
|
||||
|
||||
inline auto
|
||||
record_header_buffer::free() const
|
||||
{
|
||||
return m_buffer.free();
|
||||
}
|
||||
|
||||
inline auto
|
||||
record_header_buffer::is_empty() const
|
||||
{
|
||||
return m_buffer.is_empty() || m_headers.empty();
|
||||
}
|
||||
|
||||
inline auto
|
||||
record_header_buffer::is_full() const
|
||||
{
|
||||
return m_buffer.is_full() || size() == m_headers.size();
|
||||
}
|
||||
|
||||
template <typename Tp>
|
||||
bool
|
||||
record_header_buffer::emplace(uint64_t _kind, Tp& _v)
|
||||
{
|
||||
if(is_locked() || m_headers.empty()) return false;
|
||||
|
||||
// request N bytes in the buffer (where N=sizeof(Tp)) and if
|
||||
// available, copy _v into the buffer region
|
||||
auto _create_record = [](auto& _buf, auto& _data) {
|
||||
constexpr auto buffer_sz = sizeof(Tp);
|
||||
void* _ptr = _buf.request(buffer_sz, false);
|
||||
if(_ptr) new(_ptr) Tp{_data};
|
||||
return _ptr;
|
||||
};
|
||||
|
||||
auto _addr = _create_record(m_buffer, _v);
|
||||
if(_addr)
|
||||
{
|
||||
// if there is space in the buffer, atomically get an index
|
||||
// for where the header record should be placed.
|
||||
// NOTE: m_headers was resized to be large enough to accomodate
|
||||
// sizeof(Tp) == 1 for every entry in buffer
|
||||
auto _idx = m_index++;
|
||||
m_headers.at(_idx) = rocprofiler_record_header_t{_kind, _addr};
|
||||
}
|
||||
return (_addr != nullptr);
|
||||
}
|
||||
|
||||
template <typename Tp>
|
||||
bool
|
||||
record_header_buffer::emplace(Tp& _v)
|
||||
{
|
||||
// if enumerations are not used, use the typeid hash code
|
||||
return emplace(typeid(Tp).hash_code(), _v);
|
||||
}
|
||||
} // namespace container
|
||||
} // namespace common
|
||||
} // namespace rocprofiler
|
||||
@@ -21,12 +21,18 @@
|
||||
// SOFTWARE.
|
||||
|
||||
#include "ring_buffer.hpp"
|
||||
#include "lib/common/environment.hpp"
|
||||
#include "lib/common/units.hpp"
|
||||
|
||||
#include <sys/mman.h>
|
||||
#include <atomic>
|
||||
#include <cerrno>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <new>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
@@ -36,56 +42,28 @@ namespace container
|
||||
{
|
||||
namespace base
|
||||
{
|
||||
ring_buffer::ring_buffer(size_t _size, bool _use_mmap)
|
||||
{
|
||||
set_use_mmap(_use_mmap);
|
||||
init(_size);
|
||||
}
|
||||
|
||||
ring_buffer::~ring_buffer() { destroy(); }
|
||||
|
||||
ring_buffer::ring_buffer(const ring_buffer& rhs)
|
||||
: m_use_mmap{rhs.m_use_mmap}
|
||||
, m_use_mmap_explicit{rhs.m_use_mmap_explicit}
|
||||
{
|
||||
init(rhs.m_size);
|
||||
}
|
||||
|
||||
ring_buffer::ring_buffer(ring_buffer&& rhs) noexcept
|
||||
: m_init{rhs.m_init}
|
||||
, m_use_mmap{rhs.m_use_mmap}
|
||||
, m_use_mmap_explicit{rhs.m_use_mmap_explicit}
|
||||
, m_ptr{rhs.m_ptr}
|
||||
, m_size{rhs.m_size}
|
||||
, m_read_count{rhs.m_read_count}
|
||||
, m_write_count{rhs.m_write_count}
|
||||
, m_read_count{rhs.m_read_count.load()}
|
||||
, m_write_count{rhs.m_write_count.load()}
|
||||
{
|
||||
rhs.reset();
|
||||
}
|
||||
|
||||
ring_buffer&
|
||||
ring_buffer::operator=(const ring_buffer& rhs)
|
||||
{
|
||||
if(this == &rhs) return *this;
|
||||
destroy();
|
||||
m_use_mmap = rhs.m_use_mmap;
|
||||
m_use_mmap_explicit = rhs.m_use_mmap_explicit;
|
||||
init(rhs.m_size);
|
||||
return *this;
|
||||
}
|
||||
|
||||
ring_buffer&
|
||||
ring_buffer::operator=(ring_buffer&& rhs) noexcept
|
||||
{
|
||||
if(this == &rhs) return *this;
|
||||
destroy();
|
||||
m_init = rhs.m_init;
|
||||
m_use_mmap = rhs.m_use_mmap;
|
||||
m_use_mmap_explicit = rhs.m_use_mmap_explicit;
|
||||
m_ptr = rhs.m_ptr;
|
||||
m_size = rhs.m_size;
|
||||
m_read_count = rhs.m_read_count;
|
||||
m_write_count = rhs.m_write_count;
|
||||
m_init = rhs.m_init;
|
||||
m_ptr = rhs.m_ptr;
|
||||
m_size = rhs.m_size;
|
||||
m_read_count = rhs.m_read_count.load();
|
||||
m_write_count = rhs.m_write_count.load();
|
||||
rhs.reset();
|
||||
return *this;
|
||||
}
|
||||
@@ -94,7 +72,8 @@ void
|
||||
ring_buffer::init(size_t _size)
|
||||
{
|
||||
if(m_init)
|
||||
throw std::runtime_error("tim::base::ring_buffer::init(size_t) :: already initialized");
|
||||
throw std::runtime_error("rocprofiler::common::container::base::ring_buffer::init(size_t) "
|
||||
":: already initialized");
|
||||
|
||||
m_init = true;
|
||||
|
||||
@@ -115,14 +94,6 @@ ring_buffer::init(size_t _size)
|
||||
m_read_count = 0;
|
||||
m_write_count = 0;
|
||||
|
||||
if(!m_use_mmap_explicit) m_use_mmap = get_env("ROCPROFILER_USE_MMAP", m_use_mmap);
|
||||
|
||||
if(!m_use_mmap)
|
||||
{
|
||||
m_ptr = malloc(m_size * sizeof(char));
|
||||
return;
|
||||
}
|
||||
|
||||
// Map twice the buffer size.
|
||||
if((m_ptr =
|
||||
mmap(nullptr, m_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0)) ==
|
||||
@@ -130,7 +101,6 @@ ring_buffer::init(size_t _size)
|
||||
{
|
||||
destroy();
|
||||
auto _err = errno;
|
||||
// TIMEMORY_PRINTF_FATAL(stderr, "Error using mmap: %s\n", strerror(_err));
|
||||
throw std::runtime_error(strerror(_err));
|
||||
}
|
||||
}
|
||||
@@ -140,16 +110,9 @@ ring_buffer::destroy()
|
||||
{
|
||||
if(m_ptr && m_init)
|
||||
{
|
||||
if(!m_use_mmap)
|
||||
{
|
||||
::free(m_ptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Unmap the mapped virtual memmory.
|
||||
auto ret = munmap(m_ptr, m_size);
|
||||
if(ret != 0) perror("munmap");
|
||||
}
|
||||
// Unmap the mapped virtual memmory.
|
||||
auto ret = munmap(m_ptr, m_size);
|
||||
if(ret != 0) perror("ring_buffer: munmap failed");
|
||||
}
|
||||
m_init = false;
|
||||
m_size = 0;
|
||||
@@ -158,21 +121,6 @@ ring_buffer::destroy()
|
||||
m_ptr = nullptr;
|
||||
}
|
||||
|
||||
void
|
||||
ring_buffer::set_use_mmap(bool _v)
|
||||
{
|
||||
if(!m_init)
|
||||
{
|
||||
m_use_mmap = _v;
|
||||
m_use_mmap_explicit = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::runtime_error("tim::base::ring_buffer::set_use_mmap(bool) cannot be "
|
||||
"called after initialization");
|
||||
}
|
||||
}
|
||||
|
||||
std::string
|
||||
ring_buffer::as_string() const
|
||||
{
|
||||
@@ -186,81 +134,82 @@ ring_buffer::as_string() const
|
||||
//
|
||||
|
||||
void*
|
||||
ring_buffer::request(size_t _length)
|
||||
ring_buffer::request(size_t _length, bool _wrap)
|
||||
{
|
||||
if(m_ptr == nullptr) return nullptr;
|
||||
if(m_ptr == nullptr || m_size == 0) return nullptr;
|
||||
|
||||
// Make sure we don't put in more than there's room for, by writing no
|
||||
// more than there is free.
|
||||
if(_length > free())
|
||||
throw std::runtime_error("heap-buffer-overflow :: ring buffer is full. read data "
|
||||
"to avoid data corruption");
|
||||
if(is_full()) return (_wrap) ? retrieve(_length) : nullptr;
|
||||
|
||||
// if write count is at the tail of buffer, bump to the end of buffer
|
||||
auto _modulo = m_size - (m_write_count % m_size);
|
||||
if(_modulo < _length) m_write_count += _modulo;
|
||||
size_t _write_count = 0;
|
||||
size_t _offset = 0;
|
||||
do
|
||||
{
|
||||
// Make sure we don't put in more than there's room for, by writing no
|
||||
// more than there is free.
|
||||
if(_length > free()) return nullptr;
|
||||
|
||||
_offset = 0;
|
||||
_write_count = m_write_count.load(std::memory_order_acquire);
|
||||
auto _modulo = m_size - (_write_count % m_size);
|
||||
if(_modulo < _length) _offset = _modulo;
|
||||
} while(!m_write_count.compare_exchange_strong(
|
||||
_write_count, _write_count + _length + _offset, std::memory_order_seq_cst));
|
||||
|
||||
// pointer in buffer
|
||||
void* _out = write_ptr();
|
||||
|
||||
// Update write count
|
||||
m_write_count += _length;
|
||||
void* _out = write_ptr(_write_count);
|
||||
|
||||
return _out;
|
||||
}
|
||||
//
|
||||
|
||||
void*
|
||||
ring_buffer::retrieve(size_t _length)
|
||||
ring_buffer::retrieve(size_t _length) const
|
||||
{
|
||||
if(m_ptr == nullptr) return nullptr;
|
||||
if(m_ptr == nullptr || m_size == 0) return nullptr;
|
||||
|
||||
// Make sure we don't put in more than there's room for, by writing no
|
||||
// more than there is free.
|
||||
if(_length > count()) throw std::runtime_error("ring buffer is empty");
|
||||
|
||||
// if read count is at the tail of buffer, bump to the end of buffer
|
||||
auto _modulo = m_size - (m_read_count % m_size);
|
||||
if(_modulo < _length) m_read_count += _modulo;
|
||||
size_t _read_count = 0;
|
||||
size_t _offset = 0;
|
||||
do
|
||||
{
|
||||
if(_length > count()) return nullptr;
|
||||
_offset = 0;
|
||||
_read_count = m_read_count.load(std::memory_order_acquire);
|
||||
auto _modulo = m_size - (_read_count % m_size);
|
||||
if(_modulo < _length) _offset = _modulo;
|
||||
} while(!m_read_count.compare_exchange_strong(
|
||||
_read_count, _read_count + _length + _offset, std::memory_order_seq_cst));
|
||||
|
||||
// pointer in buffer
|
||||
void* _out = read_ptr();
|
||||
|
||||
// Update write count
|
||||
m_read_count += _length;
|
||||
void* _out = read_ptr(_read_count);
|
||||
|
||||
return _out;
|
||||
}
|
||||
//
|
||||
|
||||
size_t
|
||||
ring_buffer::rewind(size_t n) const
|
||||
{
|
||||
if(n > m_read_count) n = m_read_count;
|
||||
m_read_count -= n;
|
||||
return n;
|
||||
}
|
||||
//
|
||||
|
||||
void
|
||||
ring_buffer::reset()
|
||||
{
|
||||
m_init = false;
|
||||
m_ptr = nullptr;
|
||||
m_size = 0;
|
||||
m_read_count = 0;
|
||||
m_write_count = 0;
|
||||
m_init = false;
|
||||
m_size = 0;
|
||||
m_ptr = nullptr;
|
||||
m_read_count.store(0);
|
||||
m_write_count.store(0);
|
||||
}
|
||||
//
|
||||
|
||||
void
|
||||
ring_buffer::save(std::fstream& _fs)
|
||||
{
|
||||
_fs.write(reinterpret_cast<char*>(&m_use_mmap), sizeof(m_use_mmap));
|
||||
_fs.write(reinterpret_cast<char*>(&m_use_mmap_explicit), sizeof(m_use_mmap_explicit));
|
||||
auto _read_count = m_read_count.load();
|
||||
auto _write_count = m_write_count.load();
|
||||
_fs.write(reinterpret_cast<char*>(&m_size), sizeof(m_size));
|
||||
_fs.write(reinterpret_cast<char*>(&m_read_count), sizeof(m_read_count));
|
||||
_fs.write(reinterpret_cast<char*>(&m_write_count), sizeof(m_write_count));
|
||||
_fs.write(reinterpret_cast<char*>(&_read_count), sizeof(_read_count));
|
||||
_fs.write(reinterpret_cast<char*>(&_write_count), sizeof(_write_count));
|
||||
_fs.write(reinterpret_cast<char*>(m_ptr), m_size * sizeof(char));
|
||||
}
|
||||
//
|
||||
@@ -270,16 +219,49 @@ ring_buffer::load(std::fstream& _fs)
|
||||
{
|
||||
destroy();
|
||||
|
||||
_fs.read(reinterpret_cast<char*>(&m_use_mmap), sizeof(m_use_mmap));
|
||||
_fs.read(reinterpret_cast<char*>(&m_use_mmap_explicit), sizeof(m_use_mmap_explicit));
|
||||
_fs.read(reinterpret_cast<char*>(&m_size), sizeof(m_size));
|
||||
size_t _read_count = 0;
|
||||
size_t _write_count = 0;
|
||||
size_t _size = 0;
|
||||
|
||||
init(m_size);
|
||||
if(!m_ptr) m_ptr = malloc(m_size);
|
||||
_fs.read(reinterpret_cast<char*>(&_size), sizeof(_size));
|
||||
|
||||
_fs.read(reinterpret_cast<char*>(&m_read_count), sizeof(m_read_count));
|
||||
_fs.read(reinterpret_cast<char*>(&m_write_count), sizeof(m_write_count));
|
||||
init(_size);
|
||||
|
||||
if(!m_ptr) throw std::bad_alloc{};
|
||||
|
||||
_fs.read(reinterpret_cast<char*>(&_read_count), sizeof(_read_count));
|
||||
_fs.read(reinterpret_cast<char*>(&_write_count), sizeof(_write_count));
|
||||
_fs.read(reinterpret_cast<char*>(m_ptr), m_size * sizeof(char));
|
||||
|
||||
m_read_count.store(_read_count, std::memory_order_release);
|
||||
m_write_count.store(_write_count, std::memory_order_release);
|
||||
}
|
||||
|
||||
bool
|
||||
ring_buffer::can_clear() const
|
||||
{
|
||||
auto _read_count = m_read_count.load(std::memory_order_acquire);
|
||||
return (_read_count == 0);
|
||||
}
|
||||
|
||||
bool
|
||||
ring_buffer::clear()
|
||||
{
|
||||
if(!can_clear())
|
||||
throw std::runtime_error(
|
||||
"ring_buffer does not permit invoking clear() member function when the read "
|
||||
"pointer is non-zero because this introduces thread-safety issues");
|
||||
|
||||
m_write_count.store(0, std::memory_order_release);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ring_buffer::clear(std::nothrow_t)
|
||||
{
|
||||
if(!can_clear()) return false;
|
||||
|
||||
m_write_count.store(0, std::memory_order_release);
|
||||
return true;
|
||||
}
|
||||
} // namespace base
|
||||
} // namespace container
|
||||
|
||||
@@ -22,19 +22,19 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "lib/common/environment.hpp"
|
||||
#include "lib/common/units.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <cmath>
|
||||
#include <cstddef>
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <functional>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <new>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
@@ -47,7 +47,7 @@ struct ring_buffer;
|
||||
//
|
||||
namespace base
|
||||
{
|
||||
/// \struct tim::base::ring_buffer
|
||||
/// \struct rocprofiler::common::container::base::ring_buffer
|
||||
/// \brief Ring buffer implementation, with support for mmap as backend (Linux only).
|
||||
struct ring_buffer
|
||||
{
|
||||
@@ -55,15 +55,10 @@ struct ring_buffer
|
||||
friend struct container::ring_buffer;
|
||||
|
||||
ring_buffer() = default;
|
||||
explicit ring_buffer(bool _use_mmap) { set_use_mmap(_use_mmap); }
|
||||
explicit ring_buffer(size_t _size) { init(_size); }
|
||||
ring_buffer(size_t _size, bool _use_mmap);
|
||||
|
||||
~ring_buffer();
|
||||
|
||||
ring_buffer(const ring_buffer&);
|
||||
ring_buffer& operator=(const ring_buffer&);
|
||||
|
||||
ring_buffer(ring_buffer&&) noexcept;
|
||||
ring_buffer& operator=(ring_buffer&&) noexcept;
|
||||
|
||||
@@ -79,6 +74,12 @@ struct ring_buffer
|
||||
/// Destroy ring buffer.
|
||||
void destroy();
|
||||
|
||||
/// Request a pointer for writing at least \param n bytes.
|
||||
void* request(size_t n, bool wrap = true);
|
||||
|
||||
/// Retrieve a pointer for reading at least \param n bytes.
|
||||
void* retrieve(size_t n) const;
|
||||
|
||||
/// Write class-type data to buffer (uses placement new).
|
||||
template <typename Tp>
|
||||
std::pair<size_t, Tp*> write(Tp* in, std::enable_if_t<std::is_class<Tp>::value, int> = 0);
|
||||
@@ -93,24 +94,19 @@ struct ring_buffer
|
||||
template <typename Tp>
|
||||
Tp* request();
|
||||
|
||||
/// Request a pointer to an allocation for at least \param n bytes.
|
||||
void* request(size_t n);
|
||||
|
||||
/// Read class-type data from buffer (uses placement new).
|
||||
template <typename Tp>
|
||||
std::pair<size_t, Tp*> read(Tp* out, std::enable_if_t<std::is_class<Tp>::value, int> = 0) const;
|
||||
std::pair<size_t, Tp*> read(Tp* _dest,
|
||||
std::enable_if_t<std::is_class<Tp>::value, int> = 0) const;
|
||||
|
||||
/// Read non-class-type data from buffer (uses memcpy).
|
||||
template <typename Tp>
|
||||
std::pair<size_t, Tp*> read(Tp* out,
|
||||
std::pair<size_t, Tp*> read(Tp* _dest,
|
||||
std::enable_if_t<!std::is_class<Tp>::value, int> = 0) const;
|
||||
|
||||
/// Retrieve a pointer to the head allocation (read).
|
||||
template <typename Tp>
|
||||
Tp* retrieve();
|
||||
|
||||
/// Retrieve a pointer to the head allocation of at least \param n bytes (read).
|
||||
void* retrieve(size_t n);
|
||||
Tp* retrieve() const;
|
||||
|
||||
/// Returns number of bytes currently held by the buffer.
|
||||
size_t count() const { return (m_write_count - m_read_count); }
|
||||
@@ -124,42 +120,52 @@ struct ring_buffer
|
||||
/// Returns if the buffer is full.
|
||||
bool is_full() const { return (count() == m_size); }
|
||||
|
||||
/// Rewind the read position n bytes
|
||||
size_t rewind(size_t n) const;
|
||||
|
||||
/// explicitly configure to use mmap if avail
|
||||
void set_use_mmap(bool);
|
||||
|
||||
/// query whether using mmap
|
||||
bool get_use_mmap() const { return m_use_mmap; }
|
||||
|
||||
/// Display info about buffer
|
||||
std::string as_string() const;
|
||||
|
||||
/// save the entire buffer to a filestream
|
||||
void save(std::fstream& _fs);
|
||||
|
||||
/// load the entire buffer from a filestream
|
||||
void load(std::fstream& _fs);
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, const ring_buffer& obj)
|
||||
{
|
||||
return os << obj.as_string();
|
||||
}
|
||||
/// query whether the read pointer is zero and thus clearing is supported
|
||||
bool can_clear() const;
|
||||
|
||||
/// reset the read and write pointer to their initial values.
|
||||
/// effectively, wiping and existing memory. Please note,
|
||||
/// this should be used with care in a double buffer system
|
||||
/// where you are not actually using the read pointer.
|
||||
/// If the read pointer is non-zero, this will throw an exception
|
||||
bool clear();
|
||||
|
||||
/// reset the read and write pointer to their initial values.
|
||||
/// effectively, wiping and existing memory. Please note,
|
||||
/// this should be used with care in a double buffer system
|
||||
/// where you are not actually using the read pointer.
|
||||
bool clear(std::nothrow_t);
|
||||
|
||||
private:
|
||||
/// Returns the current write pointer.
|
||||
void* write_ptr() const { return static_cast<char*>(m_ptr) + (m_write_count % m_size); }
|
||||
void* write_ptr(size_t _write_count) const
|
||||
{
|
||||
return static_cast<char*>(m_ptr) + (_write_count % m_size);
|
||||
}
|
||||
|
||||
/// Returns the current read pointer.
|
||||
void* read_ptr() const { return static_cast<char*>(m_ptr) + (m_read_count % m_size); }
|
||||
void* read_ptr(size_t _read_count) const
|
||||
{
|
||||
return static_cast<char*>(m_ptr) + (_read_count % m_size);
|
||||
}
|
||||
|
||||
void reset();
|
||||
|
||||
private:
|
||||
bool m_init = false;
|
||||
bool m_use_mmap = true;
|
||||
bool m_use_mmap_explicit = false;
|
||||
void* m_ptr = nullptr;
|
||||
size_t m_size = 0;
|
||||
mutable size_t m_read_count = 0;
|
||||
size_t m_write_count = 0;
|
||||
bool m_init = false;
|
||||
void* m_ptr = nullptr;
|
||||
size_t m_size = 0;
|
||||
mutable std::atomic<size_t> m_read_count = 0;
|
||||
std::atomic<size_t> m_write_count = 0;
|
||||
};
|
||||
//
|
||||
template <typename Tp>
|
||||
@@ -168,28 +174,18 @@ ring_buffer::write(Tp* in, std::enable_if_t<std::is_class<Tp>::value, int>)
|
||||
{
|
||||
if(in == nullptr || m_ptr == nullptr) return {0, nullptr};
|
||||
|
||||
auto _length = sizeof(Tp);
|
||||
auto _length = sizeof(Tp);
|
||||
void* _out_p = request(_length);
|
||||
|
||||
// Make sure we don't put in more than there's room for, by writing no
|
||||
// more than there is free.
|
||||
if(_length > free())
|
||||
throw std::runtime_error("heap-buffer-overflow :: ring buffer is full. read data "
|
||||
"to avoid data corruption");
|
||||
|
||||
// if write count is at the tail of buffer, bump to the end of buffer
|
||||
auto _modulo = m_size - (m_write_count % m_size);
|
||||
if(_modulo < _length) m_write_count += _modulo;
|
||||
|
||||
// pointer in buffer
|
||||
Tp* out = reinterpret_cast<Tp*>(write_ptr());
|
||||
if(_out_p == nullptr) return {0, nullptr};
|
||||
|
||||
// Copy in.
|
||||
new((void*) out) Tp{std::move(*in)};
|
||||
new(_out_p) Tp{std::move(*in)};
|
||||
|
||||
// Update write count
|
||||
m_write_count += _length;
|
||||
// pointer in buffer
|
||||
Tp* _out = reinterpret_cast<Tp*>(_out_p);
|
||||
|
||||
return {_length, out};
|
||||
return {_length, _out};
|
||||
}
|
||||
//
|
||||
template <typename Tp>
|
||||
@@ -198,28 +194,18 @@ ring_buffer::write(Tp* in, std::enable_if_t<!std::is_class<Tp>::value, int>)
|
||||
{
|
||||
if(in == nullptr || m_ptr == nullptr) return {0, nullptr};
|
||||
|
||||
auto _length = sizeof(Tp);
|
||||
auto _length = sizeof(Tp);
|
||||
void* _out_p = request(_length);
|
||||
|
||||
// Make sure we don't put in more than there's room for, by writing no
|
||||
// more than there is free.
|
||||
if(_length > free())
|
||||
throw std::runtime_error("heap-buffer-overflow :: ring buffer is full. read data "
|
||||
"to avoid data corruption");
|
||||
|
||||
// if write count is at the tail of buffer, bump to the end of buffer
|
||||
auto _modulo = m_size - (m_write_count % m_size);
|
||||
if(_modulo < _length) m_write_count += _modulo;
|
||||
|
||||
// pointer in buffer
|
||||
Tp* out = reinterpret_cast<Tp*>(write_ptr());
|
||||
if(_out_p == nullptr) return {0, nullptr};
|
||||
|
||||
// Copy in.
|
||||
memcpy((void*) out, in, _length);
|
||||
memcpy(_out_p, in, _length);
|
||||
|
||||
// Update write count
|
||||
m_write_count += _length;
|
||||
// pointer in buffer
|
||||
Tp* _out = reinterpret_cast<Tp*>(_out_p);
|
||||
|
||||
return {_length, out};
|
||||
return {_length, _out};
|
||||
}
|
||||
//
|
||||
template <typename Tp>
|
||||
@@ -228,118 +214,70 @@ ring_buffer::request()
|
||||
{
|
||||
if(m_ptr == nullptr) return nullptr;
|
||||
|
||||
auto _length = sizeof(Tp);
|
||||
|
||||
// Make sure we don't put in more than there's room for, by writing no
|
||||
// more than there is free.
|
||||
if(_length > free())
|
||||
throw std::runtime_error("heap-buffer-overflow :: ring buffer is full. read data "
|
||||
"to avoid data corruption");
|
||||
|
||||
// if write count is at the tail of buffer, bump to the end of buffer
|
||||
auto _modulo = m_size - (m_write_count % m_size);
|
||||
if(_modulo < _length) m_write_count += _modulo;
|
||||
|
||||
// pointer in buffer
|
||||
Tp* _out = reinterpret_cast<Tp*>(write_ptr());
|
||||
|
||||
// Update write count
|
||||
m_write_count += _length;
|
||||
|
||||
return _out;
|
||||
return request(sizeof(Tp));
|
||||
}
|
||||
//
|
||||
template <typename Tp>
|
||||
std::pair<size_t, Tp*>
|
||||
ring_buffer::read(Tp* out, std::enable_if_t<std::is_class<Tp>::value, int>) const
|
||||
ring_buffer::read(Tp* _dest, std::enable_if_t<std::is_class<Tp>::value, int>) const
|
||||
{
|
||||
if(is_empty() || out == nullptr) return {0, nullptr};
|
||||
if(is_empty() || _dest == nullptr) return {0, nullptr};
|
||||
|
||||
auto _length = sizeof(Tp);
|
||||
auto _length = sizeof(Tp);
|
||||
void* _out_p = retrieve(_length);
|
||||
|
||||
// Make sure we do not read out more than there is actually in the buffer.
|
||||
if(_length > count()) throw std::runtime_error("ring buffer is empty");
|
||||
|
||||
// if read count is at the tail of buffer, bump to the end of buffer
|
||||
auto _modulo = m_size - (m_read_count % m_size);
|
||||
if(_modulo < _length) m_read_count += _modulo;
|
||||
if(_out_p == nullptr) return {0, nullptr};
|
||||
|
||||
// pointer in buffer
|
||||
Tp* in = reinterpret_cast<Tp*>(read_ptr());
|
||||
Tp* in = reinterpret_cast<Tp*>(_out_p);
|
||||
|
||||
// Copy out for BYTE, nothing magic here.
|
||||
*out = *in;
|
||||
|
||||
// Update read count.
|
||||
m_read_count += _length;
|
||||
*_dest = *in;
|
||||
|
||||
return {_length, in};
|
||||
}
|
||||
//
|
||||
template <typename Tp>
|
||||
std::pair<size_t, Tp*>
|
||||
ring_buffer::read(Tp* out, std::enable_if_t<!std::is_class<Tp>::value, int>) const
|
||||
ring_buffer::read(Tp* _dest, std::enable_if_t<!std::is_class<Tp>::value, int>) const
|
||||
{
|
||||
if(is_empty() || out == nullptr) return {0, nullptr};
|
||||
if(is_empty() || _dest == nullptr) return {0, nullptr};
|
||||
|
||||
auto _length = sizeof(Tp);
|
||||
auto _length = sizeof(Tp);
|
||||
void* _out_p = retrieve(_length);
|
||||
|
||||
if(_out_p == nullptr) return {0, nullptr};
|
||||
|
||||
// pointer in buffer
|
||||
Tp* in = reinterpret_cast<Tp*>(_out_p);
|
||||
|
||||
using Up = typename std::remove_const<Tp>::type;
|
||||
|
||||
// Make sure we do not read out more than there is actually in the buffer.
|
||||
if(_length > count()) throw std::runtime_error("ring buffer is empty");
|
||||
|
||||
// if read count is at the tail of buffer, bump to the end of buffer
|
||||
auto _modulo = m_size - (m_read_count % m_size);
|
||||
if(_modulo < _length) m_read_count += _modulo;
|
||||
|
||||
// pointer in buffer
|
||||
Tp* in = reinterpret_cast<Tp*>(read_ptr());
|
||||
|
||||
// Copy out for BYTE, nothing magic here.
|
||||
Up* _out = const_cast<Up*>(out);
|
||||
Up* _out = const_cast<Up*>(_dest);
|
||||
memcpy(_out, in, _length);
|
||||
|
||||
// Update read count.
|
||||
m_read_count += _length;
|
||||
|
||||
return {_length, in};
|
||||
}
|
||||
//
|
||||
template <typename Tp>
|
||||
Tp*
|
||||
ring_buffer::retrieve()
|
||||
ring_buffer::retrieve() const
|
||||
{
|
||||
if(m_ptr == nullptr) return nullptr;
|
||||
|
||||
auto _length = sizeof(Tp);
|
||||
|
||||
// Make sure we don't put in more than there's room for, by writing no
|
||||
// more than there is free.
|
||||
if(_length > count()) throw std::runtime_error("ring buffer is empty");
|
||||
|
||||
// if read count is at the tail of buffer, bump to the end of buffer
|
||||
auto _modulo = m_size - (m_read_count % m_size);
|
||||
if(_modulo < _length) m_read_count += _modulo;
|
||||
|
||||
// pointer in buffer
|
||||
Tp* _out = reinterpret_cast<Tp*>(read_ptr());
|
||||
|
||||
// Update write count
|
||||
m_read_count += _length;
|
||||
|
||||
return _out;
|
||||
return retrieve(sizeof(Tp));
|
||||
}
|
||||
//
|
||||
} // namespace base
|
||||
///
|
||||
/// \struct rocprofiler::container::ring_buffer
|
||||
/// \brief Ring buffer wrapper around \ref tim::base::ring_buffer for data of type Tp. If
|
||||
/// the data object size is larger than the page size (typically 4KB), behavior is
|
||||
/// undefined. During initialization, one requests a minimum number of objects and the
|
||||
/// buffer will support that number of object + the remainder of the page, e.g. if a page
|
||||
/// is 1000 bytes, the object is 1 byte, and the buffer is requested to support 1500
|
||||
/// objects, then an allocation supporting 2000 objects (i.e. 2 pages) will be created.
|
||||
//
|
||||
/// \struct rocprofiler::common::container::ring_buffer
|
||||
/// \brief Ring buffer wrapper around \ref rocprofiler::common::container::base::ring_buffer for
|
||||
/// data of type Tp. If the data object size is larger than the page size (typically 4KB), behavior
|
||||
/// is undefined. During initialization, one requests a minimum number of objects and the buffer
|
||||
/// will support that number of object + the remainder of the page, e.g. if a page is 1000 bytes,
|
||||
/// the object is 1 byte, and the buffer is requested to support 1500 objects, then an allocation
|
||||
/// supporting 2000 objects (i.e. 2 pages) will be created.
|
||||
template <typename Tp>
|
||||
struct ring_buffer : private base::ring_buffer
|
||||
{
|
||||
@@ -350,18 +288,10 @@ struct ring_buffer : private base::ring_buffer
|
||||
ring_buffer() = default;
|
||||
~ring_buffer() = default;
|
||||
|
||||
explicit ring_buffer(bool _use_mmap)
|
||||
: base_type{_use_mmap}
|
||||
{}
|
||||
|
||||
explicit ring_buffer(size_t _size)
|
||||
: base_type{_size * sizeof(Tp)}
|
||||
{}
|
||||
|
||||
ring_buffer(size_t _size, bool _use_mmap)
|
||||
: base_type{_size * sizeof(Tp), _use_mmap}
|
||||
{}
|
||||
|
||||
ring_buffer(const ring_buffer&);
|
||||
ring_buffer(ring_buffer&&) noexcept = default;
|
||||
|
||||
@@ -387,7 +317,7 @@ struct ring_buffer : private base::ring_buffer
|
||||
Tp* write(Tp* in) { return base_type::write<Tp>(in).second; }
|
||||
|
||||
/// Read data from buffer. Return pointer to location of read
|
||||
Tp* read(Tp* out) const { return base_type::read<Tp>(out).second; }
|
||||
Tp* read(Tp* _dest) const { return base_type::read<Tp>(_dest).second; }
|
||||
|
||||
/// Get an uninitialized address at tail of buffer.
|
||||
Tp* request() { return base_type::request<Tp>(); }
|
||||
@@ -407,9 +337,6 @@ struct ring_buffer : private base::ring_buffer
|
||||
/// Returns if the buffer is full.
|
||||
bool is_full() const { return (base_type::free() < sizeof(Tp)); }
|
||||
|
||||
/// Rewinds the read pointer
|
||||
size_t rewind(size_t n) const { return base_type::rewind(n); }
|
||||
|
||||
template <typename... Args>
|
||||
auto emplace(Args&&... args)
|
||||
{
|
||||
@@ -417,10 +344,8 @@ struct ring_buffer : private base::ring_buffer
|
||||
return write(&_obj);
|
||||
}
|
||||
|
||||
using base_type::get_use_mmap;
|
||||
using base_type::load;
|
||||
using base_type::save;
|
||||
using base_type::set_use_mmap;
|
||||
|
||||
std::string as_string() const
|
||||
{
|
||||
@@ -461,7 +386,7 @@ ring_buffer<Tp>::ring_buffer(const ring_buffer<Tp>& rhs)
|
||||
char* _end = static_cast<char*>(rhs.m_ptr) + rhs.m_size;
|
||||
for(size_t i = 0; i < _n; ++i)
|
||||
{
|
||||
char* _addr = static_cast<char*>(rhs.read_ptr()) + (i * sizeof(Tp));
|
||||
char* _addr = static_cast<char*>(rhs.read_ptr(m_read_count)) + (i * sizeof(Tp));
|
||||
if((_addr + sizeof(Tp)) > _end) _addr = static_cast<char*>(rhs.m_ptr);
|
||||
Tp* _in = static_cast<Tp*>(static_cast<void*>(_addr));
|
||||
write(_in);
|
||||
@@ -479,7 +404,7 @@ ring_buffer<Tp>::operator=(const ring_buffer<Tp>& rhs)
|
||||
char* _end = static_cast<char*>(rhs.m_ptr) + rhs.m_size;
|
||||
for(size_t i = 0; i < _n; ++i)
|
||||
{
|
||||
char* _addr = static_cast<char*>(rhs.read_ptr()) + (i * sizeof(Tp));
|
||||
char* _addr = static_cast<char*>(rhs.read_ptr(m_read_count)) + (i * sizeof(Tp));
|
||||
if((_addr + sizeof(Tp)) > _end) _addr = static_cast<char*>(rhs.m_ptr);
|
||||
Tp* _in = static_cast<Tp*>(static_cast<void*>(_addr));
|
||||
write(_in);
|
||||
|
||||
@@ -0,0 +1,95 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 ROCm Developer Tools
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <tuple>
|
||||
#include <type_traits>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace common
|
||||
{
|
||||
namespace mpl
|
||||
{
|
||||
// dummy tuple with low instantiation cost
|
||||
template <typename... Tp>
|
||||
struct type_list
|
||||
{
|
||||
static constexpr auto size() { return sizeof...(Tp); }
|
||||
};
|
||||
|
||||
/// get the index of a type in expansion
|
||||
template <typename Tp, typename Type>
|
||||
struct index_of;
|
||||
|
||||
template <typename Tp, template <typename...> class Tuple, typename... Types>
|
||||
struct index_of<Tp, Tuple<Tp, Types...>>
|
||||
{
|
||||
static constexpr size_t value = 0;
|
||||
};
|
||||
|
||||
template <typename Tp, typename Head, template <typename...> class Tuple, typename... Tail>
|
||||
struct index_of<Tp, Tuple<Head, Tail...>>
|
||||
{
|
||||
static constexpr size_t value = 1 + index_of<Tp, Tuple<Tail...>>::value;
|
||||
};
|
||||
|
||||
/// get the index of a type in expansion
|
||||
template <typename Tp>
|
||||
struct size_of;
|
||||
|
||||
template <typename... Tp>
|
||||
struct size_of<type_list<Tp...>>
|
||||
{
|
||||
static constexpr size_t value = sizeof...(Tp);
|
||||
};
|
||||
|
||||
template <typename... Tp>
|
||||
struct size_of<std::tuple<Tp...>>
|
||||
{
|
||||
static constexpr size_t value = sizeof...(Tp);
|
||||
};
|
||||
|
||||
// check if type is in expansion
|
||||
//
|
||||
template <typename... Tp>
|
||||
struct is_one_of
|
||||
{
|
||||
static constexpr bool value = false;
|
||||
};
|
||||
|
||||
template <typename F, typename S, template <typename...> class Tuple, typename... T>
|
||||
struct is_one_of<F, S, Tuple<T...>>
|
||||
{
|
||||
static constexpr bool value = std::is_same<F, S>::value || is_one_of<F, Tuple<T...>>::value;
|
||||
};
|
||||
|
||||
template <typename F, typename S, template <typename...> class Tuple, typename... T>
|
||||
struct is_one_of<F, Tuple<S, T...>>
|
||||
{
|
||||
static constexpr bool value = is_one_of<F, S, Tuple<T...>>::value;
|
||||
};
|
||||
} // namespace mpl
|
||||
} // namespace common
|
||||
} // namespace rocprofiler
|
||||
@@ -20,9 +20,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hsa/hsa.h>
|
||||
#include <hsa/hsa_ext_amd.h>
|
||||
#include <rocprofiler/rocprofiler.h>
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
@@ -1 +1,4 @@
|
||||
|
||||
#
|
||||
#
|
||||
#
|
||||
add_subdirectory(buffering)
|
||||
|
||||
@@ -0,0 +1,23 @@
|
||||
#
|
||||
#
|
||||
#
|
||||
project(rocprofiler-tests-buffering LANGUAGES C CXX)
|
||||
|
||||
include(GoogleTest)
|
||||
|
||||
set(buffering_sources buffering-serial.cpp buffering-parallel.cpp buffering-save-load.cpp)
|
||||
|
||||
add_executable(buffering-test)
|
||||
target_sources(buffering-test PRIVATE ${buffering_sources})
|
||||
target_link_libraries(
|
||||
buffering-test
|
||||
PRIVATE rocprofiler::rocprofiler-headers rocprofiler::rocprofiler-common-library
|
||||
GTest::gtest GTest::gtest_main)
|
||||
|
||||
gtest_add_tests(
|
||||
TARGET buffering-test
|
||||
SOURCES ${buffering_sources}
|
||||
TEST_LIST buffering-test_TESTS
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
set_tests_properties(${buffering-tests_TESTS} PROPERTIES TIMEOUT 45 LABELS "unittests")
|
||||
@@ -0,0 +1,217 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 ROCm Developer Tools
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "buffering.hpp"
|
||||
#include "lib/common/container/record_header_buffer.hpp"
|
||||
#include "lib/common/mpl.hpp"
|
||||
#include "lib/common/units.hpp"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <limits>
|
||||
#include <typeinfo>
|
||||
#include <utility>
|
||||
|
||||
namespace
|
||||
{
|
||||
namespace test = ::rocprofiler::test;
|
||||
namespace units = ::rocprofiler::common::units;
|
||||
namespace mpl = ::rocprofiler::common::mpl;
|
||||
|
||||
using record_header_buffer_t = rocprofiler::common::container::record_header_buffer;
|
||||
|
||||
// this function returns a random array of values specific to template instantiation
|
||||
template <typename Tp, size_t N>
|
||||
auto&
|
||||
get_generated_array()
|
||||
{
|
||||
static auto _value = []() {
|
||||
auto _v = test::raw_array<Tp, N>{};
|
||||
test::generate(_v, Tp{0}, std::numeric_limits<Tp>::max());
|
||||
return _v;
|
||||
}();
|
||||
return _value;
|
||||
}
|
||||
|
||||
// these are the array size variants. we use the units to scale up
|
||||
// but technically the data size of the raw_array will be multiplied
|
||||
// by sizeof(Tp)
|
||||
constexpr auto test_data_sizes = std::index_sequence<1 * units::byte,
|
||||
2 * units::byte,
|
||||
3 * units::byte,
|
||||
4 * units::byte,
|
||||
8 * units::byte,
|
||||
16 * units::kilobyte,
|
||||
20 * units::kilobyte,
|
||||
24 * units::kilobyte,
|
||||
32 * units::kilobyte,
|
||||
56 * units::kilobyte,
|
||||
64 * units::kilobyte,
|
||||
91 * units::kilobyte,
|
||||
128 * units::kilobyte,
|
||||
387 * units::kilobyte,
|
||||
693 * units::kilobyte,
|
||||
2 * units::megabyte>{};
|
||||
|
||||
// this is the list of array data types we will generate. Effectively, there
|
||||
// will be one raw array for each combination of these types and the test data sizes
|
||||
// (i.e. there will be unique 160 arrays of different types and sizes)
|
||||
using test_data_types = mpl::type_list<int8_t,
|
||||
uint8_t,
|
||||
int16_t,
|
||||
uint16_t,
|
||||
int32_t,
|
||||
uint32_t,
|
||||
int64_t,
|
||||
uint64_t,
|
||||
float,
|
||||
double>;
|
||||
|
||||
// this function creates a thread for each data size for a given type.
|
||||
// all threads are detached and will wait at the first barrier until all
|
||||
// threads have reached it, race to emplace their data in the shared
|
||||
// buffer and then wait at the second barrier until all the threads have
|
||||
// emplacing the data and the main thread has also reached the second
|
||||
// barrier.
|
||||
template <typename Tp, size_t... Idx>
|
||||
void
|
||||
launch_threads(record_header_buffer_t& _buf,
|
||||
pthread_barrier_t& _race_barrier,
|
||||
pthread_barrier_t& _done_barrier,
|
||||
std::index_sequence<Idx...>)
|
||||
{
|
||||
auto _launch =
|
||||
[](record_header_buffer_t* _buf_v, auto* _race_barrier_v, auto* _done_barrier_v, auto* _v) {
|
||||
pthread_barrier_wait(_race_barrier_v);
|
||||
EXPECT_TRUE(_buf_v->emplace(*_v));
|
||||
pthread_barrier_wait(_done_barrier_v);
|
||||
};
|
||||
(std::thread{_launch, &_buf, &_race_barrier, &_done_barrier, &get_generated_array<Tp, Idx>()}
|
||||
.detach(),
|
||||
...);
|
||||
}
|
||||
|
||||
// expansion for each type
|
||||
template <typename... Tp, size_t... Idx>
|
||||
void
|
||||
launch_threads(record_header_buffer_t& _buf,
|
||||
pthread_barrier_t& _race_barrier,
|
||||
pthread_barrier_t& _done_barrier,
|
||||
mpl::type_list<Tp...>,
|
||||
std::index_sequence<Idx...> _seq)
|
||||
{
|
||||
(launch_threads<Tp>(_buf, _race_barrier, _done_barrier, _seq), ...);
|
||||
}
|
||||
|
||||
// computes the size of every raw_array size for a given type
|
||||
template <typename Tp, size_t... Idx>
|
||||
constexpr size_t get_data_size(std::index_sequence<Idx...>)
|
||||
{
|
||||
size_t _v = 0;
|
||||
((_v += sizeof(get_generated_array<Tp, Idx>())), ...);
|
||||
return _v;
|
||||
}
|
||||
|
||||
// expansion for each type
|
||||
template <typename... Tp, size_t... Idx>
|
||||
constexpr size_t
|
||||
get_data_size(mpl::type_list<Tp...>, std::index_sequence<Idx...> _seq)
|
||||
{
|
||||
size_t _v = 0;
|
||||
((_v += get_data_size<Tp>(_seq)), ...);
|
||||
return _v;
|
||||
}
|
||||
|
||||
// validates that the raw array extracted out of the buffer is equal
|
||||
// to the raw array that was placed in the buffer
|
||||
template <typename Tp, size_t N>
|
||||
void
|
||||
validate(const std::vector<rocprofiler_record_header_t*>& _headers)
|
||||
{
|
||||
using data_type = test::raw_array<Tp, N>;
|
||||
auto& _ref_data = get_generated_array<Tp, N>();
|
||||
for(auto* itr : _headers)
|
||||
{
|
||||
if(itr->kind == typeid(data_type).hash_code())
|
||||
{
|
||||
auto* _data = static_cast<data_type*>(itr->payload);
|
||||
EXPECT_EQ(_ref_data, *_data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// expansion for every raw array size for a given data type
|
||||
template <typename Tp, size_t... Idx>
|
||||
void
|
||||
validate(const std::vector<rocprofiler_record_header_t*>& _headers, std::index_sequence<Idx...>)
|
||||
{
|
||||
(validate<Tp, Idx>(_headers), ...);
|
||||
}
|
||||
|
||||
// expansion for each raw array type
|
||||
template <typename... Tp, size_t... Idx>
|
||||
void
|
||||
validate(const std::vector<rocprofiler_record_header_t*>& _headers,
|
||||
mpl::type_list<Tp...>,
|
||||
std::index_sequence<Idx...> _seq)
|
||||
{
|
||||
(validate<Tp>(_headers, _seq), ...);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST(buffering, parallel)
|
||||
{
|
||||
// this test launches 160 threads, each with a randomly generated array of data
|
||||
// and has them contend for emplacing their data in the same buffer. The purpose
|
||||
// of this test is to validate that multiple threads can write to the same
|
||||
// (lock-free) buffer without any data corruption or loss.
|
||||
|
||||
constexpr auto num_variants = test_data_types::size() * test_data_sizes.size();
|
||||
constexpr auto data_size = get_data_size(test_data_types{}, test_data_sizes);
|
||||
|
||||
EXPECT_EQ(num_variants, 160);
|
||||
|
||||
// make a buffer large enough to hold all the data we generate
|
||||
auto _buffer = record_header_buffer_t{data_size};
|
||||
|
||||
// create a barrier that all child threads will wait and then race to enqueue their data in the
|
||||
// buffer i.e., we want to maximize contention on inserting into buffer
|
||||
auto _data_race_barrier = pthread_barrier_t{};
|
||||
pthread_barrier_init(&_data_race_barrier, nullptr, num_variants);
|
||||
|
||||
// a barrier to signal that all threads have completed placing their data in the buffer
|
||||
auto _emplaced_barrier = pthread_barrier_t{};
|
||||
pthread_barrier_init(&_emplaced_barrier, nullptr, num_variants + 1);
|
||||
|
||||
// launch 160 threads
|
||||
launch_threads(
|
||||
_buffer, _data_race_barrier, _emplaced_barrier, test_data_types{}, test_data_sizes);
|
||||
|
||||
// wait for all the threads to complete
|
||||
pthread_barrier_wait(&_emplaced_barrier);
|
||||
|
||||
// verify the data pulled out the buffer matches the data put in by the threads
|
||||
validate(_buffer.get_record_headers(), test_data_types{}, test_data_sizes);
|
||||
}
|
||||
@@ -0,0 +1,273 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 ROCm Developer Tools
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "buffering.hpp"
|
||||
#include "lib/common/container/record_header_buffer.hpp"
|
||||
#include "lib/common/mpl.hpp"
|
||||
#include "lib/common/units.hpp"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <limits>
|
||||
#include <typeinfo>
|
||||
#include <utility>
|
||||
|
||||
namespace
|
||||
{
|
||||
namespace test = ::rocprofiler::test;
|
||||
namespace units = ::rocprofiler::common::units;
|
||||
namespace mpl = ::rocprofiler::common::mpl;
|
||||
|
||||
using record_header_buffer_t = rocprofiler::common::container::record_header_buffer;
|
||||
|
||||
// this function returns a random array of values specific to template instantiation
|
||||
template <typename Tp, size_t N>
|
||||
auto&
|
||||
get_generated_array()
|
||||
{
|
||||
static auto _value = []() {
|
||||
auto _v = test::raw_array<Tp, N>{};
|
||||
test::generate(_v, Tp{0}, std::numeric_limits<Tp>::max());
|
||||
return _v;
|
||||
}();
|
||||
return _value;
|
||||
}
|
||||
|
||||
// these are the array size variants. we use the units to scale up
|
||||
// but technically the data size of the raw_array will be multiplied
|
||||
// by sizeof(Tp)
|
||||
constexpr auto test_data_sizes = std::index_sequence<1 * units::byte,
|
||||
2 * units::byte,
|
||||
3 * units::byte,
|
||||
4 * units::byte,
|
||||
8 * units::byte,
|
||||
16 * units::kilobyte,
|
||||
20 * units::kilobyte,
|
||||
24 * units::kilobyte,
|
||||
32 * units::kilobyte,
|
||||
56 * units::kilobyte,
|
||||
64 * units::kilobyte,
|
||||
91 * units::kilobyte>{};
|
||||
|
||||
// this is the list of array data types we will generate. Effectively, there
|
||||
// will be one raw array for each combination of these types and the test data sizes
|
||||
// (i.e. there will be unique 160 arrays of different types and sizes)
|
||||
using test_data_types = mpl::type_list<int8_t,
|
||||
uint8_t,
|
||||
int16_t,
|
||||
uint16_t,
|
||||
int32_t,
|
||||
uint32_t,
|
||||
int64_t,
|
||||
uint64_t,
|
||||
float,
|
||||
double>;
|
||||
|
||||
// this function creates a thread for each data size for a given type.
|
||||
// all threads are detached and will wait at the first barrier until all
|
||||
// threads have reached it, race to emplace their data in the shared
|
||||
// buffer and then wait at the second barrier until all the threads have
|
||||
// emplacing the data and the main thread has also reached the second
|
||||
// barrier.
|
||||
template <typename Tp, size_t... Idx>
|
||||
void
|
||||
launch(record_header_buffer_t* _buf, pthread_barrier_t* _done_barrier, std::index_sequence<Idx...>)
|
||||
{
|
||||
auto _launch = [](record_header_buffer_t* _buf_v, auto* _v) {
|
||||
EXPECT_TRUE(_buf_v->emplace(*_v));
|
||||
};
|
||||
(_launch(_buf, &get_generated_array<Tp, Idx>()), ...);
|
||||
pthread_barrier_wait(_done_barrier);
|
||||
}
|
||||
|
||||
// expansion for each type
|
||||
template <typename... Tp, size_t... Idx>
|
||||
void
|
||||
launch_threads(record_header_buffer_t& _buf,
|
||||
pthread_barrier_t& _done_barrier,
|
||||
mpl::type_list<Tp...>,
|
||||
std::index_sequence<Idx...> _seq)
|
||||
{
|
||||
((std::thread{[_seq](auto* _buf_v, auto* _barrier_v) { launch<Tp>(_buf_v, _barrier_v, _seq); },
|
||||
&_buf,
|
||||
&_done_barrier}
|
||||
.detach()),
|
||||
...);
|
||||
}
|
||||
|
||||
// computes the size of every raw_array size for a given type
|
||||
template <typename Tp, size_t... Idx>
|
||||
constexpr size_t get_data_size(std::index_sequence<Idx...>)
|
||||
{
|
||||
size_t _v = 0;
|
||||
((_v += sizeof(get_generated_array<Tp, Idx>())), ...);
|
||||
return _v;
|
||||
}
|
||||
|
||||
// expansion for each type
|
||||
template <typename... Tp, size_t... Idx>
|
||||
constexpr size_t
|
||||
get_data_size(mpl::type_list<Tp...>, std::index_sequence<Idx...> _seq)
|
||||
{
|
||||
size_t _v = 0;
|
||||
((_v += get_data_size<Tp>(_seq)), ...);
|
||||
return _v;
|
||||
}
|
||||
|
||||
// validates that the raw array extracted out of the buffer is equal
|
||||
// to the raw array that was placed in the buffer
|
||||
template <typename Tp, size_t N>
|
||||
void
|
||||
validate(const std::vector<rocprofiler_record_header_t*>& _headers)
|
||||
{
|
||||
using data_type = test::raw_array<Tp, N>;
|
||||
auto& _ref_data = get_generated_array<Tp, N>();
|
||||
for(auto* itr : _headers)
|
||||
{
|
||||
if(itr->kind == typeid(data_type).hash_code())
|
||||
{
|
||||
auto* _data = static_cast<data_type*>(itr->payload);
|
||||
ASSERT_TRUE(_data != nullptr);
|
||||
EXPECT_EQ(_ref_data, *_data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// expansion for every raw array size for a given data type
|
||||
template <typename Tp, size_t... Idx>
|
||||
void
|
||||
validate(const std::vector<rocprofiler_record_header_t*>& _headers, std::index_sequence<Idx...>)
|
||||
{
|
||||
(validate<Tp, Idx>(_headers), ...);
|
||||
}
|
||||
|
||||
// expansion for each raw array type
|
||||
template <typename... Tp, size_t... Idx>
|
||||
void
|
||||
validate(const std::vector<rocprofiler_record_header_t*>& _headers,
|
||||
mpl::type_list<Tp...>,
|
||||
std::index_sequence<Idx...> _seq)
|
||||
{
|
||||
(validate<Tp>(_headers, _seq), ...);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST(buffering, save_load)
|
||||
{
|
||||
// this test launches 10 threads for each of the data types in test_data_types. Each thread
|
||||
// randomly generates 12 array of data of differing sizes and contends with the other threads
|
||||
// for emplacing the data in the same buffer. The purpose of this test is test the thread-safety
|
||||
// in a slightly different way, save it to a file backing, clear it, restore it from the file,
|
||||
// and move it to another object and ensure that the data after the save + load + move matches
|
||||
// the original data placed into the buffer without any data corruption or loss
|
||||
|
||||
constexpr auto num_variants = test_data_types::size() * test_data_sizes.size();
|
||||
constexpr auto data_size = get_data_size(test_data_types{}, test_data_sizes);
|
||||
|
||||
EXPECT_EQ(num_variants, 120);
|
||||
|
||||
// make a buffer large enough to hold all the data we generate
|
||||
auto _buffer = record_header_buffer_t{};
|
||||
|
||||
EXPECT_FALSE(_buffer.is_allocated());
|
||||
EXPECT_EQ(_buffer.size(), 0);
|
||||
EXPECT_EQ(_buffer.count(), 0);
|
||||
EXPECT_EQ(_buffer.free(), 0);
|
||||
EXPECT_EQ(_buffer.capacity(), 0);
|
||||
EXPECT_TRUE(_buffer.is_empty());
|
||||
EXPECT_TRUE(_buffer.is_full());
|
||||
|
||||
// allocate the buffer
|
||||
ASSERT_TRUE(_buffer.allocate(data_size)) << "buffer failed to allocate";
|
||||
|
||||
EXPECT_EQ(_buffer.size(), 0);
|
||||
EXPECT_EQ(_buffer.count(), 0);
|
||||
EXPECT_GE(_buffer.free(), data_size);
|
||||
EXPECT_GE(_buffer.capacity(), data_size);
|
||||
EXPECT_TRUE(_buffer.is_empty());
|
||||
EXPECT_FALSE(_buffer.is_full());
|
||||
|
||||
// a barrier to signal that all threads have completed placing their data in the buffer
|
||||
auto _emplaced_barrier = pthread_barrier_t{};
|
||||
pthread_barrier_init(&_emplaced_barrier, nullptr, test_data_types::size() + 1);
|
||||
|
||||
// launch 160 threads
|
||||
launch_threads(_buffer, _emplaced_barrier, test_data_types{}, test_data_sizes);
|
||||
|
||||
// wait for all the threads to complete
|
||||
pthread_barrier_wait(&_emplaced_barrier);
|
||||
|
||||
// verify the data, at a high-level is correct
|
||||
EXPECT_EQ(_buffer.size(), num_variants);
|
||||
EXPECT_EQ(_buffer.count(), data_size);
|
||||
EXPECT_GE(_buffer.free(), 0);
|
||||
EXPECT_GE(_buffer.capacity(), data_size);
|
||||
EXPECT_FALSE(_buffer.is_empty());
|
||||
|
||||
// verify the data pulled out the buffer matches the data put in
|
||||
validate(_buffer.get_record_headers(), test_data_types{}, test_data_sizes);
|
||||
|
||||
// save the data to a binary file and clear the buffer so it can "receive" new data (in theory)
|
||||
{
|
||||
auto _ofs = std::fstream{};
|
||||
_ofs.open("buffer-save-load.dat", std::ios::out);
|
||||
_buffer.save(_ofs);
|
||||
EXPECT_EQ(_buffer.clear(), num_variants);
|
||||
}
|
||||
|
||||
// verify that the buffer is empty
|
||||
EXPECT_EQ(_buffer.get_record_headers().size(), 0) << "buffer was not cleared properly";
|
||||
|
||||
// load the data back from the binary file
|
||||
{
|
||||
auto _ifs = std::fstream{};
|
||||
_ifs.open("buffer-save-load.dat", std::ios::in);
|
||||
_buffer.load(_ifs);
|
||||
}
|
||||
|
||||
// verify that, at a high level, all the data was preserved
|
||||
ASSERT_EQ(_buffer.get_record_headers().size(), num_variants)
|
||||
<< "buffer was not saved/loaded properly";
|
||||
|
||||
// verify the data is entirely correct
|
||||
validate(_buffer.get_record_headers(), test_data_types{}, test_data_sizes);
|
||||
|
||||
// move the data into another instance of record_header_buffer_t
|
||||
auto _buffer_v = record_header_buffer_t{std::move(_buffer)};
|
||||
|
||||
// make sure the move emptied out the old object and populated the new object
|
||||
ASSERT_EQ(_buffer.get_record_headers().size(), 0) << "buffer was not moved properly";
|
||||
ASSERT_EQ(_buffer_v.get_record_headers().size(), num_variants)
|
||||
<< "buffer was not moved properly";
|
||||
|
||||
// validate the data in the new object
|
||||
// verify the data pulled out the buffer matches the data put in by the threads
|
||||
validate(_buffer_v.get_record_headers(), test_data_types{}, test_data_sizes);
|
||||
|
||||
// make sure reset works when empty and when full
|
||||
EXPECT_EQ(_buffer.reset(), 0) << "buffer should be empty after move";
|
||||
EXPECT_EQ(_buffer_v.reset(), num_variants);
|
||||
}
|
||||
@@ -0,0 +1,172 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 ROCm Developer Tools
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "buffering.hpp"
|
||||
#include "lib/common/container/record_header_buffer.hpp"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <random>
|
||||
#include <typeinfo>
|
||||
|
||||
namespace
|
||||
{
|
||||
namespace test = ::rocprofiler::test;
|
||||
|
||||
using uint_raw_array_t = test::raw_array<uint64_t, 32>;
|
||||
using flt_raw_array_t = test::raw_array<double, 64>;
|
||||
using record_header_buffer_t = rocprofiler::common::container::record_header_buffer;
|
||||
|
||||
// generates an array with random data
|
||||
template <typename Tp, size_t N>
|
||||
auto
|
||||
generate_array(Tp _low = 0UL, Tp _high = 1000UL)
|
||||
{
|
||||
auto _v = test::raw_array<Tp, N>{};
|
||||
test::generate(_v, _low, _high);
|
||||
return _v;
|
||||
}
|
||||
|
||||
// pulls out a raw array of the given type and puts back into a vector
|
||||
template <typename Tp>
|
||||
void
|
||||
extract_header(std::vector<Tp>& _arr, rocprofiler_record_header_t* _hdr)
|
||||
{
|
||||
if(_hdr->kind == typeid(Tp).hash_code())
|
||||
{
|
||||
auto* _v = reinterpret_cast<Tp*>(_hdr->payload);
|
||||
_arr.emplace_back(*_v);
|
||||
}
|
||||
else
|
||||
{
|
||||
GTEST_FAIL() << __PRETTY_FUNCTION__ << " failed";
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST(buffering, serial)
|
||||
{
|
||||
// this test verifies that the buffering system is ordered properly
|
||||
// and does not suffer from data loss or data corruption. We generate
|
||||
// 240 raw arrays of data where 120 of them are twice as large as the
|
||||
// the other 120 raw array and these two arrays contain data of different
|
||||
// types. For each iteration, we randomize whether the uint64_t array with
|
||||
// 32 elements or whether the double array with 64 elements gets inserted
|
||||
// first. We then pull all the data back out of the buffer and verify
|
||||
// that no arrays were lost and that none of the data was corrupted.
|
||||
|
||||
uint64_t n = 120;
|
||||
|
||||
// storage of the original data put into the buffer
|
||||
auto _ui_history = std::vector<uint_raw_array_t>{};
|
||||
auto _ui_result = std::vector<uint_raw_array_t>{};
|
||||
|
||||
// storage of the data extracted from the buffer
|
||||
auto _fp_history = std::vector<flt_raw_array_t>{};
|
||||
auto _fp_result = std::vector<flt_raw_array_t>{};
|
||||
|
||||
// a buffer to hold all the data
|
||||
auto _buffer = record_header_buffer_t{n * (sizeof(uint_raw_array_t) + sizeof(flt_raw_array_t))};
|
||||
|
||||
// RNG use to make the ordering of the different sized records inconsistent
|
||||
auto _gen = std::mt19937_64{std::random_device{}()};
|
||||
auto _rng = std::uniform_int_distribution<short>{0, 1};
|
||||
for(uint64_t i = 0; i < n; ++i)
|
||||
{
|
||||
// generate a 32*8 byte array
|
||||
auto _u = generate_array<uint64_t, 32>();
|
||||
// generate a 64*8 byte array
|
||||
auto _f = generate_array<double, 64>();
|
||||
|
||||
// store the original data
|
||||
_ui_history.emplace_back(_u);
|
||||
_fp_history.emplace_back(_f);
|
||||
|
||||
EXPECT_EQ(_u, _ui_history.back()) << "uint not equal after emplace_back";
|
||||
EXPECT_EQ(_f, _fp_history.back()) << "float not equal after emplace_back";
|
||||
|
||||
// randomize sequence of insertion into buffer
|
||||
if(_rng(_gen) % 2 == 0)
|
||||
{
|
||||
_buffer.emplace(_u);
|
||||
_buffer.emplace(_f);
|
||||
}
|
||||
else
|
||||
{
|
||||
_buffer.emplace(_f);
|
||||
_buffer.emplace(_u);
|
||||
}
|
||||
|
||||
EXPECT_EQ(_u, _ui_history.back()) << "uint not equal after emplace_back";
|
||||
EXPECT_EQ(_f, _fp_history.back()) << "float not equal after emplace_back";
|
||||
}
|
||||
|
||||
// get the records out of the buffer
|
||||
auto _headers = _buffer.get_record_headers();
|
||||
for(auto* itr : _headers)
|
||||
{
|
||||
ASSERT_TRUE(itr->payload) << "nullptr to payload not expected";
|
||||
|
||||
if(itr->kind == typeid(uint_raw_array_t).hash_code())
|
||||
{
|
||||
extract_header(_ui_result, itr);
|
||||
}
|
||||
else if(itr->kind == typeid(flt_raw_array_t).hash_code())
|
||||
{
|
||||
extract_header(_fp_result, itr);
|
||||
}
|
||||
else
|
||||
{
|
||||
GTEST_FAIL() << "unknown type id hash code: " << std::to_string(itr->kind);
|
||||
}
|
||||
}
|
||||
|
||||
// validate that we got the same number of records out that we put in
|
||||
ASSERT_EQ(_ui_history.size(), _ui_result.size())
|
||||
<< "UINT: " << _ui_history.size() << " vs. " << _ui_result.size();
|
||||
ASSERT_EQ(_fp_history.size(), _fp_result.size())
|
||||
<< "FLOAT: " << _fp_history.size() << " vs. " << _fp_result.size();
|
||||
|
||||
// validate there was no data corruption or data loss from storage in the buffer
|
||||
for(size_t i = 0; i < n; ++i)
|
||||
{
|
||||
auto& _ui_lhs = _ui_history.at(i);
|
||||
auto& _ui_rhs = _ui_result.at(i);
|
||||
auto& _fp_lhs = _fp_history.at(i);
|
||||
auto& _fp_rhs = _fp_result.at(i);
|
||||
|
||||
EXPECT_EQ(_ui_lhs, _ui_rhs) << "\n"
|
||||
<< "UINT LHS:\n"
|
||||
<< _ui_lhs.to_string() << "\n"
|
||||
<< "UINT RHS:\n"
|
||||
<< _ui_rhs.to_string() << "\n";
|
||||
|
||||
EXPECT_EQ(_fp_lhs, _fp_rhs) << "\n"
|
||||
<< "FLOAT LHS:\n"
|
||||
<< _fp_lhs.to_string() << "\n"
|
||||
<< "FLOAT RHS:\n"
|
||||
<< _fp_rhs.to_string() << "\n";
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,105 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 ROCm Developer Tools
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "lib/common/container/record_header_buffer.hpp"
|
||||
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace test
|
||||
{
|
||||
template <typename Tp, size_t N>
|
||||
struct raw_array
|
||||
{
|
||||
raw_array() = default;
|
||||
~raw_array() = default;
|
||||
|
||||
raw_array(const raw_array&) = default;
|
||||
raw_array(raw_array&&) noexcept = default;
|
||||
|
||||
raw_array& operator=(const raw_array&) = default;
|
||||
raw_array& operator=(raw_array&&) noexcept = default;
|
||||
|
||||
bool operator==(const raw_array<Tp, N>& rhs) const;
|
||||
bool operator!=(const raw_array<Tp, N>& rhs) const { return !(*this == rhs); }
|
||||
|
||||
Tp& operator[](size_t n) { return data[n]; }
|
||||
Tp operator[](size_t n) const { return data[n]; }
|
||||
|
||||
std::string to_string() const;
|
||||
|
||||
Tp data[N];
|
||||
};
|
||||
|
||||
template <typename Tp, size_t N>
|
||||
bool
|
||||
raw_array<Tp, N>::operator==(const raw_array<Tp, N>& rhs) const
|
||||
{
|
||||
for(size_t i = 0; i < N; ++i)
|
||||
{
|
||||
if constexpr(std::is_integral_v<Tp>)
|
||||
{
|
||||
if((*this)[i] != rhs[i]) return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto _diff = (*this)[i] - rhs[i];
|
||||
if(_diff < Tp{0.0}) _diff *= Tp{-1.0};
|
||||
if(_diff > std::numeric_limits<Tp>::round_error()) return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename Tp, size_t N>
|
||||
std::string
|
||||
raw_array<Tp, N>::to_string() const
|
||||
{
|
||||
auto _ss = std::stringstream{};
|
||||
for(size_t i = 0; i < N; ++i)
|
||||
{
|
||||
_ss << " " << std::setw(8) << std::fixed << std::setprecision(3) << (*this)[i];
|
||||
if(i % 16 == 15) _ss << "\n";
|
||||
}
|
||||
return _ss.str();
|
||||
}
|
||||
|
||||
template <typename Tp, size_t N>
|
||||
auto
|
||||
generate(raw_array<Tp, N>& _v, Tp _min, Tp _max)
|
||||
{
|
||||
using rng_t = std::conditional_t<std::is_integral<Tp>::value,
|
||||
std::uniform_int_distribution<Tp>,
|
||||
std::uniform_real_distribution<Tp>>;
|
||||
auto _rd = std::random_device{};
|
||||
auto _gen = std::mt19937_64{_rd()};
|
||||
auto _rng = rng_t{_min, _max};
|
||||
for(size_t i = 0; i < N; ++i)
|
||||
_v[i] = _rng(_gen);
|
||||
}
|
||||
} // namespace test
|
||||
} // namespace rocprofiler
|
||||
@@ -62,9 +62,13 @@ def generate_custom(args, cmake_args, ctest_args):
|
||||
|
||||
if MEMCHECK_TYPE == "AddressSanitizer":
|
||||
MEMCHECK_SANITIZER_OPTIONS = "detect_leaks=0 use_sigaltstack=0"
|
||||
MEMCHECK_SUPPRESSION_FILE = f"{SOURCE_DIR}/script/address-sanitizer-suppr.txt"
|
||||
MEMCHECK_SUPPRESSION_FILE = (
|
||||
f"{SOURCE_DIR}/source/scripts/address-sanitizer-suppr.txt"
|
||||
)
|
||||
elif MEMCHECK_TYPE == "LeakSanitizer":
|
||||
MEMCHECK_SUPPRESSION_FILE = f"{SOURCE_DIR}/script/leak-sanitizer-suppr.txt"
|
||||
MEMCHECK_SUPPRESSION_FILE = (
|
||||
f"{SOURCE_DIR}/source/scripts/leak-sanitizer-suppr.txt"
|
||||
)
|
||||
elif MEMCHECK_TYPE == "ThreadSanitizer":
|
||||
external_symbolizer_path = ""
|
||||
for version in range(8, 20):
|
||||
@@ -75,7 +79,7 @@ def generate_custom(args, cmake_args, ctest_args):
|
||||
[
|
||||
"history_size=5",
|
||||
"second_deadlock_stack=1",
|
||||
f"suppressions={SOURCE_DIR}/script/thread-sanitizer-suppr.txt",
|
||||
f"suppressions={SOURCE_DIR}/source/scripts/thread-sanitizer-suppr.txt",
|
||||
external_symbolizer_path,
|
||||
os.environ.get("TSAN_OPTIONS", ""),
|
||||
]
|
||||
@@ -101,7 +105,7 @@ def generate_custom(args, cmake_args, ctest_args):
|
||||
set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_ERRORS "100")
|
||||
set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_WARNINGS "100")
|
||||
set(CTEST_CUSTOM_MAXIMUM_PASSED_TEST_OUTPUT_SIZE "51200")
|
||||
set(CTEST_CUSTOM_COVERAGE_EXCLUDE "/usr/.*;/opt/.*;.*external/.*;.*samples/.*;.*test/.*;.*tests-v2/.*;.*perfetto/perfetto_sdk/.*;.*ctf/barectf.*")
|
||||
set(CTEST_CUSTOM_COVERAGE_EXCLUDE "/usr/.*;/opt/.*;.*external/.*;.*samples/.*;.*tests/.*")
|
||||
|
||||
set(CTEST_MEMORYCHECK_TYPE "{MEMCHECK_TYPE}")
|
||||
set(CTEST_MEMORYCHECK_SUPPRESSIONS_FILE "{MEMCHECK_SUPPRESSION_FILE}")
|
||||
@@ -426,29 +430,31 @@ if __name__ == "__main__":
|
||||
)
|
||||
finally:
|
||||
if "-VV" not in ctest_args and not args.quiet:
|
||||
tag = None
|
||||
tagfpath = os.path.join(args.binary_dir, "Testing/TAG")
|
||||
with open(tagfpath, "r") as f:
|
||||
tag = f.readline().strip()
|
||||
|
||||
for file in glob.glob(
|
||||
os.path.join(args.binary_dir, "Testing/Temporary/**"),
|
||||
os.path.join(args.binary_dir, "Testing", tag, "**"),
|
||||
recursive=True,
|
||||
):
|
||||
if not os.path.isfile(file):
|
||||
continue
|
||||
if (
|
||||
re.match(
|
||||
r"Last(Start|Update|Configure|Build|Test).*\.log$",
|
||||
os.path.basename(file),
|
||||
)
|
||||
is None
|
||||
):
|
||||
if "CoverageLog-" in os.path.basename(file):
|
||||
continue
|
||||
print(f"\n\n###### Reading {file}... ######\n\n")
|
||||
with open(file, "r") as inpf:
|
||||
fdata = inpf.read()
|
||||
print(fdata)
|
||||
# print out memory checker files
|
||||
for file in glob.glob(
|
||||
os.path.join(args.binary_dir, "Testing/Temporary/MemoryChecker.*"),
|
||||
recursive=True,
|
||||
):
|
||||
if not os.path.isfile(file):
|
||||
continue
|
||||
|
||||
print(f"\n\n\n###### Reading {file}... ######\n\n\n")
|
||||
with open(file, "r") as inpf:
|
||||
fdata = inpf.read()
|
||||
if "LastTest" not in file and "Coverage" not in file:
|
||||
print(fdata)
|
||||
oname = os.path.basename(file)
|
||||
if oname.endswith(".log"):
|
||||
oname += ".log"
|
||||
with open(os.path.join(args.binary_dir, oname), "w") as outf:
|
||||
print(f"\n\n###### Writing {oname}... ######\n\n")
|
||||
outf.write(fdata)
|
||||
print(fdata)
|
||||
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele