diff --git a/projects/rocprofiler-sdk/CHANGELOG.md b/projects/rocprofiler-sdk/CHANGELOG.md index 72a0d510b3..847b868192 100644 --- a/projects/rocprofiler-sdk/CHANGELOG.md +++ b/projects/rocprofiler-sdk/CHANGELOG.md @@ -4,7 +4,7 @@ Full documentation for ROCprofiler-SDK is available at [Click Here](source/docs/ ## ROCprofiler-SDK for AFAR I -## Additions +### Additions - HSA API Tracing - Kernel Dispatch Tracing @@ -14,7 +14,7 @@ Full documentation for ROCprofiler-SDK is available at [Click Here](source/docs/ ## ROCprofiler-SDK for AFAR II -## Additions +### Additions - HIP API Tracing - ROCTx Tracing @@ -25,7 +25,7 @@ Full documentation for ROCprofiler-SDK is available at [Click Here](source/docs/ ## ROCprofiler-SDK for AFAR III -## Additions +### Additions - Kernel Dispatch Counter Collection – (includes serialization and multidimensional instances) - Kernel serialization @@ -43,7 +43,7 @@ Full documentation for ROCprofiler-SDK is available at [Click Here](source/docs/ ## ROCprofiler-SDK for AFAR IV -## Additions +### Additions - Page Migration Reporting (API) - Scratch Memory Reporting (API) @@ -55,7 +55,7 @@ Full documentation for ROCprofiler-SDK is available at [Click Here](source/docs/ ## ROCprofiler-SDK for AFAR V -## Additions +### Additions - Agent/Device Counter Collection (API) - Single JSON output format support (tool) @@ -67,17 +67,17 @@ Full documentation for ROCprofiler-SDK is available at [Click Here](source/docs/ - ROCProf V3 Multi-GPU Support: - Multi-process (multiple files) -## Fixes +### Fixes - SQ_ACCUM_PREV and SQ_ACCUM_PREV_HIRE overwriting issue -## Changes +### Changes - rocprofv3 tool now needs `--` in front of application. For detailed uses, please [Click Here](source/docs/rocprofv3.md) - + ## ROCprofiler-SDK for AFAR VI -## Additions +### Additions - OTF2 Tool Support - Kernel and Range Filtering @@ -90,6 +90,16 @@ Full documentation for ROCprofiler-SDK is available at [Click Here](source/docs/ - Dispatch_Id - Added CSV column for counter_collection -## Fixes +### Fixes - Miscellaneous bug fixes + +## ROCprofiler-SDK for AFAR VII + +### Additions + +### Fixes + +### Changes + +- Fix misaligned stores (undefined behavior) for buffer records diff --git a/projects/rocprofiler-sdk/source/lib/common/container/record_header_buffer.hpp b/projects/rocprofiler-sdk/source/lib/common/container/record_header_buffer.hpp index 84265da289..7454df2008 100644 --- a/projects/rocprofiler-sdk/source/lib/common/container/record_header_buffer.hpp +++ b/projects/rocprofiler-sdk/source/lib/common/container/record_header_buffer.hpp @@ -235,6 +235,7 @@ record_header_buffer::emplace(uint64_t _hash, Tp& _v) if(m_headers.empty()) return false; constexpr auto request_size = sizeof(Tp); + constexpr auto align_size = alignof(Tp); // notify there was a request m_requested.fetch_add(1); @@ -242,7 +243,7 @@ record_header_buffer::emplace(uint64_t _hash, Tp& _v) // in theory, we shouldn't need to lock here but the thread sanitizer says there is a race. // the lock will be short-lived so hopefully, it will scale fine write_lock(); - auto* _addr = m_buffer.request(request_size, false); + auto* _addr = m_buffer.request(request_size, align_size, false); write_unlock(); read_lock(); @@ -277,6 +278,7 @@ record_header_buffer::emplace(uint32_t _category, uint32_t _kind, Tp& _v) if(m_headers.empty()) return false; constexpr auto request_size = sizeof(Tp); + constexpr auto align_size = alignof(Tp); // notify there was a request m_requested.fetch_add(1); @@ -284,7 +286,7 @@ record_header_buffer::emplace(uint32_t _category, uint32_t _kind, Tp& _v) // in theory, we shouldn't need to lock here but the thread sanitizer says there is a race. // the lock will be short-lived so hopefully, it will scale fine write_lock(); - auto* _addr = m_buffer.request(request_size, false); + auto* _addr = m_buffer.request(request_size, align_size, false); write_unlock(); read_lock(); diff --git a/projects/rocprofiler-sdk/source/lib/common/container/ring_buffer.cpp b/projects/rocprofiler-sdk/source/lib/common/container/ring_buffer.cpp index 14f6ce5987..0f166e445d 100644 --- a/projects/rocprofiler-sdk/source/lib/common/container/ring_buffer.cpp +++ b/projects/rocprofiler-sdk/source/lib/common/container/ring_buffer.cpp @@ -134,30 +134,35 @@ ring_buffer::as_string() const // void* -ring_buffer::request(size_t _length, bool _wrap) +ring_buffer::request(size_t _length, size_t _align, bool _wrap) { if(m_ptr == nullptr || m_size == 0) return nullptr; if(is_full()) return (_wrap) ? retrieve(_length) : nullptr; + LOG_IF(FATAL, _align == 0) << "alignment must be non-zero"; + // if write count is at the tail of buffer, bump to the end of buffer size_t _write_count = 0; size_t _offset = 0; + size_t _write_pos = 0; do { // Make sure we don't put in more than there's room for, by writing no // more than there is free. if(_length > free()) return nullptr; - _offset = 0; _write_count = m_write_count.load(std::memory_order_acquire); auto _modulo = m_size - (_write_count % m_size); if(_modulo < _length) _offset = _modulo; + auto _align_modulo = (_write_count % _align); + auto _align_offset = (_align_modulo > 0) ? (_align - _align_modulo) : 0; + _write_pos = _write_count + _align_offset; } while(!m_write_count.compare_exchange_strong( - _write_count, _write_count + _length + _offset, std::memory_order_seq_cst)); + _write_count, _write_pos + _length + _offset, std::memory_order_seq_cst)); // pointer in buffer - void* _out = write_ptr(_write_count); + void* _out = write_ptr(_write_pos); return _out; } diff --git a/projects/rocprofiler-sdk/source/lib/common/container/ring_buffer.hpp b/projects/rocprofiler-sdk/source/lib/common/container/ring_buffer.hpp index 36e6bef833..4501c8364e 100644 --- a/projects/rocprofiler-sdk/source/lib/common/container/ring_buffer.hpp +++ b/projects/rocprofiler-sdk/source/lib/common/container/ring_buffer.hpp @@ -75,7 +75,7 @@ struct ring_buffer void destroy(); /// Request a pointer for writing at least \param n bytes. - void* request(size_t n, bool wrap = true); + void* request(size_t n, size_t align, bool wrap = true); /// Retrieve a pointer for reading at least \param n bytes. void* retrieve(size_t n) const; @@ -174,8 +174,9 @@ ring_buffer::write(Tp* in, std::enable_if_t::value, int>) { if(in == nullptr || m_ptr == nullptr) return {0, nullptr}; - auto _length = sizeof(Tp); - void* _out_p = request(_length); + constexpr auto _length = sizeof(Tp); + constexpr auto _align = alignof(Tp); + void* _out_p = request(_length, _align, true); if(_out_p == nullptr) return {0, nullptr}; @@ -194,8 +195,9 @@ ring_buffer::write(Tp* in, std::enable_if_t::value, int>) { if(in == nullptr || m_ptr == nullptr) return {0, nullptr}; - auto _length = sizeof(Tp); - void* _out_p = request(_length); + constexpr auto _length = sizeof(Tp); + constexpr auto _align = alignof(Tp); + void* _out_p = request(_length, _align, true); if(_out_p == nullptr) return {0, nullptr}; @@ -214,7 +216,7 @@ ring_buffer::request(bool wrap) { if(m_ptr == nullptr) return nullptr; - return reinterpret_cast(request(sizeof(Tp), wrap)); + return reinterpret_cast(request(sizeof(Tp), alignof(Tp), wrap)); } // template diff --git a/projects/rocprofiler-sdk/source/lib/tests/buffering/buffering-parallel.cpp b/projects/rocprofiler-sdk/source/lib/tests/buffering/buffering-parallel.cpp index 21d0782247..3a0c603c85 100644 --- a/projects/rocprofiler-sdk/source/lib/tests/buffering/buffering-parallel.cpp +++ b/projects/rocprofiler-sdk/source/lib/tests/buffering/buffering-parallel.cpp @@ -156,7 +156,8 @@ validate(const std::vector& _headers) if(itr->hash == typeid(data_type).hash_code()) { auto* _data = static_cast(itr->payload); - EXPECT_EQ(_ref_data, *_data); + EXPECT_EQ(_ref_data, *_data) + << "\nREF: " << _ref_data.to_string() << "\nINP: " << _data->to_string(); } } } diff --git a/projects/rocprofiler-sdk/source/lib/tests/buffering/buffering-save-load.cpp b/projects/rocprofiler-sdk/source/lib/tests/buffering/buffering-save-load.cpp index 3aef8f7ef9..9c3d5815e2 100644 --- a/projects/rocprofiler-sdk/source/lib/tests/buffering/buffering-save-load.cpp +++ b/projects/rocprofiler-sdk/source/lib/tests/buffering/buffering-save-load.cpp @@ -218,7 +218,7 @@ TEST(buffering, save_load) // verify the data, at a high-level is correct EXPECT_EQ(_buffer.size(), num_variants); - EXPECT_EQ(_buffer.count(), data_size); + EXPECT_GE(_buffer.count(), data_size); EXPECT_GE(_buffer.free(), 0); EXPECT_GE(_buffer.capacity(), data_size); EXPECT_FALSE(_buffer.is_empty()); diff --git a/projects/rocprofiler-sdk/source/scripts/run-ci.py b/projects/rocprofiler-sdk/source/scripts/run-ci.py index f719d2bdaf..f13817ce65 100755 --- a/projects/rocprofiler-sdk/source/scripts/run-ci.py +++ b/projects/rocprofiler-sdk/source/scripts/run-ci.py @@ -117,6 +117,17 @@ def generate_custom(args, cmake_args, ctest_args): os.environ.get("TSAN_OPTIONS", ""), ] ) + elif MEMCHECK_TYPE == "UndefinedBehaviorSanitizer": + MEMCHECK_SUPPRESSION_FILE = ( + f"{SOURCE_DIR}/source/scripts/undef-behavior-sanitizer-suppr.txt" + ) + os.environ["UBSAN_OPTIONS"] = " ".join( + [ + "print_stacktrace=1", + f"suppressions={SOURCE_DIR}/source/scripts/undef-behavior-sanitizer-suppr.txt", + os.environ.get("UBSAN_OPTIONS", ""), + ] + ) codecov_exclude = [ "/usr/.*", @@ -235,12 +246,12 @@ def generate_dashboard_script(args): RETRY_DELAY 10 CAPTURE_CMAKE_ERROR _cdash_submit_err) - if("{STRICT_SUBMIT}" GREATER 0) - if(NOT _cdash_submit_err EQUAL 0) + if(NOT _cdash_submit_err EQUAL 0) + if("{STRICT_SUBMIT}" GREATER 0) message(FATAL_ERROR "CDash submission failed: {SUBMIT_ERR}") + else() + message(AUTHOR_WARNING "CDash submission failure ignored due to absence of --require-cdash-submission") endif() - else() - message(AUTHOR_WARNING "CDash submission failure ignored due to absence of --require-cdash-submission") endif() endif() endmacro() diff --git a/projects/rocprofiler-sdk/source/scripts/undef-behavior-sanitizer-suppr.txt b/projects/rocprofiler-sdk/source/scripts/undef-behavior-sanitizer-suppr.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/summary/validate.py b/projects/rocprofiler-sdk/tests/rocprofv3/summary/validate.py index 526012630f..be5c0a3d13 100644 --- a/projects/rocprofiler-sdk/tests/rocprofv3/summary/validate.py +++ b/projects/rocprofiler-sdk/tests/rocprofv3/summary/validate.py @@ -185,7 +185,7 @@ def test_summary_data(json_data): ) assert oitr.value.count == 2 elif itr.domain == "HIP_API": - assert itr.stats.count == 2135 + assert itr.stats.count >= 2130 and itr.stats.count <= 2140 elif itr.domain == "MEMORY_COPY": assert itr.stats.count == 12 elif itr.domain == "MARKER_API":