Merge commit '5f422c1993e4d920b388cc4fc9c52983d507c763' into develop

Bu işleme şunda yer alıyor:
systems-assistant[bot]
2025-08-05 17:05:53 +00:00
işleme aad122c939
4 değiştirilmiş dosya ile 17 ekleme ve 4 silme
+2 -2
Dosyayı Görüntüle
@@ -99,7 +99,7 @@ Please report issues on GitHub OR send an email to <dl.ROCm-Profiler.support@amd
- For low PC-sampling frequencies with intervals < 65k cycles, a lot of error samples might be delivered. We're working on optimizing this to allow lower sampling frequencies.
- gfx10, gfx11 and gfx12 requires a stable power state for counter collection. This includes Radeon 7000 GPUs.
- gfx11 and gfx12 architectures require a stable power state for counter collection. This includes AMD Radeon RX 7000 series GPUs and newer.
```bash
# For device <N>. Use 'rocm-smi' or 'amd-smi monitor' to see device number.
sudo amd-smi set -g <N> -l stable_std
@@ -130,4 +130,4 @@ Please report issues on GitHub OR send an email to <dl.ROCm-Profiler.support@amd
> [!WARNING]
> The latest mainline version of AQLprofile can be found at [https://repo.radeon.com/rocm/misc/aqlprofile/](https://repo.radeon.com/rocm/misc/aqlprofile/). However, it's important to note that updates to the public AQLProfile may not occur as frequently as updates to the rocprofiler-sdk. This discrepancy could lead to a potential mismatch between the AQLprofile binary and the rocprofiler-sdk source.
> To use ROCprofiler-SDK, obtain the latest mainline version of AQLprofile from [here](https://github.com/ROCm/aqlprofile).
+1 -1
Dosyayı Görüntüle
@@ -168,7 +168,7 @@ flush(rocprofiler_buffer_id_t buffer_id, bool wait)
while(buff->syncer.test_and_set())
{
std::this_thread::yield();
std::this_thread::sleep_for(std::chrono::milliseconds{10});
std::this_thread::sleep_for(std::chrono::microseconds{10});
}
}
+13 -1
Dosyayı Görüntüle
@@ -43,7 +43,7 @@ struct instance
using buffer_t = common::container::record_header_buffer;
mutable std::array<buffer_t, 2> buffers = {};
mutable std::atomic_flag syncer = ATOMIC_FLAG_INIT;
mutable std::atomic_flag syncer = ATOMIC_FLAG_INIT; // writer and reader lock.
mutable std::atomic<uint32_t> buffer_idx = {}; // array index
mutable std::atomic<uint64_t> drop_count = {};
uint64_t watermark = 0;
@@ -118,8 +118,14 @@ rocprofiler::buffer::instance::emplace(uint32_t category, uint32_t kind, Tp& val
// get the index of the current buffer
auto get_idx = [this]() { return buffer_idx.load(std::memory_order_acquire) % buffers.size(); };
while(syncer.test_and_set())
{
std::this_thread::yield();
std::this_thread::sleep_for(std::chrono::microseconds{10});
}
auto idx = get_idx();
auto success = buffers.at(idx).emplace(category, kind, value);
syncer.clear();
if(!success)
{
if(buffers.at(idx).capacity() < sizeof(value))
@@ -138,8 +144,14 @@ rocprofiler::buffer::instance::emplace(uint32_t category, uint32_t kind, Tp& val
do
{
buffer::flush(buffer_id, true);
while(syncer.test_and_set())
{
std::this_thread::yield();
std::this_thread::sleep_for(std::chrono::microseconds{10});
}
idx = get_idx();
success = buffers.at(idx).emplace(category, kind, value);
syncer.clear();
} while(!success);
}
else
+1
Dosyayı Görüntüle
@@ -28,6 +28,7 @@ THE SOFTWARE.
#include <algorithm>
#include <chrono>
#include <condition_variable>
#include <cstring>
#include <fstream>
#include <functional>
#include <iomanip>