Merge commit '5f422c1993e4d920b388cc4fc9c52983d507c763' into develop
Bu işleme şunda yer alıyor:
@@ -99,7 +99,7 @@ Please report issues on GitHub OR send an email to <dl.ROCm-Profiler.support@amd
|
||||
|
||||
- For low PC-sampling frequencies with intervals < 65k cycles, a lot of error samples might be delivered. We're working on optimizing this to allow lower sampling frequencies.
|
||||
|
||||
- gfx10, gfx11 and gfx12 requires a stable power state for counter collection. This includes Radeon 7000 GPUs.
|
||||
- gfx11 and gfx12 architectures require a stable power state for counter collection. This includes AMD Radeon RX 7000 series GPUs and newer.
|
||||
```bash
|
||||
# For device <N>. Use 'rocm-smi' or 'amd-smi monitor' to see device number.
|
||||
sudo amd-smi set -g <N> -l stable_std
|
||||
@@ -130,4 +130,4 @@ Please report issues on GitHub OR send an email to <dl.ROCm-Profiler.support@amd
|
||||
|
||||
|
||||
> [!WARNING]
|
||||
> The latest mainline version of AQLprofile can be found at [https://repo.radeon.com/rocm/misc/aqlprofile/](https://repo.radeon.com/rocm/misc/aqlprofile/). However, it's important to note that updates to the public AQLProfile may not occur as frequently as updates to the rocprofiler-sdk. This discrepancy could lead to a potential mismatch between the AQLprofile binary and the rocprofiler-sdk source.
|
||||
> To use ROCprofiler-SDK, obtain the latest mainline version of AQLprofile from [here](https://github.com/ROCm/aqlprofile).
|
||||
|
||||
@@ -168,7 +168,7 @@ flush(rocprofiler_buffer_id_t buffer_id, bool wait)
|
||||
while(buff->syncer.test_and_set())
|
||||
{
|
||||
std::this_thread::yield();
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds{10});
|
||||
std::this_thread::sleep_for(std::chrono::microseconds{10});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -43,7 +43,7 @@ struct instance
|
||||
using buffer_t = common::container::record_header_buffer;
|
||||
|
||||
mutable std::array<buffer_t, 2> buffers = {};
|
||||
mutable std::atomic_flag syncer = ATOMIC_FLAG_INIT;
|
||||
mutable std::atomic_flag syncer = ATOMIC_FLAG_INIT; // writer and reader lock.
|
||||
mutable std::atomic<uint32_t> buffer_idx = {}; // array index
|
||||
mutable std::atomic<uint64_t> drop_count = {};
|
||||
uint64_t watermark = 0;
|
||||
@@ -118,8 +118,14 @@ rocprofiler::buffer::instance::emplace(uint32_t category, uint32_t kind, Tp& val
|
||||
// get the index of the current buffer
|
||||
auto get_idx = [this]() { return buffer_idx.load(std::memory_order_acquire) % buffers.size(); };
|
||||
|
||||
while(syncer.test_and_set())
|
||||
{
|
||||
std::this_thread::yield();
|
||||
std::this_thread::sleep_for(std::chrono::microseconds{10});
|
||||
}
|
||||
auto idx = get_idx();
|
||||
auto success = buffers.at(idx).emplace(category, kind, value);
|
||||
syncer.clear();
|
||||
if(!success)
|
||||
{
|
||||
if(buffers.at(idx).capacity() < sizeof(value))
|
||||
@@ -138,8 +144,14 @@ rocprofiler::buffer::instance::emplace(uint32_t category, uint32_t kind, Tp& val
|
||||
do
|
||||
{
|
||||
buffer::flush(buffer_id, true);
|
||||
while(syncer.test_and_set())
|
||||
{
|
||||
std::this_thread::yield();
|
||||
std::this_thread::sleep_for(std::chrono::microseconds{10});
|
||||
}
|
||||
idx = get_idx();
|
||||
success = buffers.at(idx).emplace(category, kind, value);
|
||||
syncer.clear();
|
||||
} while(!success);
|
||||
}
|
||||
else
|
||||
|
||||
@@ -28,6 +28,7 @@ THE SOFTWARE.
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <condition_variable>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <functional>
|
||||
#include <iomanip>
|
||||
|
||||
Yeni konuda referans
Bir kullanıcı engelle