SWDEV-489158: Optimizing counter collection performance (#1150)

* SWDEV-489158: Optimizing counter collection performance

* Static initializer fix

* adding sched_yield+sleep
Этот коммит содержится в:
Giovanni Lenzi Baraldi
2024-10-24 16:58:07 -03:00
коммит произвёл GitHub
родитель 42765c35a2
Коммит aef18896dd
+17
Просмотреть файл
@@ -67,6 +67,14 @@ namespace hsa
{
namespace
{
static std::atomic<int64_t>&
get_balanced_signal_slots()
{
constexpr int64_t NUM_SIGNALS = 16;
static auto*& atomic = common::static_object<std::atomic<int64_t>>::construct(NUM_SIGNALS);
return *atomic;
}
template <typename DomainT, typename... Args>
inline bool
context_filter(const context::context* ctx, DomainT domain, Args... args)
@@ -106,6 +114,8 @@ AsyncSignalHandler(hsa_signal_value_t /*signal_v*/, void* data)
return false;
}
get_balanced_signal_slots().fetch_add(1);
auto& queue_info_session = *static_cast<Queue::queue_info_session_t*>(data);
auto dispatch_time = kernel_dispatch::get_dispatch_time(queue_info_session);
@@ -342,6 +352,13 @@ WriteInterceptor(const void* packets,
thr_id,
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH);
// If there is a lot of contention for HSA signals, then schedule out the thread
if(get_balanced_signal_slots().fetch_sub(1) <= 0)
{
sched_yield();
std::this_thread::sleep_for(std::chrono::microseconds(1));
}
// Stores the instrumentation pkt (i.e. AQL packets for counter collection)
// along with an ID of the client we got the packet from (this will be returned via
// completed_cb_t)