SWDEV-474179: Fixing hang issue for perfetto and missing records for trace period

Change-Id: I8926565720873d7dd730c5518f60ac6521e3bbf5
This commit is contained in:
Giovanni LB
2024-07-17 14:54:40 -03:00
کامیت شده توسط Gopesh Bhardwaj
والد 637abbf8d7
کامیت 95ed584e6d
8فایلهای تغییر یافته به همراه25 افزوده شده و 25 حذف شده
+9 -16
مشاهده پرونده
@@ -195,10 +195,9 @@ class perfetto_plugin_t {
void delete_perfetto_plugin() {
if (is_valid_ && tracing_session_ && internal_buffer_finished.load(std::memory_order_acquire)) {
writing_lock.lock();
tracing_session_->StopBlocking();
is_valid_ = false;
// close(file_descriptor_);
writing_lock.unlock();
}
}
@@ -232,8 +231,6 @@ class perfetto_plugin_t {
// ToDO: rename this variable?
if (!tracing_session_) rocprofiler::warning("Tracing session is deleted!\n");
writing_lock.lock();
const uint64_t device_id = profiler_record.gpu_id.handle;
const uint64_t queue_id = profiler_record.queue_id.handle;
const uint64_t correlation_id = profiler_record.correlation_id.value;
@@ -321,14 +318,12 @@ class perfetto_plugin_t {
TRACE_COUNTER("COUNTERS", counters_track, profiler_record.timestamps.end.value, 0);
}
writing_lock.unlock();
return 0;
}
int FlushTracerRecord(rocprofiler_record_tracer_t tracer_record,
rocprofiler_session_id_t session_id) {
if (!tracing_session_) rocprofiler::warning("Tracing session is deleted!\n");
writing_lock.lock();
uint64_t device_id = tracer_record.agent_id.handle;
const char* operation_name_c = nullptr;
// ROCTX domain Operation ID doesn't have a name
@@ -556,7 +551,6 @@ class perfetto_plugin_t {
break;
}
}
writing_lock.unlock();
return 0;
}
@@ -650,34 +644,32 @@ int rocprofiler_plugin_initialize(uint32_t rocprofiler_major_version,
rocprofiler_minor_version > ROCPROFILER_VERSION_MINOR)
return -1;
// std::lock_guard<std::mutex> lock(writing_lock);
writing_lock.lock();
std::lock_guard<std::mutex> lock(writing_lock);
if (perfetto_plugin != nullptr) return -1;
perfetto_plugin = new perfetto_plugin_t();
if (perfetto_plugin->IsValid()) {
writing_lock.unlock();
if (perfetto_plugin->IsValid())
return 0;
}
// delete perfetto_plugin;
// perfetto_plugin = nullptr;
writing_lock.unlock();
return -1;
}
void rocprofiler_plugin_finalize() {
void rocprofiler_plugin_finalize()
{
std::lock_guard<std::mutex> lock(writing_lock);
if (!perfetto_plugin) return;
perfetto_plugin->delete_perfetto_plugin();
// delete perfetto_plugin;
// perfetto_plugin = nullptr;
//perfetto_plugin = nullptr;
}
ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records(
const rocprofiler_record_header_t* begin, const rocprofiler_record_header_t* end,
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
std::lock_guard<std::mutex> lock(writing_lock);
if (!perfetto_plugin || !perfetto_plugin->IsValid()) return -1;
return perfetto_plugin->WriteBufferRecords(begin, end, session_id, buffer_id);
}
@@ -685,6 +677,7 @@ ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records(
ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(rocprofiler_record_tracer_t record) {
if (record.header.id.handle == 0) return 0;
std::lock_guard<std::mutex> lock(writing_lock);
if (!perfetto_plugin || !perfetto_plugin->IsValid()) return -1;
return perfetto_plugin->FlushTracerRecord(record, rocprofiler_session_id_t{0});
}
@@ -109,7 +109,7 @@ class ROCProfiler_Singleton {
const Agent::DeviceInfo& GetDeviceInfo(uint64_t gpu_id);
rocprofiler_timestamp_t timestamp_ns();
private:
rocprofiler_session_id_t current_session_id_{0};
std::atomic<rocprofiler_session_id_t> current_session_id_{rocprofiler_session_id_t{0}};
std::mutex session_map_lock_;
std::map<uint64_t, Session*> sessions_;
std::atomic<uint64_t> records_counter_{0};
@@ -69,9 +69,6 @@ GenericBuffer::GenericBuffer(rocprofiler_session_id_t session_id, rocprofiler_bu
GenericBuffer::~GenericBuffer() {
if (is_valid_.load(std::memory_order_acquire)) {
std::lock_guard lock(buffer_lock_);
//rocprofiler::ROCProfiler_Singleton& instance = rocprofiler::ROCProfiler_Singleton::GetInstance();
//if (instance.GetSession(session_id_))
// instance.GetSession(session_id_)->DisableTools(id_);
Flush();
@@ -97,7 +94,7 @@ bool GenericBuffer::Flush() {
{
// Wait for the current operation to complete.
std::unique_lock consumer_lock(consumer_mutex_);
consumer_cond_.wait(consumer_lock, [this]() { return !consumer_arg_.valid; });
consumer_cond_.wait(consumer_lock, [this]() { return !consumer_arg_.valid || !consumerRunning; });
}
return true;
}
@@ -120,6 +117,7 @@ void GenericBuffer::ConsumerThreadLoop(std::promise<void> ready) {
std::unique_lock consumer_lock(consumer_mutex_);
// This consumer is now ready to accept work.
consumerRunning.store(true);
ready.set_value();
while (true) {
@@ -138,6 +136,7 @@ void GenericBuffer::ConsumerThreadLoop(std::promise<void> ready) {
consumer_arg_.valid = false;
consumer_cond_.notify_all();
}
consumerRunning.store(false);
}
void GenericBuffer::NotifyConsumerThread(const std::byte* data_begin, const std::byte* data_end) {
@@ -150,7 +149,7 @@ void GenericBuffer::NotifyConsumerThread(const std::byte* data_begin, const std:
// would be lost if multiple producers could enter this critical section
// (sequentially) before the consumer thread could re-acquire the
// consumer_mutex_ lock.
consumer_cond_.wait(consumer_lock, [this]() { return !consumer_arg_.valid; });
consumer_cond_.wait(consumer_lock, [this]() { return !consumer_arg_.valid || !consumerRunning; });
consumer_arg_.begin = data_begin;
consumer_arg_.end = data_end;
@@ -116,6 +116,7 @@ class GenericBuffer {
std::mutex& GetBufferLock();
private:
std::atomic<bool> consumerRunning{false};
void SwitchBuffers();
void ConsumerThreadLoop(std::promise<void> ready);
void NotifyConsumerThread(const std::byte* data_begin, const std::byte* data_end);
+4 -1
مشاهده پرونده
@@ -71,7 +71,10 @@ std::vector<att_pending_signal_t> AttTracer::MovePendingSignals(uint32_t writer_
std::lock_guard<std::mutex> lock(sessions_pending_signals_lock_);
auto it = sessions_pending_signals_.find(writer_id);
if (it == sessions_pending_signals_.end())
rocprofiler::fatal("writer_id is not found in the pending_signals");
{
rocprofiler::warning("writer_id is not found in the pending_signals");
return {};
}
auto move_pending = std::move(it->second);
sessions_pending_signals_.erase(writer_id);
@@ -145,7 +145,10 @@ std::vector<pending_signal_ptr_t> Profiler::MovePendingSignals(uint32_t writer_i
std::lock_guard<std::mutex> lock(sessions_pending_signals_lock_);
auto it = sessions_pending_signals_.find(writer_id);
if (it == sessions_pending_signals_.end())
rocprofiler::fatal("writer_id is not found in the pending_signals");
{
rocprofiler::warning("writer_id is not found in the pending_signals");
return {};
}
auto move_pending = std::move(it->second);
sessions_pending_signals_.erase(writer_id);
@@ -93,6 +93,7 @@ class Profiler {
void StartReplayPass(rocprofiler_session_id_t session_id);
void EndReplayPass();
bool HasActivePass();
std::atomic<bool> bIsSessionDestroying{false};
private:
std::mutex counter_names_lock_;
@@ -105,7 +106,6 @@ class Profiler {
std::mutex sessions_pending_signals_lock_;
std::map<uint32_t, std::vector<pending_signal_ptr_t>> sessions_pending_signals_{};
std::condition_variable has_session_pending_cv;
std::atomic<bool> bIsSessionDestroying{false};
};
} // namespace profiler
@@ -203,6 +203,7 @@ void Session::Start()
counters_sampler_->Start();
}
if (profiler_) profiler_->bIsSessionDestroying.store(false);
is_active_ = true;
}