SWDEV-474179: Fixing hang issue for perfetto and missing records for trace period
Change-Id: I8926565720873d7dd730c5518f60ac6521e3bbf5
This commit is contained in:
کامیت شده توسط
Gopesh Bhardwaj
والد
637abbf8d7
کامیت
95ed584e6d
@@ -195,10 +195,9 @@ class perfetto_plugin_t {
|
||||
|
||||
void delete_perfetto_plugin() {
|
||||
if (is_valid_ && tracing_session_ && internal_buffer_finished.load(std::memory_order_acquire)) {
|
||||
writing_lock.lock();
|
||||
tracing_session_->StopBlocking();
|
||||
is_valid_ = false;
|
||||
// close(file_descriptor_);
|
||||
writing_lock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -232,8 +231,6 @@ class perfetto_plugin_t {
|
||||
// ToDO: rename this variable?
|
||||
if (!tracing_session_) rocprofiler::warning("Tracing session is deleted!\n");
|
||||
|
||||
writing_lock.lock();
|
||||
|
||||
const uint64_t device_id = profiler_record.gpu_id.handle;
|
||||
const uint64_t queue_id = profiler_record.queue_id.handle;
|
||||
const uint64_t correlation_id = profiler_record.correlation_id.value;
|
||||
@@ -321,14 +318,12 @@ class perfetto_plugin_t {
|
||||
TRACE_COUNTER("COUNTERS", counters_track, profiler_record.timestamps.end.value, 0);
|
||||
}
|
||||
|
||||
writing_lock.unlock();
|
||||
return 0;
|
||||
}
|
||||
|
||||
int FlushTracerRecord(rocprofiler_record_tracer_t tracer_record,
|
||||
rocprofiler_session_id_t session_id) {
|
||||
if (!tracing_session_) rocprofiler::warning("Tracing session is deleted!\n");
|
||||
writing_lock.lock();
|
||||
uint64_t device_id = tracer_record.agent_id.handle;
|
||||
const char* operation_name_c = nullptr;
|
||||
// ROCTX domain Operation ID doesn't have a name
|
||||
@@ -556,7 +551,6 @@ class perfetto_plugin_t {
|
||||
break;
|
||||
}
|
||||
}
|
||||
writing_lock.unlock();
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -650,34 +644,32 @@ int rocprofiler_plugin_initialize(uint32_t rocprofiler_major_version,
|
||||
rocprofiler_minor_version > ROCPROFILER_VERSION_MINOR)
|
||||
return -1;
|
||||
|
||||
// std::lock_guard<std::mutex> lock(writing_lock);
|
||||
writing_lock.lock();
|
||||
std::lock_guard<std::mutex> lock(writing_lock);
|
||||
if (perfetto_plugin != nullptr) return -1;
|
||||
|
||||
perfetto_plugin = new perfetto_plugin_t();
|
||||
if (perfetto_plugin->IsValid()) {
|
||||
writing_lock.unlock();
|
||||
if (perfetto_plugin->IsValid())
|
||||
return 0;
|
||||
}
|
||||
|
||||
// delete perfetto_plugin;
|
||||
// perfetto_plugin = nullptr;
|
||||
writing_lock.unlock();
|
||||
return -1;
|
||||
}
|
||||
|
||||
void rocprofiler_plugin_finalize() {
|
||||
|
||||
void rocprofiler_plugin_finalize()
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(writing_lock);
|
||||
if (!perfetto_plugin) return;
|
||||
perfetto_plugin->delete_perfetto_plugin();
|
||||
// delete perfetto_plugin;
|
||||
// perfetto_plugin = nullptr;
|
||||
//perfetto_plugin = nullptr;
|
||||
}
|
||||
|
||||
ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records(
|
||||
const rocprofiler_record_header_t* begin, const rocprofiler_record_header_t* end,
|
||||
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
|
||||
|
||||
std::lock_guard<std::mutex> lock(writing_lock);
|
||||
if (!perfetto_plugin || !perfetto_plugin->IsValid()) return -1;
|
||||
return perfetto_plugin->WriteBufferRecords(begin, end, session_id, buffer_id);
|
||||
}
|
||||
@@ -685,6 +677,7 @@ ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records(
|
||||
ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(rocprofiler_record_tracer_t record) {
|
||||
if (record.header.id.handle == 0) return 0;
|
||||
|
||||
std::lock_guard<std::mutex> lock(writing_lock);
|
||||
if (!perfetto_plugin || !perfetto_plugin->IsValid()) return -1;
|
||||
return perfetto_plugin->FlushTracerRecord(record, rocprofiler_session_id_t{0});
|
||||
}
|
||||
|
||||
@@ -109,7 +109,7 @@ class ROCProfiler_Singleton {
|
||||
const Agent::DeviceInfo& GetDeviceInfo(uint64_t gpu_id);
|
||||
rocprofiler_timestamp_t timestamp_ns();
|
||||
private:
|
||||
rocprofiler_session_id_t current_session_id_{0};
|
||||
std::atomic<rocprofiler_session_id_t> current_session_id_{rocprofiler_session_id_t{0}};
|
||||
std::mutex session_map_lock_;
|
||||
std::map<uint64_t, Session*> sessions_;
|
||||
std::atomic<uint64_t> records_counter_{0};
|
||||
|
||||
@@ -69,9 +69,6 @@ GenericBuffer::GenericBuffer(rocprofiler_session_id_t session_id, rocprofiler_bu
|
||||
GenericBuffer::~GenericBuffer() {
|
||||
if (is_valid_.load(std::memory_order_acquire)) {
|
||||
std::lock_guard lock(buffer_lock_);
|
||||
//rocprofiler::ROCProfiler_Singleton& instance = rocprofiler::ROCProfiler_Singleton::GetInstance();
|
||||
//if (instance.GetSession(session_id_))
|
||||
// instance.GetSession(session_id_)->DisableTools(id_);
|
||||
|
||||
Flush();
|
||||
|
||||
@@ -97,7 +94,7 @@ bool GenericBuffer::Flush() {
|
||||
{
|
||||
// Wait for the current operation to complete.
|
||||
std::unique_lock consumer_lock(consumer_mutex_);
|
||||
consumer_cond_.wait(consumer_lock, [this]() { return !consumer_arg_.valid; });
|
||||
consumer_cond_.wait(consumer_lock, [this]() { return !consumer_arg_.valid || !consumerRunning; });
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@@ -120,6 +117,7 @@ void GenericBuffer::ConsumerThreadLoop(std::promise<void> ready) {
|
||||
std::unique_lock consumer_lock(consumer_mutex_);
|
||||
|
||||
// This consumer is now ready to accept work.
|
||||
consumerRunning.store(true);
|
||||
ready.set_value();
|
||||
|
||||
while (true) {
|
||||
@@ -138,6 +136,7 @@ void GenericBuffer::ConsumerThreadLoop(std::promise<void> ready) {
|
||||
consumer_arg_.valid = false;
|
||||
consumer_cond_.notify_all();
|
||||
}
|
||||
consumerRunning.store(false);
|
||||
}
|
||||
|
||||
void GenericBuffer::NotifyConsumerThread(const std::byte* data_begin, const std::byte* data_end) {
|
||||
@@ -150,7 +149,7 @@ void GenericBuffer::NotifyConsumerThread(const std::byte* data_begin, const std:
|
||||
// would be lost if multiple producers could enter this critical section
|
||||
// (sequentially) before the consumer thread could re-acquire the
|
||||
// consumer_mutex_ lock.
|
||||
consumer_cond_.wait(consumer_lock, [this]() { return !consumer_arg_.valid; });
|
||||
consumer_cond_.wait(consumer_lock, [this]() { return !consumer_arg_.valid || !consumerRunning; });
|
||||
|
||||
consumer_arg_.begin = data_begin;
|
||||
consumer_arg_.end = data_end;
|
||||
|
||||
@@ -116,6 +116,7 @@ class GenericBuffer {
|
||||
std::mutex& GetBufferLock();
|
||||
|
||||
private:
|
||||
std::atomic<bool> consumerRunning{false};
|
||||
void SwitchBuffers();
|
||||
void ConsumerThreadLoop(std::promise<void> ready);
|
||||
void NotifyConsumerThread(const std::byte* data_begin, const std::byte* data_end);
|
||||
|
||||
@@ -71,7 +71,10 @@ std::vector<att_pending_signal_t> AttTracer::MovePendingSignals(uint32_t writer_
|
||||
std::lock_guard<std::mutex> lock(sessions_pending_signals_lock_);
|
||||
auto it = sessions_pending_signals_.find(writer_id);
|
||||
if (it == sessions_pending_signals_.end())
|
||||
rocprofiler::fatal("writer_id is not found in the pending_signals");
|
||||
{
|
||||
rocprofiler::warning("writer_id is not found in the pending_signals");
|
||||
return {};
|
||||
}
|
||||
|
||||
auto move_pending = std::move(it->second);
|
||||
sessions_pending_signals_.erase(writer_id);
|
||||
|
||||
@@ -145,7 +145,10 @@ std::vector<pending_signal_ptr_t> Profiler::MovePendingSignals(uint32_t writer_i
|
||||
std::lock_guard<std::mutex> lock(sessions_pending_signals_lock_);
|
||||
auto it = sessions_pending_signals_.find(writer_id);
|
||||
if (it == sessions_pending_signals_.end())
|
||||
rocprofiler::fatal("writer_id is not found in the pending_signals");
|
||||
{
|
||||
rocprofiler::warning("writer_id is not found in the pending_signals");
|
||||
return {};
|
||||
}
|
||||
|
||||
auto move_pending = std::move(it->second);
|
||||
sessions_pending_signals_.erase(writer_id);
|
||||
|
||||
@@ -93,6 +93,7 @@ class Profiler {
|
||||
void StartReplayPass(rocprofiler_session_id_t session_id);
|
||||
void EndReplayPass();
|
||||
bool HasActivePass();
|
||||
std::atomic<bool> bIsSessionDestroying{false};
|
||||
|
||||
private:
|
||||
std::mutex counter_names_lock_;
|
||||
@@ -105,7 +106,6 @@ class Profiler {
|
||||
std::mutex sessions_pending_signals_lock_;
|
||||
std::map<uint32_t, std::vector<pending_signal_ptr_t>> sessions_pending_signals_{};
|
||||
std::condition_variable has_session_pending_cv;
|
||||
std::atomic<bool> bIsSessionDestroying{false};
|
||||
};
|
||||
|
||||
} // namespace profiler
|
||||
|
||||
@@ -203,6 +203,7 @@ void Session::Start()
|
||||
counters_sampler_->Start();
|
||||
}
|
||||
|
||||
if (profiler_) profiler_->bIsSessionDestroying.store(false);
|
||||
is_active_ = true;
|
||||
}
|
||||
|
||||
|
||||
مرجع در شماره جدید
Block a user