SWDEV-559166 - Fix data races in GetSubmissionBatch, CaptureAndSet and SetQueueStatus (#1441)

Этот коммит содержится в:
Ioannis Assiouras
2025-10-23 12:18:31 +01:00
коммит произвёл GitHub
родитель e99bd0c783
Коммит 6d6b136374
9 изменённых файлов: 15 добавлений и 16 удалений
+1 -1
Просмотреть файл
@@ -265,9 +265,9 @@ void Device::destroyAllStreams() {
void Device::SyncAllStreams(bool cpu_wait, bool wait_blocking_streams_only) {
// Make a local copy to avoid stalls for GPU finish with multiple threads
std::vector<hip::Stream*> streams;
streams.reserve(streamSet.size());
{
std::shared_lock lock(streamSetLock);
streams.reserve(streamSet.size());
if (wait_blocking_streams_only) {
auto null_stream = GetNullStream();
for (auto it : streamSet) {
+1 -1
Просмотреть файл
@@ -500,7 +500,7 @@ public:
std::list<int> userEnabledPeers;
/// True if this device is active
bool isActive_;
std::atomic<bool> isActive_;
MemoryPool* default_mem_pool_; //!< Default memory pool for this device
+1 -1
Просмотреть файл
@@ -621,7 +621,7 @@ class VirtualGPU : public device::VirtualDevice {
//!< but ROC profiler expects D2H or H2D detection
int fence_state_; //!< Fence scope
//!< kUnknown/kFlushedToDevice/kFlushedToSystem
bool fence_dirty_; //!< Fence modified flag
std::atomic<bool> fence_dirty_; //!< Fence modified flag
std::atomic<uint> lastUsedSdmaEngineMask_; //!< Last Used SDMA Engine mask
uint64_t last_write_index_ = 0; //!< The last HW queue write index for any packet
+2 -3
Просмотреть файл
@@ -100,6 +100,7 @@ uint64_t Event::recordProfilingInfo(int32_t status, uint64_t timeStamp) {
// Global epoch time since the first processed command
uint64_t epoch = 0;
std::once_flag epoch_init;
// ================================================================================================
bool Event::setStatus(int32_t status, uint64_t timeStamp) {
assert(status <= CL_QUEUED && "invalid status");
@@ -112,9 +113,7 @@ bool Event::setStatus(int32_t status, uint64_t timeStamp) {
if (profilingInfo().enabled_) {
timeStamp = recordProfilingInfo(status, timeStamp);
if (epoch == 0) {
epoch = profilingInfo().queued_;
}
std::call_once(epoch_init, [&]{ epoch = profilingInfo().queued_;});
}
if (amd::IS_HIP) {
+2 -2
Просмотреть файл
@@ -83,7 +83,7 @@ class Event : public RuntimeObject {
private:
Monitor lock_;
Monitor notify_lock_; //!< Lock used for notification with direct dispatch only
mutable Monitor notify_lock_; //!< Lock used for notification with direct dispatch only
std::atomic<CallBackEntry*> callbacks_; //!< linked list of callback entries.
std::atomic<int32_t> status_; //!< current execution status.
@@ -219,7 +219,7 @@ class Event : public RuntimeObject {
void* HwEvent() const { return hw_event_; }
//! Returns notify even associated with the current command
Event* NotifyEvent() const { return notify_event_; }
Event* NotifyEvent() const {ScopedLock l(notify_lock_); return notify_event_; }
//! Get entry scope of the event
int32_t getCommandEntryScope() const {
+2 -2
Просмотреть файл
@@ -72,7 +72,7 @@ bool HostQueue::terminate() {
Command* lastCommand = getLastQueuedCommand(true);
if (lastCommand != nullptr) {
// Check if CPU batch wasn't flushed for completion with the last command
if (GetSubmissionBatch() != nullptr) {
if (GetSubmissionBatchSize() != 0) {
auto command = new Marker(*this, false);
if (command != nullptr) {
ClPrint(LOG_DETAIL_DEBUG, LOG_CMD, "Marker queued to ensure finish");
@@ -187,7 +187,7 @@ void HostQueue::finish(bool cpu_wait) {
batchSize, cpu_wait, vdev()->isFenceDirty());
// Force marker if the batch wasn't sent for CPU update or fence is dirty
if (nullptr == command || (GetSubmissionBatch() != nullptr) || vdev()->isFenceDirty()) {
if (nullptr == command || (batchSize != 0)|| vdev()->isFenceDirty()) {
if (nullptr != command) {
command->release();
}
+5 -2
Просмотреть файл
@@ -252,7 +252,10 @@ class HostQueue : public CommandQueue {
Command* GetSubmissionBatch() const { return head_; }
//! Get the current batch size
size_t GetSubmissionBatchSize() const { return size_; }
size_t GetSubmissionBatchSize() const {
ScopedLock sl(vdev()->execution());
return size_;
}
//! Insert a command into the linked list of submitted commands
void FormSubmissionBatch(Command* command) {
@@ -319,7 +322,7 @@ class HostQueue : public CommandQueue {
size_t size_ = 0; //!< The current batch size
//! True if this command queue is active
bool isActive_;
std::atomic<bool> isActive_;
bool forceDestroy_ = false; //!< Destroy the queue in the current state
amd::SyncPolicy sync_policy_; //!< Used for controlling stream synchronization
-4
Просмотреть файл
@@ -124,7 +124,6 @@ bool KernelParameters::captureAndSet(void** kernelParams, address kernArgs, size
if (memArg != nullptr) {
memArg->retain();
}
desc.info_.rawPointer_ = true;
} else if (desc.type_ == T_SAMPLER) {
LogError("Cannot handle Sampler now");
return false;
@@ -161,10 +160,7 @@ bool KernelParameters::captureAndSet(void** kernelParams, address kernArgs, size
::memcpy(param, value, desc.size_);
break;
}
desc.info_.defined_ = true;
}
execInfoOffset_ = totalSize_;
return true;
}
+1
Просмотреть файл
@@ -169,6 +169,7 @@ class KernelParameters : protected HeapObject {
samplerObjects_ = reinterpret_cast<amd::Sampler**>(values_ + samplerObjOffset_);
queueObjOffset_ = samplerObjOffset_ + signature_.numSamplers() * sizeof(amd::Sampler*);
queueObjects_ = reinterpret_cast<amd::DeviceQueue**>(values_ + queueObjOffset_);
execInfoOffset_ = totalSize_;
address limit = reinterpret_cast<address>(&queueObjects_[signature_.numQueues()]);
::memset(values_, '\0', limit - values_);
}