SWDEV-559166 - Fix data races in GetSubmissionBatch, CaptureAndSet and SetQueueStatus (#1441)
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
e99bd0c783
Коммит
6d6b136374
@@ -265,9 +265,9 @@ void Device::destroyAllStreams() {
|
||||
void Device::SyncAllStreams(bool cpu_wait, bool wait_blocking_streams_only) {
|
||||
// Make a local copy to avoid stalls for GPU finish with multiple threads
|
||||
std::vector<hip::Stream*> streams;
|
||||
streams.reserve(streamSet.size());
|
||||
{
|
||||
std::shared_lock lock(streamSetLock);
|
||||
streams.reserve(streamSet.size());
|
||||
if (wait_blocking_streams_only) {
|
||||
auto null_stream = GetNullStream();
|
||||
for (auto it : streamSet) {
|
||||
|
||||
@@ -500,7 +500,7 @@ public:
|
||||
std::list<int> userEnabledPeers;
|
||||
|
||||
/// True if this device is active
|
||||
bool isActive_;
|
||||
std::atomic<bool> isActive_;
|
||||
|
||||
|
||||
MemoryPool* default_mem_pool_; //!< Default memory pool for this device
|
||||
|
||||
@@ -621,7 +621,7 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
//!< but ROC profiler expects D2H or H2D detection
|
||||
int fence_state_; //!< Fence scope
|
||||
//!< kUnknown/kFlushedToDevice/kFlushedToSystem
|
||||
bool fence_dirty_; //!< Fence modified flag
|
||||
std::atomic<bool> fence_dirty_; //!< Fence modified flag
|
||||
|
||||
std::atomic<uint> lastUsedSdmaEngineMask_; //!< Last Used SDMA Engine mask
|
||||
uint64_t last_write_index_ = 0; //!< The last HW queue write index for any packet
|
||||
|
||||
@@ -100,6 +100,7 @@ uint64_t Event::recordProfilingInfo(int32_t status, uint64_t timeStamp) {
|
||||
|
||||
// Global epoch time since the first processed command
|
||||
uint64_t epoch = 0;
|
||||
std::once_flag epoch_init;
|
||||
// ================================================================================================
|
||||
bool Event::setStatus(int32_t status, uint64_t timeStamp) {
|
||||
assert(status <= CL_QUEUED && "invalid status");
|
||||
@@ -112,9 +113,7 @@ bool Event::setStatus(int32_t status, uint64_t timeStamp) {
|
||||
|
||||
if (profilingInfo().enabled_) {
|
||||
timeStamp = recordProfilingInfo(status, timeStamp);
|
||||
if (epoch == 0) {
|
||||
epoch = profilingInfo().queued_;
|
||||
}
|
||||
std::call_once(epoch_init, [&]{ epoch = profilingInfo().queued_;});
|
||||
}
|
||||
|
||||
if (amd::IS_HIP) {
|
||||
|
||||
@@ -83,7 +83,7 @@ class Event : public RuntimeObject {
|
||||
|
||||
private:
|
||||
Monitor lock_;
|
||||
Monitor notify_lock_; //!< Lock used for notification with direct dispatch only
|
||||
mutable Monitor notify_lock_; //!< Lock used for notification with direct dispatch only
|
||||
|
||||
std::atomic<CallBackEntry*> callbacks_; //!< linked list of callback entries.
|
||||
std::atomic<int32_t> status_; //!< current execution status.
|
||||
@@ -219,7 +219,7 @@ class Event : public RuntimeObject {
|
||||
void* HwEvent() const { return hw_event_; }
|
||||
|
||||
//! Returns notify even associated with the current command
|
||||
Event* NotifyEvent() const { return notify_event_; }
|
||||
Event* NotifyEvent() const {ScopedLock l(notify_lock_); return notify_event_; }
|
||||
|
||||
//! Get entry scope of the event
|
||||
int32_t getCommandEntryScope() const {
|
||||
|
||||
@@ -72,7 +72,7 @@ bool HostQueue::terminate() {
|
||||
Command* lastCommand = getLastQueuedCommand(true);
|
||||
if (lastCommand != nullptr) {
|
||||
// Check if CPU batch wasn't flushed for completion with the last command
|
||||
if (GetSubmissionBatch() != nullptr) {
|
||||
if (GetSubmissionBatchSize() != 0) {
|
||||
auto command = new Marker(*this, false);
|
||||
if (command != nullptr) {
|
||||
ClPrint(LOG_DETAIL_DEBUG, LOG_CMD, "Marker queued to ensure finish");
|
||||
@@ -187,7 +187,7 @@ void HostQueue::finish(bool cpu_wait) {
|
||||
batchSize, cpu_wait, vdev()->isFenceDirty());
|
||||
|
||||
// Force marker if the batch wasn't sent for CPU update or fence is dirty
|
||||
if (nullptr == command || (GetSubmissionBatch() != nullptr) || vdev()->isFenceDirty()) {
|
||||
if (nullptr == command || (batchSize != 0)|| vdev()->isFenceDirty()) {
|
||||
if (nullptr != command) {
|
||||
command->release();
|
||||
}
|
||||
|
||||
@@ -252,7 +252,10 @@ class HostQueue : public CommandQueue {
|
||||
Command* GetSubmissionBatch() const { return head_; }
|
||||
|
||||
//! Get the current batch size
|
||||
size_t GetSubmissionBatchSize() const { return size_; }
|
||||
size_t GetSubmissionBatchSize() const {
|
||||
ScopedLock sl(vdev()->execution());
|
||||
return size_;
|
||||
}
|
||||
|
||||
//! Insert a command into the linked list of submitted commands
|
||||
void FormSubmissionBatch(Command* command) {
|
||||
@@ -319,7 +322,7 @@ class HostQueue : public CommandQueue {
|
||||
size_t size_ = 0; //!< The current batch size
|
||||
|
||||
//! True if this command queue is active
|
||||
bool isActive_;
|
||||
std::atomic<bool> isActive_;
|
||||
bool forceDestroy_ = false; //!< Destroy the queue in the current state
|
||||
|
||||
amd::SyncPolicy sync_policy_; //!< Used for controlling stream synchronization
|
||||
|
||||
@@ -124,7 +124,6 @@ bool KernelParameters::captureAndSet(void** kernelParams, address kernArgs, size
|
||||
if (memArg != nullptr) {
|
||||
memArg->retain();
|
||||
}
|
||||
desc.info_.rawPointer_ = true;
|
||||
} else if (desc.type_ == T_SAMPLER) {
|
||||
LogError("Cannot handle Sampler now");
|
||||
return false;
|
||||
@@ -161,10 +160,7 @@ bool KernelParameters::captureAndSet(void** kernelParams, address kernArgs, size
|
||||
::memcpy(param, value, desc.size_);
|
||||
break;
|
||||
}
|
||||
desc.info_.defined_ = true;
|
||||
}
|
||||
|
||||
execInfoOffset_ = totalSize_;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -169,6 +169,7 @@ class KernelParameters : protected HeapObject {
|
||||
samplerObjects_ = reinterpret_cast<amd::Sampler**>(values_ + samplerObjOffset_);
|
||||
queueObjOffset_ = samplerObjOffset_ + signature_.numSamplers() * sizeof(amd::Sampler*);
|
||||
queueObjects_ = reinterpret_cast<amd::DeviceQueue**>(values_ + queueObjOffset_);
|
||||
execInfoOffset_ = totalSize_;
|
||||
address limit = reinterpret_cast<address>(&queueObjects_[signature_.numQueues()]);
|
||||
::memset(values_, '\0', limit - values_);
|
||||
}
|
||||
|
||||
Ссылка в новой задаче
Block a user