Insert explicit memory fence before submitting doorbell

Ensure that the write index and ring buffer contents are visible
to the HW before sending the doorbell. The latter is a write-combined
MMIO store and must be ordered with prior cacehable non-MMIO stores.

Also be more explicit about memory semantics for doorbell stores.

Change-Id: Ie4d96a7ee2a507237a8dbe7705fdf234d62ce9ba


[ROCm/ROCR-Runtime commit: d5b4078072]
Этот коммит содержится в:
Jay Cornwall
2016-09-06 16:27:06 -05:00
родитель 5467ef0a59
Коммит 1b76d435b4
+10 -7
Просмотреть файл
@@ -405,11 +405,12 @@ void AqlQueue::StoreRelaxed(hsa_signal_value_t value) {
if (legacy_dispatch_id > amd_queue_.max_legacy_doorbell_dispatch_id_plus_1) {
// Record the most recent packet index used in a doorbell submission.
// This field will be interpreted as a write index upon HW queue connect.
// Must be visible to the HW before sending the doorbell to avoid a race.
// Make ring buffer visible to HW before updating write index.
atomic::Store(&amd_queue_.max_legacy_doorbell_dispatch_id_plus_1,
legacy_dispatch_id, std::memory_order_relaxed);
legacy_dispatch_id, std::memory_order_release);
// Write the dispatch id to the hardware MMIO doorbell.
// Make write index visible to HW before sending doorbell.
if (doorbell_type_ == 0) {
// The legacy GFXIP 7 hardware doorbell expects:
// 1. Packet index wrapped to a point within the ring buffer
@@ -417,18 +418,20 @@ void AqlQueue::StoreRelaxed(hsa_signal_value_t value) {
uint64_t queue_size_mask =
((1 + queue_full_workaround_) * amd_queue_.hsa_queue.size) - 1;
*(volatile uint32_t*)signal_.legacy_hardware_doorbell_ptr =
uint32_t((legacy_dispatch_id & queue_size_mask) *
(sizeof(core::AqlPacket) / sizeof(uint32_t)));
atomic::Store(signal_.legacy_hardware_doorbell_ptr,
uint32_t((legacy_dispatch_id & queue_size_mask) *
(sizeof(core::AqlPacket) / sizeof(uint32_t))),
std::memory_order_release);
} else if (doorbell_type_ == 1) {
*(volatile uint32_t*)signal_.legacy_hardware_doorbell_ptr =
uint32_t(legacy_dispatch_id);
atomic::Store(signal_.legacy_hardware_doorbell_ptr,
uint32_t(legacy_dispatch_id), std::memory_order_release);
} else {
assert(false && "Agent has unsupported doorbell semantics");
}
}
// Release spinlock protecting the legacy doorbell.
// Also ensures timely delivery of (write-combined) doorbell to HW.
atomic::Store(&amd_queue_.legacy_doorbell_lock, 0U,
std::memory_order_release);
}