From 1b76d435b4bc1004b556a8977322eb4c99e2bf34 Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Tue, 6 Sep 2016 16:27:06 -0500 Subject: [PATCH] Insert explicit memory fence before submitting doorbell Ensure that the write index and ring buffer contents are visible to the HW before sending the doorbell. The latter is a write-combined MMIO store and must be ordered with prior cacehable non-MMIO stores. Also be more explicit about memory semantics for doorbell stores. Change-Id: Ie4d96a7ee2a507237a8dbe7705fdf234d62ce9ba [ROCm/ROCR-Runtime commit: d5b4078072619316f473847cc20d664a7ea8fa4e] --- .../hsa-runtime/core/runtime/amd_aql_queue.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp index 551dbed5df..c069c393dd 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp @@ -405,11 +405,12 @@ void AqlQueue::StoreRelaxed(hsa_signal_value_t value) { if (legacy_dispatch_id > amd_queue_.max_legacy_doorbell_dispatch_id_plus_1) { // Record the most recent packet index used in a doorbell submission. // This field will be interpreted as a write index upon HW queue connect. - // Must be visible to the HW before sending the doorbell to avoid a race. + // Make ring buffer visible to HW before updating write index. atomic::Store(&amd_queue_.max_legacy_doorbell_dispatch_id_plus_1, - legacy_dispatch_id, std::memory_order_relaxed); + legacy_dispatch_id, std::memory_order_release); // Write the dispatch id to the hardware MMIO doorbell. + // Make write index visible to HW before sending doorbell. if (doorbell_type_ == 0) { // The legacy GFXIP 7 hardware doorbell expects: // 1. Packet index wrapped to a point within the ring buffer @@ -417,18 +418,20 @@ void AqlQueue::StoreRelaxed(hsa_signal_value_t value) { uint64_t queue_size_mask = ((1 + queue_full_workaround_) * amd_queue_.hsa_queue.size) - 1; - *(volatile uint32_t*)signal_.legacy_hardware_doorbell_ptr = - uint32_t((legacy_dispatch_id & queue_size_mask) * - (sizeof(core::AqlPacket) / sizeof(uint32_t))); + atomic::Store(signal_.legacy_hardware_doorbell_ptr, + uint32_t((legacy_dispatch_id & queue_size_mask) * + (sizeof(core::AqlPacket) / sizeof(uint32_t))), + std::memory_order_release); } else if (doorbell_type_ == 1) { - *(volatile uint32_t*)signal_.legacy_hardware_doorbell_ptr = - uint32_t(legacy_dispatch_id); + atomic::Store(signal_.legacy_hardware_doorbell_ptr, + uint32_t(legacy_dispatch_id), std::memory_order_release); } else { assert(false && "Agent has unsupported doorbell semantics"); } } // Release spinlock protecting the legacy doorbell. + // Also ensures timely delivery of (write-combined) doorbell to HW. atomic::Store(&amd_queue_.legacy_doorbell_lock, 0U, std::memory_order_release); }