From be6b8bb055019f9b2036a609da5975ceff31e45d Mon Sep 17 00:00:00 2001 From: Tony Tye Date: Tue, 10 Oct 2023 20:12:13 +0000 Subject: [PATCH] Correct intercept queue handling of the overflow queue The intercept queue was processing all the packets on the proxy queue. This could result in the rewrite of more than one packet being put on the overflow queue. If there are a lot of packets on the intercept queue this could result in the overflow queue having more packets than the size of the hardware queue. The code to submit the overflow queue fails if it is unable to put all the packets of the overflow on the hardware queue. This resulted in an infinite loop. It also resulted in an assert being reported that packets are being added to the overflow queue when it is not empty. Correct this by checking if the overflow queue is non-empty after rewriting each packet. If it is non-empty then stop processing additional packets. The additional packets will be processed when the barrier packet added to the hardware queue is executed due to its asyn handler. This barrier packet is added to the hardware queue whenever packets are saved on the overflow queue. Change-Id: I2537911d3c3ba1aac61a0a35f1ab97426a66b5a2 --- .../hsa-runtime/core/runtime/intercept_queue.cpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/runtime/hsa-runtime/core/runtime/intercept_queue.cpp b/runtime/hsa-runtime/core/runtime/intercept_queue.cpp index 766bd15843..3a00325db0 100644 --- a/runtime/hsa-runtime/core/runtime/intercept_queue.cpp +++ b/runtime/hsa-runtime/core/runtime/intercept_queue.cpp @@ -226,8 +226,8 @@ void InterceptQueue::StoreRelaxed(hsa_signal_value_t value) { // Loop over valid packets and process. uint64_t end = LoadWriteIndexAcquire(); - uint64_t i; - for (i = next_packet_; i < end; i++) { + uint64_t i = next_packet_; + while (i < end) { if (!ring[i & mask].IsValid()) break; // Process callbacks. @@ -238,6 +238,17 @@ void InterceptQueue::StoreRelaxed(hsa_signal_value_t value) { // Invalidate consumed packet atomic::Store(&ring[i & mask].dispatch.header, kInvalidHeader, std::memory_order_release); + + // Packet has now been processed so advance the read index. + ++i; + + // Only allow the rewrite of one packet to be on the overflow queue. When + // packets are put on the overflow queue a barrier packet will also be + // added which has an async handler that will ring the doorbell, That + // doorbell ring will ensure this function is re-invoked to put the + // overflow packets on the hardware queue and continue rewriting packets on + // the intercept queue. + if (!overflow_.empty()) break; } next_packet_ = i;