From 6b00964f32ee19fc7d5aef931f39aaf24353c488 Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Tue, 20 Jan 2026 15:26:53 -0500 Subject: [PATCH] gda ionic: ccqe cleanup and error check (#389) Delete unreachable ccqe polling path, ionic_poll_wave_ccqe(). Move cqe error check to ionic_quiet_internal_ccqe(). Signed-off-by: Allen Hubbe --- src/gda/ionic/queue_pair_ionic.cpp | 55 ++++++++++++------------------ src/gda/queue_pair.hpp | 5 --- 2 files changed, 22 insertions(+), 38 deletions(-) diff --git a/src/gda/ionic/queue_pair_ionic.cpp b/src/gda/ionic/queue_pair_ionic.cpp index 164219c410..fd8a960380 100644 --- a/src/gda/ionic/queue_pair_ionic.cpp +++ b/src/gda/ionic/queue_pair_ionic.cpp @@ -68,40 +68,7 @@ __device__ uint32_t QueuePair::commit_sq(uint64_t activemask, uint32_t my_sq_pro return dbprod; } -__device__ void QueuePair::ionic_poll_wave_ccqe(uint64_t activemask) { - if (!is_first_active_lane(activemask)) { - return; - } - - struct ionic_v1_cqe *cqe = &ionic_cq_buf[0]; - uint32_t qtf_be = *(volatile uint32_t *)(&cqe->qid_type_flags); - uint32_t msn = byteswap(cqe->send.msg_msn); - - if (!!(qtf_be & byteswap(IONIC_V1_CQE_ERROR))) { -#if defined(DEBUG) - uint32_t qtf = byteswap(qtf_be); - uint32_t qid = qtf >> IONIC_V1_CQE_QID_SHIFT; - uint32_t type = (qtf >> IONIC_V1_CQE_TYPE_SHIFT) & IONIC_V1_CQE_TYPE_MASK; - uint32_t flag = qtf & 0xf; - uint32_t status = byteswap(cqe->status_length); - uint64_t npg = cqe->send.npg_wqe_idx_timestamp & IONIC_V1_CQE_WQE_IDX_MASK; - - printf("QUIET ERROR (CCQE): %s qid %u type %u flag %#x status %u msn %u npg %lu\n", - dev_name, qid, type, flag, status, msn, npg); -#endif - /* No other way to signal an error, so just crash. */ - abort(); - } - - sq_msn = msn; -} - __device__ void QueuePair::poll_wave_cqes(uint64_t activemask) { - if (!cq_mask) { - ionic_poll_wave_ccqe(activemask); - return; - } - uint32_t my_logical_lane_id = get_active_lane_num(activemask); uint32_t my_cq_pos = cq_pos + my_logical_lane_id; @@ -169,10 +136,32 @@ __device__ void QueuePair::ionic_quiet_internal_ccqe(uint64_t activemask, uint32 } volatile struct ionic_v1_cqe *cqe = &ionic_cq_buf[0]; + uint32_t qtf_be = cqe->qid_type_flags; uint32_t msn = byteswap(cqe->send.msg_msn); while ((msn - cons) & 0x800000) { + if (!!(qtf_be & byteswap(IONIC_V1_CQE_ERROR))) { + break; + } + + qtf_be = cqe->qid_type_flags; msn = byteswap(cqe->send.msg_msn); } + + if (!!(qtf_be & byteswap(IONIC_V1_CQE_ERROR))) { +#if defined(DEBUG) + uint32_t qtf = byteswap(qtf_be); + uint32_t qid = qtf >> IONIC_V1_CQE_QID_SHIFT; + uint32_t type = (qtf >> IONIC_V1_CQE_TYPE_SHIFT) & IONIC_V1_CQE_TYPE_MASK; + uint32_t flag = qtf & 0xf; + uint32_t status = byteswap(cqe->status_length); + uint64_t npg = cqe->send.npg_wqe_idx_timestamp & IONIC_V1_CQE_WQE_IDX_MASK; + + printf("QUIET ERROR (CCQE): %s qid %u type %u flag %#x status %u msn %u npg %lu\n", + dev_name, qid, type, flag, status, msn, npg); +#endif + /* No other way to signal an error, so just crash. */ + abort(); + } } __device__ void QueuePair::ionic_quiet_internal(uint64_t activemask, uint32_t cons) { diff --git a/src/gda/queue_pair.hpp b/src/gda/queue_pair.hpp index d6ddb2ae57..d360597067 100644 --- a/src/gda/queue_pair.hpp +++ b/src/gda/queue_pair.hpp @@ -364,11 +364,6 @@ class QueuePair { */ __device__ uint32_t commit_sq(uint64_t activemask, uint32_t my_sq_prod, uint32_t my_sq_pos, uint32_t num_wqes); - /** - * @brief Helper method to poll the ccqe. - */ - __device__ __attribute__((noinline)) void ionic_poll_wave_ccqe(uint64_t active_lane_mask); - /** * @brief Helper method to poll the next completion queue entry. */