gda ionic: collapsed cqe (#345)

* util: dlsym optional helper

Like DLSYM_HELPER, but does not return if the symbol is not found.

Signed-off-by: Allen Hubbe <allen.hubbe@amd.com>

* gda ionic: sync dv and fw headers

Sync dv and fw headers to match out-of-tree libionic and firmware.

Signed-off-by: Allen Hubbe <allen.hubbe@amd.com>

* gda ionic: collapsed cqe

Detect and enable collapsed cqe if supported by drivers and firmware.
Fall back to regular completion queue.

Signed-off-by: Allen Hubbe <allen.hubbe@amd.com>

---------

Signed-off-by: Allen Hubbe <allen.hubbe@amd.com>
This commit is contained in:
Allen Hubbe
2026-01-06 20:42:15 -05:00
zatwierdzone przez GitHub
rodzic 82d91433c9
commit 1494c24f9a
9 zmienionych plików z 216 dodań i 51 usunięć
+6 -4
Wyświetl plik
@@ -1161,6 +1161,9 @@ void GDABackend::create_queues() {
if (gda_provider == GDAProvider::BNXT) {
bnxt_create_cqs(ncqes);
bnxt_create_qps(envvar::sq_size);
} else if (gda_provider == GDAProvider::IONIC) {
ionic_create_cqs(ncqes);
create_qps(envvar::sq_size);
} else {
create_cqs(ncqes);
create_qps(envvar::sq_size);
@@ -1266,6 +1269,9 @@ void GDABackend::create_cqs(int cqe) {
struct ibv_cq_init_attr_ex cq_attr;
struct ibv_cq_ex *cq_ex;
assert(gda_provider != GDAProvider::BNXT);
assert(gda_provider != GDAProvider::IONIC);
memset(&cq_attr, 0, sizeof(struct ibv_cq_init_attr_ex));
cq_attr.cqe = cqe;
cq_attr.cq_context = nullptr;
@@ -1276,10 +1282,6 @@ void GDABackend::create_cqs(int cqe) {
cq_attr.parent_domain = pd_parent;
for (int i = 0; i < qps.size(); i++) {
if (gda_provider == GDAProvider::IONIC) {
cq_attr.parent_domain = pd_uxdma[i & 1];
}
cq_ex = ibv.create_cq_ex(context, &cq_attr);
CHECK_NNULL(cq_ex, "ibv_create_cq_ex");
+1
Wyświetl plik
@@ -382,6 +382,7 @@ class GDABackend : public Backend {
*/
void create_cqs(int ncqes);
void bnxt_create_cqs(int ncqes);
void ionic_create_cqs(int ncqes);
/**
* @brief Create all QPs with a SQ of length sq_length
+41
Wyświetl plik
@@ -27,6 +27,46 @@
namespace rocshmem {
void GDABackend::ionic_create_cqs(int ncqes) {
struct ibv_cq_init_attr_ex cq_attr;
struct ionic_cq_init_attr_ex ionic_cq_attr;
memset(&cq_attr, 0, sizeof(cq_attr));
cq_attr.cqe = ncqes;
cq_attr.cq_context = nullptr;
cq_attr.channel = nullptr;
cq_attr.comp_vector = 0;
cq_attr.flags = 0;
cq_attr.comp_mask = IBV_CQ_INIT_ATTR_MASK_PD;
cq_attr.parent_domain = pd_parent;
memset(&ionic_cq_attr, 0, sizeof(ionic_cq_attr));
if (ionic_dv.create_cq_ex) {
ionic_cq_attr.comp_mask = IONIC_CQ_INIT_ATTR_MASK_FLAGS;
ionic_cq_attr.flags = IONIC_CQ_INIT_ATTR_CCQE;
}
for (int i = 0; i < qps.size(); i++) {
struct ibv_cq_ex *cq_ex = nullptr;
cq_attr.parent_domain = pd_uxdma[i & 1];
if (ionic_dv.create_cq_ex) {
cq_ex = ionic_dv.create_cq_ex(context, &cq_attr, &ionic_cq_attr);
// If cq_ex is nullptr, fallback to ibv_create_cq_ex below.
//CHECK_NNULL(cq_ex, "ionic_dv_create_cq_ex");
}
if (!cq_ex) {
cq_ex = ibv_create_cq_ex(context, &cq_attr);
CHECK_NNULL(cq_ex, "ibv_create_cq_ex");
}
cqs[i] = ibv.cq_ex_to_cq(cq_ex);
CHECK_NNULL(cqs[i], "ibv_cq_ex_to_cq");
}
}
void GDABackend::ionic_initialize_gpu_qp(QueuePair* gpu_qp, int conn_num) {
ionic_dv_ctx dvctx;
ionic_dv.get_ctx(&dvctx, context);
@@ -115,6 +155,7 @@ int GDABackend::ionic_dv_dl_init() {
DLSYM_HELPER(ionic_dv, ionic_dv_, ionicdv_handle_, pd_set_sqcmb);
DLSYM_HELPER(ionic_dv, ionic_dv_, ionicdv_handle_, pd_set_rqcmb);
DLSYM_HELPER(ionic_dv, ionic_dv_, ionicdv_handle_, pd_set_udma_mask);
DLSYM_OPT_HELPER(ionic_dv, ionic_dv_, ionicdv_handle_, create_cq_ex);
return ROCSHMEM_SUCCESS;
}
+35
Wyświetl plik
@@ -155,6 +155,15 @@ int ionic_dv_pd_set_sqcmb(struct ibv_pd *ibpd, bool enable, bool expdb, bool req
*/
int ionic_dv_pd_set_rqcmb(struct ibv_pd *ibpd, bool enable, bool expdb, bool require);
/**
* ionic_dv_pd_set_expdb_mask - Specify expdb mask.
*
* Queues associated with this pd will attempt to have expdb on for WQE sizes
* other than default (and supported by the NIC).
*
* @mask - IONIC_EXPDB_* bitmap
*/
int ionic_dv_pd_set_expdb_mask(struct ibv_pd *ibpd, uint8_t mask);
/**
* ionic_dv_qp_set_gda - Enable or disable GPU-Direct Async (GDA) mode.
@@ -221,6 +230,32 @@ int ionic_dv_qp_get_send_dbell_data(struct ibv_qp *ibqp, uint64_t *dbdata);
int ionic_dv_qp_get_recv_dbell_data(struct ibv_qp *ibqp, uint64_t *dbdata);
enum ionic_cq_init_attr_mask {
IONIC_CQ_INIT_ATTR_MASK_FLAGS = 1 << 0,
};
enum ionic_cq_init_attr_flags {
IONIC_CQ_INIT_ATTR_CCQE = 1 << 0,
};
struct ionic_cq_init_attr_ex {
/* One or more flags of enum ionic_cq_init_attr_mask */
uint32_t comp_mask;
/* One or more flags of enum ionic_cq_init_attr_flags */
uint32_t flags;
};
/**
* ionic_dv_create_cq_ex - Create an IBV CQ with vendor-specific attributes.
*
* @ibctx - Context CQ will be attached to.
* @ex - IBV attributes to create the CQ with.
* @ionic_ex - Vendor-specific attributes to create the CQ with.
*/
struct ibv_cq_ex *ionic_dv_create_cq_ex(struct ibv_context *ibctx,
struct ibv_cq_init_attr_ex *ex,
struct ionic_cq_init_attr_ex *ionic_ex);
/**
* ionic_dv_get_ctx - Extract context information for gpu-initiated rdma.
*/
+64 -47
Wyświetl plik
@@ -138,22 +138,34 @@ union ionic_v1_pld {
__u8 data[32];
};
struct ionic_v1_cqe_send {
__u8 rsvd[4];
__be32 msg_msn;
__u8 rsvd2[8];
__le64 npg_wqe_idx_timestamp;
};
struct ionic_v1_cqe_recv {
__le64 wqe_idx_timestamp;
__be32 src_qpn_op;
__u8 src_mac[6];
__be16 vlan_tag;
__be32 imm_data_rkey;
};
struct ionic_v1_cqe_rcqe {
__be64 wqe_idx_timestamp;
__u8 rsvd[8];
__be32 seq_op_flags;
__be32 imm_data_rkey;
};
/* completion queue v1 cqe */
struct ionic_v1_cqe {
union {
struct {
__le64 wqe_idx_timestamp;
__be32 src_qpn_op;
__u8 src_mac[6];
__be16 vlan_tag;
__be32 imm_data_rkey;
} recv;
struct {
__u8 rsvd[4];
__be32 msg_msn;
__u8 rsvd2[8];
__le64 npg_wqe_idx_timestamp;
} send;
struct ionic_v1_cqe_send send;
struct ionic_v1_cqe_recv recv;
struct ionic_v1_cqe_rcqe rcqe;
};
__be32 status_length;
__be32 qid_type_flags;
@@ -165,6 +177,34 @@ enum ionic_v1_cqe_wqe_idx_timestamp_bits {
IONIC_V1_CQE_TIMESTAMP_SHIFT = 16,
};
/* bits for rcqe seq_op_flags */
enum ionic_v1_cqe_rcqe_op_flag_bits {
IONIC_V1_CQE_RCQE_SEQ_MASK = 0xffffff,
IONIC_V1_CQE_RCQE_FLAG_V = BIT(24),
IONIC_V1_CQE_RCQE_FLAG_I = BIT(25),
IONIC_V1_CQE_RCQE_OP_SHIFT = 28,
};
static inline uint32_t ionic_v1_rcqe_seq(uint32_t seq_opf)
{
return seq_opf & IONIC_V1_CQE_RCQE_SEQ_MASK;
}
static inline uint8_t ionic_v1_rcqe_op(uint32_t seq_opf)
{
return seq_opf >> IONIC_V1_CQE_RCQE_OP_SHIFT;
}
static inline bool ionic_v1_rcqe_valid(uint32_t seq_opf)
{
return seq_opf & IONIC_V1_CQE_RCQE_FLAG_V;
}
static inline bool ionic_v1_rcqe_ready(uint32_t seq_opf)
{
return seq_opf & IONIC_V1_CQE_RCQE_FLAG_I;
}
/* bits for cqe recv */
enum ionic_v1_cqe_src_qpn_bits {
IONIC_V1_CQE_RECV_QPN_MASK = 0xffffff,
@@ -194,7 +234,7 @@ enum ionic_v1_cqe_qtf_bits {
IONIC_V1_CQE_TYPE_RECV = 1,
IONIC_V1_CQE_TYPE_SEND_MSN = 2,
IONIC_V1_CQE_TYPE_SEND_NPG = 3,
IONIC_V1_CQE_TYPE_RECV_INDIR = 4,
IONIC_V1_CQE_TYPE_RECV_RCQE = 4,
};
#if !defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_HCC__)
@@ -483,50 +523,27 @@ static inline int ionic_v1_use_spec_sge(int min_sge, int spec)
}
#define IONIC_RCQ_SIZE 4096
#define IONIC_RCQ_DEPTH 128
#define IONIC_RCQ_DEPTH_LOG2 7
#define IONIC_RCQ_STRIDE_LOG2 4
struct ionic_rcq_hdr {
uint8_t pad[60];
uint32_t seq_pad;
};
struct ionic_rcqe {
uint32_t status_length;
uint32_t imm_data;
uint32_t seq_flags;
uint32_t rsvd;
};
enum ionic_rcqe_flag {
IONIC_RCQE_C = BIT(7),
IONIC_RCQE_I = BIT(6),
__be32 seq;
__be32 ack;
};
struct ionic_rcq {
struct ionic_rcq_hdr hdr;
struct ionic_rcqe ring[IONIC_RCQ_DEPTH];
union {
uint8_t bytes[IONIC_RCQ_SIZE];
struct ionic_rcq_hdr hdr;
};
};
static inline uint32_t ionic_rcq_hdr_seq(struct ionic_rcq_hdr *hdr)
static inline uint32_t ionic_rcq_seq(struct ionic_rcq *rcq)
{
return be32toh(hdr->seq_pad) >> 8;
return be32toh(rcq->hdr.seq) & IONIC_V1_CQE_RCQE_SEQ_MASK;
}
static inline uint32_t ionic_rcqe_seq(struct ionic_rcqe *rcqe)
static inline void ionic_rcq_ack(struct ionic_rcq *rcq, uint32_t ack)
{
return be32toh(rcqe->seq_flags) >> 8;
}
static inline bool ionic_rcqe_color(struct ionic_rcqe *rcqe)
{
return !!(rcqe->seq_flags & htobe32(IONIC_RCQE_C));
}
static inline bool ionic_rcqe_imm(struct ionic_rcqe *rcqe)
{
return !!(rcqe->seq_flags & htobe32(IONIC_RCQE_I));
rcq->hdr.ack = htobe32(ack);
}
#endif // !defined(__cplusplus)
@@ -38,6 +38,9 @@ struct ionicdv_funcs_t {
int (*pd_set_sqcmb)(struct ibv_pd *ibpd, bool enable, bool expdb, bool require);
int (*pd_set_rqcmb)(struct ibv_pd *ibpd, bool enable, bool expdb, bool require);
int (*pd_set_udma_mask)(struct ibv_pd *ibpd, uint8_t udma_mask);
struct ibv_cq_ex *(*create_cq_ex)(struct ibv_context *ibctx,
struct ibv_cq_init_attr_ex *ex,
struct ionic_cq_init_attr_ex *ionic_ex);
};
#endif //LIBRARY_SRC_GDA_IONIC_GDA_PROVIDER_HPP_
+50
Wyświetl plik
@@ -68,7 +68,40 @@ __device__ uint32_t QueuePair::commit_sq(uint64_t activemask, uint32_t my_sq_pro
return dbprod;
}
__device__ void QueuePair::ionic_poll_wave_ccqe(uint64_t activemask) {
if (!is_first_active_lane(activemask)) {
return;
}
struct ionic_v1_cqe *cqe = &ionic_cq_buf[0];
uint32_t qtf_be = *(volatile uint32_t *)(&cqe->qid_type_flags);
uint32_t msn = swap_endian_val<uint32_t>(cqe->send.msg_msn);
if (!!(qtf_be & swap_endian_val<uint32_t>(IONIC_V1_CQE_ERROR))) {
#if defined(DEBUG)
uint32_t qtf = swap_endian_val<uint32_t>(qtf_be);
uint32_t qid = qtf >> IONIC_V1_CQE_QID_SHIFT;
uint32_t type = (qtf >> IONIC_V1_CQE_TYPE_SHIFT) & IONIC_V1_CQE_TYPE_MASK;
uint32_t flag = qtf & 0xf;
uint32_t status = swap_endian_val<uint32_t>(cqe->status_length);
uint64_t npg = cqe->send.npg_wqe_idx_timestamp & IONIC_V1_CQE_WQE_IDX_MASK;
printf("QUIET ERROR (CCQE): %s qid %u type %u flag %#x status %u msn %u npg %lu\n",
dev_name, qid, type, flag, status, msn, npg);
#endif
/* No other way to signal an error, so just crash. */
abort();
}
sq_msn = msn;
}
__device__ void QueuePair::poll_wave_cqes(uint64_t activemask) {
if (!cq_mask) {
ionic_poll_wave_ccqe(activemask);
return;
}
uint32_t my_logical_lane_id = get_active_lane_num(activemask);
uint32_t my_cq_pos = cq_pos + my_logical_lane_id;
@@ -130,9 +163,26 @@ __device__ void QueuePair::poll_wave_cqes(uint64_t activemask) {
sq_msn = msn;
}
__device__ void QueuePair::ionic_quiet_internal_ccqe(uint64_t activemask, uint32_t cons) {
if (!is_first_active_lane(activemask)) {
return;
}
volatile struct ionic_v1_cqe *cqe = &ionic_cq_buf[0];
uint32_t msn = swap_endian_val<uint32_t>(cqe->send.msg_msn);
while ((msn - cons) & 0x800000) {
msn = swap_endian_val<uint32_t>(cqe->send.msg_msn);
}
}
__device__ void QueuePair::ionic_quiet_internal(uint64_t activemask, uint32_t cons) {
uint32_t greed = 10;
if (!cq_mask) {
ionic_quiet_internal_ccqe(activemask, cons);
return;
}
/* wait for sq_msn to catch up or pass cons. */
/* 0x800000 - sign bit for 24-bit fields */
while ((sq_msn - cons) & 0x800000) {
+11
Wyświetl plik
@@ -364,11 +364,22 @@ class QueuePair {
*/
__device__ uint32_t commit_sq(uint64_t activemask, uint32_t my_sq_prod, uint32_t my_sq_pos, uint32_t num_wqes);
/**
* @brief Helper method to poll the ccqe.
*/
__device__ __attribute__((noinline)) void ionic_poll_wave_ccqe(uint64_t active_lane_mask);
/**
* @brief Helper method to poll the next completion queue entry.
*/
__device__ __attribute__((noinline)) void poll_wave_cqes(uint64_t active_lane_mask);
/**
* @brief Helper method to drain completion queue entries.
* @param cons wait for sq_msn to catch up to this position.
*/
__device__ __attribute__((noinline)) void ionic_quiet_internal_ccqe(uint64_t active_lane_mask, uint32_t cons);
/**
* @brief Helper method to drain completion queue entries.
* @param cons wait for sq_msn to catch up to this position.
+5
Wyświetl plik
@@ -124,6 +124,11 @@ namespace rocshmem {
#define STRINGIFY_NX(name) #name
#define STRINGIFY(name) STRINGIFY_NX(name)
#define DLSYM_OPT_HELPER(func_struct, prefix, handle, func_name) \
do { \
*(void **) (&func_struct.func_name) = dlsym(handle, STRINGIFY(PPCAT(prefix, func_name))); \
} while (0)
#define DLSYM_HELPER(func_struct, prefix, handle, func_name) \
do { \
*(void **) (&func_struct.func_name) = dlsym(handle, STRINGIFY(PPCAT(prefix, func_name))); \