gda ionic: collapsed cqe (#345)
* util: dlsym optional helper Like DLSYM_HELPER, but does not return if the symbol is not found. Signed-off-by: Allen Hubbe <allen.hubbe@amd.com> * gda ionic: sync dv and fw headers Sync dv and fw headers to match out-of-tree libionic and firmware. Signed-off-by: Allen Hubbe <allen.hubbe@amd.com> * gda ionic: collapsed cqe Detect and enable collapsed cqe if supported by drivers and firmware. Fall back to regular completion queue. Signed-off-by: Allen Hubbe <allen.hubbe@amd.com> --------- Signed-off-by: Allen Hubbe <allen.hubbe@amd.com>
Этот коммит содержится в:
@@ -1161,6 +1161,9 @@ void GDABackend::create_queues() {
|
||||
if (gda_provider == GDAProvider::BNXT) {
|
||||
bnxt_create_cqs(ncqes);
|
||||
bnxt_create_qps(envvar::sq_size);
|
||||
} else if (gda_provider == GDAProvider::IONIC) {
|
||||
ionic_create_cqs(ncqes);
|
||||
create_qps(envvar::sq_size);
|
||||
} else {
|
||||
create_cqs(ncqes);
|
||||
create_qps(envvar::sq_size);
|
||||
@@ -1266,6 +1269,9 @@ void GDABackend::create_cqs(int cqe) {
|
||||
struct ibv_cq_init_attr_ex cq_attr;
|
||||
struct ibv_cq_ex *cq_ex;
|
||||
|
||||
assert(gda_provider != GDAProvider::BNXT);
|
||||
assert(gda_provider != GDAProvider::IONIC);
|
||||
|
||||
memset(&cq_attr, 0, sizeof(struct ibv_cq_init_attr_ex));
|
||||
cq_attr.cqe = cqe;
|
||||
cq_attr.cq_context = nullptr;
|
||||
@@ -1276,10 +1282,6 @@ void GDABackend::create_cqs(int cqe) {
|
||||
cq_attr.parent_domain = pd_parent;
|
||||
|
||||
for (int i = 0; i < qps.size(); i++) {
|
||||
if (gda_provider == GDAProvider::IONIC) {
|
||||
cq_attr.parent_domain = pd_uxdma[i & 1];
|
||||
}
|
||||
|
||||
cq_ex = ibv.create_cq_ex(context, &cq_attr);
|
||||
CHECK_NNULL(cq_ex, "ibv_create_cq_ex");
|
||||
|
||||
|
||||
@@ -382,6 +382,7 @@ class GDABackend : public Backend {
|
||||
*/
|
||||
void create_cqs(int ncqes);
|
||||
void bnxt_create_cqs(int ncqes);
|
||||
void ionic_create_cqs(int ncqes);
|
||||
|
||||
/**
|
||||
* @brief Create all QPs with a SQ of length sq_length
|
||||
|
||||
@@ -27,6 +27,46 @@
|
||||
|
||||
namespace rocshmem {
|
||||
|
||||
void GDABackend::ionic_create_cqs(int ncqes) {
|
||||
struct ibv_cq_init_attr_ex cq_attr;
|
||||
struct ionic_cq_init_attr_ex ionic_cq_attr;
|
||||
|
||||
memset(&cq_attr, 0, sizeof(cq_attr));
|
||||
cq_attr.cqe = ncqes;
|
||||
cq_attr.cq_context = nullptr;
|
||||
cq_attr.channel = nullptr;
|
||||
cq_attr.comp_vector = 0;
|
||||
cq_attr.flags = 0;
|
||||
cq_attr.comp_mask = IBV_CQ_INIT_ATTR_MASK_PD;
|
||||
cq_attr.parent_domain = pd_parent;
|
||||
|
||||
memset(&ionic_cq_attr, 0, sizeof(ionic_cq_attr));
|
||||
if (ionic_dv.create_cq_ex) {
|
||||
ionic_cq_attr.comp_mask = IONIC_CQ_INIT_ATTR_MASK_FLAGS;
|
||||
ionic_cq_attr.flags = IONIC_CQ_INIT_ATTR_CCQE;
|
||||
}
|
||||
|
||||
for (int i = 0; i < qps.size(); i++) {
|
||||
struct ibv_cq_ex *cq_ex = nullptr;
|
||||
|
||||
cq_attr.parent_domain = pd_uxdma[i & 1];
|
||||
|
||||
if (ionic_dv.create_cq_ex) {
|
||||
cq_ex = ionic_dv.create_cq_ex(context, &cq_attr, &ionic_cq_attr);
|
||||
// If cq_ex is nullptr, fallback to ibv_create_cq_ex below.
|
||||
//CHECK_NNULL(cq_ex, "ionic_dv_create_cq_ex");
|
||||
}
|
||||
|
||||
if (!cq_ex) {
|
||||
cq_ex = ibv_create_cq_ex(context, &cq_attr);
|
||||
CHECK_NNULL(cq_ex, "ibv_create_cq_ex");
|
||||
}
|
||||
|
||||
cqs[i] = ibv.cq_ex_to_cq(cq_ex);
|
||||
CHECK_NNULL(cqs[i], "ibv_cq_ex_to_cq");
|
||||
}
|
||||
}
|
||||
|
||||
void GDABackend::ionic_initialize_gpu_qp(QueuePair* gpu_qp, int conn_num) {
|
||||
ionic_dv_ctx dvctx;
|
||||
ionic_dv.get_ctx(&dvctx, context);
|
||||
@@ -115,6 +155,7 @@ int GDABackend::ionic_dv_dl_init() {
|
||||
DLSYM_HELPER(ionic_dv, ionic_dv_, ionicdv_handle_, pd_set_sqcmb);
|
||||
DLSYM_HELPER(ionic_dv, ionic_dv_, ionicdv_handle_, pd_set_rqcmb);
|
||||
DLSYM_HELPER(ionic_dv, ionic_dv_, ionicdv_handle_, pd_set_udma_mask);
|
||||
DLSYM_OPT_HELPER(ionic_dv, ionic_dv_, ionicdv_handle_, create_cq_ex);
|
||||
|
||||
return ROCSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -155,6 +155,15 @@ int ionic_dv_pd_set_sqcmb(struct ibv_pd *ibpd, bool enable, bool expdb, bool req
|
||||
*/
|
||||
int ionic_dv_pd_set_rqcmb(struct ibv_pd *ibpd, bool enable, bool expdb, bool require);
|
||||
|
||||
/**
|
||||
* ionic_dv_pd_set_expdb_mask - Specify expdb mask.
|
||||
*
|
||||
* Queues associated with this pd will attempt to have expdb on for WQE sizes
|
||||
* other than default (and supported by the NIC).
|
||||
*
|
||||
* @mask - IONIC_EXPDB_* bitmap
|
||||
*/
|
||||
int ionic_dv_pd_set_expdb_mask(struct ibv_pd *ibpd, uint8_t mask);
|
||||
|
||||
/**
|
||||
* ionic_dv_qp_set_gda - Enable or disable GPU-Direct Async (GDA) mode.
|
||||
@@ -221,6 +230,32 @@ int ionic_dv_qp_get_send_dbell_data(struct ibv_qp *ibqp, uint64_t *dbdata);
|
||||
int ionic_dv_qp_get_recv_dbell_data(struct ibv_qp *ibqp, uint64_t *dbdata);
|
||||
|
||||
|
||||
enum ionic_cq_init_attr_mask {
|
||||
IONIC_CQ_INIT_ATTR_MASK_FLAGS = 1 << 0,
|
||||
};
|
||||
|
||||
enum ionic_cq_init_attr_flags {
|
||||
IONIC_CQ_INIT_ATTR_CCQE = 1 << 0,
|
||||
};
|
||||
|
||||
struct ionic_cq_init_attr_ex {
|
||||
/* One or more flags of enum ionic_cq_init_attr_mask */
|
||||
uint32_t comp_mask;
|
||||
/* One or more flags of enum ionic_cq_init_attr_flags */
|
||||
uint32_t flags;
|
||||
};
|
||||
|
||||
/**
|
||||
* ionic_dv_create_cq_ex - Create an IBV CQ with vendor-specific attributes.
|
||||
*
|
||||
* @ibctx - Context CQ will be attached to.
|
||||
* @ex - IBV attributes to create the CQ with.
|
||||
* @ionic_ex - Vendor-specific attributes to create the CQ with.
|
||||
*/
|
||||
struct ibv_cq_ex *ionic_dv_create_cq_ex(struct ibv_context *ibctx,
|
||||
struct ibv_cq_init_attr_ex *ex,
|
||||
struct ionic_cq_init_attr_ex *ionic_ex);
|
||||
|
||||
/**
|
||||
* ionic_dv_get_ctx - Extract context information for gpu-initiated rdma.
|
||||
*/
|
||||
|
||||
@@ -138,22 +138,34 @@ union ionic_v1_pld {
|
||||
__u8 data[32];
|
||||
};
|
||||
|
||||
struct ionic_v1_cqe_send {
|
||||
__u8 rsvd[4];
|
||||
__be32 msg_msn;
|
||||
__u8 rsvd2[8];
|
||||
__le64 npg_wqe_idx_timestamp;
|
||||
};
|
||||
|
||||
struct ionic_v1_cqe_recv {
|
||||
__le64 wqe_idx_timestamp;
|
||||
__be32 src_qpn_op;
|
||||
__u8 src_mac[6];
|
||||
__be16 vlan_tag;
|
||||
__be32 imm_data_rkey;
|
||||
};
|
||||
|
||||
struct ionic_v1_cqe_rcqe {
|
||||
__be64 wqe_idx_timestamp;
|
||||
__u8 rsvd[8];
|
||||
__be32 seq_op_flags;
|
||||
__be32 imm_data_rkey;
|
||||
};
|
||||
|
||||
/* completion queue v1 cqe */
|
||||
struct ionic_v1_cqe {
|
||||
union {
|
||||
struct {
|
||||
__le64 wqe_idx_timestamp;
|
||||
__be32 src_qpn_op;
|
||||
__u8 src_mac[6];
|
||||
__be16 vlan_tag;
|
||||
__be32 imm_data_rkey;
|
||||
} recv;
|
||||
struct {
|
||||
__u8 rsvd[4];
|
||||
__be32 msg_msn;
|
||||
__u8 rsvd2[8];
|
||||
__le64 npg_wqe_idx_timestamp;
|
||||
} send;
|
||||
struct ionic_v1_cqe_send send;
|
||||
struct ionic_v1_cqe_recv recv;
|
||||
struct ionic_v1_cqe_rcqe rcqe;
|
||||
};
|
||||
__be32 status_length;
|
||||
__be32 qid_type_flags;
|
||||
@@ -165,6 +177,34 @@ enum ionic_v1_cqe_wqe_idx_timestamp_bits {
|
||||
IONIC_V1_CQE_TIMESTAMP_SHIFT = 16,
|
||||
};
|
||||
|
||||
/* bits for rcqe seq_op_flags */
|
||||
enum ionic_v1_cqe_rcqe_op_flag_bits {
|
||||
IONIC_V1_CQE_RCQE_SEQ_MASK = 0xffffff,
|
||||
IONIC_V1_CQE_RCQE_FLAG_V = BIT(24),
|
||||
IONIC_V1_CQE_RCQE_FLAG_I = BIT(25),
|
||||
IONIC_V1_CQE_RCQE_OP_SHIFT = 28,
|
||||
};
|
||||
|
||||
static inline uint32_t ionic_v1_rcqe_seq(uint32_t seq_opf)
|
||||
{
|
||||
return seq_opf & IONIC_V1_CQE_RCQE_SEQ_MASK;
|
||||
}
|
||||
|
||||
static inline uint8_t ionic_v1_rcqe_op(uint32_t seq_opf)
|
||||
{
|
||||
return seq_opf >> IONIC_V1_CQE_RCQE_OP_SHIFT;
|
||||
}
|
||||
|
||||
static inline bool ionic_v1_rcqe_valid(uint32_t seq_opf)
|
||||
{
|
||||
return seq_opf & IONIC_V1_CQE_RCQE_FLAG_V;
|
||||
}
|
||||
|
||||
static inline bool ionic_v1_rcqe_ready(uint32_t seq_opf)
|
||||
{
|
||||
return seq_opf & IONIC_V1_CQE_RCQE_FLAG_I;
|
||||
}
|
||||
|
||||
/* bits for cqe recv */
|
||||
enum ionic_v1_cqe_src_qpn_bits {
|
||||
IONIC_V1_CQE_RECV_QPN_MASK = 0xffffff,
|
||||
@@ -194,7 +234,7 @@ enum ionic_v1_cqe_qtf_bits {
|
||||
IONIC_V1_CQE_TYPE_RECV = 1,
|
||||
IONIC_V1_CQE_TYPE_SEND_MSN = 2,
|
||||
IONIC_V1_CQE_TYPE_SEND_NPG = 3,
|
||||
IONIC_V1_CQE_TYPE_RECV_INDIR = 4,
|
||||
IONIC_V1_CQE_TYPE_RECV_RCQE = 4,
|
||||
};
|
||||
|
||||
#if !defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_HCC__)
|
||||
@@ -483,50 +523,27 @@ static inline int ionic_v1_use_spec_sge(int min_sge, int spec)
|
||||
}
|
||||
|
||||
#define IONIC_RCQ_SIZE 4096
|
||||
#define IONIC_RCQ_DEPTH 128
|
||||
#define IONIC_RCQ_DEPTH_LOG2 7
|
||||
#define IONIC_RCQ_STRIDE_LOG2 4
|
||||
|
||||
struct ionic_rcq_hdr {
|
||||
uint8_t pad[60];
|
||||
uint32_t seq_pad;
|
||||
};
|
||||
|
||||
struct ionic_rcqe {
|
||||
uint32_t status_length;
|
||||
uint32_t imm_data;
|
||||
uint32_t seq_flags;
|
||||
uint32_t rsvd;
|
||||
};
|
||||
|
||||
enum ionic_rcqe_flag {
|
||||
IONIC_RCQE_C = BIT(7),
|
||||
IONIC_RCQE_I = BIT(6),
|
||||
__be32 seq;
|
||||
__be32 ack;
|
||||
};
|
||||
|
||||
struct ionic_rcq {
|
||||
struct ionic_rcq_hdr hdr;
|
||||
struct ionic_rcqe ring[IONIC_RCQ_DEPTH];
|
||||
union {
|
||||
uint8_t bytes[IONIC_RCQ_SIZE];
|
||||
struct ionic_rcq_hdr hdr;
|
||||
};
|
||||
};
|
||||
|
||||
static inline uint32_t ionic_rcq_hdr_seq(struct ionic_rcq_hdr *hdr)
|
||||
static inline uint32_t ionic_rcq_seq(struct ionic_rcq *rcq)
|
||||
{
|
||||
return be32toh(hdr->seq_pad) >> 8;
|
||||
return be32toh(rcq->hdr.seq) & IONIC_V1_CQE_RCQE_SEQ_MASK;
|
||||
}
|
||||
|
||||
static inline uint32_t ionic_rcqe_seq(struct ionic_rcqe *rcqe)
|
||||
static inline void ionic_rcq_ack(struct ionic_rcq *rcq, uint32_t ack)
|
||||
{
|
||||
return be32toh(rcqe->seq_flags) >> 8;
|
||||
}
|
||||
|
||||
static inline bool ionic_rcqe_color(struct ionic_rcqe *rcqe)
|
||||
{
|
||||
return !!(rcqe->seq_flags & htobe32(IONIC_RCQE_C));
|
||||
}
|
||||
|
||||
static inline bool ionic_rcqe_imm(struct ionic_rcqe *rcqe)
|
||||
{
|
||||
return !!(rcqe->seq_flags & htobe32(IONIC_RCQE_I));
|
||||
rcq->hdr.ack = htobe32(ack);
|
||||
}
|
||||
|
||||
#endif // !defined(__cplusplus)
|
||||
|
||||
@@ -38,6 +38,9 @@ struct ionicdv_funcs_t {
|
||||
int (*pd_set_sqcmb)(struct ibv_pd *ibpd, bool enable, bool expdb, bool require);
|
||||
int (*pd_set_rqcmb)(struct ibv_pd *ibpd, bool enable, bool expdb, bool require);
|
||||
int (*pd_set_udma_mask)(struct ibv_pd *ibpd, uint8_t udma_mask);
|
||||
struct ibv_cq_ex *(*create_cq_ex)(struct ibv_context *ibctx,
|
||||
struct ibv_cq_init_attr_ex *ex,
|
||||
struct ionic_cq_init_attr_ex *ionic_ex);
|
||||
};
|
||||
|
||||
#endif //LIBRARY_SRC_GDA_IONIC_GDA_PROVIDER_HPP_
|
||||
|
||||
@@ -68,7 +68,40 @@ __device__ uint32_t QueuePair::commit_sq(uint64_t activemask, uint32_t my_sq_pro
|
||||
return dbprod;
|
||||
}
|
||||
|
||||
__device__ void QueuePair::ionic_poll_wave_ccqe(uint64_t activemask) {
|
||||
if (!is_first_active_lane(activemask)) {
|
||||
return;
|
||||
}
|
||||
|
||||
struct ionic_v1_cqe *cqe = &ionic_cq_buf[0];
|
||||
uint32_t qtf_be = *(volatile uint32_t *)(&cqe->qid_type_flags);
|
||||
uint32_t msn = swap_endian_val<uint32_t>(cqe->send.msg_msn);
|
||||
|
||||
if (!!(qtf_be & swap_endian_val<uint32_t>(IONIC_V1_CQE_ERROR))) {
|
||||
#if defined(DEBUG)
|
||||
uint32_t qtf = swap_endian_val<uint32_t>(qtf_be);
|
||||
uint32_t qid = qtf >> IONIC_V1_CQE_QID_SHIFT;
|
||||
uint32_t type = (qtf >> IONIC_V1_CQE_TYPE_SHIFT) & IONIC_V1_CQE_TYPE_MASK;
|
||||
uint32_t flag = qtf & 0xf;
|
||||
uint32_t status = swap_endian_val<uint32_t>(cqe->status_length);
|
||||
uint64_t npg = cqe->send.npg_wqe_idx_timestamp & IONIC_V1_CQE_WQE_IDX_MASK;
|
||||
|
||||
printf("QUIET ERROR (CCQE): %s qid %u type %u flag %#x status %u msn %u npg %lu\n",
|
||||
dev_name, qid, type, flag, status, msn, npg);
|
||||
#endif
|
||||
/* No other way to signal an error, so just crash. */
|
||||
abort();
|
||||
}
|
||||
|
||||
sq_msn = msn;
|
||||
}
|
||||
|
||||
__device__ void QueuePair::poll_wave_cqes(uint64_t activemask) {
|
||||
if (!cq_mask) {
|
||||
ionic_poll_wave_ccqe(activemask);
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t my_logical_lane_id = get_active_lane_num(activemask);
|
||||
uint32_t my_cq_pos = cq_pos + my_logical_lane_id;
|
||||
|
||||
@@ -130,9 +163,26 @@ __device__ void QueuePair::poll_wave_cqes(uint64_t activemask) {
|
||||
sq_msn = msn;
|
||||
}
|
||||
|
||||
__device__ void QueuePair::ionic_quiet_internal_ccqe(uint64_t activemask, uint32_t cons) {
|
||||
if (!is_first_active_lane(activemask)) {
|
||||
return;
|
||||
}
|
||||
|
||||
volatile struct ionic_v1_cqe *cqe = &ionic_cq_buf[0];
|
||||
uint32_t msn = swap_endian_val<uint32_t>(cqe->send.msg_msn);
|
||||
while ((msn - cons) & 0x800000) {
|
||||
msn = swap_endian_val<uint32_t>(cqe->send.msg_msn);
|
||||
}
|
||||
}
|
||||
|
||||
__device__ void QueuePair::ionic_quiet_internal(uint64_t activemask, uint32_t cons) {
|
||||
uint32_t greed = 10;
|
||||
|
||||
if (!cq_mask) {
|
||||
ionic_quiet_internal_ccqe(activemask, cons);
|
||||
return;
|
||||
}
|
||||
|
||||
/* wait for sq_msn to catch up or pass cons. */
|
||||
/* 0x800000 - sign bit for 24-bit fields */
|
||||
while ((sq_msn - cons) & 0x800000) {
|
||||
|
||||
@@ -364,11 +364,22 @@ class QueuePair {
|
||||
*/
|
||||
__device__ uint32_t commit_sq(uint64_t activemask, uint32_t my_sq_prod, uint32_t my_sq_pos, uint32_t num_wqes);
|
||||
|
||||
/**
|
||||
* @brief Helper method to poll the ccqe.
|
||||
*/
|
||||
__device__ __attribute__((noinline)) void ionic_poll_wave_ccqe(uint64_t active_lane_mask);
|
||||
|
||||
/**
|
||||
* @brief Helper method to poll the next completion queue entry.
|
||||
*/
|
||||
__device__ __attribute__((noinline)) void poll_wave_cqes(uint64_t active_lane_mask);
|
||||
|
||||
/**
|
||||
* @brief Helper method to drain completion queue entries.
|
||||
* @param cons wait for sq_msn to catch up to this position.
|
||||
*/
|
||||
__device__ __attribute__((noinline)) void ionic_quiet_internal_ccqe(uint64_t active_lane_mask, uint32_t cons);
|
||||
|
||||
/**
|
||||
* @brief Helper method to drain completion queue entries.
|
||||
* @param cons wait for sq_msn to catch up to this position.
|
||||
|
||||
@@ -124,6 +124,11 @@ namespace rocshmem {
|
||||
#define STRINGIFY_NX(name) #name
|
||||
#define STRINGIFY(name) STRINGIFY_NX(name)
|
||||
|
||||
#define DLSYM_OPT_HELPER(func_struct, prefix, handle, func_name) \
|
||||
do { \
|
||||
*(void **) (&func_struct.func_name) = dlsym(handle, STRINGIFY(PPCAT(prefix, func_name))); \
|
||||
} while (0)
|
||||
|
||||
#define DLSYM_HELPER(func_struct, prefix, handle, func_name) \
|
||||
do { \
|
||||
*(void **) (&func_struct.func_name) = dlsym(handle, STRINGIFY(PPCAT(prefix, func_name))); \
|
||||
|
||||
Ссылка в новой задаче
Block a user