[GDA/BNXT] Remove doorbell arbitration (#363)
[ROCm/rocshmem commit: fe1a28e409]
Tento commit je obsažen v:
@@ -715,12 +715,15 @@ void GDABackend::cleanup_ibv() {
|
||||
int err;
|
||||
|
||||
if (gda_provider == GDAProvider::BNXT) {
|
||||
CHECK_HIP(hipHostUnregister(db_region_attr.dbr));
|
||||
|
||||
for (int i = 0; i < qps.size(); i++) {
|
||||
err = bnxt_re_dv.destroy_qp(qps[i]);
|
||||
CHECK_ZERO(err, "bnxt_re_dv_destroy_qp");
|
||||
|
||||
CHECK_HIP(hipHostUnregister(bnxt_qps[i].db_region_attr->dbr));
|
||||
|
||||
err = bnxt_re_dv.free_db_region(context, bnxt_qps[i].db_region_attr);
|
||||
CHECK_ZERO(err, "bnxt_re_dv_free_db_region");
|
||||
|
||||
err = bnxt_re_dv.umem_dereg(bnxt_qps[i].attr.rq_umem_handle);
|
||||
CHECK_ZERO(err, "bnxt_re_dv_umem_dereg (RQ)");
|
||||
|
||||
|
||||
@@ -96,8 +96,6 @@ class GDABackend : public Backend {
|
||||
std::vector<struct bnxt_host_qp> bnxt_qps;
|
||||
std::vector<struct bnxt_host_cq> bnxt_scqs;
|
||||
std::vector<struct bnxt_host_cq> bnxt_rcqs;
|
||||
|
||||
struct bnxt_re_dv_db_region_attr db_region_attr;
|
||||
/* GDA_BNXT END */
|
||||
|
||||
/* GDA_IONIC & GDA_MLX5 START */
|
||||
|
||||
@@ -75,11 +75,8 @@ void GDABackend::bnxt_initialize_gpu_qp(QueuePair* gpu_qp, int conn_num) {
|
||||
gpu_qp->sq.mtu = ibv_mtu_to_int(portinfo.active_mtu);
|
||||
|
||||
/* Export DB */
|
||||
err = bnxt_re_dv.get_default_db_region(context, &db_region_attr);
|
||||
CHECK_ZERO(err, "bnxt_re_dv_init_obj(QP)");
|
||||
|
||||
CHECK_HIP(hipHostRegister(db_region_attr.dbr, getpagesize(), hipHostRegisterDefault));
|
||||
CHECK_HIP(hipHostGetDevicePointer((void**) &gpu_qp->dbr, db_region_attr.dbr, 0));
|
||||
CHECK_HIP(hipHostRegister(bnxt_qps[conn_num].db_region_attr->dbr, getpagesize(), hipHostRegisterDefault));
|
||||
CHECK_HIP(hipHostGetDevicePointer((void**) &gpu_qp->dbr, bnxt_qps[conn_num].db_region_attr->dbr, 0));
|
||||
|
||||
/* Export Memory Keys */
|
||||
gpu_qp->lkey = heap_mr->lkey;
|
||||
@@ -229,6 +226,10 @@ void GDABackend::bnxt_create_qps(int sq_length) {
|
||||
rq_umem_handle = bnxt_re_dv.umem_reg(context, &umem_attr);
|
||||
CHECK_NNULL(rq_umem_handle, "bnxt_re_dv_umem_reg(rq)");
|
||||
|
||||
/* Alloc DPI */
|
||||
bnxt_qps[i].db_region_attr = bnxt_re_dv.alloc_db_region(context);
|
||||
CHECK_NNULL(bnxt_qps[i].db_region_attr, "bnxt_re_dv_alloc_db_region");
|
||||
|
||||
/* IB DV QP Init Attr */
|
||||
memset(&bnxt_qps[i].attr, 0, sizeof(struct bnxt_re_dv_qp_init_attr));
|
||||
bnxt_qps[i].attr.send_cq = ib_qp_attr.send_cq;
|
||||
@@ -240,19 +241,20 @@ void GDABackend::bnxt_create_qps(int sq_length) {
|
||||
bnxt_qps[i].attr.max_inline_data = ib_qp_attr.cap.max_inline_data;
|
||||
bnxt_qps[i].attr.qp_type = ib_qp_attr.qp_type;
|
||||
|
||||
bnxt_qps[i].attr.qp_handle = bnxt_qps[i].mem_info.qp_handle;
|
||||
bnxt_qps[i].attr.qp_handle = bnxt_qps[i].mem_info.qp_handle;
|
||||
bnxt_qps[i].attr.dbr_handle = bnxt_qps[i].db_region_attr;
|
||||
bnxt_qps[i].attr.sq_umem_handle = sq_umem_handle;
|
||||
bnxt_qps[i].attr.sq_len = bnxt_qps[i].mem_info.sq_len;
|
||||
bnxt_qps[i].attr.sq_slots = bnxt_qps[i].mem_info.sq_slots;
|
||||
bnxt_qps[i].attr.sq_wqe_sz = bnxt_qps[i].mem_info.sq_wqe_sz;
|
||||
bnxt_qps[i].attr.sq_psn_sz = bnxt_qps[i].mem_info.sq_psn_sz;
|
||||
bnxt_qps[i].attr.sq_npsn = bnxt_qps[i].mem_info.sq_npsn;
|
||||
bnxt_qps[i].attr.sq_len = bnxt_qps[i].mem_info.sq_len;
|
||||
bnxt_qps[i].attr.sq_slots = bnxt_qps[i].mem_info.sq_slots;
|
||||
bnxt_qps[i].attr.sq_wqe_sz = bnxt_qps[i].mem_info.sq_wqe_sz;
|
||||
bnxt_qps[i].attr.sq_psn_sz = bnxt_qps[i].mem_info.sq_psn_sz;
|
||||
bnxt_qps[i].attr.sq_npsn = bnxt_qps[i].mem_info.sq_npsn;
|
||||
|
||||
bnxt_qps[i].attr.rq_umem_handle = rq_umem_handle;
|
||||
bnxt_qps[i].attr.rq_len = bnxt_qps[i].mem_info.rq_len;
|
||||
bnxt_qps[i].attr.rq_slots = bnxt_qps[i].mem_info.rq_slots;
|
||||
bnxt_qps[i].attr.rq_wqe_sz = bnxt_qps[i].mem_info.rq_wqe_sz;
|
||||
bnxt_qps[i].attr.comp_mask = bnxt_qps[i].mem_info.comp_mask;
|
||||
bnxt_qps[i].attr.rq_len = bnxt_qps[i].mem_info.rq_len;
|
||||
bnxt_qps[i].attr.rq_slots = bnxt_qps[i].mem_info.rq_slots;
|
||||
bnxt_qps[i].attr.rq_wqe_sz = bnxt_qps[i].mem_info.rq_wqe_sz;
|
||||
bnxt_qps[i].attr.comp_mask = bnxt_qps[i].mem_info.comp_mask;
|
||||
|
||||
/* Alloc QP */
|
||||
qps[i] = bnxt_re_dv.create_qp(pd_orig, &bnxt_qps[i].attr);
|
||||
@@ -288,7 +290,8 @@ int GDABackend::bnxt_dv_dl_init() {
|
||||
DLSYM_HELPER(bnxt_re_dv, bnxt_re_dv_, bnxtdv_handle_, cq_mem_alloc);
|
||||
DLSYM_HELPER(bnxt_re_dv, bnxt_re_dv_, bnxtdv_handle_, umem_reg);
|
||||
DLSYM_HELPER(bnxt_re_dv, bnxt_re_dv_, bnxtdv_handle_, umem_dereg);
|
||||
DLSYM_HELPER(bnxt_re_dv, bnxt_re_dv_, bnxtdv_handle_, get_default_db_region);
|
||||
DLSYM_HELPER(bnxt_re_dv, bnxt_re_dv_, bnxtdv_handle_, alloc_db_region);
|
||||
DLSYM_HELPER(bnxt_re_dv, bnxt_re_dv_, bnxtdv_handle_, free_db_region);
|
||||
|
||||
return ROCSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -70,6 +70,7 @@ struct bnxt_host_cq {
|
||||
struct bnxt_host_qp {
|
||||
struct bnxt_re_dv_qp_mem_info mem_info;
|
||||
struct bnxt_re_dv_qp_init_attr attr;
|
||||
struct bnxt_re_dv_db_region_attr *db_region_attr;
|
||||
void *sq_buf;
|
||||
void *rq_buf;
|
||||
void *msntbl;
|
||||
@@ -96,8 +97,8 @@ struct bnxtdv_funcs_t {
|
||||
void* (*umem_reg)(struct ibv_context *ibvctx,
|
||||
struct bnxt_re_dv_umem_reg_attr *in);
|
||||
int (*umem_dereg)(void *umem_handle);
|
||||
int (*get_default_db_region)(struct ibv_context *ibvctx,
|
||||
struct bnxt_re_dv_db_region_attr *out);
|
||||
struct bnxt_re_dv_db_region_attr * (*alloc_db_region)(struct ibv_context *ctx);
|
||||
int (*free_db_region)(struct ibv_context *ctx, struct bnxt_re_dv_db_region_attr *attr);
|
||||
};
|
||||
|
||||
#endif //LIBRARY_SRC_GDA_BNXT_GDA_PROVIDER_HPP_
|
||||
|
||||
@@ -320,21 +320,7 @@ __device__ void QueuePair::bnxt_post_wqe_rma_single(int32_t length, uintptr_t *l
|
||||
bnxt_write_rma_wqe(raddr, laddr, length, opcode);
|
||||
|
||||
if (ring_db) {
|
||||
uint64_t active_lane_mask;
|
||||
uint8_t active_lane_count;
|
||||
uint8_t active_lane_id;
|
||||
|
||||
active_lane_mask = get_active_lane_mask();
|
||||
active_lane_count = get_active_lane_count(active_lane_mask);
|
||||
active_lane_id = get_active_lane_num(active_lane_mask);
|
||||
|
||||
/* Ring Doorbell
|
||||
* Doorbell ring must be serialized as we cannot have all threads write to the same address */
|
||||
for (int i = 0; i < active_lane_count; i++) {
|
||||
if (i == active_lane_id) {
|
||||
bnxt_ring_doorbell(sq.tail);
|
||||
}
|
||||
}
|
||||
bnxt_ring_doorbell(sq.tail);
|
||||
}
|
||||
|
||||
release_lock(&sq.lock);
|
||||
@@ -443,27 +429,14 @@ __device__ uint64_t QueuePair::bnxt_post_wqe_amo(uintptr_t *raddr, uint8_t opcod
|
||||
__device__ uint64_t QueuePair::bnxt_post_wqe_amo_single(uintptr_t *raddr, uint8_t opcode,
|
||||
int64_t atomic_data, int64_t atomic_cmp,
|
||||
bool fetching) {
|
||||
uint64_t active_lane_mask;
|
||||
uint8_t active_lane_count;
|
||||
uint8_t active_lane_id;
|
||||
uint32_t atomic_idx = 0;
|
||||
|
||||
active_lane_mask = get_active_lane_mask();
|
||||
active_lane_count = get_active_lane_count(active_lane_mask);
|
||||
active_lane_id = get_active_lane_num(active_lane_mask);
|
||||
|
||||
aquire_lock(&sq.lock);
|
||||
|
||||
/* Write WQE to SQ */
|
||||
atomic_idx = bnxt_write_amo_wqe(raddr, opcode, atomic_data, atomic_cmp, fetching);
|
||||
|
||||
/* Ring Doorbell
|
||||
* Doorbell ring must be serialized as we cannot have all threads write to the same address */
|
||||
for (int i = 0; i < active_lane_count; i++) {
|
||||
if (i == active_lane_id) {
|
||||
bnxt_ring_doorbell(sq.tail);
|
||||
}
|
||||
}
|
||||
bnxt_ring_doorbell(sq.tail);
|
||||
|
||||
if (fetching) {
|
||||
quiet();
|
||||
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele