From 5e2ba952f38c17417d511872d6b67dbb85b2f878 Mon Sep 17 00:00:00 2001 From: Yiltan Date: Mon, 15 Dec 2025 09:23:01 -0500 Subject: [PATCH] [GDA/BNXT] Remove doorbell arbitration (#363) [ROCm/rocshmem commit: fe1a28e4092997e34e2a0693be259805112f1e72] --- projects/rocshmem/src/gda/backend_gda.cpp | 7 ++-- projects/rocshmem/src/gda/backend_gda.hpp | 2 -- .../src/gda/bnxt/backend_gda_bnxt.cpp | 35 ++++++++++--------- .../src/gda/bnxt/provider_gda_bnxt.hpp | 5 +-- .../rocshmem/src/gda/bnxt/queue_pair_bnxt.cpp | 31 ++-------------- 5 files changed, 29 insertions(+), 51 deletions(-) diff --git a/projects/rocshmem/src/gda/backend_gda.cpp b/projects/rocshmem/src/gda/backend_gda.cpp index 019d0b513a..86f0fc1a99 100644 --- a/projects/rocshmem/src/gda/backend_gda.cpp +++ b/projects/rocshmem/src/gda/backend_gda.cpp @@ -715,12 +715,15 @@ void GDABackend::cleanup_ibv() { int err; if (gda_provider == GDAProvider::BNXT) { - CHECK_HIP(hipHostUnregister(db_region_attr.dbr)); - for (int i = 0; i < qps.size(); i++) { err = bnxt_re_dv.destroy_qp(qps[i]); CHECK_ZERO(err, "bnxt_re_dv_destroy_qp"); + CHECK_HIP(hipHostUnregister(bnxt_qps[i].db_region_attr->dbr)); + + err = bnxt_re_dv.free_db_region(context, bnxt_qps[i].db_region_attr); + CHECK_ZERO(err, "bnxt_re_dv_free_db_region"); + err = bnxt_re_dv.umem_dereg(bnxt_qps[i].attr.rq_umem_handle); CHECK_ZERO(err, "bnxt_re_dv_umem_dereg (RQ)"); diff --git a/projects/rocshmem/src/gda/backend_gda.hpp b/projects/rocshmem/src/gda/backend_gda.hpp index a29d5b1a63..6f3c100483 100644 --- a/projects/rocshmem/src/gda/backend_gda.hpp +++ b/projects/rocshmem/src/gda/backend_gda.hpp @@ -96,8 +96,6 @@ class GDABackend : public Backend { std::vector bnxt_qps; std::vector bnxt_scqs; std::vector bnxt_rcqs; - - struct bnxt_re_dv_db_region_attr db_region_attr; /* GDA_BNXT END */ /* GDA_IONIC & GDA_MLX5 START */ diff --git a/projects/rocshmem/src/gda/bnxt/backend_gda_bnxt.cpp b/projects/rocshmem/src/gda/bnxt/backend_gda_bnxt.cpp index 1148247af1..0b0bbc3f34 100644 --- a/projects/rocshmem/src/gda/bnxt/backend_gda_bnxt.cpp +++ b/projects/rocshmem/src/gda/bnxt/backend_gda_bnxt.cpp @@ -75,11 +75,8 @@ void GDABackend::bnxt_initialize_gpu_qp(QueuePair* gpu_qp, int conn_num) { gpu_qp->sq.mtu = ibv_mtu_to_int(portinfo.active_mtu); /* Export DB */ - err = bnxt_re_dv.get_default_db_region(context, &db_region_attr); - CHECK_ZERO(err, "bnxt_re_dv_init_obj(QP)"); - - CHECK_HIP(hipHostRegister(db_region_attr.dbr, getpagesize(), hipHostRegisterDefault)); - CHECK_HIP(hipHostGetDevicePointer((void**) &gpu_qp->dbr, db_region_attr.dbr, 0)); + CHECK_HIP(hipHostRegister(bnxt_qps[conn_num].db_region_attr->dbr, getpagesize(), hipHostRegisterDefault)); + CHECK_HIP(hipHostGetDevicePointer((void**) &gpu_qp->dbr, bnxt_qps[conn_num].db_region_attr->dbr, 0)); /* Export Memory Keys */ gpu_qp->lkey = heap_mr->lkey; @@ -229,6 +226,10 @@ void GDABackend::bnxt_create_qps(int sq_length) { rq_umem_handle = bnxt_re_dv.umem_reg(context, &umem_attr); CHECK_NNULL(rq_umem_handle, "bnxt_re_dv_umem_reg(rq)"); + /* Alloc DPI */ + bnxt_qps[i].db_region_attr = bnxt_re_dv.alloc_db_region(context); + CHECK_NNULL(bnxt_qps[i].db_region_attr, "bnxt_re_dv_alloc_db_region"); + /* IB DV QP Init Attr */ memset(&bnxt_qps[i].attr, 0, sizeof(struct bnxt_re_dv_qp_init_attr)); bnxt_qps[i].attr.send_cq = ib_qp_attr.send_cq; @@ -240,19 +241,20 @@ void GDABackend::bnxt_create_qps(int sq_length) { bnxt_qps[i].attr.max_inline_data = ib_qp_attr.cap.max_inline_data; bnxt_qps[i].attr.qp_type = ib_qp_attr.qp_type; - bnxt_qps[i].attr.qp_handle = bnxt_qps[i].mem_info.qp_handle; + bnxt_qps[i].attr.qp_handle = bnxt_qps[i].mem_info.qp_handle; + bnxt_qps[i].attr.dbr_handle = bnxt_qps[i].db_region_attr; bnxt_qps[i].attr.sq_umem_handle = sq_umem_handle; - bnxt_qps[i].attr.sq_len = bnxt_qps[i].mem_info.sq_len; - bnxt_qps[i].attr.sq_slots = bnxt_qps[i].mem_info.sq_slots; - bnxt_qps[i].attr.sq_wqe_sz = bnxt_qps[i].mem_info.sq_wqe_sz; - bnxt_qps[i].attr.sq_psn_sz = bnxt_qps[i].mem_info.sq_psn_sz; - bnxt_qps[i].attr.sq_npsn = bnxt_qps[i].mem_info.sq_npsn; + bnxt_qps[i].attr.sq_len = bnxt_qps[i].mem_info.sq_len; + bnxt_qps[i].attr.sq_slots = bnxt_qps[i].mem_info.sq_slots; + bnxt_qps[i].attr.sq_wqe_sz = bnxt_qps[i].mem_info.sq_wqe_sz; + bnxt_qps[i].attr.sq_psn_sz = bnxt_qps[i].mem_info.sq_psn_sz; + bnxt_qps[i].attr.sq_npsn = bnxt_qps[i].mem_info.sq_npsn; bnxt_qps[i].attr.rq_umem_handle = rq_umem_handle; - bnxt_qps[i].attr.rq_len = bnxt_qps[i].mem_info.rq_len; - bnxt_qps[i].attr.rq_slots = bnxt_qps[i].mem_info.rq_slots; - bnxt_qps[i].attr.rq_wqe_sz = bnxt_qps[i].mem_info.rq_wqe_sz; - bnxt_qps[i].attr.comp_mask = bnxt_qps[i].mem_info.comp_mask; + bnxt_qps[i].attr.rq_len = bnxt_qps[i].mem_info.rq_len; + bnxt_qps[i].attr.rq_slots = bnxt_qps[i].mem_info.rq_slots; + bnxt_qps[i].attr.rq_wqe_sz = bnxt_qps[i].mem_info.rq_wqe_sz; + bnxt_qps[i].attr.comp_mask = bnxt_qps[i].mem_info.comp_mask; /* Alloc QP */ qps[i] = bnxt_re_dv.create_qp(pd_orig, &bnxt_qps[i].attr); @@ -288,7 +290,8 @@ int GDABackend::bnxt_dv_dl_init() { DLSYM_HELPER(bnxt_re_dv, bnxt_re_dv_, bnxtdv_handle_, cq_mem_alloc); DLSYM_HELPER(bnxt_re_dv, bnxt_re_dv_, bnxtdv_handle_, umem_reg); DLSYM_HELPER(bnxt_re_dv, bnxt_re_dv_, bnxtdv_handle_, umem_dereg); - DLSYM_HELPER(bnxt_re_dv, bnxt_re_dv_, bnxtdv_handle_, get_default_db_region); + DLSYM_HELPER(bnxt_re_dv, bnxt_re_dv_, bnxtdv_handle_, alloc_db_region); + DLSYM_HELPER(bnxt_re_dv, bnxt_re_dv_, bnxtdv_handle_, free_db_region); return ROCSHMEM_SUCCESS; } diff --git a/projects/rocshmem/src/gda/bnxt/provider_gda_bnxt.hpp b/projects/rocshmem/src/gda/bnxt/provider_gda_bnxt.hpp index b50d868291..ebc079635e 100644 --- a/projects/rocshmem/src/gda/bnxt/provider_gda_bnxt.hpp +++ b/projects/rocshmem/src/gda/bnxt/provider_gda_bnxt.hpp @@ -70,6 +70,7 @@ struct bnxt_host_cq { struct bnxt_host_qp { struct bnxt_re_dv_qp_mem_info mem_info; struct bnxt_re_dv_qp_init_attr attr; + struct bnxt_re_dv_db_region_attr *db_region_attr; void *sq_buf; void *rq_buf; void *msntbl; @@ -96,8 +97,8 @@ struct bnxtdv_funcs_t { void* (*umem_reg)(struct ibv_context *ibvctx, struct bnxt_re_dv_umem_reg_attr *in); int (*umem_dereg)(void *umem_handle); - int (*get_default_db_region)(struct ibv_context *ibvctx, - struct bnxt_re_dv_db_region_attr *out); + struct bnxt_re_dv_db_region_attr * (*alloc_db_region)(struct ibv_context *ctx); + int (*free_db_region)(struct ibv_context *ctx, struct bnxt_re_dv_db_region_attr *attr); }; #endif //LIBRARY_SRC_GDA_BNXT_GDA_PROVIDER_HPP_ diff --git a/projects/rocshmem/src/gda/bnxt/queue_pair_bnxt.cpp b/projects/rocshmem/src/gda/bnxt/queue_pair_bnxt.cpp index 54f77b9e3c..72244d95ab 100644 --- a/projects/rocshmem/src/gda/bnxt/queue_pair_bnxt.cpp +++ b/projects/rocshmem/src/gda/bnxt/queue_pair_bnxt.cpp @@ -320,21 +320,7 @@ __device__ void QueuePair::bnxt_post_wqe_rma_single(int32_t length, uintptr_t *l bnxt_write_rma_wqe(raddr, laddr, length, opcode); if (ring_db) { - uint64_t active_lane_mask; - uint8_t active_lane_count; - uint8_t active_lane_id; - - active_lane_mask = get_active_lane_mask(); - active_lane_count = get_active_lane_count(active_lane_mask); - active_lane_id = get_active_lane_num(active_lane_mask); - - /* Ring Doorbell - * Doorbell ring must be serialized as we cannot have all threads write to the same address */ - for (int i = 0; i < active_lane_count; i++) { - if (i == active_lane_id) { - bnxt_ring_doorbell(sq.tail); - } - } + bnxt_ring_doorbell(sq.tail); } release_lock(&sq.lock); @@ -443,27 +429,14 @@ __device__ uint64_t QueuePair::bnxt_post_wqe_amo(uintptr_t *raddr, uint8_t opcod __device__ uint64_t QueuePair::bnxt_post_wqe_amo_single(uintptr_t *raddr, uint8_t opcode, int64_t atomic_data, int64_t atomic_cmp, bool fetching) { - uint64_t active_lane_mask; - uint8_t active_lane_count; - uint8_t active_lane_id; uint32_t atomic_idx = 0; - active_lane_mask = get_active_lane_mask(); - active_lane_count = get_active_lane_count(active_lane_mask); - active_lane_id = get_active_lane_num(active_lane_mask); - aquire_lock(&sq.lock); /* Write WQE to SQ */ atomic_idx = bnxt_write_amo_wqe(raddr, opcode, atomic_data, atomic_cmp, fetching); - /* Ring Doorbell - * Doorbell ring must be serialized as we cannot have all threads write to the same address */ - for (int i = 0; i < active_lane_count; i++) { - if (i == active_lane_id) { - bnxt_ring_doorbell(sq.tail); - } - } + bnxt_ring_doorbell(sq.tail); if (fetching) { quiet();