Do not hardcode wf_size==64 in ionic provider (#367)

* Do not hardcode wf_size==64 in ionic provider

* Simpler same_qp_mask in ionic


[ROCm/rocshmem commit: 0c496d83d6]
This commit is contained in:
Aurelien Bouteiller
2026-01-05 18:36:58 -05:00
committed by GitHub
parent 56bfb13644
commit abb1e0684a
@@ -30,19 +30,11 @@
namespace rocshmem {
__device__ uint64_t QueuePair::get_same_qp_lane_mask() {
uint64_t lane_mask = get_active_lane_mask();
uintptr_t this_val = reinterpret_cast<uintptr_t>(this);
// exclude threads operating on a different qp from this thread lane mask
#pragma unroll
for (int i = 0; i < 64; ++i) {
uint64_t bit_i = 1ull << i;
if ((lane_mask & bit_i) && __shfl(this_val, i) != this_val) {
lane_mask &= ~bit_i;
}
}
return lane_mask;
uint64_t active = get_active_lane_mask();
uintptr_t this_qp = reinterpret_cast<uintptr_t>(this);
// Bitmask of lanes in this warp whose value == this_qp
uint64_t same_qp_mask = __match_any_sync(active, this_qp);
return same_qp_mask;
}
__device__ uint32_t QueuePair::reserve_sq(uint64_t activemask, uint32_t num_wqes) {
@@ -173,7 +165,7 @@ __device__ void QueuePair::ionic_quiet_internal(uint64_t activemask, uint32_t co
__device__ void QueuePair::ionic_ring_doorbell(uint32_t pos) {
// When threads write at once to the same address, not all writes reach the bus.
// Take turns and insert a thread fence between writes to the same address.
for (int i = 0; i < 64; ++i) {
for (int i = 0; i < WF_SIZE; ++i) {
if (__lane_id() == i) {
__threadfence();
__atomic_store_n(sq_dbreg, sq_dbval | (sq_mask & pos), __ATOMIC_SEQ_CST);