[GDA] Implement fetching atomics for BNXT (#253)

* Indent driver script
* Implemented fetching atomics BNXT
Dieser Commit ist enthalten in:
Yiltan
2025-09-18 09:50:42 -04:00
committet von GitHub
Ursprung 758c4a43f6
Commit f5aefd15f3
3 geänderte Dateien mit 50 neuen und 38 gelöschten Zeilen
+34 -34
Datei anzeigen
@@ -461,25 +461,25 @@ TestGDA() {
ExecTest "teamctxput" 2 4 128 1024
ExecTest "teamctxput" 2 16 256 1024
ExecTest "get" 2 1 1 1048576
ExecTest "get" 2 1 1024 512
ExecTest "get" 2 8 1 1048576
ExecTest "get" 2 16 128 8
ExecTest "get" 2 32 256 512
ExecTest "get" 2 64 1024 8
ExecTest "get" 2 1 1 1048576
ExecTest "get" 2 1 1024 512
ExecTest "get" 2 8 1 1048576
ExecTest "get" 2 16 128 8
ExecTest "get" 2 32 256 512
ExecTest "get" 2 64 1024 8
ExecTest "wgget" 2 1 64 1048576
ExecTest "wgget" 2 2 64 1048576
ExecTest "wgget" 2 16 64 8
ExecTest "wgget" 2 1 64 1048576
ExecTest "wgget" 2 2 64 1048576
ExecTest "wgget" 2 16 64 8
ExecTest "waveget" 2 1 64 1048576
ExecTest "waveget" 2 2 64 1048576
ExecTest "waveget" 2 2 128 1048576
ExecTest "waveget" 2 16 128 8
ExecTest "waveget" 2 1 64 1048576
ExecTest "waveget" 2 2 64 1048576
ExecTest "waveget" 2 2 128 1048576
ExecTest "waveget" 2 16 128 8
ExecTest "defaultctxget" 2 4 128 1024
ExecTest "teamctxget" 2 4 128 1024
ExecTest "teamctxget" 2 16 256 1024
ExecTest "defaultctxget" 2 4 128 1024
ExecTest "teamctxget" 2 4 128 1024
ExecTest "teamctxget" 2 16 256 1024
# ExecTest "g" 2 1 1 128
# ExecTest "g" 2 1 1024 2
@@ -513,25 +513,25 @@ TestGDA() {
ExecTest "teamctxputnbi" 2 4 128 1024
ExecTest "teamctxputnbi" 2 16 256 1024
ExecTest "getnbi" 2 1 1 1048576
ExecTest "getnbi" 2 1 1024 512
ExecTest "getnbi" 2 8 1 1048576
ExecTest "getnbi" 2 16 128 8
ExecTest "getnbi" 2 32 256 512
ExecTest "getnbi" 2 64 1024 8
ExecTest "getnbi" 2 1 1 1048576
ExecTest "getnbi" 2 1 1024 512
ExecTest "getnbi" 2 8 1 1048576
ExecTest "getnbi" 2 16 128 8
ExecTest "getnbi" 2 32 256 512
ExecTest "getnbi" 2 64 1024 8
ExecTest "wggetnbi" 2 1 64 1048576
ExecTest "wggetnbi" 2 2 64 1048576
ExecTest "wggetnbi" 2 16 64 8
ExecTest "wggetnbi" 2 1 64 1048576
ExecTest "wggetnbi" 2 2 64 1048576
ExecTest "wggetnbi" 2 16 64 8
ExecTest "wavegetnbi" 2 1 64 1048576
ExecTest "wavegetnbi" 2 2 64 1048576
ExecTest "wavegetnbi" 2 2 128 1048576
ExecTest "wavegetnbi" 2 16 128 8
ExecTest "wavegetnbi" 2 1 64 1048576
ExecTest "wavegetnbi" 2 2 64 1048576
ExecTest "wavegetnbi" 2 2 128 1048576
ExecTest "wavegetnbi" 2 16 128 8
ExecTest "defaultctxgetnbi" 2 4 128 1024
ExecTest "teamctxgetnbi" 2 4 128 1024
ExecTest "teamctxgetnbi" 2 16 256 1024
ExecTest "defaultctxgetnbi" 2 4 128 1024
ExecTest "teamctxgetnbi" 2 4 128 1024
ExecTest "teamctxgetnbi" 2 16 256 1024
#TestAMO() {
##############################################################################
@@ -550,7 +550,7 @@ TestGDA() {
# ExecTest "amo_fcswap" 2 32 1
# ExecTest "amo_fcswap" 2 8 1
#Works on CX7, not implemented on BNXT
#This works but tester requires 32bit amos to be implemented
# ExecTest "amo_finc" 2 1 1
# ExecTest "amo_finc" 2 1 1024
# ExecTest "amo_finc" 2 8 1
@@ -562,7 +562,7 @@ TestGDA() {
# ExecTest "amo_inc" 2 8 1
# ExecTest "amo_inc" 2 32 128
#Works on CX7, not implemented on BNXT
#This works but tester requires 32bit amos to be implemented
# ExecTest "amo_fadd" 2 1 1
# ExecTest "amo_fadd" 2 1 1024
# ExecTest "amo_fadd" 2 8 1
+13 -2
Datei anzeigen
@@ -336,6 +336,7 @@ __device__ uint64_t QueuePair::post_wqe_amo(int pe, int32_t length, uintptr_t *r
uint64_t active_lane_mask;
uint8_t active_lane_count;
uint8_t active_lane_id;
uint32_t atomic_idx = 0;
active_lane_mask = get_active_lane_mask();
active_lane_count = get_active_lane_count(active_lane_mask);
@@ -378,8 +379,14 @@ __device__ uint64_t QueuePair::post_wqe_amo(int pe, int32_t length, uintptr_t *r
amo.swp_dt = atomic_data;
/* Populate SG Segment - (Return address of atomic) */
sge.pa = (uint64_t) nonfetching_atomic;
sge.lkey = nonfetching_atomic_lkey;
if (fetching) {
atomic_idx = fetching_atomic_idx++ % FETCHING_ATOMIC_CNT;
sge.pa = (uint64_t) &fetching_atomic[atomic_idx];
sge.lkey = fetching_atomic_lkey;
} else {
sge.pa = (uint64_t) nonfetching_atomic;
sge.lkey = nonfetching_atomic_lkey;
}
sge.length = length;
/* Write WQE to SQ */
@@ -406,6 +413,10 @@ __device__ uint64_t QueuePair::post_wqe_amo(int pe, int32_t length, uintptr_t *r
release_lock(&sq.lock);
}
if (fetching) {
return fetching_atomic[atomic_idx];
}
return 0;
}
+3 -2
Datei anzeigen
@@ -311,9 +311,12 @@ class QueuePair {
uint64_t* nonfetching_atomic{nullptr};
uint32_t nonfetching_atomic_lkey{0};
struct ibv_mr *mr_nonfetching_atomic;
uint64_t* fetching_atomic{nullptr};
uint32_t fetching_atomic_lkey{0};
uint32_t fetching_atomic_idx{0};
struct ibv_mr *mr_fetching_atomic;
static const uint32_t FETCHING_ATOMIC_CNT{1024};
static_assert(FETCHING_ATOMIC_CNT % WF_SIZE == 0);
@@ -322,8 +325,6 @@ class QueuePair {
HIPAllocator allocator{};
struct ibv_mr *mr_nonfetching_atomic;
struct ibv_mr *mr_fetching_atomic;
};
} // namespace rocshmem