[GDA] Implement fetching atomics for BNXT (#253)
* Indent driver script * Implemented fetching atomics BNXT
Dieser Commit ist enthalten in:
@@ -461,25 +461,25 @@ TestGDA() {
|
||||
ExecTest "teamctxput" 2 4 128 1024
|
||||
ExecTest "teamctxput" 2 16 256 1024
|
||||
|
||||
ExecTest "get" 2 1 1 1048576
|
||||
ExecTest "get" 2 1 1024 512
|
||||
ExecTest "get" 2 8 1 1048576
|
||||
ExecTest "get" 2 16 128 8
|
||||
ExecTest "get" 2 32 256 512
|
||||
ExecTest "get" 2 64 1024 8
|
||||
ExecTest "get" 2 1 1 1048576
|
||||
ExecTest "get" 2 1 1024 512
|
||||
ExecTest "get" 2 8 1 1048576
|
||||
ExecTest "get" 2 16 128 8
|
||||
ExecTest "get" 2 32 256 512
|
||||
ExecTest "get" 2 64 1024 8
|
||||
|
||||
ExecTest "wgget" 2 1 64 1048576
|
||||
ExecTest "wgget" 2 2 64 1048576
|
||||
ExecTest "wgget" 2 16 64 8
|
||||
ExecTest "wgget" 2 1 64 1048576
|
||||
ExecTest "wgget" 2 2 64 1048576
|
||||
ExecTest "wgget" 2 16 64 8
|
||||
|
||||
ExecTest "waveget" 2 1 64 1048576
|
||||
ExecTest "waveget" 2 2 64 1048576
|
||||
ExecTest "waveget" 2 2 128 1048576
|
||||
ExecTest "waveget" 2 16 128 8
|
||||
ExecTest "waveget" 2 1 64 1048576
|
||||
ExecTest "waveget" 2 2 64 1048576
|
||||
ExecTest "waveget" 2 2 128 1048576
|
||||
ExecTest "waveget" 2 16 128 8
|
||||
|
||||
ExecTest "defaultctxget" 2 4 128 1024
|
||||
ExecTest "teamctxget" 2 4 128 1024
|
||||
ExecTest "teamctxget" 2 16 256 1024
|
||||
ExecTest "defaultctxget" 2 4 128 1024
|
||||
ExecTest "teamctxget" 2 4 128 1024
|
||||
ExecTest "teamctxget" 2 16 256 1024
|
||||
|
||||
# ExecTest "g" 2 1 1 128
|
||||
# ExecTest "g" 2 1 1024 2
|
||||
@@ -513,25 +513,25 @@ TestGDA() {
|
||||
ExecTest "teamctxputnbi" 2 4 128 1024
|
||||
ExecTest "teamctxputnbi" 2 16 256 1024
|
||||
|
||||
ExecTest "getnbi" 2 1 1 1048576
|
||||
ExecTest "getnbi" 2 1 1024 512
|
||||
ExecTest "getnbi" 2 8 1 1048576
|
||||
ExecTest "getnbi" 2 16 128 8
|
||||
ExecTest "getnbi" 2 32 256 512
|
||||
ExecTest "getnbi" 2 64 1024 8
|
||||
ExecTest "getnbi" 2 1 1 1048576
|
||||
ExecTest "getnbi" 2 1 1024 512
|
||||
ExecTest "getnbi" 2 8 1 1048576
|
||||
ExecTest "getnbi" 2 16 128 8
|
||||
ExecTest "getnbi" 2 32 256 512
|
||||
ExecTest "getnbi" 2 64 1024 8
|
||||
|
||||
ExecTest "wggetnbi" 2 1 64 1048576
|
||||
ExecTest "wggetnbi" 2 2 64 1048576
|
||||
ExecTest "wggetnbi" 2 16 64 8
|
||||
ExecTest "wggetnbi" 2 1 64 1048576
|
||||
ExecTest "wggetnbi" 2 2 64 1048576
|
||||
ExecTest "wggetnbi" 2 16 64 8
|
||||
|
||||
ExecTest "wavegetnbi" 2 1 64 1048576
|
||||
ExecTest "wavegetnbi" 2 2 64 1048576
|
||||
ExecTest "wavegetnbi" 2 2 128 1048576
|
||||
ExecTest "wavegetnbi" 2 16 128 8
|
||||
ExecTest "wavegetnbi" 2 1 64 1048576
|
||||
ExecTest "wavegetnbi" 2 2 64 1048576
|
||||
ExecTest "wavegetnbi" 2 2 128 1048576
|
||||
ExecTest "wavegetnbi" 2 16 128 8
|
||||
|
||||
ExecTest "defaultctxgetnbi" 2 4 128 1024
|
||||
ExecTest "teamctxgetnbi" 2 4 128 1024
|
||||
ExecTest "teamctxgetnbi" 2 16 256 1024
|
||||
ExecTest "defaultctxgetnbi" 2 4 128 1024
|
||||
ExecTest "teamctxgetnbi" 2 4 128 1024
|
||||
ExecTest "teamctxgetnbi" 2 16 256 1024
|
||||
|
||||
#TestAMO() {
|
||||
##############################################################################
|
||||
@@ -550,7 +550,7 @@ TestGDA() {
|
||||
# ExecTest "amo_fcswap" 2 32 1
|
||||
# ExecTest "amo_fcswap" 2 8 1
|
||||
|
||||
#Works on CX7, not implemented on BNXT
|
||||
#This works but tester requires 32bit amos to be implemented
|
||||
# ExecTest "amo_finc" 2 1 1
|
||||
# ExecTest "amo_finc" 2 1 1024
|
||||
# ExecTest "amo_finc" 2 8 1
|
||||
@@ -562,7 +562,7 @@ TestGDA() {
|
||||
# ExecTest "amo_inc" 2 8 1
|
||||
# ExecTest "amo_inc" 2 32 128
|
||||
|
||||
#Works on CX7, not implemented on BNXT
|
||||
#This works but tester requires 32bit amos to be implemented
|
||||
# ExecTest "amo_fadd" 2 1 1
|
||||
# ExecTest "amo_fadd" 2 1 1024
|
||||
# ExecTest "amo_fadd" 2 8 1
|
||||
|
||||
@@ -336,6 +336,7 @@ __device__ uint64_t QueuePair::post_wqe_amo(int pe, int32_t length, uintptr_t *r
|
||||
uint64_t active_lane_mask;
|
||||
uint8_t active_lane_count;
|
||||
uint8_t active_lane_id;
|
||||
uint32_t atomic_idx = 0;
|
||||
|
||||
active_lane_mask = get_active_lane_mask();
|
||||
active_lane_count = get_active_lane_count(active_lane_mask);
|
||||
@@ -378,8 +379,14 @@ __device__ uint64_t QueuePair::post_wqe_amo(int pe, int32_t length, uintptr_t *r
|
||||
amo.swp_dt = atomic_data;
|
||||
|
||||
/* Populate SG Segment - (Return address of atomic) */
|
||||
sge.pa = (uint64_t) nonfetching_atomic;
|
||||
sge.lkey = nonfetching_atomic_lkey;
|
||||
if (fetching) {
|
||||
atomic_idx = fetching_atomic_idx++ % FETCHING_ATOMIC_CNT;
|
||||
sge.pa = (uint64_t) &fetching_atomic[atomic_idx];
|
||||
sge.lkey = fetching_atomic_lkey;
|
||||
} else {
|
||||
sge.pa = (uint64_t) nonfetching_atomic;
|
||||
sge.lkey = nonfetching_atomic_lkey;
|
||||
}
|
||||
sge.length = length;
|
||||
|
||||
/* Write WQE to SQ */
|
||||
@@ -406,6 +413,10 @@ __device__ uint64_t QueuePair::post_wqe_amo(int pe, int32_t length, uintptr_t *r
|
||||
release_lock(&sq.lock);
|
||||
}
|
||||
|
||||
if (fetching) {
|
||||
return fetching_atomic[atomic_idx];
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -311,9 +311,12 @@ class QueuePair {
|
||||
|
||||
uint64_t* nonfetching_atomic{nullptr};
|
||||
uint32_t nonfetching_atomic_lkey{0};
|
||||
struct ibv_mr *mr_nonfetching_atomic;
|
||||
|
||||
uint64_t* fetching_atomic{nullptr};
|
||||
uint32_t fetching_atomic_lkey{0};
|
||||
uint32_t fetching_atomic_idx{0};
|
||||
struct ibv_mr *mr_fetching_atomic;
|
||||
|
||||
static const uint32_t FETCHING_ATOMIC_CNT{1024};
|
||||
static_assert(FETCHING_ATOMIC_CNT % WF_SIZE == 0);
|
||||
@@ -322,8 +325,6 @@ class QueuePair {
|
||||
|
||||
HIPAllocator allocator{};
|
||||
|
||||
struct ibv_mr *mr_nonfetching_atomic;
|
||||
struct ibv_mr *mr_fetching_atomic;
|
||||
};
|
||||
|
||||
} // namespace rocshmem
|
||||
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren