[SWDEV-438556][SWDEV-451646] - Fix for Warp vote catch tests

Change-Id: Icf9e0d488c4ba0267ce988b85d7b38478db9d1b8
Этот коммит содержится в:
Rahul Manocha
2024-03-15 18:08:32 +00:00
коммит произвёл Rakesh Roy
родитель 8bd7575356
Коммит e2ca8a03ff
5 изменённых файлов: 21 добавлений и 19 удалений
+4 -6
Просмотреть файл
@@ -1447,9 +1447,6 @@
"Unit_Coalesced_Group_Sync_Positive_Basic - uint8_t",
"Unit_Coalesced_Group_Sync_Positive_Basic - uint16_t",
"Unit_Coalesced_Group_Sync_Positive_Basic - uint32_t",
"Unit_Warp_Ballot_Positive_Basic",
"Unit_Warp_Vote_Any_Positive_Basic",
"Unit_Warp_Vote_All_Positive_Basic",
"=== SWDEV-443630 - Below tests failed in stress test on 19/01/23 ===",
"Unit_hipGetSetDevice_MultiThreaded",
#endif
@@ -1536,9 +1533,6 @@
"Unit_Coalesced_Group_Sync_Positive_Basic - uint8_t",
"Unit_Coalesced_Group_Sync_Positive_Basic - uint16_t",
"Unit_Coalesced_Group_Sync_Positive_Basic - uint32_t",
"Unit_Warp_Ballot_Positive_Basic",
"Unit_Warp_Vote_Any_Positive_Basic",
"Unit_Warp_Vote_All_Positive_Basic",
"=== SWDEV-439298: Below test failing in CQE staging ===",
"Unit_hipCGMultiGridGroupType_Barrier",
"=== SWDEV-443630 : Below test failed in stress test on 19/01/24 ===",
@@ -1580,6 +1574,10 @@
"Unit_safeAtomicMin_Positive_SameAddress - float",
"=== SWDEV-454220 : Below test hanged in stress test on 22/03/24 ===",
"Unit_hipExtLaunchKernel_Positive_Basic",
"=== Below tests fail in stress test 03/29/24 ===",
"Unit_Warp_Ballot_Positive_Basic",
"Unit_Warp_Vote_Any_Positive_Basic",
"Unit_Warp_Vote_All_Positive_Basic",
#endif
#if defined NAVI21
"=== SWDEV-445961: These tests hang in PSDB stress test on 09/02/2024 ===",
+6 -5
Просмотреть файл
@@ -41,7 +41,7 @@ static bool check_if_all(uint64_t predicate_mask, uint64_t active_mask, size_t p
return true;
}
__global__ void kernel_all(int* const out, const uint64_t* const active_masks, uint64_t predicate) {
__global__ void kernel_all(uint64_t* const out, const uint64_t* const active_masks, uint64_t predicate) {
if (deactivate_thread(active_masks)) {
return;
}
@@ -49,12 +49,13 @@ __global__ void kernel_all(int* const out, const uint64_t* const active_masks, u
const auto grid = cg::this_grid();
const auto warp = cg::tiled_partition(cg::this_thread_block(), warpSize);
out[grid.thread_rank()] = __all((predicate & (static_cast<uint64_t>(1) << warp.thread_rank())));
int pred = MASK_SHIFT(predicate, warp.thread_rank());
out[grid.thread_rank()] = __all(pred);
}
class WarpAll : public WarpVoteTest<WarpAll, int> {
class WarpAll : public WarpVoteTest<WarpAll, uint64_t> {
public:
void launch_kernel(int* const arr_dev, const uint64_t* const active_masks) {
void launch_kernel(uint64_t* const arr_dev, const uint64_t* const active_masks) {
auto test_case = GENERATE(range(0, 5));
predicate_mask_ = get_predicate_mask(test_case, this->warp_size_);
INFO("Predicate mask: " << predicate_mask_);
@@ -62,7 +63,7 @@ class WarpAll : public WarpVoteTest<WarpAll, int> {
predicate_mask_);
}
void validate(const int* const arr) {
void validate(const uint64_t* const arr) {
ArrayAllOf(arr, this->grid_.thread_count_, [this](unsigned int i) -> std::optional<int> {
const auto rank_in_block = this->grid_.thread_rank_in_block(i).value();
const auto rank_in_warp = rank_in_block % this->warp_size_;
+6 -5
Просмотреть файл
@@ -32,7 +32,7 @@ THE SOFTWARE.
namespace cg = cooperative_groups;
__global__ void kernel_any(int* const out, const uint64_t* const active_masks, uint64_t predicate) {
__global__ void kernel_any(uint64_t* const out, const uint64_t* const active_masks, uint64_t predicate) {
if (deactivate_thread(active_masks)) {
return;
}
@@ -40,12 +40,13 @@ __global__ void kernel_any(int* const out, const uint64_t* const active_masks, u
const auto grid = cg::this_grid();
const auto warp = cg::tiled_partition(cg::this_thread_block(), warpSize);
out[grid.thread_rank()] = __any((predicate & (static_cast<uint64_t>(1) << warp.thread_rank())));
int pred = MASK_SHIFT(predicate, warp.thread_rank());
out[grid.thread_rank()] = __any(pred);
}
class WarpAny : public WarpVoteTest<WarpAny, int> {
class WarpAny : public WarpVoteTest<WarpAny, uint64_t> {
public:
void launch_kernel(int* const arr_dev, const uint64_t* const active_masks) {
void launch_kernel(uint64_t* const arr_dev, const uint64_t* const active_masks) {
auto test_case = GENERATE(range(0, 5));
predicate_mask_ = get_predicate_mask(test_case, this->warp_size_);
INFO("Predicate mask: " << predicate_mask_);
@@ -53,7 +54,7 @@ class WarpAny : public WarpVoteTest<WarpAny, int> {
predicate_mask_);
}
void validate(const int* const arr) {
void validate(const uint64_t* const arr) {
ArrayAllOf(arr, this->grid_.thread_count_, [this](unsigned int i) -> std::optional<int> {
const auto rank_in_block = this->grid_.thread_rank_in_block(i).value();
const auto rank_in_warp = rank_in_block % this->warp_size_;
+2 -2
Просмотреть файл
@@ -40,8 +40,8 @@ __global__ void kernel_ballot(uint64_t* const out, const uint64_t* const active_
const auto grid = cg::this_grid();
const auto warp = cg::tiled_partition(cg::this_thread_block(), warpSize);
out[grid.thread_rank()] =
__ballot((predicate & (static_cast<uint64_t>(1) << warp.thread_rank())));
int pred = MASK_SHIFT(predicate, warp.thread_rank());
out[grid.thread_rank()] = __ballot(pred);
}
class WarpBallot : public WarpVoteTest<WarpBallot, uint64_t> {
+3 -1
Просмотреть файл
@@ -25,6 +25,9 @@ THE SOFTWARE.
#include <hip/hip_cooperative_groups.h>
#include <hip/hip_fp16.h>
#define MASK_SHIFT(x, n) \
(x & (static_cast<uint64_t>(1) << n)) >> n
const unsigned long long Every5thBit = 0x1084210842108421;
const unsigned long long Every9thBit = 0x8040201008040201;
const unsigned long long Every5thBut9th = Every5thBit & ~Every9thBit;
@@ -37,7 +40,6 @@ inline __device__ bool deactivate_thread(const uint64_t* const active_masks) {
const auto warps_per_block = (block.size() + warpSize - 1) / warpSize;
const auto block_rank = (blockIdx.z * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x;
const auto idx = block_rank * warps_per_block + block.thread_rank() / warpSize;
return !(active_masks[idx] & (static_cast<uint64_t>(1) << warp.thread_rank()));
}