Fix bug reported in SWDEV-251619
Unit testing of CG threadRank api in multi-grid environment requires
some changes as grid id of particular device is not known apriori.
Change-Id: Ie7941a09337653f3ada5eedfb7c64a93a234498b
[ROCm/clr commit: 9c86cdb89d]
Este commit está contenido en:
cometido por
Mahesha Shivamallappa
padre
41e2601b0d
commit
d3bcef604a
+17
-3
@@ -34,6 +34,8 @@ THE SOFTWARE.
|
||||
#include <climits>
|
||||
|
||||
#define ASSERT_EQUAL(lhs, rhs) assert(lhs == rhs)
|
||||
#define ASSERT_LE(lhs, rhs) assert(lhs <= rhs)
|
||||
#define ASSERT_GE(lhs, rhs) assert(lhs >= rhs)
|
||||
|
||||
using namespace cooperative_groups;
|
||||
|
||||
@@ -193,15 +195,27 @@ static void test_cg_multi_grid_group_type(int blockSize)
|
||||
}
|
||||
|
||||
// Validate results
|
||||
int gridsSeen[MaxGPUs];
|
||||
for (int i = 0; i < nGpu; ++i) {
|
||||
for (int j = 0; j < 2 * blockSize; ++j) {
|
||||
//ASSERT_EQUAL(numGridsTestH[i][j], nGpu);
|
||||
//ASSERT_EQUAL(gridRankTestH[i][j], i);
|
||||
ASSERT_EQUAL(numGridsTestH[i][j], nGpu);
|
||||
ASSERT_GE(gridRankTestH[i][j], 0);
|
||||
ASSERT_LE(gridRankTestH[i][j], nGpu-1);
|
||||
ASSERT_EQUAL(gridRankTestH[i][j], gridRankTestH[i][0]);
|
||||
ASSERT_EQUAL(sizeTestH[i][j], nGpu * 2 * blockSize);
|
||||
ASSERT_EQUAL(thdRankTestH[i][j], (i * 2 * blockSize) + j);
|
||||
int gridRank = gridRankTestH[i][j];
|
||||
ASSERT_EQUAL(thdRankTestH[i][j], (gridRank * 2 * blockSize) + j);
|
||||
ASSERT_EQUAL(isValidTestH[i][j], 1);
|
||||
}
|
||||
ASSERT_EQUAL(syncResultD[i+1], 2 * blockSize);
|
||||
|
||||
// Validate uniqueness property of grid rank
|
||||
gridsSeen[i] = gridRankTestH[i][0];
|
||||
for (int k = 0; k < i; ++k) {
|
||||
if (gridsSeen[k] == gridsSeen[i]) {
|
||||
assert (false && "Grid rank in multi-gpu setup should be unique");
|
||||
}
|
||||
}
|
||||
}
|
||||
ASSERT_EQUAL(syncResultD[0], nGpu * 2 * blockSize);
|
||||
|
||||
|
||||
+31
-6
@@ -34,11 +34,14 @@ THE SOFTWARE.
|
||||
#include <climits>
|
||||
|
||||
#define ASSERT_EQUAL(lhs, rhs) assert(lhs == rhs)
|
||||
#define ASSERT_LE(lhs, rhs) assert(lhs <= rhs)
|
||||
#define ASSERT_GE(lhs, rhs) assert(lhs >= rhs)
|
||||
|
||||
using namespace cooperative_groups;
|
||||
|
||||
static __global__
|
||||
void kernel_cg_multi_grid_group_type_via_base_type(int *sizeTestD,
|
||||
int* gridRankTestD,
|
||||
int *thdRankTestD,
|
||||
int *isValidTestD,
|
||||
int *syncTestD,
|
||||
@@ -51,6 +54,7 @@ void kernel_cg_multi_grid_group_type_via_base_type(int *sizeTestD,
|
||||
sizeTestD[gIdx] = tg.size();
|
||||
|
||||
// Test thread_rank
|
||||
gridRankTestD[gIdx] = this_multi_grid().grid_rank();
|
||||
thdRankTestD[gIdx] = tg.thread_rank();
|
||||
|
||||
// Test is_valid
|
||||
@@ -110,6 +114,7 @@ static void test_cg_multi_grid_group_type_via_base_type(int blockSize)
|
||||
// Allocate host and device memory
|
||||
int nBytes = sizeof(int) * 2 * blockSize;
|
||||
int *sizeTestD[MaxGPUs], *sizeTestH[MaxGPUs];
|
||||
int *gridRankTestD[MaxGPUs], *gridRankTestH[MaxGPUs];
|
||||
int *thdRankTestD[MaxGPUs], *thdRankTestH[MaxGPUs];
|
||||
int *isValidTestD[MaxGPUs], *isValidTestH[MaxGPUs];
|
||||
int *syncTestD[MaxGPUs], *syncResultD;
|
||||
@@ -117,11 +122,13 @@ static void test_cg_multi_grid_group_type_via_base_type(int blockSize)
|
||||
ASSERT_EQUAL(hipSetDevice(i), hipSuccess);
|
||||
|
||||
ASSERT_EQUAL(hipMalloc(&sizeTestD[i], nBytes), hipSuccess);
|
||||
ASSERT_EQUAL(hipMalloc(&gridRankTestD[i], nBytes), hipSuccess);
|
||||
ASSERT_EQUAL(hipMalloc(&thdRankTestD[i], nBytes), hipSuccess);
|
||||
ASSERT_EQUAL(hipMalloc(&isValidTestD[i], nBytes), hipSuccess);
|
||||
ASSERT_EQUAL(hipMalloc(&syncTestD[i], nBytes), hipSuccess);
|
||||
|
||||
ASSERT_EQUAL(hipHostMalloc(&sizeTestH[i], nBytes), hipSuccess);
|
||||
ASSERT_EQUAL(hipHostMalloc(&gridRankTestH[i], nBytes), hipSuccess);
|
||||
ASSERT_EQUAL(hipHostMalloc(&thdRankTestH[i], nBytes), hipSuccess);
|
||||
ASSERT_EQUAL(hipHostMalloc(&isValidTestH[i], nBytes), hipSuccess);
|
||||
|
||||
@@ -135,17 +142,18 @@ static void test_cg_multi_grid_group_type_via_base_type(int blockSize)
|
||||
}
|
||||
|
||||
// Launch Kernel
|
||||
constexpr int NumKernelArgs = 5;
|
||||
constexpr int NumKernelArgs = 6;
|
||||
hipLaunchParams* launchParamsList = new hipLaunchParams[nGpu];
|
||||
void* args[MaxGPUs * NumKernelArgs];
|
||||
for (int i = 0; i < nGpu; i++) {
|
||||
ASSERT_EQUAL(hipSetDevice(i), hipSuccess);
|
||||
|
||||
args[i * NumKernelArgs ] = &sizeTestD[i];
|
||||
args[i * NumKernelArgs + 1] = &thdRankTestD[i];
|
||||
args[i * NumKernelArgs + 2] = &isValidTestD[i];
|
||||
args[i * NumKernelArgs + 3] = &syncTestD[i];
|
||||
args[i * NumKernelArgs + 4] = &syncResultD;
|
||||
args[i * NumKernelArgs + 1] = &gridRankTestD[i];
|
||||
args[i * NumKernelArgs + 2] = &thdRankTestD[i];
|
||||
args[i * NumKernelArgs + 3] = &isValidTestD[i];
|
||||
args[i * NumKernelArgs + 4] = &syncTestD[i];
|
||||
args[i * NumKernelArgs + 5] = &syncResultD;
|
||||
|
||||
launchParamsList[i].func = reinterpret_cast<void*>(kernel_cg_multi_grid_group_type_via_base_type);
|
||||
launchParamsList[i].gridDim = 2;
|
||||
@@ -164,6 +172,8 @@ static void test_cg_multi_grid_group_type_via_base_type(int blockSize)
|
||||
|
||||
ASSERT_EQUAL(hipMemcpy(sizeTestH[i], sizeTestD[i], nBytes, hipMemcpyDeviceToHost),
|
||||
hipSuccess);
|
||||
ASSERT_EQUAL(hipMemcpy(gridRankTestH[i], gridRankTestD[i], nBytes, hipMemcpyDeviceToHost),
|
||||
hipSuccess);
|
||||
ASSERT_EQUAL(hipMemcpy(thdRankTestH[i], thdRankTestD[i], nBytes, hipMemcpyDeviceToHost),
|
||||
hipSuccess);
|
||||
ASSERT_EQUAL(hipMemcpy(isValidTestH[i], isValidTestD[i], nBytes, hipMemcpyDeviceToHost),
|
||||
@@ -173,13 +183,26 @@ static void test_cg_multi_grid_group_type_via_base_type(int blockSize)
|
||||
}
|
||||
|
||||
// Validate results
|
||||
int gridsSeen[MaxGPUs];
|
||||
for (int i = 0; i < nGpu; ++i) {
|
||||
for (int j = 0; j < 2 * blockSize; ++j) {
|
||||
ASSERT_EQUAL(sizeTestH[i][j], nGpu * 2 * blockSize);
|
||||
ASSERT_EQUAL(thdRankTestH[i][j], (i * 2 * blockSize) + j);
|
||||
ASSERT_GE(gridRankTestH[i][j], 0);
|
||||
ASSERT_LE(gridRankTestH[i][j], nGpu-1);
|
||||
ASSERT_EQUAL(gridRankTestH[i][j], gridRankTestH[i][0]);
|
||||
int gridRank = gridRankTestH[i][j];
|
||||
ASSERT_EQUAL(thdRankTestH[i][j], (gridRank * 2 * blockSize) + j);
|
||||
ASSERT_EQUAL(isValidTestH[i][j], 1);
|
||||
}
|
||||
ASSERT_EQUAL(syncResultD[i+1], 2 * blockSize);
|
||||
|
||||
// Validate uniqueness property of grid rank
|
||||
gridsSeen[i] = gridRankTestH[i][0];
|
||||
for (int k = 0; k < i; ++k) {
|
||||
if (gridsSeen[k] == gridsSeen[i]) {
|
||||
assert (false && "Grid rank in multi-gpu setup should be unique");
|
||||
}
|
||||
}
|
||||
}
|
||||
ASSERT_EQUAL(syncResultD[0], nGpu * 2 * blockSize);
|
||||
|
||||
@@ -189,6 +212,7 @@ static void test_cg_multi_grid_group_type_via_base_type(int blockSize)
|
||||
ASSERT_EQUAL(hipSetDevice(i), hipSuccess);
|
||||
|
||||
ASSERT_EQUAL(hipFree(sizeTestD[i]), hipSuccess);
|
||||
ASSERT_EQUAL(hipFree(gridRankTestD[i]), hipSuccess);
|
||||
ASSERT_EQUAL(hipFree(thdRankTestD[i]), hipSuccess);
|
||||
ASSERT_EQUAL(hipFree(isValidTestD[i]), hipSuccess);
|
||||
ASSERT_EQUAL(hipFree(syncTestD[i]), hipSuccess);
|
||||
@@ -197,6 +221,7 @@ static void test_cg_multi_grid_group_type_via_base_type(int blockSize)
|
||||
ASSERT_EQUAL(hipFree(syncResultD), hipSuccess);
|
||||
|
||||
ASSERT_EQUAL(hipHostFree(sizeTestH[i]), hipSuccess);
|
||||
ASSERT_EQUAL(hipHostFree(gridRankTestH[i]), hipSuccess);
|
||||
ASSERT_EQUAL(hipHostFree(thdRankTestH[i]), hipSuccess);
|
||||
ASSERT_EQUAL(hipHostFree(isValidTestH[i]), hipSuccess);
|
||||
|
||||
|
||||
+31
-6
@@ -34,11 +34,14 @@ THE SOFTWARE.
|
||||
#include <climits>
|
||||
|
||||
#define ASSERT_EQUAL(lhs, rhs) assert(lhs == rhs)
|
||||
#define ASSERT_LE(lhs, rhs) assert(lhs <= rhs)
|
||||
#define ASSERT_GE(lhs, rhs) assert(lhs >= rhs)
|
||||
|
||||
using namespace cooperative_groups;
|
||||
|
||||
static __global__
|
||||
void kernel_cg_multi_grid_group_type_via_public_api(int *sizeTestD,
|
||||
int* gridRankTestD,
|
||||
int *thdRankTestD,
|
||||
int *isValidTestD,
|
||||
int *syncTestD,
|
||||
@@ -51,6 +54,7 @@ void kernel_cg_multi_grid_group_type_via_public_api(int *sizeTestD,
|
||||
sizeTestD[gIdx] = group_size(mg);
|
||||
|
||||
// Test thread_rank api
|
||||
gridRankTestD[gIdx] = this_multi_grid().grid_rank();
|
||||
thdRankTestD[gIdx] = thread_rank(mg);
|
||||
|
||||
// Test is_valid api
|
||||
@@ -110,6 +114,7 @@ static void test_cg_multi_grid_group_type_via_public_api(int blockSize)
|
||||
// Allocate host and device memory
|
||||
int nBytes = sizeof(int) * 2 * blockSize;
|
||||
int *sizeTestD[MaxGPUs], *sizeTestH[MaxGPUs];
|
||||
int *gridRankTestD[MaxGPUs], *gridRankTestH[MaxGPUs];
|
||||
int *thdRankTestD[MaxGPUs], *thdRankTestH[MaxGPUs];
|
||||
int *isValidTestD[MaxGPUs], *isValidTestH[MaxGPUs];
|
||||
int *syncTestD[MaxGPUs], *syncResultD;
|
||||
@@ -117,11 +122,13 @@ static void test_cg_multi_grid_group_type_via_public_api(int blockSize)
|
||||
ASSERT_EQUAL(hipSetDevice(i), hipSuccess);
|
||||
|
||||
ASSERT_EQUAL(hipMalloc(&sizeTestD[i], nBytes), hipSuccess);
|
||||
ASSERT_EQUAL(hipMalloc(&gridRankTestD[i], nBytes), hipSuccess);
|
||||
ASSERT_EQUAL(hipMalloc(&thdRankTestD[i], nBytes), hipSuccess);
|
||||
ASSERT_EQUAL(hipMalloc(&isValidTestD[i], nBytes), hipSuccess);
|
||||
ASSERT_EQUAL(hipMalloc(&syncTestD[i], nBytes), hipSuccess);
|
||||
|
||||
ASSERT_EQUAL(hipHostMalloc(&sizeTestH[i], nBytes), hipSuccess);
|
||||
ASSERT_EQUAL(hipHostMalloc(&gridRankTestH[i], nBytes), hipSuccess);
|
||||
ASSERT_EQUAL(hipHostMalloc(&thdRankTestH[i], nBytes), hipSuccess);
|
||||
ASSERT_EQUAL(hipHostMalloc(&isValidTestH[i], nBytes), hipSuccess);
|
||||
|
||||
@@ -135,17 +142,18 @@ static void test_cg_multi_grid_group_type_via_public_api(int blockSize)
|
||||
}
|
||||
|
||||
// Launch Kernel
|
||||
constexpr int NumKernelArgs = 5;
|
||||
constexpr int NumKernelArgs = 6;
|
||||
hipLaunchParams* launchParamsList = new hipLaunchParams[nGpu];
|
||||
void* args[MaxGPUs * NumKernelArgs];
|
||||
for (int i = 0; i < nGpu; i++) {
|
||||
ASSERT_EQUAL(hipSetDevice(i), hipSuccess);
|
||||
|
||||
args[i * NumKernelArgs ] = &sizeTestD[i];
|
||||
args[i * NumKernelArgs + 1] = &thdRankTestD[i];
|
||||
args[i * NumKernelArgs + 2] = &isValidTestD[i];
|
||||
args[i * NumKernelArgs + 3] = &syncTestD[i];
|
||||
args[i * NumKernelArgs + 4] = &syncResultD;
|
||||
args[i * NumKernelArgs + 1] = &gridRankTestD[i];
|
||||
args[i * NumKernelArgs + 2] = &thdRankTestD[i];
|
||||
args[i * NumKernelArgs + 3] = &isValidTestD[i];
|
||||
args[i * NumKernelArgs + 4] = &syncTestD[i];
|
||||
args[i * NumKernelArgs + 5] = &syncResultD;
|
||||
|
||||
launchParamsList[i].func = reinterpret_cast<void*>(kernel_cg_multi_grid_group_type_via_public_api);
|
||||
launchParamsList[i].gridDim = 2;
|
||||
@@ -164,6 +172,8 @@ static void test_cg_multi_grid_group_type_via_public_api(int blockSize)
|
||||
|
||||
ASSERT_EQUAL(hipMemcpy(sizeTestH[i], sizeTestD[i], nBytes, hipMemcpyDeviceToHost),
|
||||
hipSuccess);
|
||||
ASSERT_EQUAL(hipMemcpy(gridRankTestH[i], gridRankTestD[i], nBytes, hipMemcpyDeviceToHost),
|
||||
hipSuccess);
|
||||
ASSERT_EQUAL(hipMemcpy(thdRankTestH[i], thdRankTestD[i], nBytes, hipMemcpyDeviceToHost),
|
||||
hipSuccess);
|
||||
ASSERT_EQUAL(hipMemcpy(isValidTestH[i], isValidTestD[i], nBytes, hipMemcpyDeviceToHost),
|
||||
@@ -173,13 +183,26 @@ static void test_cg_multi_grid_group_type_via_public_api(int blockSize)
|
||||
}
|
||||
|
||||
// Validate results
|
||||
int gridsSeen[MaxGPUs];
|
||||
for (int i = 0; i < nGpu; ++i) {
|
||||
for (int j = 0; j < 2 * blockSize; ++j) {
|
||||
ASSERT_EQUAL(sizeTestH[i][j], nGpu * 2 * blockSize);
|
||||
ASSERT_EQUAL(thdRankTestH[i][j], (i * 2 * blockSize) + j);
|
||||
ASSERT_GE(gridRankTestH[i][j], 0);
|
||||
ASSERT_LE(gridRankTestH[i][j], nGpu-1);
|
||||
ASSERT_EQUAL(gridRankTestH[i][j], gridRankTestH[i][0]);
|
||||
int gridRank = gridRankTestH[i][j];
|
||||
ASSERT_EQUAL(thdRankTestH[i][j], (gridRank * 2 * blockSize) + j);
|
||||
ASSERT_EQUAL(isValidTestH[i][j], 1);
|
||||
}
|
||||
ASSERT_EQUAL(syncResultD[i+1], 2 * blockSize);
|
||||
|
||||
// Validate uniqueness property of grid rank
|
||||
gridsSeen[i] = gridRankTestH[i][0];
|
||||
for (int k = 0; k < i; ++k) {
|
||||
if (gridsSeen[k] == gridsSeen[i]) {
|
||||
assert (false && "Grid rank in multi-gpu setup should be unique");
|
||||
}
|
||||
}
|
||||
}
|
||||
ASSERT_EQUAL(syncResultD[0], nGpu * 2 * blockSize);
|
||||
|
||||
@@ -189,6 +212,7 @@ static void test_cg_multi_grid_group_type_via_public_api(int blockSize)
|
||||
ASSERT_EQUAL(hipSetDevice(i), hipSuccess);
|
||||
|
||||
ASSERT_EQUAL(hipFree(sizeTestD[i]), hipSuccess);
|
||||
ASSERT_EQUAL(hipFree(gridRankTestD[i]), hipSuccess);
|
||||
ASSERT_EQUAL(hipFree(thdRankTestD[i]), hipSuccess);
|
||||
ASSERT_EQUAL(hipFree(isValidTestD[i]), hipSuccess);
|
||||
ASSERT_EQUAL(hipFree(syncTestD[i]), hipSuccess);
|
||||
@@ -197,6 +221,7 @@ static void test_cg_multi_grid_group_type_via_public_api(int blockSize)
|
||||
ASSERT_EQUAL(hipFree(syncResultD), hipSuccess);
|
||||
|
||||
ASSERT_EQUAL(hipHostFree(sizeTestH[i]), hipSuccess);
|
||||
ASSERT_EQUAL(hipHostFree(gridRankTestH[i]), hipSuccess);
|
||||
ASSERT_EQUAL(hipHostFree(thdRankTestH[i]), hipSuccess);
|
||||
ASSERT_EQUAL(hipHostFree(isValidTestH[i]), hipSuccess);
|
||||
|
||||
|
||||
Referencia en una nueva incidencia
Block a user