Unit test fail check (#689)

* Adding fall-through on unit test failure

* Workaround for hipGraph validity check issue
Bu işleme şunda yer alıyor:
gilbertlee-amd
2023-02-18 09:50:46 -07:00
işlemeyi yapan: GitHub
ebeveyn 1c166046a2
işleme a640c6983f
2 değiştirilmiş dosya ile 11 ekleme ve 2 silme
+1 -1
Dosyayı Görüntüle
@@ -23,7 +23,7 @@ namespace RcclUnitTesting
std::vector<int> const numElements = {384 * 1024, 384};
std::vector<bool> const inPlaceList = {true, false};
std::vector<bool> const managedMemList = {true, false};
std::vector<bool> const useHipGraphList = {true, false};
std::vector<bool> const useHipGraphList = {false, true};
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList, useHipGraphList);
testBed.Finalize();
+10 -1
Dosyayı Görüntüle
@@ -40,6 +40,7 @@
if (response != TEST_SUCCESS) \
{ \
ERROR("Child %d reports failure\n", childId); \
ASSERT_EQ(response, TEST_SUCCESS); \
FAIL(); \
} \
}
@@ -476,6 +477,7 @@ namespace RcclUnitTesting
int const numChildren = isMultiProcess ? numGpus : 1;
int const numRanks = numGpus*ranksPerGpu;
this->InitComms(TestBed::GetDeviceIdsList(numChildren, numGpus, ranksPerGpu));
if (testing::Test::HasFailure()) continue;
for (int ftIdx = 0; ftIdx < funcTypes.size() && isCorrect; ++ftIdx)
for (int dtIdx = 0; dtIdx < dataTypes.size() && isCorrect; ++dtIdx)
@@ -499,9 +501,14 @@ namespace RcclUnitTesting
numInputElements,
numOutputElements,
optionalArgs);
if (testing::Test::HasFailure()) continue;
// Only allocate once for largest size
if (neIdx == 0) this->AllocateMem(inPlaceList[ipIdx], managedMemList[mmIdx]);
if (neIdx == 0)
{
this->AllocateMem(inPlaceList[ipIdx], managedMemList[mmIdx]);
if (testing::Test::HasFailure()) continue;
}
for (int hgIdx = 0; hgIdx < useHipGraphList.size() && isCorrect; ++hgIdx)
{
@@ -512,6 +519,7 @@ namespace RcclUnitTesting
funcTypes[ftIdx] == ncclCollReduce ||
funcTypes[ftIdx] == ncclCollAllReduce));
if (!canSkip) this->PrepareData();
if (testing::Test::HasFailure()) continue;
std::string name = this->GetTestCaseName(numGpus, isMultiProcess,
funcTypes[ftIdx], dataTypes[dtIdx],
@@ -526,6 +534,7 @@ namespace RcclUnitTesting
std::vector<int> currentRanksEmpty = {};
this->ExecuteCollectives(currentRanksEmpty, useHipGraphList[hgIdx]);
if (testing::Test::HasFailure()) continue;
this->ValidateResults(isCorrect);
if (!isCorrect)
{