Update MP UT to support arbitrary # of GPUs; multiple bugfixes (#16)

* Fixing temp file creation/deletion for Clique kernel mode.

* Refactoring of MP unit tests; include bugfixes and general support for any number of GPUs

* GroupCall MP UT properly quits when too many devices specified

* MP UT will programmatically set NCCL_COMM_ID if not specified; updated install script
Šī revīzija ir iekļauta:
Stanley Tsang
2021-02-05 17:49:25 -07:00
revīziju iesūtīja GitHub
vecāks 6dfdfef98f
revīzija d00b7d17bd
23 mainīti faili ar 538 papildinājumiem un 716 dzēšanām
+14 -46
Parādīt failu
@@ -25,60 +25,28 @@ namespace CorrectnessTests
datasets[i] = (Dataset*)mmap(NULL, sizeof(Dataset), PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0);
datasets[i]->InitializeRootProcess(numDevices, numElements, dataType, inPlace, ncclFuncs[i]);
}
Barrier::ClearShmFiles(std::atoi(getenv("NCCL_COMM_ID")));
int pid1 = 0;
int pid2 = 0;
int pid3 = 0;
pid1 = fork();
std::vector<int> pids(numDevices);
// From this point on, ignore original process as we cannot have it create a HIP context
if (pid1 == 0)
int gpu = -1;
for (int i = 0; i < numDevices; i++)
{
pid2 = fork();
if (numDevices > 2)
gpu++;
int pid = fork();
if (pid == 0)
{
pid3 = fork();
}
if ((pid2 > 0 && pid3 == 0 && numDevices == 2) || (pid2 > 0 && pid3 > 0 && numDevices > 2))
{
// Process 0
TestCombinedCalls(0, datasets, ncclFuncs);
if (pid3 > 0)
{
waitpid(pid3, NULL, 0);
}
}
else if ((pid2 == 0 && pid3 == 0 && numDevices == 2) || (pid2 == 0 && pid3 > 0 && numDevices > 2))
{
// Process 1
TestCombinedCalls(1, datasets, ncclFuncs);
if (numDevices > 2)
{
waitpid(pid3, NULL, 0);
}
exit(0);
}
else if (pid2 > 0 && pid3 == 0 && numDevices > 2)
{
// Process 2 (available when numDevices > 2)
TestCombinedCalls(2, datasets, ncclFuncs);
exit(0);
}
else if (pid2 == 0 && pid3 == 0 && numDevices == 4)
{
// Process 3 (available when numDevices == 4)
TestCombinedCalls(3, datasets, ncclFuncs);
exit(0);
bool pass;
TestCombinedCalls(gpu, datasets, ncclFuncs, pass);
TerminateChildProcess(pass);
}
else
{
exit(0);
pids[gpu] = pid;
}
waitpid(pid2, NULL, 0);
exit(0);
}
waitpid(pid1, NULL, 0);
ValidateProcesses(pids);
for (int i = 0; i < datasets.size(); i++)
{
munmap(datasets[i], sizeof(Dataset));
@@ -104,7 +72,7 @@ namespace CorrectnessTests
// Number of elements
testing::Values(3072, 3145728),
// Number of devices
testing::Values(2,3,4),
testing::Values(2,3,4,8),
// In-place or not
testing::Values(false, true),
testing::Values("")),