Added a Process Isolated Test Runner (#1993)
* Added single process isolation support to execute tests * Address review comments * Update README * Removed requirement of explicit call to clear method * Added macros for simplified usage * Updated tests to use process isolation framework * Adjust summary output format for isolated tests * Updated rccl_wrap tests * Used process isolation in AllocTests * Used process isolation and fixed failing tests * Modified test output, added signal handling Updated macros to handle lambdas * Convert argcheck tests to isolated tests * Convert proxy tests to isolated tests * Remove non-supported test * Fixed file descriptor handling and clearing env vars for tests
Tento commit je obsažen v:
+593
-198
@@ -4,27 +4,120 @@
|
||||
* See LICENSE.txt for license information
|
||||
************************************************************************/
|
||||
#include <gtest/gtest.h>
|
||||
#include <cstring>
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include "comm.h"
|
||||
#include "info.h"
|
||||
#include "common/ProcessIsolatedTestRunner.hpp"
|
||||
#include "enqueue.h"
|
||||
#include "info.h"
|
||||
#include "utils.h"
|
||||
|
||||
class EnqueueTests : public ::testing::Test {
|
||||
protected:
|
||||
namespace RcclUnitTesting
|
||||
{
|
||||
|
||||
// Simple test kernel for validating ncclInitKernelsForDevice
|
||||
__global__ void simpleTestKernel(int* data)
|
||||
{
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
if(data)
|
||||
data[tid] = tid;
|
||||
}
|
||||
|
||||
// Helper function to test ncclInitKernelsForDevice with a real kernel
|
||||
ncclResult_t testKernelAttributes(void* kernelFn, size_t* maxStackSize)
|
||||
{
|
||||
if(!kernelFn || !maxStackSize)
|
||||
return ncclInvalidArgument;
|
||||
|
||||
*maxStackSize = 0;
|
||||
hipFuncAttributes attr = {0};
|
||||
|
||||
hipError_t errcode = hipFuncGetAttributes(&attr, kernelFn);
|
||||
if(errcode != hipSuccess)
|
||||
return ncclSystemError;
|
||||
|
||||
*maxStackSize = attr.localSizeBytes;
|
||||
return ncclSuccess; // ncclSuccess
|
||||
}
|
||||
|
||||
// Helper function to test shared memory limit checking with a real kernel
|
||||
// ncclMaxSharedMem: For gfx906 (cudaArch 906) with WarpSize 64, this is typically 32832 bytes
|
||||
ncclResult_t testKernelSharedMemoryLimit(
|
||||
void* kernelFn, int cudaArch, int maxSharedMem, size_t* maxStackSize, int ncclMaxSharedMem
|
||||
)
|
||||
{
|
||||
if(!kernelFn)
|
||||
return ncclInvalidArgument;
|
||||
|
||||
ncclResult_t result = ncclSuccess;
|
||||
if(maxStackSize)
|
||||
*maxStackSize = 0;
|
||||
|
||||
hipFuncAttributes attr = {0};
|
||||
hipError_t errcode = hipFuncGetAttributes(&attr, kernelFn);
|
||||
if(errcode != hipSuccess)
|
||||
{
|
||||
return ncclSystemError;
|
||||
}
|
||||
|
||||
if(maxStackSize)
|
||||
{
|
||||
*maxStackSize = attr.localSizeBytes;
|
||||
}
|
||||
|
||||
// Test the shared memory limit check (mimics enqueue.cc lines 135-146)
|
||||
if(ncclMaxSharedMem != 0)
|
||||
{
|
||||
int sharedMemSize = ncclMaxSharedMem;
|
||||
|
||||
if(sharedMemSize > (maxSharedMem - attr.sharedSizeBytes))
|
||||
{
|
||||
WARN(
|
||||
"cudaArch %d ncclMaxSharedMem %d exceeds device/fn maxSharedMem %zu",
|
||||
cudaArch,
|
||||
sharedMemSize,
|
||||
maxSharedMem - attr.sharedSizeBytes
|
||||
);
|
||||
return ncclSystemError;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Helper structure to hold test environment
|
||||
struct EnqueueTestEnvironment
|
||||
{
|
||||
ncclComm* comm;
|
||||
ncclInfo* info;
|
||||
void* sendbuff;
|
||||
void* recvbuff;
|
||||
static uint32_t abortFlag0, abortFlag1;
|
||||
static int abortFlagRefCount;
|
||||
void* sendbuff;
|
||||
void* recvbuff;
|
||||
uint32_t abortFlag0;
|
||||
uint32_t abortFlag1;
|
||||
int abortFlagRefCount;
|
||||
|
||||
void SetUp() override {
|
||||
EnqueueTestEnvironment()
|
||||
: comm(nullptr)
|
||||
, info(nullptr)
|
||||
, sendbuff(nullptr)
|
||||
, recvbuff(nullptr)
|
||||
, abortFlag0(0)
|
||||
, abortFlag1(0)
|
||||
, abortFlagRefCount(0)
|
||||
{}
|
||||
|
||||
~EnqueueTestEnvironment()
|
||||
{
|
||||
cleanup();
|
||||
}
|
||||
|
||||
void setup()
|
||||
{
|
||||
// Allocate GPU memory for buffers
|
||||
size_t bufferSize = 1024 * sizeof(float);
|
||||
hipError_t hipErr = hipMalloc(&sendbuff, bufferSize);
|
||||
size_t bufferSize = 1024 * sizeof(float);
|
||||
hipError_t hipErr = hipMalloc(&sendbuff, bufferSize);
|
||||
ASSERT_EQ(hipErr, hipSuccess) << "Failed to allocate sendbuff";
|
||||
|
||||
hipErr = hipMalloc(&recvbuff, bufferSize);
|
||||
@@ -34,17 +127,17 @@ protected:
|
||||
comm = new ncclComm();
|
||||
memset(comm, 0, sizeof(ncclComm));
|
||||
|
||||
comm->startMagic = NCCL_MAGIC; // 0x0280028002800280
|
||||
comm->startMagic = NCCL_MAGIC; // 0x0280028002800280
|
||||
|
||||
// Initialize critical fields
|
||||
comm->rank = 0;
|
||||
comm->nRanks = 2;
|
||||
comm->cudaDev = 0;
|
||||
comm->rank = 0;
|
||||
comm->nRanks = 2;
|
||||
comm->cudaDev = 0;
|
||||
comm->localRank = 0;
|
||||
|
||||
// Initialize abort flags
|
||||
comm->abortFlag = &abortFlag0;
|
||||
comm->childAbortFlag = &abortFlag1;
|
||||
comm->abortFlag = &abortFlag0;
|
||||
comm->childAbortFlag = &abortFlag1;
|
||||
comm->abortFlagRefCount = &abortFlagRefCount;
|
||||
|
||||
// Initialize memory stack
|
||||
@@ -53,15 +146,15 @@ protected:
|
||||
|
||||
// Initialize intra-communication pointers
|
||||
comm->intraComm0 = nullptr;
|
||||
comm->intraNext = nullptr;
|
||||
comm->intraNext = nullptr;
|
||||
|
||||
// Initialize work FIFO structures
|
||||
comm->workFifoBytes = 1024; // Power of 2
|
||||
comm->workFifoBuf = nullptr;
|
||||
comm->workFifoBufDev = nullptr;
|
||||
comm->workFifoConsumed = 0;
|
||||
comm->workFifoBytes = 1024; // Power of 2
|
||||
comm->workFifoBuf = nullptr;
|
||||
comm->workFifoBufDev = nullptr;
|
||||
comm->workFifoConsumed = 0;
|
||||
comm->workFifoProducedLastRecorded = 0;
|
||||
comm->workFifoProduced = 0;
|
||||
comm->workFifoProduced = 0;
|
||||
|
||||
// Initialize planner
|
||||
memset(&comm->planner, 0, sizeof(comm->planner));
|
||||
@@ -69,254 +162,556 @@ protected:
|
||||
// Initialize config
|
||||
memset(&comm->config, 0, sizeof(comm->config));
|
||||
comm->config.blocking = 1;
|
||||
comm->checkPointers = 0; // Disable pointer validation for easier testing
|
||||
comm->checkPointers = 0; // Disable pointer validation for easier testing
|
||||
|
||||
// Initialize peer info arrays
|
||||
comm->peerInfo = new ncclPeerInfo[comm->nRanks];
|
||||
memset(comm->peerInfo, 0, comm->nRanks * sizeof(ncclPeerInfo));
|
||||
|
||||
comm->localRankToRank = new int[comm->nRanks];
|
||||
for (int i = 0; i < comm->nRanks; i++) {
|
||||
for(int i = 0; i < comm->nRanks; i++)
|
||||
{
|
||||
comm->localRankToRank[i] = i;
|
||||
}
|
||||
|
||||
comm->endMagic = NCCL_MAGIC; // 0x0280028002800280
|
||||
comm->endMagic = NCCL_MAGIC; // 0x0280028002800280
|
||||
|
||||
// Initialize operation info with valid GPU buffers
|
||||
info = new ncclInfo();
|
||||
memset(info, 0, sizeof(ncclInfo));
|
||||
info->comm = comm;
|
||||
info->opName = "AllReduce";
|
||||
info->count = 1024;
|
||||
info->comm = comm;
|
||||
info->opName = "AllReduce";
|
||||
info->count = 1024;
|
||||
info->datatype = ncclFloat;
|
||||
info->op = ncclSum;
|
||||
info->root = 0;
|
||||
info->sendbuff = sendbuff; // Use allocated GPU memory
|
||||
info->recvbuff = recvbuff; // Use allocated GPU memory
|
||||
info->stream = nullptr;
|
||||
info->op = ncclSum;
|
||||
info->root = 0;
|
||||
info->sendbuff = sendbuff; // Use allocated GPU memory
|
||||
info->recvbuff = recvbuff; // Use allocated GPU memory
|
||||
info->stream = nullptr;
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
if (sendbuff) {
|
||||
hipFree(sendbuff);
|
||||
void cleanup()
|
||||
{
|
||||
// Clean up info first (it references comm)
|
||||
if(info)
|
||||
{
|
||||
delete info;
|
||||
info = nullptr;
|
||||
}
|
||||
if (recvbuff) {
|
||||
hipFree(recvbuff);
|
||||
}
|
||||
if (comm) {
|
||||
|
||||
// Clean up comm and its allocated resources
|
||||
if(comm)
|
||||
{
|
||||
// Clean up memory stacks
|
||||
ncclMemoryStackDestruct(&comm->memScoped);
|
||||
ncclMemoryStackDestruct(&comm->memPermanent);
|
||||
delete[] comm->peerInfo;
|
||||
delete[] comm->localRankToRank;
|
||||
|
||||
// Clean up peer info arrays
|
||||
if(comm->peerInfo)
|
||||
{
|
||||
delete[] comm->peerInfo;
|
||||
comm->peerInfo = nullptr;
|
||||
}
|
||||
|
||||
if(comm->localRankToRank)
|
||||
{
|
||||
delete[] comm->localRankToRank;
|
||||
comm->localRankToRank = nullptr;
|
||||
}
|
||||
|
||||
delete comm;
|
||||
comm = nullptr;
|
||||
}
|
||||
if (info) {
|
||||
delete info;
|
||||
|
||||
// Clean up GPU buffers last
|
||||
if(sendbuff)
|
||||
{
|
||||
hipError_t err = hipFree(sendbuff);
|
||||
if(err != hipSuccess)
|
||||
{
|
||||
// Log error but don't throw in cleanup
|
||||
fprintf(stderr, "Warning: hipFree(sendbuff) failed with error %d\n", err);
|
||||
}
|
||||
sendbuff = nullptr;
|
||||
}
|
||||
|
||||
if(recvbuff)
|
||||
{
|
||||
hipError_t err = hipFree(recvbuff);
|
||||
if(err != hipSuccess)
|
||||
{
|
||||
// Log error but don't throw in cleanup
|
||||
fprintf(stderr, "Warning: hipFree(recvbuff) failed with error %d\n", err);
|
||||
}
|
||||
recvbuff = nullptr;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Static member definitions
|
||||
uint32_t EnqueueTests::abortFlag0 = 0;
|
||||
uint32_t EnqueueTests::abortFlag1 = 0;
|
||||
int EnqueueTests::abortFlagRefCount = 0;
|
||||
// Empty test fixture for test organization
|
||||
class EnqueueTests : public ::testing::Test
|
||||
{
|
||||
// No setup/teardown - all tests use process isolation
|
||||
};
|
||||
|
||||
// Test ncclInitKernelsForDevice function
|
||||
TEST_F(EnqueueTests, ncclInitKernelsForDevice_ValidInput) {
|
||||
size_t maxStackSize = 0;
|
||||
ncclResult_t result = ncclInitKernelsForDevice(906, 65536, &maxStackSize);
|
||||
TEST_F(EnqueueTests, ncclInitKernelsForDevice_ValidInput)
|
||||
{
|
||||
ProcessIsolatedTestRunner::ExecutionOptions options;
|
||||
options.stopOnFirstFailure = false; // Continue running all tests
|
||||
options.verboseLogging = true;
|
||||
|
||||
EXPECT_TRUE(result == ncclSuccess);
|
||||
EXPECT_GT(maxStackSize, 0);
|
||||
RUN_ISOLATED_TESTS_WITH_OPTIONS(
|
||||
options,
|
||||
ProcessIsolatedTestRunner::TestConfig(
|
||||
"ncclInitKernelsForDevice_ValidInput",
|
||||
[this]()
|
||||
{
|
||||
size_t maxStackSize = 0;
|
||||
ncclResult_t result = ncclInitKernelsForDevice(906, 65536, &maxStackSize);
|
||||
|
||||
EXPECT_TRUE(result == ncclSuccess);
|
||||
// maxStackSize should be set to a reasonable value (> 0)
|
||||
EXPECT_GT(maxStackSize, 0)
|
||||
<< "Expected maxStackSize to be computed and set to a positive value";
|
||||
}
|
||||
).withEnvironment({{"NCCL_DEBUG", "INFO"}, {"NCCL_DEBUG_SUBSYS", "ALL"}}),
|
||||
|
||||
ProcessIsolatedTestRunner::TestConfig(
|
||||
"ncclInitKernelsForDevice_ValidInputCarveout",
|
||||
[this]()
|
||||
{
|
||||
size_t maxStackSize = 0;
|
||||
ncclResult_t result = ncclInitKernelsForDevice(906, 65536, &maxStackSize);
|
||||
|
||||
EXPECT_TRUE(result == ncclSuccess);
|
||||
// maxStackSize should be set to a reasonable value (> 0)
|
||||
EXPECT_GT(maxStackSize, 0)
|
||||
<< "Expected maxStackSize to be computed and set to a positive value";
|
||||
}
|
||||
)
|
||||
.withEnvironment(
|
||||
{{"NCCL_L1_SHARED_MEMORY_CARVEOUT", "1"},
|
||||
{"NCCL_DEBUG", "INFO"},
|
||||
{"NCCL_DEBUG_SUBSYS", "ALL"}}
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
TEST_F(EnqueueTests, ncclInitKernelsForDevice_NullStackSize) {
|
||||
ncclResult_t result = ncclInitKernelsForDevice(906, 65536, nullptr);
|
||||
TEST_F(EnqueueTests, ncclInitKernelsForDevice_NullStackSize)
|
||||
{
|
||||
ProcessIsolatedTestRunner::ExecutionOptions options;
|
||||
options.stopOnFirstFailure = false;
|
||||
options.verboseLogging = true;
|
||||
|
||||
EXPECT_EQ(result, ncclSuccess);
|
||||
RUN_ISOLATED_TESTS_WITH_OPTIONS(
|
||||
options,
|
||||
ProcessIsolatedTestRunner::TestConfig(
|
||||
"ncclInitKernelsForDevice_NullStackSize",
|
||||
[]()
|
||||
{
|
||||
ncclResult_t result = ncclInitKernelsForDevice(906, 65536, nullptr);
|
||||
EXPECT_EQ(result, ncclSuccess);
|
||||
}
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
TEST_F(EnqueueTests, ncclInitKernelsForDevice_InvalidArch) {
|
||||
size_t maxStackSize = 0;
|
||||
ncclResult_t result = ncclInitKernelsForDevice(-1, 65536, &maxStackSize);
|
||||
EXPECT_EQ(result, ncclSuccess);
|
||||
// Test with a real compiled kernel to verify attribute retrieval works correctly
|
||||
TEST_F(EnqueueTests, KernelAttributes_WithRealKernel)
|
||||
{
|
||||
ProcessIsolatedTestRunner::ExecutionOptions options;
|
||||
options.stopOnFirstFailure = false;
|
||||
options.verboseLogging = true;
|
||||
|
||||
RUN_ISOLATED_TESTS_WITH_OPTIONS(
|
||||
options,
|
||||
ProcessIsolatedTestRunner::TestConfig(
|
||||
"KernelAttributes_WithRealKernel",
|
||||
[]()
|
||||
{
|
||||
size_t maxStackSize = 0;
|
||||
ncclResult_t result = testKernelAttributes((void*)simpleTestKernel, &maxStackSize);
|
||||
|
||||
EXPECT_EQ(result, ncclSuccess)
|
||||
<< "Expected successful kernel attribute retrieval with a real compiled kernel";
|
||||
}
|
||||
).withEnvironment({{"NCCL_DEBUG", "INFO"}})
|
||||
);
|
||||
}
|
||||
|
||||
TEST_F(EnqueueTests, ncclInitKernelsForDevice_ExceedsSharedMemory) {
|
||||
size_t maxStackSize = 0;
|
||||
TEST_F(EnqueueTests, ncclInitKernelsForDevice_InvalidArch)
|
||||
{
|
||||
ProcessIsolatedTestRunner::ExecutionOptions options;
|
||||
options.stopOnFirstFailure = false;
|
||||
options.verboseLogging = true;
|
||||
|
||||
ncclResult_t result = ncclInitKernelsForDevice(906, 32832, &maxStackSize);
|
||||
EXPECT_TRUE(result == ncclSystemError);
|
||||
RUN_ISOLATED_TESTS_WITH_OPTIONS(
|
||||
options,
|
||||
ProcessIsolatedTestRunner::TestConfig(
|
||||
"ncclInitKernelsForDevice_InvalidArch",
|
||||
[]()
|
||||
{
|
||||
size_t maxStackSize = 0;
|
||||
ncclResult_t result = ncclInitKernelsForDevice(-1, 65536, &maxStackSize);
|
||||
EXPECT_EQ(result, ncclSuccess);
|
||||
}
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
TEST_F(EnqueueTests, ncclInitKernelsForDevice_ExceedsSharedMemory)
|
||||
{
|
||||
ProcessIsolatedTestRunner::ExecutionOptions options;
|
||||
options.stopOnFirstFailure = false;
|
||||
options.verboseLogging = true;
|
||||
|
||||
RUN_ISOLATED_TESTS_WITH_OPTIONS(
|
||||
options,
|
||||
ProcessIsolatedTestRunner::TestConfig(
|
||||
"ncclInitKernelsForDevice_ExceedsSharedMemory",
|
||||
[]()
|
||||
{
|
||||
size_t maxStackSize = 0;
|
||||
// For gfx906, ncclMaxSharedMem is 32832 (as shown in test output)
|
||||
// Use a very small maxSharedMem (16000 bytes) to trigger the exceeds check
|
||||
ncclResult_t result = testKernelSharedMemoryLimit(
|
||||
(void*)simpleTestKernel, // Use our real compiled kernel
|
||||
906, // cudaArch
|
||||
16000, // maxSharedMem (intentionally too small)
|
||||
&maxStackSize,
|
||||
32832 // ncclMaxSharedMem for gfx906
|
||||
);
|
||||
|
||||
EXPECT_EQ(result, ncclSystemError)
|
||||
<< "Expected ncclSystemError when ncclMaxSharedMem exceeds maxSharedMem";
|
||||
}
|
||||
).withEnvironment({{"NCCL_DEBUG", "WARN"}})
|
||||
);
|
||||
}
|
||||
|
||||
// Test ncclEnqueueCheck function
|
||||
TEST_F(EnqueueTests, ncclEnqueueCheck_ValidInput) {
|
||||
ncclResult_t result = ncclEnqueueCheck(info);
|
||||
EXPECT_TRUE(result == ncclSuccess);
|
||||
TEST_F(EnqueueTests, ncclEnqueueCheck_ValidInput)
|
||||
{
|
||||
ProcessIsolatedTestRunner::ExecutionOptions options;
|
||||
options.stopOnFirstFailure = false;
|
||||
options.verboseLogging = true;
|
||||
|
||||
RUN_ISOLATED_TESTS_WITH_OPTIONS(
|
||||
options,
|
||||
ProcessIsolatedTestRunner::TestConfig(
|
||||
"ncclEnqueueCheck_ValidInput",
|
||||
[]()
|
||||
{
|
||||
EnqueueTestEnvironment env;
|
||||
env.setup();
|
||||
ncclResult_t result = ncclEnqueueCheck(env.info);
|
||||
EXPECT_TRUE(result == ncclSuccess);
|
||||
env.cleanup();
|
||||
}
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
TEST_F(EnqueueTests, ncclEnqueueCheck_InvalidComm) {
|
||||
info->comm = nullptr;
|
||||
ncclResult_t result = ncclEnqueueCheck(info);
|
||||
EXPECT_EQ(result, ncclInvalidArgument);
|
||||
TEST_F(EnqueueTests, ncclEnqueueCheck_InvalidComm)
|
||||
{
|
||||
ProcessIsolatedTestRunner::ExecutionOptions options;
|
||||
options.stopOnFirstFailure = false;
|
||||
options.verboseLogging = true;
|
||||
|
||||
RUN_ISOLATED_TESTS_WITH_OPTIONS(
|
||||
options,
|
||||
ProcessIsolatedTestRunner::TestConfig(
|
||||
"ncclEnqueueCheck_InvalidComm",
|
||||
[]()
|
||||
{
|
||||
EnqueueTestEnvironment env;
|
||||
env.setup();
|
||||
env.info->comm = nullptr;
|
||||
ncclResult_t result = ncclEnqueueCheck(env.info);
|
||||
EXPECT_EQ(result, ncclInvalidArgument);
|
||||
env.cleanup();
|
||||
}
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
TEST_F(EnqueueTests, ncclEnqueueCheck_InvalidBuffers) {
|
||||
// Test with null sendbuff
|
||||
comm->checkPointers = 1;
|
||||
info->sendbuff = nullptr;
|
||||
ncclResult_t result = ncclEnqueueCheck(info);
|
||||
EXPECT_EQ(result, ncclInvalidArgument);
|
||||
TEST_F(EnqueueTests, ncclEnqueueCheck_InvalidBuffers)
|
||||
{
|
||||
ProcessIsolatedTestRunner::ExecutionOptions options;
|
||||
options.stopOnFirstFailure = false;
|
||||
options.verboseLogging = true;
|
||||
|
||||
// Reset sendbuff and test with null recvbuff
|
||||
info->sendbuff = sendbuff;
|
||||
info->recvbuff = nullptr;
|
||||
result = ncclEnqueueCheck(info);
|
||||
EXPECT_EQ(result, ncclInvalidArgument);
|
||||
RUN_ISOLATED_TESTS_WITH_OPTIONS(
|
||||
options,
|
||||
ProcessIsolatedTestRunner::TestConfig(
|
||||
"ncclEnqueueCheck_InvalidBuffers",
|
||||
[]()
|
||||
{
|
||||
EnqueueTestEnvironment env;
|
||||
env.setup();
|
||||
|
||||
// Test with null sendbuff
|
||||
env.comm->checkPointers = 1;
|
||||
env.info->sendbuff = nullptr;
|
||||
ncclResult_t result = ncclEnqueueCheck(env.info);
|
||||
EXPECT_EQ(result, ncclInvalidArgument);
|
||||
|
||||
// Reset sendbuff and test with null recvbuff
|
||||
env.info->sendbuff = env.sendbuff;
|
||||
env.info->recvbuff = nullptr;
|
||||
result = ncclEnqueueCheck(env.info);
|
||||
EXPECT_EQ(result, ncclInvalidArgument);
|
||||
|
||||
env.cleanup();
|
||||
}
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
// Test ncclFuncSendCount function
|
||||
TEST_F(EnqueueTests, ncclFuncSendCount_AllReduce) {
|
||||
size_t count = 1000;
|
||||
int nRanks = 4;
|
||||
TEST_F(EnqueueTests, ncclFuncSendCount_AllTests)
|
||||
{
|
||||
ProcessIsolatedTestRunner::ExecutionOptions options;
|
||||
options.stopOnFirstFailure = false;
|
||||
options.verboseLogging = true;
|
||||
|
||||
size_t result = ncclFuncSendCount(ncclFuncAllReduce, nRanks, count);
|
||||
EXPECT_EQ(result, count);
|
||||
}
|
||||
RUN_ISOLATED_TESTS_WITH_OPTIONS(
|
||||
options,
|
||||
ProcessIsolatedTestRunner::TestConfig(
|
||||
"ncclFuncSendCount_AllReduce",
|
||||
[]()
|
||||
{
|
||||
size_t count = 1000;
|
||||
int nRanks = 4;
|
||||
size_t result = ncclFuncSendCount(ncclFuncAllReduce, nRanks, count);
|
||||
EXPECT_EQ(result, count);
|
||||
}
|
||||
),
|
||||
|
||||
TEST_F(EnqueueTests, ncclFuncSendCount_Broadcast) {
|
||||
size_t count = 1000;
|
||||
int nRanks = 4;
|
||||
ProcessIsolatedTestRunner::TestConfig(
|
||||
"ncclFuncSendCount_Broadcast",
|
||||
[]()
|
||||
{
|
||||
size_t count = 1000;
|
||||
int nRanks = 4;
|
||||
size_t result = ncclFuncSendCount(ncclFuncBroadcast, nRanks, count);
|
||||
EXPECT_EQ(result, count);
|
||||
}
|
||||
),
|
||||
|
||||
size_t result = ncclFuncSendCount(ncclFuncBroadcast, nRanks, count);
|
||||
EXPECT_EQ(result, count);
|
||||
}
|
||||
ProcessIsolatedTestRunner::TestConfig(
|
||||
"ncclFuncSendCount_Reduce",
|
||||
[]()
|
||||
{
|
||||
size_t count = 1000;
|
||||
int nRanks = 4;
|
||||
size_t result = ncclFuncSendCount(ncclFuncReduce, nRanks, count);
|
||||
EXPECT_EQ(result, count);
|
||||
}
|
||||
),
|
||||
|
||||
TEST_F(EnqueueTests, ncclFuncSendCount_Reduce) {
|
||||
size_t count = 1000;
|
||||
int nRanks = 4;
|
||||
ProcessIsolatedTestRunner::TestConfig(
|
||||
"ncclFuncSendCount_AllGather",
|
||||
[]()
|
||||
{
|
||||
size_t count = 1000;
|
||||
int nRanks = 4;
|
||||
size_t result = ncclFuncSendCount(ncclFuncAllGather, nRanks, count);
|
||||
EXPECT_EQ(result, count);
|
||||
}
|
||||
),
|
||||
|
||||
size_t result = ncclFuncSendCount(ncclFuncReduce, nRanks, count);
|
||||
EXPECT_EQ(result, count);
|
||||
}
|
||||
ProcessIsolatedTestRunner::TestConfig(
|
||||
"ncclFuncSendCount_ReduceScatter",
|
||||
[]()
|
||||
{
|
||||
size_t count = 1000;
|
||||
int nRanks = 4;
|
||||
size_t result = ncclFuncSendCount(ncclFuncReduceScatter, nRanks, count);
|
||||
EXPECT_EQ(result, count * nRanks);
|
||||
}
|
||||
),
|
||||
|
||||
TEST_F(EnqueueTests, ncclFuncSendCount_AllGather) {
|
||||
size_t count = 1000;
|
||||
int nRanks = 4;
|
||||
|
||||
size_t result = ncclFuncSendCount(ncclFuncAllGather, nRanks, count);
|
||||
EXPECT_EQ(result, count);
|
||||
}
|
||||
|
||||
TEST_F(EnqueueTests, ncclFuncSendCount_ReduceScatter) {
|
||||
size_t count = 1000;
|
||||
int nRanks = 4;
|
||||
|
||||
size_t result = ncclFuncSendCount(ncclFuncReduceScatter, nRanks, count);
|
||||
EXPECT_EQ(result, count * nRanks);
|
||||
}
|
||||
|
||||
TEST_F(EnqueueTests, ncclFuncSendCount_ZeroCount) {
|
||||
size_t result = ncclFuncSendCount(ncclFuncAllReduce, 4, 0);
|
||||
EXPECT_EQ(result, 0);
|
||||
ProcessIsolatedTestRunner::TestConfig(
|
||||
"ncclFuncSendCount_ZeroCount",
|
||||
[]()
|
||||
{
|
||||
size_t result = ncclFuncSendCount(ncclFuncAllReduce, 4, 0);
|
||||
EXPECT_EQ(result, 0);
|
||||
}
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
// Test ncclFuncRecvCount function
|
||||
TEST_F(EnqueueTests, ncclFuncRecvCount_AllReduce) {
|
||||
size_t count = 1000;
|
||||
int nRanks = 4;
|
||||
TEST_F(EnqueueTests, ncclFuncRecvCount_AllTests)
|
||||
{
|
||||
ProcessIsolatedTestRunner::ExecutionOptions options;
|
||||
options.stopOnFirstFailure = false;
|
||||
options.verboseLogging = true;
|
||||
|
||||
size_t result = ncclFuncRecvCount(ncclFuncAllReduce, nRanks, count);
|
||||
EXPECT_EQ(result, count);
|
||||
}
|
||||
RUN_ISOLATED_TESTS_WITH_OPTIONS(
|
||||
options,
|
||||
ProcessIsolatedTestRunner::TestConfig(
|
||||
"ncclFuncRecvCount_AllReduce",
|
||||
[]()
|
||||
{
|
||||
size_t count = 1000;
|
||||
int nRanks = 4;
|
||||
size_t result = ncclFuncRecvCount(ncclFuncAllReduce, nRanks, count);
|
||||
EXPECT_EQ(result, count);
|
||||
}
|
||||
),
|
||||
|
||||
TEST_F(EnqueueTests, ncclFuncRecvCount_Broadcast) {
|
||||
size_t count = 1000;
|
||||
int nRanks = 4;
|
||||
ProcessIsolatedTestRunner::TestConfig(
|
||||
"ncclFuncRecvCount_Broadcast",
|
||||
[]()
|
||||
{
|
||||
size_t count = 1000;
|
||||
int nRanks = 4;
|
||||
size_t result = ncclFuncRecvCount(ncclFuncBroadcast, nRanks, count);
|
||||
EXPECT_EQ(result, count);
|
||||
}
|
||||
),
|
||||
|
||||
size_t result = ncclFuncRecvCount(ncclFuncBroadcast, nRanks, count);
|
||||
EXPECT_EQ(result, count);
|
||||
}
|
||||
ProcessIsolatedTestRunner::TestConfig(
|
||||
"ncclFuncRecvCount_Reduce",
|
||||
[]()
|
||||
{
|
||||
size_t count = 1000;
|
||||
int nRanks = 4;
|
||||
size_t result = ncclFuncRecvCount(ncclFuncReduce, nRanks, count);
|
||||
EXPECT_EQ(result, count);
|
||||
}
|
||||
),
|
||||
|
||||
TEST_F(EnqueueTests, ncclFuncRecvCount_Reduce) {
|
||||
size_t count = 1000;
|
||||
int nRanks = 4;
|
||||
ProcessIsolatedTestRunner::TestConfig(
|
||||
"ncclFuncRecvCount_AllGather",
|
||||
[]()
|
||||
{
|
||||
size_t count = 1000;
|
||||
int nRanks = 4;
|
||||
size_t result = ncclFuncRecvCount(ncclFuncAllGather, nRanks, count);
|
||||
EXPECT_EQ(result, count * nRanks);
|
||||
}
|
||||
),
|
||||
|
||||
size_t result = ncclFuncRecvCount(ncclFuncReduce, nRanks, count);
|
||||
EXPECT_EQ(result, count);
|
||||
}
|
||||
ProcessIsolatedTestRunner::TestConfig(
|
||||
"ncclFuncRecvCount_ReduceScatter",
|
||||
[]()
|
||||
{
|
||||
size_t count = 1000;
|
||||
int nRanks = 4;
|
||||
size_t result = ncclFuncRecvCount(ncclFuncReduceScatter, nRanks, count);
|
||||
EXPECT_EQ(result, count);
|
||||
}
|
||||
),
|
||||
|
||||
TEST_F(EnqueueTests, ncclFuncRecvCount_AllGather) {
|
||||
size_t count = 1000;
|
||||
int nRanks = 4;
|
||||
|
||||
size_t result = ncclFuncRecvCount(ncclFuncAllGather, nRanks, count);
|
||||
EXPECT_EQ(result, count * nRanks);
|
||||
}
|
||||
|
||||
TEST_F(EnqueueTests, ncclFuncRecvCount_ReduceScatter) {
|
||||
size_t count = 1000;
|
||||
int nRanks = 4;
|
||||
|
||||
size_t result = ncclFuncRecvCount(ncclFuncReduceScatter, nRanks, count);
|
||||
EXPECT_EQ(result, count);
|
||||
}
|
||||
|
||||
TEST_F(EnqueueTests, ncclFuncRecvCount_ZeroCount) {
|
||||
size_t result = ncclFuncRecvCount(ncclFuncAllReduce, 4, 0);
|
||||
EXPECT_EQ(result, 0);
|
||||
ProcessIsolatedTestRunner::TestConfig(
|
||||
"ncclFuncRecvCount_ZeroCount",
|
||||
[]()
|
||||
{
|
||||
size_t result = ncclFuncRecvCount(ncclFuncAllReduce, 4, 0);
|
||||
EXPECT_EQ(result, 0);
|
||||
}
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
// Test ncclFuncMaxSendRecvCount function
|
||||
TEST_F(EnqueueTests, ncclFuncMaxSendRecvCount_AllReduce) {
|
||||
size_t count = 1000;
|
||||
int nRanks = 4;
|
||||
TEST_F(EnqueueTests, ncclFuncMaxSendRecvCount_AllTests)
|
||||
{
|
||||
ProcessIsolatedTestRunner::ExecutionOptions options;
|
||||
options.stopOnFirstFailure = false;
|
||||
options.verboseLogging = true;
|
||||
|
||||
size_t result = ncclFuncMaxSendRecvCount(ncclFuncAllReduce, nRanks, count);
|
||||
EXPECT_EQ(result, count);
|
||||
}
|
||||
RUN_ISOLATED_TESTS_WITH_OPTIONS(
|
||||
options,
|
||||
ProcessIsolatedTestRunner::TestConfig(
|
||||
"ncclFuncMaxSendRecvCount_AllReduce",
|
||||
[]()
|
||||
{
|
||||
size_t count = 1000;
|
||||
int nRanks = 4;
|
||||
size_t result = ncclFuncMaxSendRecvCount(ncclFuncAllReduce, nRanks, count);
|
||||
EXPECT_EQ(result, count);
|
||||
}
|
||||
),
|
||||
|
||||
TEST_F(EnqueueTests, ncclFuncMaxSendRecvCount_AllGather) {
|
||||
size_t count = 1000;
|
||||
int nRanks = 4;
|
||||
ProcessIsolatedTestRunner::TestConfig(
|
||||
"ncclFuncMaxSendRecvCount_AllGather",
|
||||
[]()
|
||||
{
|
||||
size_t count = 1000;
|
||||
int nRanks = 4;
|
||||
size_t result = ncclFuncMaxSendRecvCount(ncclFuncAllGather, nRanks, count);
|
||||
// For AllGather, receive count (count * nRanks) is larger than send count (count)
|
||||
EXPECT_EQ(result, count * nRanks);
|
||||
}
|
||||
),
|
||||
|
||||
size_t result = ncclFuncMaxSendRecvCount(ncclFuncAllGather, nRanks, count);
|
||||
// For AllGather, receive count (count * nRanks) is larger than send count (count)
|
||||
EXPECT_EQ(result, count * nRanks);
|
||||
}
|
||||
ProcessIsolatedTestRunner::TestConfig(
|
||||
"ncclFuncMaxSendRecvCount_ReduceScatter",
|
||||
[]()
|
||||
{
|
||||
size_t count = 1000;
|
||||
int nRanks = 4;
|
||||
size_t result = ncclFuncMaxSendRecvCount(ncclFuncReduceScatter, nRanks, count);
|
||||
// For ReduceScatter, send count (count) is larger than receive count (count/nRanks)
|
||||
EXPECT_EQ(result, count * nRanks);
|
||||
}
|
||||
),
|
||||
|
||||
TEST_F(EnqueueTests, ncclFuncMaxSendRecvCount_ReduceScatter) {
|
||||
size_t count = 1000;
|
||||
int nRanks = 4;
|
||||
|
||||
size_t result = ncclFuncMaxSendRecvCount(ncclFuncReduceScatter, nRanks, count);
|
||||
// For ReduceScatter, send count (count) is larger than receive count (count/nRanks)
|
||||
EXPECT_EQ(result, count * nRanks);
|
||||
}
|
||||
|
||||
TEST_F(EnqueueTests, ncclFuncMaxSendRecvCount_ZeroCount) {
|
||||
size_t result = ncclFuncMaxSendRecvCount(ncclFuncAllReduce, 4, 0);
|
||||
EXPECT_EQ(result, 0);
|
||||
ProcessIsolatedTestRunner::TestConfig(
|
||||
"ncclFuncMaxSendRecvCount_ZeroCount",
|
||||
[]()
|
||||
{
|
||||
size_t result = ncclFuncMaxSendRecvCount(ncclFuncAllReduce, 4, 0);
|
||||
EXPECT_EQ(result, 0);
|
||||
}
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
// Edge case tests
|
||||
TEST_F(EnqueueTests, ncclFuncCounts_SingleRank) {
|
||||
size_t count = 1000;
|
||||
int nRanks = 1;
|
||||
TEST_F(EnqueueTests, ncclFuncCounts_EdgeCases)
|
||||
{
|
||||
ProcessIsolatedTestRunner::ExecutionOptions options;
|
||||
options.stopOnFirstFailure = false;
|
||||
options.verboseLogging = true;
|
||||
|
||||
// Test with single rank
|
||||
EXPECT_EQ(ncclFuncSendCount(ncclFuncAllReduce, nRanks, count), count);
|
||||
EXPECT_EQ(ncclFuncRecvCount(ncclFuncAllReduce, nRanks, count), count);
|
||||
EXPECT_EQ(ncclFuncMaxSendRecvCount(ncclFuncAllReduce, nRanks, count), count);
|
||||
RUN_ISOLATED_TESTS_WITH_OPTIONS(
|
||||
options,
|
||||
ProcessIsolatedTestRunner::TestConfig(
|
||||
"ncclFuncCounts_SingleRank",
|
||||
[]()
|
||||
{
|
||||
size_t count = 1000;
|
||||
int nRanks = 1;
|
||||
// Test with single rank
|
||||
EXPECT_EQ(ncclFuncSendCount(ncclFuncAllReduce, nRanks, count), count);
|
||||
EXPECT_EQ(ncclFuncRecvCount(ncclFuncAllReduce, nRanks, count), count);
|
||||
EXPECT_EQ(ncclFuncMaxSendRecvCount(ncclFuncAllReduce, nRanks, count), count);
|
||||
}
|
||||
),
|
||||
|
||||
ProcessIsolatedTestRunner::TestConfig(
|
||||
"ncclFuncCounts_LargeRankCount",
|
||||
[]()
|
||||
{
|
||||
size_t count = 1000;
|
||||
int nRanks = 1024;
|
||||
// Test with large number of ranks
|
||||
EXPECT_EQ(ncclFuncSendCount(ncclFuncAllGather, nRanks, count), count);
|
||||
EXPECT_EQ(ncclFuncRecvCount(ncclFuncAllGather, nRanks, count), count * nRanks);
|
||||
EXPECT_EQ(
|
||||
ncclFuncMaxSendRecvCount(ncclFuncAllGather, nRanks, count),
|
||||
count * nRanks
|
||||
);
|
||||
}
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
TEST_F(EnqueueTests, ncclFuncCounts_LargeRankCount) {
|
||||
size_t count = 1000;
|
||||
int nRanks = 1024;
|
||||
|
||||
// Test with large number of ranks
|
||||
EXPECT_EQ(ncclFuncSendCount(ncclFuncAllGather, nRanks, count), count);
|
||||
EXPECT_EQ(ncclFuncRecvCount(ncclFuncAllGather, nRanks, count), count * nRanks);
|
||||
EXPECT_EQ(ncclFuncMaxSendRecvCount(ncclFuncAllGather, nRanks, count), count * nRanks);
|
||||
}
|
||||
} // namespace RcclUnitTesting
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele