Files
rocm-systems/test/RcclWrapTests.cpp
T
2025-09-15 18:00:26 -05:00

2320 خطوط
80 KiB
C++

/*************************************************************************
* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
*
* See LICENSE.txt for license information
************************************************************************/
#include "comm.h" // Ensure full definition of struct ncclComm
#include "debug.h"
#include "graph/topo.h"
#include <cstdlib>
#include <cstring>
#include <gtest/gtest.h>
#include <rccl/rccl.h>
namespace RcclUnitTesting {
// Static flag to ensure only one rcclSetP2pNetChunkSize test runs per execution
static bool s_p2pNetChunkSizeTestExecuted = false;
// Helper function to check if P2P test should be skipped due to execution order
static bool ShouldSkipP2pTestDueToExecutionOrder(const std::string &testName) {
if (s_p2pNetChunkSizeTestExecuted) {
INFO(NCCL_LOG_INFO,
"\n=== IMPORTANT NOTE ===\n"
"Test '%s' is being skipped because another rcclSetP2pNetChunkSize "
"test\n"
"has already executed in this run. The rcclSetP2pNetChunkSize "
"function uses a static\n"
"variable that gets initialized on first call, which affects "
"subsequent tests.\n"
"\nTo run this test properly, execute it individually using:\n"
" --gtest_filter=Rcclwrap.%s\n"
"\nOr run each rcclSetP2pNetChunkSize test in separate executions to "
"ensure\n"
"proper static variable initialization.\n"
"========================\n",
testName.c_str(), testName.c_str());
return true;
}
// Mark that a P2P test is now executing
s_p2pNetChunkSizeTestExecuted = true;
return false;
}
// Helper function to determine if P2P test should be skipped due to static
// variable state
static bool ShouldSkipP2pTest(const char *requiredEnvValue = nullptr) {
const char *envValue = getenv("NCCL_P2P_NET_CHUNKSIZE");
// If a specific environment value is required, check for it
if (requiredEnvValue != nullptr) {
if (!envValue || strcmp(envValue, requiredEnvValue) != 0) {
return true; // Skip if env var is not set to required value
}
return false; // Don't skip if env var matches required value
}
// For architecture logic tests, skip only if environment variable is set
// (which would override the static variable behavior)
// Note: We cannot directly check if static variable is RCCL_VALUE_UNSET
// from test code, so we rely on clean environment for proper testing
if (envValue != nullptr) {
return true; // Skip if env var is set (prevents testing architecture logic)
}
// Environment is clean - proceed with test
// Warning: Static variable might still be initialized from previous tests
// For guaranteed clean state, run tests individually or restart binary
return false; // Don't skip
}
// Static flag to ensure only one rcclSetPxn test runs per execution
static bool s_pxnTestExecuted = false;
// Helper function to check if PXN test should be skipped due to execution order
static bool ShouldSkipPxnTestDueToExecutionOrder(const std::string &testName) {
if (s_pxnTestExecuted) {
INFO(NCCL_LOG_INFO,
"\n=== IMPORTANT NOTE ===\n"
"Test '%s' is being skipped because another rcclSetPxn test\n"
"has already executed in this run. The rcclSetPxn function uses a "
"static\n"
"variable that gets initialized on first call, which affects "
"subsequent tests.\n"
"\nTo run this test properly, execute it individually using:\n"
" --gtest_filter=Rcclwrap.%s\n"
"\nOr run each rcclSetPxn test in separate executions to ensure\n"
"proper static variable initialization.\n"
"========================\n",
testName.c_str(), testName.c_str());
return true;
}
// Mark that a PXN test is now executing
s_pxnTestExecuted = true;
return false;
}
// Helper function to determine if PXN test should be skipped due to static
// variable state
static bool ShouldSkipPxnTest(const char *requiredEnvValue = nullptr) {
const char *envValue = getenv("NCCL_PXN_DISABLE");
// If a specific environment value is required, check for it
if (requiredEnvValue != nullptr) {
if (!envValue || strcmp(envValue, requiredEnvValue) != 0) {
return true; // Skip if env var is not set to required value
}
return false; // Don't skip if env var matches required value
}
// For architecture logic tests, skip only if environment variable is set
// (which would override the static variable behavior)
if (envValue != nullptr) {
return true; // Skip if env var is set (prevents testing architecture logic)
}
// Environment is clean - proceed with test
return false; // Don't skip
}
// Helper function to test the static expose check
ncclResult_t testStaticExposeCheck() {
RCCL_STATIC_EXPOSE_CHECK();
return ncclSuccess;
}
// Helper function to create and initialize mock communicator
static void CreateMockComm(ncclComm_t &mockComm,
struct ncclTopoSystem &mockTopo,
struct ncclTopoNode &mockGpuNode, const char *arch,
int nRanks) {
// Allocate memory for the communicator
mockComm = new ncclComm();
memset(mockComm, 0, sizeof(ncclComm));
// Initialize basic communicator fields
mockComm->nRanks = nRanks;
mockComm->nNodes = 1; // Default to single node for P2P tests
mockComm->rank = 0; // Default rank
// Initialize topology
memset(&mockTopo, 0, sizeof(mockTopo));
mockComm->topo = &mockTopo;
// Initialize GPU node
mockTopo.nodes[GPU].count = 1;
memset(&mockGpuNode, 0, sizeof(mockGpuNode));
// Set GPU architecture
strncpy(mockGpuNode.gpu.gcn, arch, sizeof(mockGpuNode.gpu.gcn) - 1);
mockGpuNode.gpu.gcn[sizeof(mockGpuNode.gpu.gcn) - 1] = '\0';
// Copy the node into the topology array
mockTopo.nodes[GPU].nodes[0] = mockGpuNode;
// Initialize other required fields for tests
memset(mockComm->minMaxLLRange, 0, sizeof(mockComm->minMaxLLRange));
}
// Helper function to cleanup mock communicator
static void CleanupMockComm(ncclComm_t &mockComm) {
if (mockComm) {
delete mockComm;
mockComm = nullptr;
}
}
// Helper function to determine if rcclSetPipelining test should be skipped
static bool ShouldSkipRcclSetPipeliningTests() {
const char *disable = getenv("RCCL_DISABLE_REDUCE_COPY_PIPELINING");
// Skip the test if RCCL_DISABLE_REDUCE_COPY_PIPELINING is set
if (disable && strcmp(disable, "0") != 0) {
return true;
}
return false;
}
// Helper function to validate protocol string against known valid protocols
static bool isProtoStrValid(const char *envStr) {
if (!envStr)
return false;
for (int i = 0; i < NCCL_NUM_PROTOCOLS; ++i) {
if (strcasecmp(envStr, ncclProtoStr[i]) == 0) {
return true; // Match found
}
}
return false; // No match found
}
// Helper function to validate algorithm string against known valid algorithms
static bool isAlgoStrValid(const char *envStr) {
if (!envStr)
return false;
for (int i = 0; i < NCCL_NUM_ALGORITHMS; ++i) {
if (strcasecmp(envStr, ncclAlgoStr[i]) == 0) {
return true; // Match found
}
}
return false; // No match found
}
TEST(Rcclwrap, RcclFuncMaxSendRecvCount) {
ncclResult_t staticCheckResult = testStaticExposeCheck();
#ifdef RCCL_EXPOSE_STATIC
EXPECT_EQ(staticCheckResult, ncclSuccess);
#else
EXPECT_EQ(staticCheckResult, ncclInvalidUsage);
#endif
size_t maxCount = 0;
ncclResult_t result =
rcclFuncMaxSendRecvCount(ncclFuncAllReduce, 4, 1024, maxCount);
EXPECT_EQ(maxCount, 1024);
EXPECT_EQ(result, ncclSuccess);
}
TEST(Rcclwrap, RcclUpdateCollectiveProtocol_UsesLL128WhenInRange) {
setenv("NCCL_PROTO", "", 1); // Trigger auto selection mode
unsetenv("NCCL_PROTO");
ncclComm_t comm = new ncclComm();
*comm = {};
// Manually populate minimal fields for comm
comm->nRanks = 1;
comm->nNodes = 2; // triggers inter-node logic
comm->rank = 0;
comm->topo = new ncclTopoSystem();
*comm->topo = {};
comm->topo->ll128Enabled = true;
comm->topo->nodes[GPU].nodes[0] = {};
comm->topo->nodes[GPU].count = 1;
strncpy(comm->topo->nodes[GPU].nodes[0].gpu.gcn, "gfx942",
sizeof(comm->topo->nodes[GPU].nodes[0].gpu.gcn));
int idx = rcclGetTunableIndex(ncclFuncAllReduce);
comm->minMaxLLRange[idx][NCCL_PROTO_LL][RCCL_PROTOCOL_MIN_IDX] = 512;
comm->minMaxLLRange[idx][NCCL_PROTO_LL][RCCL_PROTOCOL_MAX_IDX] = 1024;
comm->minMaxLLRange[idx][NCCL_PROTO_LL128][RCCL_PROTOCOL_MIN_IDX] = 256;
comm->minMaxLLRange[idx][NCCL_PROTO_LL128][RCCL_PROTOCOL_MAX_IDX] = 2048;
comm->minMaxLLRange[idx][NCCL_PROTO_LL128][RCCL_PROTOCOL_FACTOR_IDX] = 1;
ncclTaskColl info = {};
// Manually populate minimal fields for info
info.func = ncclFuncAllReduce;
info.protocol = NCCL_PROTO_UNDEF;
size_t nBytes = 1024;
rcclUpdateCollectiveProtocol(comm, nBytes, &info);
EXPECT_TRUE(info.protocol == NCCL_PROTO_LL128 ||
info.protocol == NCCL_PROTO_LL);
delete comm->topo;
delete comm;
}
TEST(Rcclwrap, RcclUpdateCollectiveProtocol_WarnsOnGfx942Arch) {
setenv("NCCL_PROTO", "", 1);
unsetenv("NCCL_PROTO");
ncclComm_t comm = new ncclComm();
*comm = {};
// Manually populate minimal fields for comm
comm->nRanks = 1;
comm->nNodes = 2; // triggers inter-node logic
comm->rank = 0;
comm->topo = new ncclTopoSystem();
comm->topo->ll128Enabled = true;
comm->topo->nodes[GPU].nodes[0] = {};
strncpy(comm->topo->nodes[GPU].nodes[0].gpu.gcn, "gfx942",
sizeof(comm->topo->nodes[GPU].nodes[0].gpu.gcn));
int idx = rcclGetTunableIndex(ncclFuncAllReduce);
comm->minMaxLLRange[idx][NCCL_PROTO_LL][RCCL_PROTOCOL_MIN_IDX] =
RCCL_LL_LIMITS_UNDEFINED;
comm->minMaxLLRange[idx][NCCL_PROTO_LL][RCCL_PROTOCOL_MAX_IDX] =
RCCL_LL_LIMITS_UNDEFINED;
comm->minMaxLLRange[idx][NCCL_PROTO_LL128][RCCL_PROTOCOL_MIN_IDX] =
RCCL_LL_LIMITS_UNDEFINED;
comm->minMaxLLRange[idx][NCCL_PROTO_LL128][RCCL_PROTOCOL_MAX_IDX] =
RCCL_LL_LIMITS_UNDEFINED;
comm->minMaxLLRange[idx][NCCL_PROTO_LL128][RCCL_PROTOCOL_FACTOR_IDX] =
RCCL_LL_LIMITS_UNDEFINED;
ncclTaskColl info = {};
// Manually populate minimal fields for info
info.func = ncclFuncAllReduce;
info.protocol = NCCL_PROTO_UNDEF;
size_t nBytes = 1024; // 1024 per rank for 4 ranks
rcclUpdateCollectiveProtocol(comm, nBytes, &info);
EXPECT_EQ(info.protocol, NCCL_PROTO_UNDEF);
delete comm->topo;
delete comm;
}
TEST(Rcclwrap,
RcclUpdateCollectiveProtocol_HonorsUserProtocolEnv) { // Why does this pass
// if it does not
// enter the else if
// block
setenv("NCCL_PROTO", "1", 1); // Simulate manual override
ncclComm_t comm = new ncclComm();
*comm = {};
// Manually populate minimal fields for comm
comm->nRanks = 1;
comm->nNodes = 2; // triggers inter-node logic
comm->rank = 0;
comm->topo = new ncclTopoSystem(); //(struct ncclTopoSystem*)calloc(1,
// sizeof(struct ncclTopoSystem));
*comm->topo = {};
comm->topo->ll128Enabled = true;
comm->topo->nodes[GPU].nodes[0] = {};
strncpy(comm->topo->nodes[GPU].nodes[0].gpu.gcn, "gfx942",
sizeof(comm->topo->nodes[GPU].nodes[0].gpu.gcn));
ncclTaskColl info = {};
// Manually populate minimal fields for info
info.func = ncclFuncAllReduce;
info.protocol = NCCL_PROTO_UNDEF;
size_t nBytes = 1024; // 1024 per rank for 4 ranks
rcclUpdateCollectiveProtocol(comm, nBytes, &info);
EXPECT_EQ(info.protocol, NCCL_PROTO_UNDEF);
delete comm->topo;
delete comm;
}
TEST(Rcclwrap, RcclUpdateCollectiveProtocol_SimpleFallbackWhenNoRanges) {
setenv("NCCL_PROTO", "", 1); // Trigger auto selection mode
unsetenv("NCCL_PROTO");
ncclComm_t comm = new ncclComm();
*comm = {};
// Manually populate minimal fields for comm
comm->nRanks = 1;
comm->nNodes = 2; // triggers inter-node logic
comm->rank = 0;
comm->topo = new ncclTopoSystem(); //(struct ncclTopoSystem*)calloc(1,
// sizeof(struct ncclTopoSystem));
*comm->topo = {};
comm->topo->ll128Enabled = true;
comm->topo->nodes[GPU].nodes[0] = {};
comm->topo->nodes[GPU].count = 1;
strncpy(comm->topo->nodes[GPU].nodes[0].gpu.gcn, "gfx942",
sizeof(comm->topo->nodes[GPU].nodes[0].gpu.gcn));
int idx = rcclGetTunableIndex(ncclFuncAllReduce);
comm->minMaxLLRange[idx][NCCL_PROTO_LL][RCCL_PROTOCOL_MIN_IDX] = 512;
comm->minMaxLLRange[idx][NCCL_PROTO_LL][RCCL_PROTOCOL_MAX_IDX] = 1024;
// Manually populate minimal fields for info
ncclTaskColl info = {};
info.func = ncclFuncAllReduce;
info.protocol = NCCL_PROTO_UNDEF;
size_t nBytes = 2048; // 1024 per rank for 4 ranks
rcclUpdateCollectiveProtocol(comm, nBytes, &info);
EXPECT_EQ(info.protocol, NCCL_PROTO_SIMPLE);
delete comm->topo;
delete comm;
}
TEST(Rcclwrap, validHsaScratchEnvSettingTest) {
// When HSA_NO_SCRATCH_RECLAIM is set, it is always valid
EXPECT_TRUE(validHsaScratchEnvSetting("1", 0, 0, "gfx950"));
EXPECT_TRUE(validHsaScratchEnvSetting("1", 0, 0, "gfx942"));
// When HSA_NO_SCRATCH_RECLAIM is not set, looking at hip version and firmware
// version
EXPECT_TRUE(validHsaScratchEnvSetting(nullptr, 60443484, 24, "gfx950"));
EXPECT_FALSE(validHsaScratchEnvSetting(nullptr, 60443483, 24, "gfx950"));
EXPECT_FALSE(validHsaScratchEnvSetting(nullptr, 60443484, 23, "gfx950"));
EXPECT_TRUE(validHsaScratchEnvSetting(nullptr, 60443484, 177, "gfx942"));
EXPECT_FALSE(validHsaScratchEnvSetting(nullptr, 60443484, 176, "gfx942"));
EXPECT_FALSE(validHsaScratchEnvSetting(nullptr, 60443483, 177, "gfx942"));
EXPECT_TRUE(validHsaScratchEnvSetting(nullptr, 60443483, 0, "gfx000"));
EXPECT_TRUE(validHsaScratchEnvSetting(nullptr, 60300000, 0, "gfx000"));
}
TEST(Rcclwrap, RcclUpdateThreadThreshold_UserEnvSet) {
const char *value = getenv("NCCL_THREAD_THRESHOLDS");
if (!value) {
INFO(NCCL_LOG_INFO, "[Rcclwrap] Test skipped. Set environment variable "
"NCCL_THREAD_THRESHOLD");
GTEST_SKIP() << "[Rcclwrap] Test skipped. Set environment variable "
"NCCL_THREAD_THRESHOLD\n";
} else {
ncclComm comm = {.nRanks = 8, .nNodes = 4};
ncclTaskColl info = {.func = ncclFuncReduceScatter, .protocol = 0};
memset(comm.minMaxLLRange, 0, sizeof(comm.minMaxLLRange));
int threadThreshold = 5; // Any number should do, we should make sure this
// number does not change
rcclUpdateThreadThreshold(&comm, 0, &info, threadThreshold);
EXPECT_EQ(threadThreshold, 5);
}
}
TEST(Rcclwrap, RcclUpdateThreadThreshold_MinNChannelsSet) {
const char *value = getenv("NCCL_MIN_NCHANNELS");
if (!value) {
INFO(
NCCL_LOG_INFO,
"[Rcclwrap] Test skipped. Set environment variable NCCL_MIN_NCHANNELS");
GTEST_SKIP() << "[Rcclwrap] Test skipped. Set environment variable "
"NCCL_MIN_NCHANNELS\n";
} else {
ncclComm comm{};
ncclTaskColl info{};
int threadThreshold = 5;
comm.nRanks = 4;
comm.nNodes = 4;
info.func = ncclFuncAllGather;
info.protocol = 0;
memset(comm.minMaxLLRange, 0, sizeof(comm.minMaxLLRange));
rcclUpdateThreadThreshold(&comm, 0, &info, threadThreshold);
EXPECT_EQ(threadThreshold, 5);
}
}
TEST(Rcclwrap, RcclUpdateThreadThreshold_MNChannelsSet) {
const char *value = getenv("NCCL_MAX_NCHANNELS");
if (!value) {
INFO(
NCCL_LOG_INFO,
"[Rcclwrap] Test skipped. Set environment variable NCCL_MAX_NCHANNELS");
GTEST_SKIP() << "[Rcclwrap] Test skipped. Set environment variable "
"NCCL_MAX_NCHANNELS\n";
} else {
ncclComm comm{};
ncclTaskColl info{};
int threadThreshold = 5;
comm.nRanks = 4;
comm.nNodes = 4;
info.func = ncclFuncAllGather;
info.protocol = 0;
memset(comm.minMaxLLRange, 0, sizeof(comm.minMaxLLRange));
rcclUpdateThreadThreshold(&comm, 0, &info, threadThreshold);
EXPECT_EQ(threadThreshold, 5);
}
}
TEST(Rcclwrap, RcclUpdateThreadThreshold_NoEnv_nNodesLessThan2) {
ncclComm comm{};
ncclTaskColl info{};
int threadThreshold = 5;
comm.nRanks = 4;
comm.nNodes = 1; // less than 2
info.func = ncclFuncReduceScatter;
info.protocol = 0;
memset(comm.minMaxLLRange, 0, sizeof(comm.minMaxLLRange));
rcclUpdateThreadThreshold(&comm, 0, &info, threadThreshold);
EXPECT_EQ(threadThreshold, 5); // no change
}
TEST(Rcclwrap, RcclUpdateThreadThreshold_NoEnv_FuncUnsupported) {
ncclComm comm{};
ncclTaskColl info{};
int threadThreshold = 5;
comm.nRanks = 4;
comm.nNodes = 2;
info.func = ncclFuncAllReduce; // unsupported func
info.protocol = 0;
memset(comm.minMaxLLRange, 0, sizeof(comm.minMaxLLRange));
rcclUpdateThreadThreshold(&comm, 0, &info, threadThreshold);
EXPECT_EQ(threadThreshold, 5);
}
TEST(Rcclwrap, RcclUpdateThreadThreshold_NoEnv_UpdateOccurs) {
ncclComm comm{};
ncclTaskColl info{};
int threadThreshold = 5;
comm.nRanks = 4;
comm.nNodes = 2;
info.func = ncclFuncReduceScatter;
info.protocol = 0;
memset(comm.minMaxLLRange, 0, sizeof(comm.minMaxLLRange));
int idx = rcclGetTunableIndex(info.func);
comm.minMaxLLRange[idx][info.protocol][RCCL_PROTOCOL_THREAD_THRESHOLD_IDX] =
10;
rcclUpdateThreadThreshold(&comm, 0, &info, threadThreshold);
EXPECT_EQ(threadThreshold, 40); // 10 * 4
}
TEST(Rcclwrap, RcclUpdateThreadThreshold_NoEnv_ThresholdUndefined) {
ncclComm comm{};
ncclTaskColl info{};
int threadThreshold = 5;
comm.nRanks = 4;
comm.nNodes = 3;
info.func = ncclFuncAllGather;
info.protocol = 0;
memset(comm.minMaxLLRange, 0, sizeof(comm.minMaxLLRange));
int idx = rcclGetTunableIndex(info.func);
comm.minMaxLLRange[idx][info.protocol][RCCL_PROTOCOL_THREAD_THRESHOLD_IDX] =
RCCL_LL_LIMITS_UNDEFINED;
rcclUpdateThreadThreshold(&comm, 0, &info, threadThreshold);
EXPECT_EQ(threadThreshold, 5);
}
TEST(Rcclwrap, GFX942_SmallRanks) {
// Check execution order first
if (ShouldSkipP2pTestDueToExecutionOrder("GFX942_SmallRanks")) {
GTEST_SKIP() << "Skipping due to execution order - another "
"rcclSetP2pNetChunkSize test already ran";
}
// Check if we should skip this test due to environment variable being set
if (ShouldSkipP2pTest()) {
GTEST_SKIP()
<< "Skipping test: NCCL_P2P_NET_CHUNKSIZE environment variable is set, "
<< "which would override the static variable behavior. "
<< "This test requires clean environment to test architecture logic.";
}
INFO(NCCL_LOG_INFO,
"Testing rcclSetP2pNetChunkSize for GFX942 with small ranks");
ncclComm_t mockComm = nullptr;
struct ncclTopoSystem mockTopo;
struct ncclTopoNode mockGpuNode;
CreateMockComm(mockComm, mockTopo, mockGpuNode, "gfx942", 32);
int chunkSize = RCCL_VALUE_UNSET;
rcclSetP2pNetChunkSize(mockComm, chunkSize);
// Expected: 1 << 17 = 131072 for ranks < 64
EXPECT_EQ(chunkSize, 1 << 17)
<< "GFX942 with ranks < 64 should set chunk size to 131072";
INFO(NCCL_LOG_INFO, "GFX942 small ranks test completed - chunk size: %d",
chunkSize);
CleanupMockComm(mockComm);
}
TEST(Rcclwrap, GFX942_LargeRanks) {
// Check execution order first
if (ShouldSkipP2pTestDueToExecutionOrder("GFX942_LargeRanks")) {
GTEST_SKIP() << "Skipping due to execution order - another "
"rcclSetP2pNetChunkSize test already ran";
}
// Check if we should skip this test due to environment variable being set
if (ShouldSkipP2pTest()) {
GTEST_SKIP()
<< "Skipping test: NCCL_P2P_NET_CHUNKSIZE environment variable is set, "
<< "which would override the static variable behavior. "
<< "This test requires clean environment to test architecture logic.";
}
INFO(NCCL_LOG_INFO,
"Testing rcclSetP2pNetChunkSize for GFX942 with large ranks");
ncclComm_t mockComm = nullptr;
struct ncclTopoSystem mockTopo;
struct ncclTopoNode mockGpuNode;
CreateMockComm(mockComm, mockTopo, mockGpuNode, "gfx942", 128);
int chunkSize = RCCL_VALUE_UNSET;
rcclSetP2pNetChunkSize(mockComm, chunkSize);
// Expected: 1 << 19 = 524288 for ranks >= 64
EXPECT_EQ(chunkSize, 1 << 19)
<< "GFX942 with ranks >= 64 should set chunk size to 524288";
INFO(NCCL_LOG_INFO, "GFX942 large ranks test completed - chunk size: %d",
chunkSize);
CleanupMockComm(mockComm);
}
TEST(Rcclwrap, GFX942_BoundaryRank64) {
// Check execution order first
if (ShouldSkipP2pTestDueToExecutionOrder("GFX942_BoundaryRank64")) {
GTEST_SKIP() << "Skipping due to execution order - another "
"rcclSetP2pNetChunkSize test already ran";
}
// Check if we should skip this test due to environment variable being set
if (ShouldSkipP2pTest()) {
GTEST_SKIP()
<< "Skipping test: NCCL_P2P_NET_CHUNKSIZE environment variable is set, "
<< "which would override the static variable behavior. "
<< "This test requires clean environment to test architecture logic.";
}
INFO(NCCL_LOG_INFO,
"Testing rcclSetP2pNetChunkSize for GFX942 with boundary rank 64");
ncclComm_t mockComm = nullptr;
struct ncclTopoSystem mockTopo;
struct ncclTopoNode mockGpuNode;
CreateMockComm(mockComm, mockTopo, mockGpuNode, "gfx942", 64);
int chunkSize = RCCL_VALUE_UNSET;
rcclSetP2pNetChunkSize(mockComm, chunkSize);
// Expected: 1 << 19 = 524288 for ranks >= 64
EXPECT_EQ(chunkSize, 1 << 19)
<< "GFX942 with ranks = 64 should set chunk size to 524288";
INFO(NCCL_LOG_INFO, "GFX942 boundary rank 64 test completed - chunk size: %d",
chunkSize);
CleanupMockComm(mockComm);
}
TEST(Rcclwrap, GFX942_BoundaryRank63) {
// Check execution order first
if (ShouldSkipP2pTestDueToExecutionOrder("GFX942_BoundaryRank63")) {
GTEST_SKIP() << "Skipping due to execution order - another "
"rcclSetP2pNetChunkSize test already ran";
}
// Check if we should skip this test due to environment variable being set
if (ShouldSkipP2pTest()) {
GTEST_SKIP()
<< "Skipping test: NCCL_P2P_NET_CHUNKSIZE environment variable is set, "
<< "which would override the static variable behavior. "
<< "This test requires clean environment to test architecture logic.";
}
INFO(NCCL_LOG_INFO,
"Testing rcclSetP2pNetChunkSize for GFX942 with boundary rank 63");
ncclComm_t mockComm = nullptr;
struct ncclTopoSystem mockTopo;
struct ncclTopoNode mockGpuNode;
CreateMockComm(mockComm, mockTopo, mockGpuNode, "gfx942", 63);
int chunkSize = RCCL_VALUE_UNSET;
rcclSetP2pNetChunkSize(mockComm, chunkSize);
// Expected: 1 << 17 = 131072 for ranks < 64
EXPECT_EQ(chunkSize, 1 << 17)
<< "GFX942 with ranks = 63 should set chunk size to 131072";
INFO(NCCL_LOG_INFO, "GFX942 boundary rank 63 test completed - chunk size: %d",
chunkSize);
CleanupMockComm(mockComm);
}
TEST(Rcclwrap, GFX950_SmallRanks) {
// Check execution order first
if (ShouldSkipP2pTestDueToExecutionOrder("GFX950_SmallRanks")) {
GTEST_SKIP() << "Skipping due to execution order - another "
"rcclSetP2pNetChunkSize test already ran";
}
// Check if we should skip this test due to environment variable being set
if (ShouldSkipP2pTest()) {
GTEST_SKIP()
<< "Skipping test: NCCL_P2P_NET_CHUNKSIZE environment variable is set, "
<< "which would override the static variable behavior. "
<< "This test requires clean environment to test architecture logic.";
}
INFO(NCCL_LOG_INFO,
"Testing rcclSetP2pNetChunkSize for GFX950 with small ranks");
ncclComm_t mockComm = nullptr;
struct ncclTopoSystem mockTopo;
struct ncclTopoNode mockGpuNode;
CreateMockComm(mockComm, mockTopo, mockGpuNode, "gfx950", 8);
int chunkSize = RCCL_VALUE_UNSET;
rcclSetP2pNetChunkSize(mockComm, chunkSize);
// Expected: 1 << 17 = 131072 for ranks < 16
EXPECT_EQ(chunkSize, 1 << 17)
<< "GFX950 with ranks < 16 should set chunk size to 131072";
INFO(NCCL_LOG_INFO, "GFX950 small ranks test completed - chunk size: %d",
chunkSize);
CleanupMockComm(mockComm);
}
TEST(Rcclwrap, GFX950_MediumRanks) {
// Check execution order first
if (ShouldSkipP2pTestDueToExecutionOrder("GFX950_MediumRanks")) {
GTEST_SKIP() << "Skipping due to execution order - another "
"rcclSetP2pNetChunkSize test already ran";
}
// Check if we should skip this test due to environment variable being set
if (ShouldSkipP2pTest()) {
GTEST_SKIP()
<< "Skipping test: NCCL_P2P_NET_CHUNKSIZE environment variable is set, "
<< "which would override the static variable behavior. "
<< "This test requires clean environment to test architecture logic.";
}
INFO(NCCL_LOG_INFO,
"Testing rcclSetP2pNetChunkSize for GFX950 with medium ranks");
ncclComm_t mockComm = nullptr;
struct ncclTopoSystem mockTopo;
struct ncclTopoNode mockGpuNode;
CreateMockComm(mockComm, mockTopo, mockGpuNode, "gfx950", 24);
int chunkSize = RCCL_VALUE_UNSET;
rcclSetP2pNetChunkSize(mockComm, chunkSize);
// Expected: 1 << 18 = 262144 for 16 <= ranks < 32
EXPECT_EQ(chunkSize, 1 << 18)
<< "GFX950 with 16 <= ranks < 32 should set chunk size to 262144";
INFO(NCCL_LOG_INFO, "GFX950 medium ranks test completed - chunk size: %d",
chunkSize);
CleanupMockComm(mockComm);
}
TEST(Rcclwrap, GFX950_LargeRanks) {
// Check execution order first
if (ShouldSkipP2pTestDueToExecutionOrder("GFX950_LargeRanks")) {
GTEST_SKIP() << "Skipping due to execution order - another "
"rcclSetP2pNetChunkSize test already ran";
}
// Check if we should skip this test due to environment variable being set
if (ShouldSkipP2pTest()) {
GTEST_SKIP()
<< "Skipping test: NCCL_P2P_NET_CHUNKSIZE environment variable is set, "
<< "which would override the static variable behavior. "
<< "This test requires clean environment to test architecture logic.";
}
INFO(NCCL_LOG_INFO,
"Testing rcclSetP2pNetChunkSize for GFX950 with large ranks");
ncclComm_t mockComm = nullptr;
struct ncclTopoSystem mockTopo;
struct ncclTopoNode mockGpuNode;
CreateMockComm(mockComm, mockTopo, mockGpuNode, "gfx950", 64);
int chunkSize = RCCL_VALUE_UNSET;
rcclSetP2pNetChunkSize(mockComm, chunkSize);
// Expected: 1 << 19 = 524288 for ranks >= 32
EXPECT_EQ(chunkSize, 1 << 19)
<< "GFX950 with ranks >= 32 should set chunk size to 524288";
INFO(NCCL_LOG_INFO, "GFX950 large ranks test completed - chunk size: %d",
chunkSize);
CleanupMockComm(mockComm);
}
TEST(Rcclwrap, GFX950_BoundaryRank16) {
// Check execution order first
if (ShouldSkipP2pTestDueToExecutionOrder("GFX950_BoundaryRank16")) {
GTEST_SKIP() << "Skipping due to execution order - another "
"rcclSetP2pNetChunkSize test already ran";
}
// Check if we should skip this test due to environment variable being set
if (ShouldSkipP2pTest()) {
GTEST_SKIP()
<< "Skipping test: NCCL_P2P_NET_CHUNKSIZE environment variable is set, "
<< "which would override the static variable behavior. "
<< "This test requires clean environment to test architecture logic.";
}
INFO(NCCL_LOG_INFO,
"Testing rcclSetP2pNetChunkSize for GFX950 with boundary rank 16");
ncclComm_t mockComm = nullptr;
struct ncclTopoSystem mockTopo;
struct ncclTopoNode mockGpuNode;
CreateMockComm(mockComm, mockTopo, mockGpuNode, "gfx950", 16);
int chunkSize = RCCL_VALUE_UNSET;
rcclSetP2pNetChunkSize(mockComm, chunkSize);
// Expected: 1 << 18 = 262144 for ranks >= 16
EXPECT_EQ(chunkSize, 1 << 18)
<< "GFX950 with ranks = 16 should set chunk size to 262144";
INFO(NCCL_LOG_INFO, "GFX950 boundary rank 16 test completed - chunk size: %d",
chunkSize);
CleanupMockComm(mockComm);
}
TEST(Rcclwrap, GFX950_BoundaryRank15) {
// Check execution order first
if (ShouldSkipP2pTestDueToExecutionOrder("GFX950_BoundaryRank15")) {
GTEST_SKIP() << "Skipping due to execution order - another "
"rcclSetP2pNetChunkSize test already ran";
}
// Check if we should skip this test due to environment variable being set
if (ShouldSkipP2pTest()) {
GTEST_SKIP()
<< "Skipping test: NCCL_P2P_NET_CHUNKSIZE environment variable is set, "
<< "which would override the static variable behavior. "
<< "This test requires clean environment to test architecture logic.";
}
INFO(NCCL_LOG_INFO,
"Testing rcclSetP2pNetChunkSize for GFX950 with boundary rank 15");
ncclComm_t mockComm = nullptr;
struct ncclTopoSystem mockTopo;
struct ncclTopoNode mockGpuNode;
CreateMockComm(mockComm, mockTopo, mockGpuNode, "gfx950", 15);
int chunkSize = RCCL_VALUE_UNSET;
rcclSetP2pNetChunkSize(mockComm, chunkSize);
// Expected: 1 << 17 = 131072 for ranks < 16
EXPECT_EQ(chunkSize, 1 << 17)
<< "GFX950 with ranks = 15 should set chunk size to 131072";
INFO(NCCL_LOG_INFO, "GFX950 boundary rank 15 test completed - chunk size: %d",
chunkSize);
CleanupMockComm(mockComm);
}
TEST(Rcclwrap, GFX950_BoundaryRank32) {
// Check execution order first
if (ShouldSkipP2pTestDueToExecutionOrder("GFX950_BoundaryRank32")) {
GTEST_SKIP() << "Skipping due to execution order - another "
"rcclSetP2pNetChunkSize test already ran";
}
// Check if we should skip this test due to environment variable being set
if (ShouldSkipP2pTest()) {
GTEST_SKIP()
<< "Skipping test: NCCL_P2P_NET_CHUNKSIZE environment variable is set, "
<< "which would override the static variable behavior. "
<< "This test requires clean environment to test architecture logic.";
}
INFO(NCCL_LOG_INFO,
"Testing rcclSetP2pNetChunkSize for GFX950 with boundary rank 32");
ncclComm_t mockComm = nullptr;
struct ncclTopoSystem mockTopo;
struct ncclTopoNode mockGpuNode;
CreateMockComm(mockComm, mockTopo, mockGpuNode, "gfx950", 32);
int chunkSize = RCCL_VALUE_UNSET;
rcclSetP2pNetChunkSize(mockComm, chunkSize);
// Expected: 1 << 19 = 524288 for ranks >= 32
EXPECT_EQ(chunkSize, 1 << 19)
<< "GFX950 with ranks = 32 should set chunk size to 524288";
INFO(NCCL_LOG_INFO, "GFX950 boundary rank 32 test completed - chunk size: %d",
chunkSize);
CleanupMockComm(mockComm);
}
TEST(Rcclwrap, GFX950_BoundaryRank31) {
// Check execution order first
if (ShouldSkipP2pTestDueToExecutionOrder("GFX950_BoundaryRank31")) {
GTEST_SKIP() << "Skipping due to execution order - another "
"rcclSetP2pNetChunkSize test already ran";
}
// Check if we should skip this test due to environment variable being set
if (ShouldSkipP2pTest()) {
GTEST_SKIP()
<< "Skipping test: NCCL_P2P_NET_CHUNKSIZE environment variable is set, "
<< "which would override the static variable behavior. "
<< "This test requires clean environment to test architecture logic.";
}
INFO(NCCL_LOG_INFO,
"Testing rcclSetP2pNetChunkSize for GFX950 with boundary rank 31");
ncclComm_t mockComm = nullptr;
struct ncclTopoSystem mockTopo;
struct ncclTopoNode mockGpuNode;
CreateMockComm(mockComm, mockTopo, mockGpuNode, "gfx950", 31);
int chunkSize = RCCL_VALUE_UNSET;
rcclSetP2pNetChunkSize(mockComm, chunkSize);
// Expected: 1 << 18 = 262144 for 16 <= ranks < 32
EXPECT_EQ(chunkSize, 1 << 18)
<< "GFX950 with ranks = 31 should set chunk size to 262144";
INFO(NCCL_LOG_INFO, "GFX950 boundary rank 31 test completed - chunk size: %d",
chunkSize);
CleanupMockComm(mockComm);
}
TEST(Rcclwrap, UnsupportedArch_GFX908) {
// Check execution order first
if (ShouldSkipP2pTestDueToExecutionOrder("UnsupportedArch_GFX908")) {
GTEST_SKIP() << "Skipping due to execution order - another "
"rcclSetP2pNetChunkSize test already ran";
}
// Check if we should skip this test due to environment variable being set
if (ShouldSkipP2pTest()) {
GTEST_SKIP()
<< "Skipping test: NCCL_P2P_NET_CHUNKSIZE environment variable is set, "
<< "which would override the static variable behavior. "
<< "This test requires clean environment to test architecture logic.";
}
INFO(NCCL_LOG_INFO,
"Testing rcclSetP2pNetChunkSize for unsupported architecture GFX908");
ncclComm_t mockComm = nullptr;
struct ncclTopoSystem mockTopo;
struct ncclTopoNode mockGpuNode;
CreateMockComm(mockComm, mockTopo, mockGpuNode, "gfx908", 32);
int chunkSize = RCCL_VALUE_UNSET;
rcclSetP2pNetChunkSize(mockComm, chunkSize);
// Expected: RCCL_VALUE_INVALID for unsupported architectures
EXPECT_EQ(chunkSize, RCCL_VALUE_INVALID)
<< "Unsupported architecture GFX908 should set chunk size to "
"RCCL_VALUE_INVALID";
INFO(NCCL_LOG_INFO,
"Unsupported architecture GFX908 test completed - chunk size: %d",
chunkSize);
CleanupMockComm(mockComm);
}
TEST(Rcclwrap, UnsupportedArch_GFX90A) {
// Check execution order first
if (ShouldSkipP2pTestDueToExecutionOrder("UnsupportedArch_GFX90A")) {
GTEST_SKIP() << "Skipping due to execution order - another "
"rcclSetP2pNetChunkSize test already ran";
}
// Check if we should skip this test due to environment variable being set
if (ShouldSkipP2pTest()) {
GTEST_SKIP()
<< "Skipping test: NCCL_P2P_NET_CHUNKSIZE environment variable is set, "
<< "which would override the static variable behavior. "
<< "This test requires clean environment to test architecture logic.";
}
INFO(NCCL_LOG_INFO,
"Testing rcclSetP2pNetChunkSize for unsupported architecture GFX90A");
ncclComm_t mockComm = nullptr;
struct ncclTopoSystem mockTopo;
struct ncclTopoNode mockGpuNode;
CreateMockComm(mockComm, mockTopo, mockGpuNode, "gfx90a", 32);
int chunkSize = RCCL_VALUE_UNSET;
rcclSetP2pNetChunkSize(mockComm, chunkSize);
// Expected: RCCL_VALUE_INVALID for unsupported architectures
EXPECT_EQ(chunkSize, RCCL_VALUE_INVALID)
<< "Unsupported architecture GFX90A should set chunk size to "
"RCCL_VALUE_INVALID";
INFO(NCCL_LOG_INFO,
"Unsupported architecture GFX90A test completed - chunk size: %d",
chunkSize);
CleanupMockComm(mockComm);
}
// This test specifically tests the environment variable behavior
TEST(Rcclwrap, WithEnvironmentVariable) {
// Check execution order first
if (ShouldSkipP2pTestDueToExecutionOrder("WithEnvironmentVariable")) {
GTEST_SKIP() << "Skipping due to execution order - another "
"rcclSetP2pNetChunkSize test already ran";
}
// This test requires environment variable to be set to a specific value
if (ShouldSkipP2pTest("123456")) {
GTEST_SKIP()
<< "Skipping test: NCCL_P2P_NET_CHUNKSIZE environment variable is not "
"set to '123456'. "
<< "Please set: export NCCL_P2P_NET_CHUNKSIZE=123456 to run this test. "
<< "This test verifies that user override via environment variable "
"works correctly.";
}
INFO(NCCL_LOG_INFO,
"Testing rcclSetP2pNetChunkSize with environment variable set");
// Environment variable is confirmed to be set to "123456"
const char *envVar = getenv("NCCL_P2P_NET_CHUNKSIZE");
INFO(NCCL_LOG_INFO, "Environment variable found: NCCL_P2P_NET_CHUNKSIZE=%s",
envVar);
ncclComm_t mockComm = nullptr;
struct ncclTopoSystem mockTopo;
struct ncclTopoNode mockGpuNode;
CreateMockComm(mockComm, mockTopo, mockGpuNode, "gfx942", 32);
int chunkSize = RCCL_VALUE_UNSET;
rcclSetP2pNetChunkSize(mockComm, chunkSize);
// Expected: RCCL_VALUE_INVALID when environment variable is set (user
// override)
EXPECT_EQ(chunkSize, RCCL_VALUE_INVALID)
<< "When env var is set, should return RCCL_VALUE_INVALID";
INFO(NCCL_LOG_INFO, "Environment variable test completed - chunk size: %d",
chunkSize);
INFO(NCCL_LOG_INFO,
"User override via NCCL_P2P_NET_CHUNKSIZE=%s was respected", envVar);
CleanupMockComm(mockComm);
}
TEST(Rcclwrap, EmptyArchString) {
// Check execution order first
if (ShouldSkipP2pTestDueToExecutionOrder("EmptyArchString")) {
GTEST_SKIP() << "Skipping due to execution order - another "
"rcclSetP2pNetChunkSize test already ran";
}
// Check if we should skip this test due to environment variable being set
if (ShouldSkipP2pTest()) {
GTEST_SKIP()
<< "Skipping test: NCCL_P2P_NET_CHUNKSIZE environment variable is set, "
<< "which would override the static variable behavior. "
<< "This test requires clean environment to test architecture logic.";
}
INFO(NCCL_LOG_INFO,
"Testing rcclSetP2pNetChunkSize with empty architecture string");
ncclComm_t mockComm = nullptr;
struct ncclTopoSystem mockTopo;
struct ncclTopoNode mockGpuNode;
CreateMockComm(mockComm, mockTopo, mockGpuNode, "", 32);
int chunkSize = RCCL_VALUE_UNSET;
rcclSetP2pNetChunkSize(mockComm, chunkSize);
// Expected: RCCL_VALUE_INVALID for empty/invalid architecture
EXPECT_EQ(chunkSize, RCCL_VALUE_INVALID)
<< "Empty architecture should set chunk size to RCCL_VALUE_INVALID";
INFO(NCCL_LOG_INFO, "Empty architecture test completed - chunk size: %d",
chunkSize);
CleanupMockComm(mockComm);
}
TEST(Rcclwrap, PartialArchMatch) {
// Check execution order first
if (ShouldSkipP2pTestDueToExecutionOrder("PartialArchMatch")) {
GTEST_SKIP() << "Skipping due to execution order - another "
"rcclSetP2pNetChunkSize test already ran";
}
// Check if we should skip this test due to environment variable being set
if (ShouldSkipP2pTest()) {
GTEST_SKIP()
<< "Skipping test: NCCL_P2P_NET_CHUNKSIZE environment variable is set, "
<< "which would override the static variable behavior. "
<< "This test requires clean environment to test architecture logic.";
}
INFO(NCCL_LOG_INFO,
"Testing rcclSetP2pNetChunkSize with partial architecture match");
ncclComm_t mockComm = nullptr;
struct ncclTopoSystem mockTopo;
struct ncclTopoNode mockGpuNode;
CreateMockComm(mockComm, mockTopo, mockGpuNode, "gfx94", 32);
int chunkSize = RCCL_VALUE_UNSET;
rcclSetP2pNetChunkSize(mockComm, chunkSize);
// Expected: RCCL_VALUE_INVALID for partial match
EXPECT_EQ(chunkSize, RCCL_VALUE_INVALID)
<< "Partial architecture match should set chunk size to "
"RCCL_VALUE_INVALID";
INFO(NCCL_LOG_INFO,
"Partial architecture match test completed - chunk size: %d", chunkSize);
CleanupMockComm(mockComm);
}
TEST(Rcclwrap, ZeroRanks_GFX942) {
// Check execution order first
if (ShouldSkipP2pTestDueToExecutionOrder("ZeroRanks_GFX942")) {
GTEST_SKIP() << "Skipping due to execution order - another "
"rcclSetP2pNetChunkSize test already ran";
}
// Check if we should skip this test due to environment variable being set
if (ShouldSkipP2pTest()) {
GTEST_SKIP()
<< "Skipping test: NCCL_P2P_NET_CHUNKSIZE environment variable is set, "
<< "which would override the static variable behavior. "
<< "This test requires clean environment to test architecture logic.";
}
INFO(NCCL_LOG_INFO,
"Testing rcclSetP2pNetChunkSize with zero ranks for GFX942");
ncclComm_t mockComm = nullptr;
struct ncclTopoSystem mockTopo;
struct ncclTopoNode mockGpuNode;
CreateMockComm(mockComm, mockTopo, mockGpuNode, "gfx942", 0);
int chunkSize = RCCL_VALUE_UNSET;
rcclSetP2pNetChunkSize(mockComm, chunkSize);
// Expected: 1 << 17 = 131072 (since 0 < 64)
EXPECT_EQ(chunkSize, 1 << 17)
<< "Zero ranks should be treated as < 64, setting chunk size to 131072";
INFO(NCCL_LOG_INFO, "Zero ranks test completed - chunk size: %d", chunkSize);
CleanupMockComm(mockComm);
}
TEST(Rcclwrap, ZeroRanks_GFX950) {
// Check execution order first
if (ShouldSkipP2pTestDueToExecutionOrder("ZeroRanks_GFX950")) {
GTEST_SKIP() << "Skipping due to execution order - another "
"rcclSetP2pNetChunkSize test already ran";
}
// Check if we should skip this test due to environment variable being set
if (ShouldSkipP2pTest()) {
GTEST_SKIP()
<< "Skipping test: NCCL_P2P_NET_CHUNKSIZE environment variable is set, "
<< "which would override the static variable behavior. "
<< "This test requires clean environment to test architecture logic.";
}
INFO(NCCL_LOG_INFO,
"Testing rcclSetP2pNetChunkSize with zero ranks for GFX950");
ncclComm_t mockComm = nullptr;
struct ncclTopoSystem mockTopo;
struct ncclTopoNode mockGpuNode;
CreateMockComm(mockComm, mockTopo, mockGpuNode, "gfx950", 0);
int chunkSize = RCCL_VALUE_UNSET;
rcclSetP2pNetChunkSize(mockComm, chunkSize);
// Expected: 1 << 17 = 131072 (since 0 < 16)
EXPECT_EQ(chunkSize, 1 << 17)
<< "Zero ranks should be treated as < 16, setting chunk size to 131072";
INFO(NCCL_LOG_INFO, "Zero ranks GFX950 test completed - chunk size: %d",
chunkSize);
CleanupMockComm(mockComm);
}
TEST(Rcclwrap, LargeRankValues_GFX950) {
// Check execution order first
if (ShouldSkipP2pTestDueToExecutionOrder("LargeRankValues_GFX950")) {
GTEST_SKIP() << "Skipping due to execution order - another "
"rcclSetP2pNetChunkSize test already ran";
}
// Check if we should skip this test due to environment variable being set
if (ShouldSkipP2pTest()) {
GTEST_SKIP()
<< "Skipping test: NCCL_P2P_NET_CHUNKSIZE environment variable is set, "
<< "which would override the static variable behavior. "
<< "This test requires clean environment to test architecture logic.";
}
INFO(NCCL_LOG_INFO,
"Testing rcclSetP2pNetChunkSize with very large rank values for GFX950");
ncclComm_t mockComm = nullptr;
struct ncclTopoSystem mockTopo;
struct ncclTopoNode mockGpuNode;
CreateMockComm(mockComm, mockTopo, mockGpuNode, "gfx950", 1000000);
int chunkSize = RCCL_VALUE_UNSET;
rcclSetP2pNetChunkSize(mockComm, chunkSize);
// Expected: 1 << 19 = 524288 (since 1000000 >= 32)
EXPECT_EQ(chunkSize, 1 << 19) << "Very large ranks should be treated as >= "
"32, setting chunk size to 524288";
INFO(NCCL_LOG_INFO, "Large rank values test completed - chunk size: %d",
chunkSize);
CleanupMockComm(mockComm);
}
TEST(Rcclwrap, CaseInsensitiveArch) {
// Check execution order first
if (ShouldSkipP2pTestDueToExecutionOrder("CaseInsensitiveArch")) {
GTEST_SKIP() << "Skipping due to execution order - another "
"rcclSetP2pNetChunkSize test already ran";
}
// Check if we should skip this test due to environment variable being set
if (ShouldSkipP2pTest()) {
GTEST_SKIP()
<< "Skipping test: NCCL_P2P_NET_CHUNKSIZE environment variable is set, "
<< "which would override the static variable behavior. "
<< "This test requires clean environment to test architecture logic.";
}
INFO(NCCL_LOG_INFO,
"Testing rcclSetP2pNetChunkSize with case variations in architecture");
ncclComm_t mockComm = nullptr;
struct ncclTopoSystem mockTopo;
struct ncclTopoNode mockGpuNode;
CreateMockComm(mockComm, mockTopo, mockGpuNode, "GFX942", 32); // Uppercase
int chunkSize = RCCL_VALUE_UNSET;
rcclSetP2pNetChunkSize(mockComm, chunkSize);
// Expected: RCCL_VALUE_INVALID (case sensitive matching expected)
EXPECT_EQ(chunkSize, RCCL_VALUE_INVALID)
<< "Uppercase architecture should not match (case sensitive)";
INFO(NCCL_LOG_INFO,
"Case insensitive architecture test completed - chunk size: %d",
chunkSize);
CleanupMockComm(mockComm);
}
// Add these test cases after the existing rcclSetP2pNetChunkSize tests
TEST(Rcclwrap, PXN_GFX942_SmallRanks) {
// Check execution order first
if (ShouldSkipPxnTestDueToExecutionOrder("PXN_GFX942_SmallRanks")) {
GTEST_SKIP() << "Skipping due to execution order - another rcclSetPxn test "
"already ran";
}
// Check if we should skip this test due to environment variable being set
if (ShouldSkipPxnTest()) {
GTEST_SKIP()
<< "Skipping test: NCCL_PXN_DISABLE environment variable is set, "
<< "which would override the static variable behavior. "
<< "This test requires clean environment to test architecture logic.";
}
INFO(NCCL_LOG_INFO, "Testing rcclSetPxn for GFX942 with small ranks");
ncclComm_t mockComm = nullptr;
struct ncclTopoSystem mockTopo;
struct ncclTopoNode mockGpuNode;
CreateMockComm(mockComm, mockTopo, mockGpuNode, "gfx942", 32);
int pxnDisable = RCCL_VALUE_UNSET;
rcclSetPxn(mockComm, pxnDisable);
// Expected: 1 (disabled) for ranks < 64 on GFX942
EXPECT_EQ(pxnDisable, 1)
<< "GFX942 with ranks < 64 should disable PXN (pxnDisable = 1)";
INFO(NCCL_LOG_INFO, "GFX942 small ranks PXN test completed - pxnDisable: %d",
pxnDisable);
CleanupMockComm(mockComm);
}
TEST(Rcclwrap, PXN_GFX942_LargeRanks) {
// Check execution order first
if (ShouldSkipPxnTestDueToExecutionOrder("PXN_GFX942_LargeRanks")) {
GTEST_SKIP() << "Skipping due to execution order - another rcclSetPxn test "
"already ran";
}
// Check if we should skip this test due to environment variable being set
if (ShouldSkipPxnTest()) {
GTEST_SKIP()
<< "Skipping test: NCCL_PXN_DISABLE environment variable is set, "
<< "which would override the static variable behavior. "
<< "This test requires clean environment to test architecture logic.";
}
INFO(NCCL_LOG_INFO, "Testing rcclSetPxn for GFX942 with large ranks");
ncclComm_t mockComm = nullptr;
struct ncclTopoSystem mockTopo;
struct ncclTopoNode mockGpuNode;
CreateMockComm(mockComm, mockTopo, mockGpuNode, "gfx942", 128);
int pxnDisable = RCCL_VALUE_UNSET;
rcclSetPxn(mockComm, pxnDisable);
// Expected: 0 (enabled) for ranks >= 64 on GFX942
EXPECT_EQ(pxnDisable, 0)
<< "GFX942 with ranks >= 64 should enable PXN (pxnDisable = 0)";
INFO(NCCL_LOG_INFO, "GFX942 large ranks PXN test completed - pxnDisable: %d",
pxnDisable);
CleanupMockComm(mockComm);
}
TEST(Rcclwrap, PXN_GFX942_BoundaryRank64) {
// Check execution order first
if (ShouldSkipPxnTestDueToExecutionOrder("PXN_GFX942_BoundaryRank64")) {
GTEST_SKIP() << "Skipping due to execution order - another rcclSetPxn test "
"already ran";
}
// Check if we should skip this test due to environment variable being set
if (ShouldSkipPxnTest()) {
GTEST_SKIP()
<< "Skipping test: NCCL_PXN_DISABLE environment variable is set, "
<< "which would override the static variable behavior. "
<< "This test requires clean environment to test architecture logic.";
}
INFO(NCCL_LOG_INFO, "Testing rcclSetPxn for GFX942 with boundary rank 64");
ncclComm_t mockComm = nullptr;
struct ncclTopoSystem mockTopo;
struct ncclTopoNode mockGpuNode;
CreateMockComm(mockComm, mockTopo, mockGpuNode, "gfx942", 64);
int pxnDisable = RCCL_VALUE_UNSET;
rcclSetPxn(mockComm, pxnDisable);
// Expected: 0 (enabled) for ranks >= 64 on GFX942
EXPECT_EQ(pxnDisable, 0)
<< "GFX942 with ranks = 64 should enable PXN (pxnDisable = 0)";
INFO(NCCL_LOG_INFO,
"GFX942 boundary rank 64 PXN test completed - pxnDisable: %d",
pxnDisable);
CleanupMockComm(mockComm);
}
TEST(Rcclwrap, PXN_GFX942_BoundaryRank63) {
// Check execution order first
if (ShouldSkipPxnTestDueToExecutionOrder("PXN_GFX942_BoundaryRank63")) {
GTEST_SKIP() << "Skipping due to execution order - another rcclSetPxn test "
"already ran";
}
// Check if we should skip this test due to environment variable being set
if (ShouldSkipPxnTest()) {
GTEST_SKIP()
<< "Skipping test: NCCL_PXN_DISABLE environment variable is set, "
<< "which would override the static variable behavior. "
<< "This test requires clean environment to test architecture logic.";
}
INFO(NCCL_LOG_INFO, "Testing rcclSetPxn for GFX942 with boundary rank 63");
ncclComm_t mockComm = nullptr;
struct ncclTopoSystem mockTopo;
struct ncclTopoNode mockGpuNode;
CreateMockComm(mockComm, mockTopo, mockGpuNode, "gfx942", 63);
int pxnDisable = RCCL_VALUE_UNSET;
rcclSetPxn(mockComm, pxnDisable);
// Expected: 1 (disabled) for ranks < 64 on GFX942
EXPECT_EQ(pxnDisable, 1)
<< "GFX942 with ranks = 63 should disable PXN (pxnDisable = 1)";
INFO(NCCL_LOG_INFO,
"GFX942 boundary rank 63 PXN test completed - pxnDisable: %d",
pxnDisable);
CleanupMockComm(mockComm);
}
TEST(Rcclwrap, PXN_GFX950_SmallRanks) {
// Check execution order first
if (ShouldSkipPxnTestDueToExecutionOrder("PXN_GFX950_SmallRanks")) {
GTEST_SKIP() << "Skipping due to execution order - another rcclSetPxn test "
"already ran";
}
// Check if we should skip this test due to environment variable being set
if (ShouldSkipPxnTest()) {
GTEST_SKIP()
<< "Skipping test: NCCL_PXN_DISABLE environment variable is set, "
<< "which would override the static variable behavior. "
<< "This test requires clean environment to test architecture logic.";
}
INFO(NCCL_LOG_INFO, "Testing rcclSetPxn for GFX950 with small ranks");
ncclComm_t mockComm = nullptr;
struct ncclTopoSystem mockTopo;
struct ncclTopoNode mockGpuNode;
CreateMockComm(mockComm, mockTopo, mockGpuNode, "gfx950", 16);
int pxnDisable = RCCL_VALUE_UNSET;
rcclSetPxn(mockComm, pxnDisable);
// Expected: 1 (disabled) for ranks < 32 on GFX950
EXPECT_EQ(pxnDisable, 1)
<< "GFX950 with ranks < 32 should disable PXN (pxnDisable = 1)";
INFO(NCCL_LOG_INFO, "GFX950 small ranks PXN test completed - pxnDisable: %d",
pxnDisable);
CleanupMockComm(mockComm);
}
TEST(Rcclwrap, PXN_GFX950_LargeRanks) {
// Check execution order first
if (ShouldSkipPxnTestDueToExecutionOrder("PXN_GFX950_LargeRanks")) {
GTEST_SKIP() << "Skipping due to execution order - another rcclSetPxn test "
"already ran";
}
// Check if we should skip this test due to environment variable being set
if (ShouldSkipPxnTest()) {
GTEST_SKIP()
<< "Skipping test: NCCL_PXN_DISABLE environment variable is set, "
<< "which would override the static variable behavior. "
<< "This test requires clean environment to test architecture logic.";
}
INFO(NCCL_LOG_INFO, "Testing rcclSetPxn for GFX950 with large ranks");
ncclComm_t mockComm = nullptr;
struct ncclTopoSystem mockTopo;
struct ncclTopoNode mockGpuNode;
CreateMockComm(mockComm, mockTopo, mockGpuNode, "gfx950", 64);
int pxnDisable = RCCL_VALUE_UNSET;
rcclSetPxn(mockComm, pxnDisable);
// Expected: 0 (enabled) for ranks >= 32 on GFX950
EXPECT_EQ(pxnDisable, 0)
<< "GFX950 with ranks >= 32 should enable PXN (pxnDisable = 0)";
INFO(NCCL_LOG_INFO, "GFX950 large ranks PXN test completed - pxnDisable: %d",
pxnDisable);
CleanupMockComm(mockComm);
}
TEST(Rcclwrap, PXN_GFX950_BoundaryRank32) {
// Check execution order first
if (ShouldSkipPxnTestDueToExecutionOrder("PXN_GFX950_BoundaryRank32")) {
GTEST_SKIP() << "Skipping due to execution order - another rcclSetPxn test "
"already ran";
}
// Check if we should skip this test due to environment variable being set
if (ShouldSkipPxnTest()) {
GTEST_SKIP()
<< "Skipping test: NCCL_PXN_DISABLE environment variable is set, "
<< "which would override the static variable behavior. "
<< "This test requires clean environment to test architecture logic.";
}
INFO(NCCL_LOG_INFO, "Testing rcclSetPxn for GFX950 with boundary rank 32");
ncclComm_t mockComm = nullptr;
struct ncclTopoSystem mockTopo;
struct ncclTopoNode mockGpuNode;
CreateMockComm(mockComm, mockTopo, mockGpuNode, "gfx950", 32);
int pxnDisable = RCCL_VALUE_UNSET;
rcclSetPxn(mockComm, pxnDisable);
// Expected: 0 (enabled) for ranks >= 32 on GFX950
EXPECT_EQ(pxnDisable, 0)
<< "GFX950 with ranks = 32 should enable PXN (pxnDisable = 0)";
INFO(NCCL_LOG_INFO,
"GFX950 boundary rank 32 PXN test completed - pxnDisable: %d",
pxnDisable);
CleanupMockComm(mockComm);
}
TEST(Rcclwrap, PXN_GFX950_BoundaryRank31) {
// Check execution order first
if (ShouldSkipPxnTestDueToExecutionOrder("PXN_GFX950_BoundaryRank31")) {
GTEST_SKIP() << "Skipping due to execution order - another rcclSetPxn test "
"already ran";
}
// Check if we should skip this test due to environment variable being set
if (ShouldSkipPxnTest()) {
GTEST_SKIP()
<< "Skipping test: NCCL_PXN_DISABLE environment variable is set, "
<< "which would override the static variable behavior. "
<< "This test requires clean environment to test architecture logic.";
}
INFO(NCCL_LOG_INFO, "Testing rcclSetPxn for GFX950 with boundary rank 31");
ncclComm_t mockComm = nullptr;
struct ncclTopoSystem mockTopo;
struct ncclTopoNode mockGpuNode;
CreateMockComm(mockComm, mockTopo, mockGpuNode, "gfx950", 31);
int pxnDisable = RCCL_VALUE_UNSET;
rcclSetPxn(mockComm, pxnDisable);
// Expected: 1 (disabled) for ranks < 32 on GFX950
EXPECT_EQ(pxnDisable, 1)
<< "GFX950 with ranks = 31 should disable PXN (pxnDisable = 1)";
INFO(NCCL_LOG_INFO,
"GFX950 boundary rank 31 PXN test completed - pxnDisable: %d",
pxnDisable);
CleanupMockComm(mockComm);
}
TEST(Rcclwrap, PXN_UnsupportedArch_GFX908) {
// Check execution order first
if (ShouldSkipPxnTestDueToExecutionOrder("PXN_UnsupportedArch_GFX908")) {
GTEST_SKIP() << "Skipping due to execution order - another rcclSetPxn test "
"already ran";
}
// Check if we should skip this test due to environment variable being set
if (ShouldSkipPxnTest()) {
GTEST_SKIP()
<< "Skipping test: NCCL_PXN_DISABLE environment variable is set, "
<< "which would override the static variable behavior. "
<< "This test requires clean environment to test architecture logic.";
}
INFO(NCCL_LOG_INFO, "Testing rcclSetPxn for unsupported architecture GFX908");
ncclComm_t mockComm = nullptr;
struct ncclTopoSystem mockTopo;
struct ncclTopoNode mockGpuNode;
CreateMockComm(mockComm, mockTopo, mockGpuNode, "gfx908", 32);
int pxnDisable = RCCL_VALUE_UNSET;
rcclSetPxn(mockComm, pxnDisable);
// Expected: RCCL_VALUE_INVALID for unsupported architectures
EXPECT_EQ(pxnDisable, RCCL_VALUE_INVALID)
<< "Unsupported architecture GFX908 should set pxnDisable to "
"RCCL_VALUE_INVALID";
INFO(NCCL_LOG_INFO,
"Unsupported architecture GFX908 PXN test completed - pxnDisable: %d",
pxnDisable);
CleanupMockComm(mockComm);
}
TEST(Rcclwrap, PXN_UnsupportedArch_GFX90A) {
// Check execution order first
if (ShouldSkipPxnTestDueToExecutionOrder("PXN_UnsupportedArch_GFX90A")) {
GTEST_SKIP() << "Skipping due to execution order - another rcclSetPxn test "
"already ran";
}
// Check if we should skip this test due to environment variable being set
if (ShouldSkipPxnTest()) {
GTEST_SKIP()
<< "Skipping test: NCCL_PXN_DISABLE environment variable is set, "
<< "which would override the static variable behavior. "
<< "This test requires clean environment to test architecture logic.";
}
INFO(NCCL_LOG_INFO, "Testing rcclSetPxn for unsupported architecture GFX90A");
ncclComm_t mockComm = nullptr;
struct ncclTopoSystem mockTopo;
struct ncclTopoNode mockGpuNode;
CreateMockComm(mockComm, mockTopo, mockGpuNode, "gfx90a", 32);
int pxnDisable = RCCL_VALUE_UNSET;
rcclSetPxn(mockComm, pxnDisable);
// Expected: RCCL_VALUE_INVALID for unsupported architectures
EXPECT_EQ(pxnDisable, RCCL_VALUE_INVALID)
<< "Unsupported architecture GFX90A should set pxnDisable to "
"RCCL_VALUE_INVALID";
INFO(NCCL_LOG_INFO,
"Unsupported architecture GFX90A PXN test completed - pxnDisable: %d",
pxnDisable);
CleanupMockComm(mockComm);
}
// This test specifically tests the environment variable behavior
TEST(Rcclwrap, PXN_WithEnvironmentVariable) {
// Check execution order first
if (ShouldSkipPxnTestDueToExecutionOrder("PXN_WithEnvironmentVariable")) {
GTEST_SKIP() << "Skipping due to execution order - another rcclSetPxn test "
"already ran";
}
// This test requires environment variable to be set to a specific value
if (ShouldSkipPxnTest("1")) {
GTEST_SKIP() << "Skipping test: NCCL_PXN_DISABLE environment variable is "
"not set to '1'. "
<< "Please set: export NCCL_PXN_DISABLE=1 to run this test. "
<< "This test verifies that user override via environment "
"variable works correctly.";
}
INFO(NCCL_LOG_INFO, "Testing rcclSetPxn with environment variable set");
// Environment variable is confirmed to be set to "1"
const char *envVar = getenv("NCCL_PXN_DISABLE");
INFO(NCCL_LOG_INFO, "Environment variable found: NCCL_PXN_DISABLE=%s",
envVar);
ncclComm_t mockComm = nullptr;
struct ncclTopoSystem mockTopo;
struct ncclTopoNode mockGpuNode;
CreateMockComm(mockComm, mockTopo, mockGpuNode, "gfx942", 128);
int pxnDisable = RCCL_VALUE_UNSET;
rcclSetPxn(mockComm, pxnDisable);
// Expected: RCCL_VALUE_INVALID when environment variable is set (user
// override)
EXPECT_EQ(pxnDisable, RCCL_VALUE_INVALID)
<< "When env var is set, should return RCCL_VALUE_INVALID";
INFO(NCCL_LOG_INFO,
"Environment variable PXN test completed - pxnDisable: %d", pxnDisable);
INFO(NCCL_LOG_INFO, "User override via NCCL_PXN_DISABLE=%s was respected",
envVar);
CleanupMockComm(mockComm);
}
TEST(Rcclwrap, PXN_ZeroRanks_GFX942) {
// Check execution order first
if (ShouldSkipPxnTestDueToExecutionOrder("PXN_ZeroRanks_GFX942")) {
GTEST_SKIP() << "Skipping due to execution order - another rcclSetPxn test "
"already ran";
}
// Check if we should skip this test due to environment variable being set
if (ShouldSkipPxnTest()) {
GTEST_SKIP()
<< "Skipping test: NCCL_PXN_DISABLE environment variable is set, "
<< "which would override the static variable behavior. "
<< "This test requires clean environment to test architecture logic.";
}
INFO(NCCL_LOG_INFO, "Testing rcclSetPxn with zero ranks for GFX942");
ncclComm_t mockComm = nullptr;
struct ncclTopoSystem mockTopo;
struct ncclTopoNode mockGpuNode;
CreateMockComm(mockComm, mockTopo, mockGpuNode, "gfx942", 0);
int pxnDisable = RCCL_VALUE_UNSET;
rcclSetPxn(mockComm, pxnDisable);
// Expected: 1 (disabled) since 0 < 64
EXPECT_EQ(pxnDisable, 1)
<< "Zero ranks should be treated as < 64, disabling PXN (pxnDisable = 1)";
INFO(NCCL_LOG_INFO, "Zero ranks GFX942 PXN test completed - pxnDisable: %d",
pxnDisable);
CleanupMockComm(mockComm);
}
TEST(Rcclwrap, PXN_ZeroRanks_GFX950) {
// Check execution order first
if (ShouldSkipPxnTestDueToExecutionOrder("PXN_ZeroRanks_GFX950")) {
GTEST_SKIP() << "Skipping due to execution order - another rcclSetPxn test "
"already ran";
}
// Check if we should skip this test due to environment variable being set
if (ShouldSkipPxnTest()) {
GTEST_SKIP()
<< "Skipping test: NCCL_PXN_DISABLE environment variable is set, "
<< "which would override the static variable behavior. "
<< "This test requires clean environment to test architecture logic.";
}
INFO(NCCL_LOG_INFO, "Testing rcclSetPxn with zero ranks for GFX950");
ncclComm_t mockComm = nullptr;
struct ncclTopoSystem mockTopo;
struct ncclTopoNode mockGpuNode;
CreateMockComm(mockComm, mockTopo, mockGpuNode, "gfx950", 0);
int pxnDisable = RCCL_VALUE_UNSET;
rcclSetPxn(mockComm, pxnDisable);
// Expected: 1 (disabled) since 0 < 32
EXPECT_EQ(pxnDisable, 1)
<< "Zero ranks should be treated as < 32, disabling PXN (pxnDisable = 1)";
INFO(NCCL_LOG_INFO, "Zero ranks GFX950 PXN test completed - pxnDisable: %d",
pxnDisable);
CleanupMockComm(mockComm);
}
TEST(Rcclwrap, RcclSetPipelining_Invalid_DType) {
// Skip the test if pipelining has been disabled
// (RCCL_DISABLE_REDUCE_COPY_PIPELINING=1)
if (ShouldSkipRcclSetPipeliningTests()) {
GTEST_SKIP()
<< "Skipping test: RCCL_DISABLE_REDUCE_COPY_PIPELINING environment "
"variable is set. Unset this variable to enable pipelining.";
}
// Skip the test if pipelining has been enabled for all data types
// (RCCL_PIPELINE_ALL_DATA_TYPES=1)
const char *allowAllDTypes = getenv("RCCL_PIPELINE_ALL_DATA_TYPES");
if (allowAllDTypes && strcmp(allowAllDTypes, "0") != 0) {
GTEST_SKIP() << "Skipping test: RCCL_PIPELINE_ALL_DATA_TYPES environment "
"variable is set. Unset this variable to enable pipelining "
"only for bf16 data type.";
}
// Pipeline should not be set for non-bf16 datatypes, unless
// rcclParamPipelineAllDTypes() returns true
ncclComm_t comm = nullptr;
struct ncclTopoSystem topo;
struct ncclTopoNode gpu;
CreateMockComm(comm, topo, gpu, "gfx950", 8);
comm->nNodes = 2; // Multi node
ncclTaskColl info = {};
info.func = ncclFuncAllReduce;
info.datatype = ncclFloat32;
size_t nBytes = 16 * 1024 * 1024; // 16MB
rcclSetPipelining(comm, nBytes, &info);
EXPECT_EQ(info.pipeline, 0) << "Non-bf16 should not set pipeline by default";
CleanupMockComm(comm);
}
TEST(Rcclwrap, RcclSetPipelining_GFX950_MultiNode_Enable) {
// Skip the test if pipelining has been disabled
// (RCCL_DISABLE_REDUCE_COPY_PIPELINING=1)
if (ShouldSkipRcclSetPipeliningTests()) {
GTEST_SKIP()
<< "Skipping test: RCCL_DISABLE_REDUCE_COPY_PIPELINING environment "
"variable is set. Unset this variable to enable pipelining.";
}
// For multi-node, pipeline is set to 1 for AllReduce with bf16
ncclComm_t comm = nullptr;
struct ncclTopoSystem topo;
struct ncclTopoNode gpu;
CreateMockComm(comm, topo, gpu, "gfx950", 8);
comm->nNodes = 2; // Multi node
ncclTaskColl info = {};
// In rcclSetPipelining(), ncclFuncAllReduce, ncclFuncReduceScatter, and
// ncclFuncReduce share the same case body. Testing any one of them is
// sufficient to validate that code path.
info.func = ncclFuncAllReduce;
info.datatype = ncclBfloat16;
size_t nBytes = 16 * 1024 * 1024; // 16MB
rcclSetPipelining(comm, nBytes, &info);
EXPECT_EQ(info.pipeline, 1)
<< "gfx950 multi-node AllReduce bf16 should enable pipelining";
CleanupMockComm(comm);
}
TEST(Rcclwrap, RcclSetPipelining_GFX950_SingleNode_Disable) {
// Skip the test if pipelining has been disabled
// (RCCL_DISABLE_REDUCE_COPY_PIPELINING=1)
if (ShouldSkipRcclSetPipeliningTests()) {
GTEST_SKIP()
<< "Skipping test: RCCL_DISABLE_REDUCE_COPY_PIPELINING environment "
"variable is set. Unset this variable to enable pipelining.";
}
// For single-node, pipeline remains 0
ncclComm_t comm = nullptr;
struct ncclTopoSystem topo;
struct ncclTopoNode gpu;
CreateMockComm(comm, topo, gpu, "gfx950", 8);
comm->nNodes = 1; // Single node
ncclTaskColl info = {};
// In rcclSetPipelining(), ncclFuncAllReduce, ncclFuncReduceScatter, and
// ncclFuncReduce share the same case body. Testing any one of them is
// sufficient to validate that code path.
info.func = ncclFuncAllReduce;
info.datatype = ncclBfloat16;
size_t nBytes = 16 * 1024 * 1024; // 16MB
rcclSetPipelining(comm, nBytes, &info);
EXPECT_EQ(info.pipeline, 0)
<< "gfx950 single-node should not enable pipelining";
CleanupMockComm(comm);
}
TEST(Rcclwrap, RcclSetPipelining_GFX942_SingleNode_AllReduce_Enable) {
// Skip the test if pipelining has been disabled
// (RCCL_DISABLE_REDUCE_COPY_PIPELINING=1)
if (ShouldSkipRcclSetPipeliningTests()) {
GTEST_SKIP()
<< "Skipping test: RCCL_DISABLE_REDUCE_COPY_PIPELINING environment "
"variable is set. Unset this variable to enable pipelining.";
}
// For single-node, pipeline is set to 1 for AllReduce with bf16
ncclComm_t comm = nullptr;
struct ncclTopoSystem topo;
struct ncclTopoNode gpu;
CreateMockComm(comm, topo, gpu, "gfx942", 8);
comm->nNodes = 1; // Single node
ncclTaskColl info = {};
info.func = ncclFuncAllReduce;
info.datatype = ncclBfloat16;
size_t nBytes = 16 * 1024 * 1024; // 16MB
rcclSetPipelining(comm, nBytes, &info);
EXPECT_EQ(info.pipeline, 1)
<< "gfx942 single-node AllReduce bf16 should enable pipelining";
CleanupMockComm(comm);
}
TEST(Rcclwrap, RcclSetPipelining_GFX942_MultiNode_AllReduce_Enable) {
// Skip the test if pipelining has been disabled
// (RCCL_DISABLE_REDUCE_COPY_PIPELINING=1)
if (ShouldSkipRcclSetPipeliningTests()) {
GTEST_SKIP()
<< "Skipping test: RCCL_DISABLE_REDUCE_COPY_PIPELINING environment "
"variable is set. Unset this variable to enable pipelining.";
}
// For multi-node AllReduce with bf16, pipelining is enabled if
// nBytes <= 512MB * 2^(log2(nNodes)-1)
// Testing with nNodes = 4 => threshold = 512MB * 2^(2-1) = 1GB
ncclComm_t comm = nullptr;
struct ncclTopoSystem topo;
struct ncclTopoNode gpu;
CreateMockComm(comm, topo, gpu, "gfx942", 8);
comm->nNodes = 4;
ncclTaskColl info = {};
info.func = ncclFuncAllReduce;
info.datatype = ncclBfloat16;
size_t nBytes = (1ULL << 30); // 1GB, exactly at threshold
rcclSetPipelining(comm, nBytes, &info);
EXPECT_EQ(info.pipeline, 1)
<< "gfx942 4-node AllReduce at threshold should enable pipelining";
CleanupMockComm(comm);
}
TEST(Rcclwrap, RcclSetPipelining_GFX942_MultiNode_AllReduce_Disable) {
// Skip the test if pipelining has been disabled
// (RCCL_DISABLE_REDUCE_COPY_PIPELINING=1)
if (ShouldSkipRcclSetPipeliningTests()) {
GTEST_SKIP()
<< "Skipping test: RCCL_DISABLE_REDUCE_COPY_PIPELINING environment "
"variable is set. Unset this variable to enable pipelining.";
}
// When nBytes is just above the threshold, pipelining should be disabled
ncclComm_t comm = nullptr;
struct ncclTopoSystem topo;
struct ncclTopoNode gpu;
CreateMockComm(comm, topo, gpu, "gfx942", 8);
comm->nNodes = 4;
ncclTaskColl info = {};
info.func = ncclFuncAllReduce;
info.datatype = ncclBfloat16;
size_t nBytes = (1ULL << 30) + 1024; // 1GB + 1KB, just above threshold
rcclSetPipelining(comm, nBytes, &info);
EXPECT_EQ(info.pipeline, 0)
<< "gfx942 4-node AllReduce above threshold should disable pipelining";
CleanupMockComm(comm);
}
TEST(Rcclwrap, RcclSetPipelining_GFX942_Enable) {
// Skip the test if pipelining has been disabled
// (RCCL_DISABLE_REDUCE_COPY_PIPELINING=1)
if (ShouldSkipRcclSetPipeliningTests()) {
GTEST_SKIP()
<< "Skipping test: RCCL_DISABLE_REDUCE_COPY_PIPELINING environment "
"variable is set. Unset this variable to enable pipelining.";
}
// ReduceScatter & Reduce should enable pipelining regardless of no. of nodes
ncclComm_t comm = nullptr;
struct ncclTopoSystem topo;
struct ncclTopoNode gpu;
CreateMockComm(comm, topo, gpu, "gfx942", 8);
comm->nNodes = 8;
ncclTaskColl info = {};
// In rcclSetPipelining(), ncclFuncReduceScatter, and
// ncclFuncReduce share the same case body. Testing any one of them is
// sufficient to validate that code path.
info.func = ncclFuncReduceScatter;
info.datatype = ncclBfloat16;
size_t nBytes = 16 * 1024 * 1024; // 16MB
rcclSetPipelining(comm, nBytes, &info);
EXPECT_EQ(info.pipeline, 1)
<< "gfx942 ReduceScatter and Reduce should enable "
"pipelining with single or multi-node";
CleanupMockComm(comm);
}
TEST(Rcclwrap, RcclOverrideProtocol_NoOverride) {
const char *protoOverrideEnv = getenv("RCCL_OVERRIDE_PROTO");
// Skip the test if RCCL_OVERRIDE_PROTO is set
if (protoOverrideEnv) {
GTEST_SKIP() << "Skipping test: Variable RCCL_OVERRIDE_PROTO is set. Unset "
"it to run this test.";
}
float table[NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS];
ncclTaskColl info = {};
ncclResult_t result = rcclOverrideProtocol(ncclProtoStr, table, &info);
EXPECT_EQ(result, ncclSuccess)
<< "Expected ncclSuccess when RCCL_OVERRIDE_PROTO is unset, indicating "
"no override should be applied.";
}
TEST(Rcclwrap, RcclOverrideProtocol_UnsupportedOverride) {
const char *protoOverrideEnv = getenv("RCCL_OVERRIDE_PROTO");
// Skip the test if RCCL_OVERRIDE_PROTO is not set or if its set to an invalid
// value
if (!isProtoStrValid(protoOverrideEnv)) {
GTEST_SKIP()
<< "Skipping test: Variable RCCL_OVERRIDE_PROTO is not set or "
"set to an invalid value. Set it to a valid protocol value to "
"run this test.";
}
// Mark all combinations as unsupported for the purpose of this test.
float table[NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS];
for (int a = 0; a < NCCL_NUM_ALGORITHMS; ++a)
for (int p = 0; p < NCCL_NUM_PROTOCOLS; ++p)
table[a][p] = NCCL_ALGO_PROTO_IGNORE;
ncclTaskColl info = {};
info.func = ncclFuncReduceScatter;
info.datatype = ncclBfloat16;
info.algorithm = NCCL_ALGO_RING; // Set any algorithm
ncclResult_t result = rcclOverrideProtocol(ncclProtoStr, table, &info);
EXPECT_EQ(result, ncclInternalError)
<< "Expected ncclInternalError when the override protocol is valid, but "
"not enabled for the selected algorithm.";
}
TEST(Rcclwrap, RcclOverrideProtocol_ValidOverride) {
const char *protoOverrideEnv = getenv("RCCL_OVERRIDE_PROTO");
// Skip the test if RCCL_OVERRIDE_PROTO is not set or if its set to an invalid
// value
if (!isProtoStrValid(protoOverrideEnv)) {
GTEST_SKIP() << "Skipping test: RCCL_OVERRIDE_PROTO is not set or set to "
"an invalid value. Set it to a valid protocol name (e.g., "
"'Simple') to run this test.";
}
// Get the index of the protocol from the string for later comparison
int protoIndex = NCCL_PROTO_UNDEF;
ncclResult_t idxResult = rcclGetAlgoProtoIndex(
protoOverrideEnv, ncclProtoStr, NCCL_NUM_PROTOCOLS, protoIndex);
ASSERT_EQ(idxResult, ncclSuccess)
<< "Failed to get protocol index from string";
// Mark all combinations as valid for the purpose of this test.
float table[NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS];
for (int a = 0; a < NCCL_NUM_ALGORITHMS; ++a)
for (int p = 0; p < NCCL_NUM_PROTOCOLS; ++p)
table[a][p] = 0.0;
ncclTaskColl info = {};
info.func = ncclFuncAllReduce;
info.datatype = ncclBfloat16;
info.algorithm = NCCL_ALGO_RING; // Set any algorithm
info.protocol = NCCL_PROTO_UNDEF;
ncclResult_t result = rcclOverrideProtocol(ncclProtoStr, table, &info);
EXPECT_EQ(result, ncclSuccess)
<< "Expected ncclSuccess when override is applied successfully.";
EXPECT_EQ(info.protocol, protoIndex) << "Protocol index should match the "
"override value from environment.";
}
TEST(Rcclwrap, RcclOverrideProtocol_ValidOverridePersists) {
const char *protoOverrideEnv = getenv("RCCL_OVERRIDE_PROTO");
// Skip the test if RCCL_OVERRIDE_PROTO is not set or if its set to an invalid
// value
if (!isProtoStrValid(protoOverrideEnv)) {
GTEST_SKIP()
<< "Skipping test: RCCL_OVERRIDE_PROTO is not set or set to an invalid "
"value. Set it to a valid protocol name (e.g., 'Simple') to run "
"this test.";
}
// Get the index of the protocol from the string for later comparison
int protoIndex = NCCL_PROTO_UNDEF;
ncclResult_t idxResult = rcclGetAlgoProtoIndex(
protoOverrideEnv, ncclProtoStr, NCCL_NUM_PROTOCOLS, protoIndex);
ASSERT_EQ(idxResult, ncclSuccess)
<< "Failed to get protocol index from string";
// Mark all combinations as valid for the purpose of this test.
float table[NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS];
for (int a = 0; a < NCCL_NUM_ALGORITHMS; ++a)
for (int p = 0; p < NCCL_NUM_PROTOCOLS; ++p)
table[a][p] = 0.0;
ncclTaskColl info = {};
info.func = ncclFuncAllReduce;
info.datatype = ncclFloat16;
info.algorithm = NCCL_ALGO_RING; // Set any algorithm
info.protocol = NCCL_PROTO_UNDEF;
// First call
ncclResult_t result1 = rcclOverrideProtocol(ncclProtoStr, table, &info);
EXPECT_EQ(result1, ncclSuccess)
<< "Expected rcclOverrideProtocol to succeed with valid override";
EXPECT_EQ(info.protocol, protoIndex)
<< "Expected protocol to match override after first call";
// Second call
ncclResult_t result2 = rcclOverrideProtocol(ncclProtoStr, table, &info);
EXPECT_EQ(result2, ncclSuccess)
<< "Expected rcclOverrideProtocol to succeed again on second call";
EXPECT_EQ(info.protocol, protoIndex)
<< "Expected protocol to match override after second call";
}
TEST(Rcclwrap, RcclOverrideProtocol_InvalidProtocol) {
const char *protoOverrideEnv = getenv("RCCL_OVERRIDE_PROTO");
// Skip the test if RCCL_OVERRIDE_PROTO is not set or if its set to a valid
// value
if (!protoOverrideEnv || isProtoStrValid(protoOverrideEnv)) {
GTEST_SKIP()
<< "Skipping test: Variable RCCL_OVERRIDE_PROTO is not set or set to a "
"valid value. Set it to an invalid protocol value to run this test.";
}
float table[NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS];
ncclTaskColl info = {};
ncclResult_t result = rcclOverrideProtocol(ncclProtoStr, table, &info);
EXPECT_EQ(result, ncclInvalidUsage) << "Expected ncclInvalidUsage when the "
"override protocol is invalid.";
}
TEST(Rcclwrap, RcclOverrideProtocol_InvalidOverridePersists) {
const char *protoOverrideEnv = getenv("RCCL_OVERRIDE_PROTO");
if (!protoOverrideEnv || isProtoStrValid(protoOverrideEnv)) {
GTEST_SKIP()
<< "Skipping test: Variable RCCL_OVERRIDE_PROTO is not set or set to a "
"valid value. Set it to an invalid protocol value to run this test.";
}
float table[NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS];
ncclTaskColl info = {};
// First call should fail due to invalid proto string
ncclResult_t result1 = rcclOverrideProtocol(ncclProtoStr, table, &info);
EXPECT_EQ(result1, ncclInvalidUsage)
<< "Expected rcclOverrideProtocol to fail with invalid "
"RCCL_OVERRIDE_PROTO.";
// Second call should still fail because the static variable disables further
// overrides
ncclResult_t result2 = rcclOverrideProtocol(ncclProtoStr, table, &info);
EXPECT_EQ(result2, ncclInvalidUsage)
<< "Expected rcclOverrideProtocol to continue returning failure after "
"invalid proto was set.";
}
TEST(Rcclwrap, RcclOverrideAlgorithm_NoOverride) {
const char *algoOverrideEnv = getenv("RCCL_OVERRIDE_ALGO");
// Skip the test if RCCL_OVERRIDE_ALGO is set
if (algoOverrideEnv) {
GTEST_SKIP() << "Skipping test: Variable RCCL_OVERRIDE_ALGO is set. Unset "
"it to run this test.";
}
float table[NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS];
ncclTaskColl info = {};
ncclResult_t result = rcclOverrideAlgorithm(ncclAlgoStr, table, &info);
// Since no override is set, it should return success and do nothing
EXPECT_EQ(result, ncclSuccess)
<< "Expected ncclSuccess when RCCL_OVERRIDE_ALGO is unset, indicating no "
"override should be applied.";
}
TEST(Rcclwrap, RcclOverrideAlgorithm_UnsupportedOverride) {
const char *algoOverrideEnv = getenv("RCCL_OVERRIDE_ALGO");
// Skip the test if RCCL_OVERRIDE_ALGO is not set or if its set to an invalid
// value
if (!isAlgoStrValid(algoOverrideEnv)) {
GTEST_SKIP() << "Skipping test: RCCL_OVERRIDE_ALGO is not set or "
"set to an invalid value. Set it to a valid algorithm to "
"run this test.";
}
float table[NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS];
for (int a = 0; a < NCCL_NUM_ALGORITHMS; ++a)
for (int p = 0; p < NCCL_NUM_PROTOCOLS; ++p)
table[a][p] = NCCL_ALGO_PROTO_IGNORE;
ncclTaskColl info = {};
info.func = ncclFuncReduceScatter;
info.datatype = ncclBfloat16;
info.protocol = NCCL_PROTO_SIMPLE; // Set any protocol
ncclResult_t result = rcclOverrideAlgorithm(ncclAlgoStr, table, &info);
EXPECT_EQ(result, ncclInternalError)
<< "Expected ncclInternalError when the override algorithm is valid, but "
"not enabled for the selected protocol.";
}
TEST(Rcclwrap, RcclOverrideAlgorithm_ValidOverride) {
const char *algoOverrideEnv = getenv("RCCL_OVERRIDE_ALGO");
// Skip the test if RCCL_OVERRIDE_ALGO is not set or if its set to an invalid
// value
if (!isAlgoStrValid(algoOverrideEnv)) {
GTEST_SKIP() << "Skipping test: RCCL_OVERRIDE_ALGO is not set or set to "
"an invalid value. Set it to a valid algorithm name (e.g., "
"'Ring') to run this test.";
}
// Get the index of the algorithm from the string for later comparison
int algoIndex = NCCL_ALGO_UNDEF;
ncclResult_t idxResult = rcclGetAlgoProtoIndex(
algoOverrideEnv, ncclAlgoStr, NCCL_NUM_ALGORITHMS, algoIndex);
ASSERT_EQ(idxResult, ncclSuccess)
<< "Failed to get algorithm index from string";
float table[NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS];
// Mark all combinations as valid for the purpose of this test.
for (int a = 0; a < NCCL_NUM_ALGORITHMS; ++a)
for (int p = 0; p < NCCL_NUM_PROTOCOLS; ++p)
table[a][p] = 0.0;
ncclTaskColl info = {};
info.func = ncclFuncAllReduce;
info.datatype = ncclBfloat16;
info.protocol = NCCL_PROTO_SIMPLE; // Set any protocol
info.algorithm = NCCL_ALGO_UNDEF;
ncclResult_t result = rcclOverrideAlgorithm(ncclAlgoStr, table, &info);
EXPECT_EQ(result, ncclSuccess)
<< "Expected ncclSuccess when override is applied successfully.";
EXPECT_EQ(info.algorithm, algoIndex)
<< "Algorithm index should match the override value from environment.";
}
TEST(Rcclwrap, RcclOverrideAlgorithm_ValidOverridePersists) {
const char *algoOverrideEnv = getenv("RCCL_OVERRIDE_ALGO");
// Skip the test if RCCL_OVERRIDE_ALGO is not set or if its set to an invalid
// value
if (!isAlgoStrValid(algoOverrideEnv)) {
GTEST_SKIP()
<< "Skipping test: RCCL_OVERRIDE_ALGO is not set or set to an invalid "
"value. Set it to a valid algorithm name (e.g., 'Ring') to run this "
"test.";
}
// Get the index of the algorithm from the string for later comparison
int algoIndex = NCCL_ALGO_UNDEF;
ncclResult_t idxResult = rcclGetAlgoProtoIndex(
algoOverrideEnv, ncclAlgoStr, NCCL_NUM_ALGORITHMS, algoIndex);
ASSERT_EQ(idxResult, ncclSuccess)
<< "Failed to get algorithm index from string";
// Mark all combinations as valid for the purpose of this test.
float table[NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS];
for (int a = 0; a < NCCL_NUM_ALGORITHMS; ++a)
for (int p = 0; p < NCCL_NUM_PROTOCOLS; ++p)
table[a][p] = 0.0;
ncclTaskColl info = {};
info.func = ncclFuncAllReduce;
info.datatype = ncclFloat16;
info.protocol = NCCL_PROTO_SIMPLE; // Set any protocol
info.algorithm = NCCL_ALGO_UNDEF;
// First call
ncclResult_t result1 = rcclOverrideAlgorithm(ncclAlgoStr, table, &info);
EXPECT_EQ(result1, ncclSuccess)
<< "Expected rcclOverrideAlgorithm to succeed with valid override.";
EXPECT_EQ(info.algorithm, algoIndex)
<< "Expected algorithm to match override after first call.";
// Second call
ncclResult_t result2 = rcclOverrideAlgorithm(ncclAlgoStr, table, &info);
EXPECT_EQ(result2, ncclSuccess)
<< "Expected rcclOverrideAlgorithm to succeed again on second call.";
EXPECT_EQ(info.algorithm, algoIndex)
<< "Expected algorithm to match override after second call.";
}
TEST(Rcclwrap, RcclOverrideAlgorithm_InvalidAlgorithm) {
const char *algoOverrideEnv = getenv("RCCL_OVERRIDE_ALGO");
// Skip the test if RCCL_OVERRIDE_ALGO is not set or if its set to a valid
// value
if (!algoOverrideEnv || isAlgoStrValid(algoOverrideEnv)) {
GTEST_SKIP() << "Skipping test: RCCL_OVERRIDE_ALGO is not set or set to a "
"valid value. Set it to an invalid algorithm value to run "
"this test.";
}
float table[NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS];
ncclTaskColl info = {};
ncclResult_t result = rcclOverrideAlgorithm(ncclAlgoStr, table, &info);
EXPECT_EQ(result, ncclInvalidUsage)
<< "Expected ncclInvalidUsage when the override algorithm is invalid.";
}
TEST(Rcclwrap, RcclOverrideAlgorithm_InvalidOverridePersists) {
const char *algoOverrideEnv = getenv("RCCL_OVERRIDE_ALGO");
// Skip the test if RCCL_OVERRIDE_ALGO is not set or if its set to a valid
// value
if (!algoOverrideEnv || isAlgoStrValid(algoOverrideEnv)) {
GTEST_SKIP()
<< "Skipping test: RCCL_OVERRIDE_ALGO is not set or set to a valid "
"value. Set it to an invalid algorithm name to run this test.";
}
float table[NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS];
ncclTaskColl info = {};
// First call should fail due to invalid algo string (and set the static flag)
ncclResult_t result1 = rcclOverrideAlgorithm(ncclAlgoStr, table, &info);
EXPECT_EQ(result1, ncclInvalidUsage)
<< "Expected rcclOverrideAlgorithm to fail with invalid "
"RCCL_OVERRIDE_ALGO.";
// Second call should also fail due to static validInput=false
ncclResult_t result2 = rcclOverrideAlgorithm(ncclAlgoStr, table, &info);
EXPECT_EQ(result2, ncclInvalidUsage)
<< "Expected rcclOverrideAlgorithm to continue returning failure after "
"invalid algo was set.";
}
} // namespace RcclUnitTesting