Created coverage tests for rccl_wrap (#1694)

* Created coverage tests for rccl_wrap

RCCL_EXPOSE_STATIC off by default

Coverage tests for rccl_wrap.cc

* Remove RCCL_EXPOSE_STATIC dependency

* Removed Rcclwrap.RcclGetAlgoInfoTest

* Remove comments

* Corrected RCCL_EXPOSE_STATIC definition logic

---------

Co-authored-by: Welling <awelling@ctr2-alola-login-01.amd.com>
Co-authored-by: Atul Kulkarni <atul.kulkarni@amd.com>

[ROCm/rccl commit: 82bea39280]
这个提交包含在:
awelling2801
2025-08-06 14:48:00 -05:00
提交者 GitHub
父节点 f34d760613
当前提交 c5b4e1bc78
修改 3 个文件,包含 177 行新增4 行删除
+3 -4
查看文件
@@ -40,7 +40,6 @@ option(TRACE "Enable additional tracing"
option(FAULT_INJECTION "Enable fault injection" ON)
option(FORCE_REDUCE_PIPELINING "Force reduce pipelining" OFF)
option(DISABLE_CHEAP_THREADFENCE "Compile-time killswitch for simpler fence" OFF)
option(RCCL_EXPOSE_STATIC "Expose internal static functions for testing" OFF)
# Default GPU architectures to build
#==================================================================================================
@@ -372,9 +371,6 @@ if(ROCTX)
endif()
endif()
if(RCCL_EXPOSE_STATIC)
add_definitions(-DRCCL_EXPOSE_STATIC)
endif()
# Determine version from makefiles/version.mk and fill in templates
#==================================================================================================
## parse version from Makefile NCCL_MAJOR, NCCL_MINOR, NCCL_PATCH must exist
@@ -739,6 +735,9 @@ add_custom_target(hipify_all DEPENDS ${HIP_SOURCES})
if (BUILD_TESTS)
if (ROCM_VERSION VERSION_GREATER_EQUAL "60400" AND CMAKE_BUILD_TYPE MATCHES "Debug")
## Set definition for exposing rccl static function
add_definitions(-DRCCL_EXPOSE_STATIC)
set(HIPIFY_SRC_DIR "${PROJECT_BINARY_DIR}/hipify/src")
set(REPLACE_SCRIPT "${CMAKE_SOURCE_DIR}/tools/scripts/replace_static.sh")
message ("Replacing static functions in ${HIPIFY_SRC_DIR} with ${REPLACE_SCRIPT} for unit tests")
+1
查看文件
@@ -143,6 +143,7 @@ if(BUILD_TESTS)
ArgCheckTests.cpp
IpcsocketTests.cpp
CollRegTests.cpp
RcclWrapTests.cpp
ShmTests.cpp
P2pTests.cpp
BitOpsTests.cpp
+173
查看文件
@@ -0,0 +1,173 @@
/*************************************************************************
* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
*
* See LICENSE.txt for license information
************************************************************************/
#include <gtest/gtest.h>
#include <rccl/rccl.h>
#include <comm.h>
#include <graph/topo.h>
#include "TestBed.hpp"
namespace RcclUnitTesting
{
// Helper function to test the static expose check
ncclResult_t testStaticExposeCheck() {
RCCL_STATIC_EXPOSE_CHECK();
return ncclSuccess;
}
TEST(Rcclwrap, RcclFuncMaxSendRecvCount) {
ncclResult_t staticCheckResult = testStaticExposeCheck();
#ifdef RCCL_EXPOSE_STATIC
EXPECT_EQ(staticCheckResult, ncclSuccess);
#else
EXPECT_EQ(staticCheckResult, ncclInvalidUsage);
#endif
size_t maxCount = 0;
ncclResult_t result = rcclFuncMaxSendRecvCount(ncclFuncAllReduce, 4, 1024, maxCount);
EXPECT_EQ(maxCount, 1024);
EXPECT_EQ(result, ncclSuccess);
}
TEST(Rcclwrap, RcclUpdateCollectiveProtocol_UsesLL128WhenInRange) {
setenv("NCCL_PROTO", "", 1); // Trigger auto selection mode
unsetenv("NCCL_PROTO");
ncclComm_t comm = new ncclComm();
*comm = {};
// Manually populate minimal fields for comm
comm->nRanks = 1;
comm->nNodes = 2; // triggers inter-node logic
comm->rank=0;
comm->topo = new ncclTopoSystem();
*comm->topo = {};
comm->topo->ll128Enabled=true;
comm->topo->nodes[GPU].nodes[0] = {};
comm->topo->nodes[GPU].count = 1;
strncpy(comm->topo->nodes[GPU].nodes[0].gpu.gcn, "gfx942", sizeof(comm->topo->nodes[GPU].nodes[0].gpu.gcn));
int idx = rcclGetTunableIndex(ncclFuncAllReduce);
comm->minMaxLLRange[idx][NCCL_PROTO_LL][RCCL_PROTOCOL_MIN_IDX] = 512;
comm->minMaxLLRange[idx][NCCL_PROTO_LL][RCCL_PROTOCOL_MAX_IDX] = 1024;
comm->minMaxLLRange[idx][NCCL_PROTO_LL128][RCCL_PROTOCOL_MIN_IDX] = 256;
comm->minMaxLLRange[idx][NCCL_PROTO_LL128][RCCL_PROTOCOL_MAX_IDX] = 2048;
comm->minMaxLLRange[idx][NCCL_PROTO_LL128][RCCL_PROTOCOL_FACTOR_IDX] = 1;
ncclTaskColl info = {};
// Manually populate minimal fields for info
info.func = ncclFuncAllReduce;
info.protocol = NCCL_PROTO_UNDEF;
size_t nBytes = 1024;
rcclUpdateCollectiveProtocol(comm, nBytes, &info);
EXPECT_TRUE(info.protocol == NCCL_PROTO_LL128 || info.protocol == NCCL_PROTO_LL);
delete comm->topo;
delete comm;
}
TEST(Rcclwrap, RcclUpdateCollectiveProtocol_WarnsOnGfx942Arch) {
setenv("NCCL_PROTO", "", 1);
unsetenv("NCCL_PROTO");
ncclComm_t comm = new ncclComm();
*comm = {};
// Manually populate minimal fields for comm
comm->nRanks = 1;
comm->nNodes = 2; // triggers inter-node logic
comm->rank=0;
comm->topo = new ncclTopoSystem();
comm->topo->ll128Enabled=true;
comm->topo->nodes[GPU].nodes[0] = {};
strncpy(comm->topo->nodes[GPU].nodes[0].gpu.gcn, "gfx942", sizeof(comm->topo->nodes[GPU].nodes[0].gpu.gcn));
int idx = rcclGetTunableIndex(ncclFuncAllReduce);
comm->minMaxLLRange[idx][NCCL_PROTO_LL][RCCL_PROTOCOL_MIN_IDX] = RCCL_LL_LIMITS_UNDEFINED;
comm->minMaxLLRange[idx][NCCL_PROTO_LL][RCCL_PROTOCOL_MAX_IDX] = RCCL_LL_LIMITS_UNDEFINED;
comm->minMaxLLRange[idx][NCCL_PROTO_LL128][RCCL_PROTOCOL_MIN_IDX] = RCCL_LL_LIMITS_UNDEFINED;
comm->minMaxLLRange[idx][NCCL_PROTO_LL128][RCCL_PROTOCOL_MAX_IDX] = RCCL_LL_LIMITS_UNDEFINED;
comm->minMaxLLRange[idx][NCCL_PROTO_LL128][RCCL_PROTOCOL_FACTOR_IDX] = RCCL_LL_LIMITS_UNDEFINED;
ncclTaskColl info = {};
// Manually populate minimal fields for info
info.func = ncclFuncAllReduce;
info.protocol = NCCL_PROTO_UNDEF;
size_t nBytes = 1024; // 1024 per rank for 4 ranks
rcclUpdateCollectiveProtocol(comm, nBytes, &info);
EXPECT_EQ(info.protocol, NCCL_PROTO_UNDEF);
delete comm->topo;
delete comm;
}
TEST(Rcclwrap, RcclUpdateCollectiveProtocol_HonorsUserProtocolEnv) { //Why does this pass if it does not enter the else if block
setenv("NCCL_PROTO", "1", 1); // Simulate manual override
ncclComm_t comm = new ncclComm();
*comm = {};
// Manually populate minimal fields for comm
comm->nRanks = 1;
comm->nNodes = 2; // triggers inter-node logic
comm->rank=0;
comm->topo = new ncclTopoSystem(); //(struct ncclTopoSystem*)calloc(1, sizeof(struct ncclTopoSystem));
*comm->topo = {};
comm->topo->ll128Enabled=true;
comm->topo->nodes[GPU].nodes[0] = {};
strncpy(comm->topo->nodes[GPU].nodes[0].gpu.gcn, "gfx942", sizeof(comm->topo->nodes[GPU].nodes[0].gpu.gcn));
ncclTaskColl info = {};
// Manually populate minimal fields for info
info.func = ncclFuncAllReduce;
info.protocol = NCCL_PROTO_UNDEF;
size_t nBytes = 1024; // 1024 per rank for 4 ranks
rcclUpdateCollectiveProtocol(comm, nBytes, &info);
EXPECT_EQ(info.protocol, NCCL_PROTO_UNDEF);
delete comm->topo;
delete comm;
}
TEST(Rcclwrap, RcclUpdateCollectiveProtocol_SimpleFallbackWhenNoRanges) {
setenv("NCCL_PROTO", "", 1); // Trigger auto selection mode
unsetenv("NCCL_PROTO");
ncclComm_t comm = new ncclComm();
*comm = {};
// Manually populate minimal fields for comm
comm->nRanks = 1;
comm->nNodes = 2; // triggers inter-node logic
comm->rank=0;
comm->topo = new ncclTopoSystem(); //(struct ncclTopoSystem*)calloc(1, sizeof(struct ncclTopoSystem));
*comm->topo = {};
comm->topo->ll128Enabled=true;
comm->topo->nodes[GPU].nodes[0] = {};
comm->topo->nodes[GPU].count = 1;
strncpy(comm->topo->nodes[GPU].nodes[0].gpu.gcn, "gfx942", sizeof(comm->topo->nodes[GPU].nodes[0].gpu.gcn));
int idx = rcclGetTunableIndex(ncclFuncAllReduce);
comm->minMaxLLRange[idx][NCCL_PROTO_LL][RCCL_PROTOCOL_MIN_IDX] = 512;
comm->minMaxLLRange[idx][NCCL_PROTO_LL][RCCL_PROTOCOL_MAX_IDX] = 1024;
// Manually populate minimal fields for info
ncclTaskColl info = {};
info.func = ncclFuncAllReduce;
info.protocol = NCCL_PROTO_UNDEF;
size_t nBytes = 2048; // 1024 per rank for 4 ranks
rcclUpdateCollectiveProtocol(comm, nBytes, &info);
EXPECT_EQ(info.protocol, NCCL_PROTO_SIMPLE);
delete comm->topo;
delete comm;
}
} //RcclUnitTesting