Created coverage tests for rccl_wrap (#1694)
* Created coverage tests for rccl_wrap
RCCL_EXPOSE_STATIC off by default
Coverage tests for rccl_wrap.cc
* Remove RCCL_EXPOSE_STATIC dependency
* Removed Rcclwrap.RcclGetAlgoInfoTest
* Remove comments
* Corrected RCCL_EXPOSE_STATIC definition logic
---------
Co-authored-by: Welling <awelling@ctr2-alola-login-01.amd.com>
Co-authored-by: Atul Kulkarni <atul.kulkarni@amd.com>
[ROCm/rccl commit: 82bea39280]
这个提交包含在:
@@ -40,7 +40,6 @@ option(TRACE "Enable additional tracing"
|
||||
option(FAULT_INJECTION "Enable fault injection" ON)
|
||||
option(FORCE_REDUCE_PIPELINING "Force reduce pipelining" OFF)
|
||||
option(DISABLE_CHEAP_THREADFENCE "Compile-time killswitch for simpler fence" OFF)
|
||||
option(RCCL_EXPOSE_STATIC "Expose internal static functions for testing" OFF)
|
||||
|
||||
# Default GPU architectures to build
|
||||
#==================================================================================================
|
||||
@@ -372,9 +371,6 @@ if(ROCTX)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(RCCL_EXPOSE_STATIC)
|
||||
add_definitions(-DRCCL_EXPOSE_STATIC)
|
||||
endif()
|
||||
# Determine version from makefiles/version.mk and fill in templates
|
||||
#==================================================================================================
|
||||
## parse version from Makefile NCCL_MAJOR, NCCL_MINOR, NCCL_PATCH must exist
|
||||
@@ -739,6 +735,9 @@ add_custom_target(hipify_all DEPENDS ${HIP_SOURCES})
|
||||
|
||||
if (BUILD_TESTS)
|
||||
if (ROCM_VERSION VERSION_GREATER_EQUAL "60400" AND CMAKE_BUILD_TYPE MATCHES "Debug")
|
||||
## Set definition for exposing rccl static function
|
||||
add_definitions(-DRCCL_EXPOSE_STATIC)
|
||||
|
||||
set(HIPIFY_SRC_DIR "${PROJECT_BINARY_DIR}/hipify/src")
|
||||
set(REPLACE_SCRIPT "${CMAKE_SOURCE_DIR}/tools/scripts/replace_static.sh")
|
||||
message ("Replacing static functions in ${HIPIFY_SRC_DIR} with ${REPLACE_SCRIPT} for unit tests")
|
||||
|
||||
@@ -143,6 +143,7 @@ if(BUILD_TESTS)
|
||||
ArgCheckTests.cpp
|
||||
IpcsocketTests.cpp
|
||||
CollRegTests.cpp
|
||||
RcclWrapTests.cpp
|
||||
ShmTests.cpp
|
||||
P2pTests.cpp
|
||||
BitOpsTests.cpp
|
||||
|
||||
@@ -0,0 +1,173 @@
|
||||
/*************************************************************************
|
||||
* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* See LICENSE.txt for license information
|
||||
************************************************************************/
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <rccl/rccl.h>
|
||||
#include <comm.h>
|
||||
#include <graph/topo.h>
|
||||
|
||||
#include "TestBed.hpp"
|
||||
|
||||
|
||||
namespace RcclUnitTesting
|
||||
{
|
||||
|
||||
// Helper function to test the static expose check
|
||||
ncclResult_t testStaticExposeCheck() {
|
||||
RCCL_STATIC_EXPOSE_CHECK();
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
TEST(Rcclwrap, RcclFuncMaxSendRecvCount) {
|
||||
ncclResult_t staticCheckResult = testStaticExposeCheck();
|
||||
#ifdef RCCL_EXPOSE_STATIC
|
||||
EXPECT_EQ(staticCheckResult, ncclSuccess);
|
||||
#else
|
||||
EXPECT_EQ(staticCheckResult, ncclInvalidUsage);
|
||||
#endif
|
||||
|
||||
size_t maxCount = 0;
|
||||
ncclResult_t result = rcclFuncMaxSendRecvCount(ncclFuncAllReduce, 4, 1024, maxCount);
|
||||
EXPECT_EQ(maxCount, 1024);
|
||||
EXPECT_EQ(result, ncclSuccess);
|
||||
}
|
||||
|
||||
TEST(Rcclwrap, RcclUpdateCollectiveProtocol_UsesLL128WhenInRange) {
|
||||
setenv("NCCL_PROTO", "", 1); // Trigger auto selection mode
|
||||
unsetenv("NCCL_PROTO");
|
||||
|
||||
ncclComm_t comm = new ncclComm();
|
||||
*comm = {};
|
||||
// Manually populate minimal fields for comm
|
||||
comm->nRanks = 1;
|
||||
comm->nNodes = 2; // triggers inter-node logic
|
||||
comm->rank=0;
|
||||
comm->topo = new ncclTopoSystem();
|
||||
*comm->topo = {};
|
||||
comm->topo->ll128Enabled=true;
|
||||
comm->topo->nodes[GPU].nodes[0] = {};
|
||||
comm->topo->nodes[GPU].count = 1;
|
||||
strncpy(comm->topo->nodes[GPU].nodes[0].gpu.gcn, "gfx942", sizeof(comm->topo->nodes[GPU].nodes[0].gpu.gcn));
|
||||
|
||||
int idx = rcclGetTunableIndex(ncclFuncAllReduce);
|
||||
comm->minMaxLLRange[idx][NCCL_PROTO_LL][RCCL_PROTOCOL_MIN_IDX] = 512;
|
||||
comm->minMaxLLRange[idx][NCCL_PROTO_LL][RCCL_PROTOCOL_MAX_IDX] = 1024;
|
||||
comm->minMaxLLRange[idx][NCCL_PROTO_LL128][RCCL_PROTOCOL_MIN_IDX] = 256;
|
||||
comm->minMaxLLRange[idx][NCCL_PROTO_LL128][RCCL_PROTOCOL_MAX_IDX] = 2048;
|
||||
comm->minMaxLLRange[idx][NCCL_PROTO_LL128][RCCL_PROTOCOL_FACTOR_IDX] = 1;
|
||||
|
||||
ncclTaskColl info = {};
|
||||
// Manually populate minimal fields for info
|
||||
info.func = ncclFuncAllReduce;
|
||||
info.protocol = NCCL_PROTO_UNDEF;
|
||||
|
||||
size_t nBytes = 1024;
|
||||
|
||||
rcclUpdateCollectiveProtocol(comm, nBytes, &info);
|
||||
EXPECT_TRUE(info.protocol == NCCL_PROTO_LL128 || info.protocol == NCCL_PROTO_LL);
|
||||
|
||||
delete comm->topo;
|
||||
delete comm;
|
||||
}
|
||||
|
||||
TEST(Rcclwrap, RcclUpdateCollectiveProtocol_WarnsOnGfx942Arch) {
|
||||
setenv("NCCL_PROTO", "", 1);
|
||||
unsetenv("NCCL_PROTO");
|
||||
|
||||
ncclComm_t comm = new ncclComm();
|
||||
*comm = {};
|
||||
// Manually populate minimal fields for comm
|
||||
comm->nRanks = 1;
|
||||
comm->nNodes = 2; // triggers inter-node logic
|
||||
comm->rank=0;
|
||||
comm->topo = new ncclTopoSystem();
|
||||
comm->topo->ll128Enabled=true;
|
||||
comm->topo->nodes[GPU].nodes[0] = {};
|
||||
strncpy(comm->topo->nodes[GPU].nodes[0].gpu.gcn, "gfx942", sizeof(comm->topo->nodes[GPU].nodes[0].gpu.gcn));
|
||||
|
||||
int idx = rcclGetTunableIndex(ncclFuncAllReduce);
|
||||
comm->minMaxLLRange[idx][NCCL_PROTO_LL][RCCL_PROTOCOL_MIN_IDX] = RCCL_LL_LIMITS_UNDEFINED;
|
||||
comm->minMaxLLRange[idx][NCCL_PROTO_LL][RCCL_PROTOCOL_MAX_IDX] = RCCL_LL_LIMITS_UNDEFINED;
|
||||
comm->minMaxLLRange[idx][NCCL_PROTO_LL128][RCCL_PROTOCOL_MIN_IDX] = RCCL_LL_LIMITS_UNDEFINED;
|
||||
comm->minMaxLLRange[idx][NCCL_PROTO_LL128][RCCL_PROTOCOL_MAX_IDX] = RCCL_LL_LIMITS_UNDEFINED;
|
||||
comm->minMaxLLRange[idx][NCCL_PROTO_LL128][RCCL_PROTOCOL_FACTOR_IDX] = RCCL_LL_LIMITS_UNDEFINED;
|
||||
|
||||
ncclTaskColl info = {};
|
||||
// Manually populate minimal fields for info
|
||||
info.func = ncclFuncAllReduce;
|
||||
info.protocol = NCCL_PROTO_UNDEF;
|
||||
size_t nBytes = 1024; // 1024 per rank for 4 ranks
|
||||
|
||||
rcclUpdateCollectiveProtocol(comm, nBytes, &info);
|
||||
EXPECT_EQ(info.protocol, NCCL_PROTO_UNDEF);
|
||||
|
||||
delete comm->topo;
|
||||
delete comm;
|
||||
}
|
||||
|
||||
TEST(Rcclwrap, RcclUpdateCollectiveProtocol_HonorsUserProtocolEnv) { //Why does this pass if it does not enter the else if block
|
||||
setenv("NCCL_PROTO", "1", 1); // Simulate manual override
|
||||
|
||||
ncclComm_t comm = new ncclComm();
|
||||
*comm = {};
|
||||
// Manually populate minimal fields for comm
|
||||
comm->nRanks = 1;
|
||||
comm->nNodes = 2; // triggers inter-node logic
|
||||
comm->rank=0;
|
||||
comm->topo = new ncclTopoSystem(); //(struct ncclTopoSystem*)calloc(1, sizeof(struct ncclTopoSystem));
|
||||
*comm->topo = {};
|
||||
comm->topo->ll128Enabled=true;
|
||||
comm->topo->nodes[GPU].nodes[0] = {};
|
||||
strncpy(comm->topo->nodes[GPU].nodes[0].gpu.gcn, "gfx942", sizeof(comm->topo->nodes[GPU].nodes[0].gpu.gcn));
|
||||
|
||||
ncclTaskColl info = {};
|
||||
// Manually populate minimal fields for info
|
||||
info.func = ncclFuncAllReduce;
|
||||
info.protocol = NCCL_PROTO_UNDEF;
|
||||
size_t nBytes = 1024; // 1024 per rank for 4 ranks
|
||||
|
||||
rcclUpdateCollectiveProtocol(comm, nBytes, &info);
|
||||
EXPECT_EQ(info.protocol, NCCL_PROTO_UNDEF);
|
||||
|
||||
delete comm->topo;
|
||||
delete comm;
|
||||
}
|
||||
|
||||
TEST(Rcclwrap, RcclUpdateCollectiveProtocol_SimpleFallbackWhenNoRanges) {
|
||||
setenv("NCCL_PROTO", "", 1); // Trigger auto selection mode
|
||||
unsetenv("NCCL_PROTO");
|
||||
|
||||
ncclComm_t comm = new ncclComm();
|
||||
*comm = {};
|
||||
// Manually populate minimal fields for comm
|
||||
comm->nRanks = 1;
|
||||
comm->nNodes = 2; // triggers inter-node logic
|
||||
comm->rank=0;
|
||||
comm->topo = new ncclTopoSystem(); //(struct ncclTopoSystem*)calloc(1, sizeof(struct ncclTopoSystem));
|
||||
*comm->topo = {};
|
||||
comm->topo->ll128Enabled=true;
|
||||
comm->topo->nodes[GPU].nodes[0] = {};
|
||||
comm->topo->nodes[GPU].count = 1;
|
||||
strncpy(comm->topo->nodes[GPU].nodes[0].gpu.gcn, "gfx942", sizeof(comm->topo->nodes[GPU].nodes[0].gpu.gcn));
|
||||
|
||||
int idx = rcclGetTunableIndex(ncclFuncAllReduce);
|
||||
comm->minMaxLLRange[idx][NCCL_PROTO_LL][RCCL_PROTOCOL_MIN_IDX] = 512;
|
||||
comm->minMaxLLRange[idx][NCCL_PROTO_LL][RCCL_PROTOCOL_MAX_IDX] = 1024;
|
||||
|
||||
|
||||
// Manually populate minimal fields for info
|
||||
ncclTaskColl info = {};
|
||||
info.func = ncclFuncAllReduce;
|
||||
info.protocol = NCCL_PROTO_UNDEF;
|
||||
size_t nBytes = 2048; // 1024 per rank for 4 ranks
|
||||
|
||||
rcclUpdateCollectiveProtocol(comm, nBytes, &info);
|
||||
EXPECT_EQ(info.protocol, NCCL_PROTO_SIMPLE);
|
||||
|
||||
delete comm->topo;
|
||||
delete comm;
|
||||
}
|
||||
} //RcclUnitTesting
|
||||
在新工单中引用
屏蔽一个用户