From 18e9ad913b6a589ad9515885fdf9f82598eda310 Mon Sep 17 00:00:00 2001 From: corey-derochie-amd <161367113+corey-derochie-amd@users.noreply.github.com> Date: Wed, 10 Dec 2025 10:06:44 -0700 Subject: [PATCH] Fixed unit-test env var list parsing and improved filtered test run speed (#1626) * Fixed parsing of env var lists which were overwriting the mutable env var string and polluting future parses. * Fixed all tests to obey UT_DATATYPES and UT_REDOPS filters. * Allow tests to bail early via `GTEST_SKIP` if UT_DATATYPES or UT_REDOPS filters give a test size of zero. This allows tests to run much faster with filters on. * Wrapped the support checks in helper functions on `TestBed`. --- test/AllToAllVTests.cpp | 16 +++++++++-- test/GroupCallTests.cpp | 50 +++++++++++++++++++++++++++----- test/SendRecvTests.cpp | 18 ++++++++++-- test/common/EnvVars.cpp | 10 +++++-- test/common/TestBed.cpp | 64 +++++++++++++++++++++++++---------------- test/common/TestBed.hpp | 6 ++++ 6 files changed, 126 insertions(+), 38 deletions(-) diff --git a/test/AllToAllVTests.cpp b/test/AllToAllVTests.cpp index aa6615fc33..b42da15c2a 100644 --- a/test/AllToAllVTests.cpp +++ b/test/AllToAllVTests.cpp @@ -79,13 +79,19 @@ namespace RcclUnitTesting TestBed testBed; // Configuration - std::vector const& dataTypes = {ncclInt32, ncclFloat64, ncclFloat16}; + std::vector const& testDataTypes = {ncclInt32, ncclFloat64, ncclFloat16}; bool const inPlace = false; bool const useManagedMem = false; bool const useHipGraph = false; OptionalColArgs options; + std::vector dataTypes; + testBed.GetSupportedDataTypes(dataTypes, testDataTypes); + if (dataTypes.empty()) { + GTEST_SKIP() << "Skipping... test datatypes excluded by UT_DATATYPES."; + } + bool isCorrect = true; for (int totalRanks : testBed.ev.GetNumGpusList()) for (int isMultiProcess : testBed.ev.GetIsMultiProcessList()) @@ -137,13 +143,19 @@ namespace RcclUnitTesting TestBed testBed; // Configuration - std::vector const& dataTypes = {ncclFloat32, ncclInt8}; + std::vector const& testDataTypes = {ncclFloat32, ncclInt8}; bool const inPlace = false; bool const useManagedMem = false; bool const useHipGraph = false; OptionalColArgs options; + std::vector dataTypes; + testBed.GetSupportedDataTypes(dataTypes, testDataTypes); + if (dataTypes.empty()) { + GTEST_SKIP() << "Skipping... test datatypes excluded by UT_DATATYPES."; + } + bool isCorrect = true; for (int totalRanks : testBed.ev.GetNumGpusList()) for (int isMultiProcess : testBed.ev.GetIsMultiProcessList()) diff --git a/test/GroupCallTests.cpp b/test/GroupCallTests.cpp index 888854d904..87b8d50940 100644 --- a/test/GroupCallTests.cpp +++ b/test/GroupCallTests.cpp @@ -14,14 +14,26 @@ namespace RcclUnitTesting // Configuration std::vector const funcTypes = {ncclCollAllReduce, ncclCollAllReduce, ncclCollAllReduce}; - std::vector const redOps = {ncclSum, ncclSum, ncclSum}; - std::vector const dataTypes = {ncclFloat, ncclFloat, ncclFloat}; + std::vector const testRedOps = {ncclSum, ncclSum, ncclSum}; + std::vector const testDataTypes = {ncclFloat, ncclFloat, ncclFloat}; std::vector const numElements = {1048576, 384 * 1024, 384}; int const numCollPerGroup = numElements.size(); bool const inPlace = false; bool const useManagedMem = false; + std::vector dataTypes; + testBed.GetSupportedDataTypes(dataTypes, testDataTypes); + if (dataTypes.empty()) { + GTEST_SKIP() << "Skipping... test datatypes excluded by UT_DATATYPES."; + } + + std::vector redOps; + testBed.GetSupportedRedOps(redOps, testRedOps); + if (redOps.empty()) { + GTEST_SKIP() << "Skipping... test reduction operations excluded by UT_REDOPS."; + } + bool isCorrect = true; for (int totalRanks : testBed.ev.GetNumGpusList()) for (int isMultiProcess : testBed.ev.GetIsMultiProcessList()) @@ -127,14 +139,26 @@ namespace RcclUnitTesting // Configuration std::vector const funcTypes = {ncclCollAllReduce, ncclCollAllReduce, ncclCollAllReduce}; - std::vector const redOps = {ncclSum, ncclSum, ncclSum}; - std::vector const dataTypes = {ncclFloat16, ncclFloat32, ncclFloat64}; + std::vector const testRedOps = {ncclSum, ncclSum, ncclSum}; + std::vector const testDataTypes = {ncclFloat16, ncclFloat32, ncclFloat64}; std::vector const numElements = {1048576, 384 * 1024, 384}; int const numCollPerGroup = numElements.size(); bool const inPlace = false; bool const useManagedMem = false; + std::vector dataTypes; + testBed.GetSupportedDataTypes(dataTypes, testDataTypes); + if (dataTypes.empty()) { + GTEST_SKIP() << "Skipping... test datatypes excluded by UT_DATATYPES."; + } + + std::vector redOps; + testBed.GetSupportedRedOps(redOps, testRedOps); + if (redOps.empty()) { + GTEST_SKIP() << "Skipping... test reduction operations excluded by UT_REDOPS."; + } + bool isCorrect = true; for (int totalRanks : testBed.ev.GetNumGpusList()) for (int isMultiProcess : testBed.ev.GetIsMultiProcessList()) @@ -230,8 +254,8 @@ namespace RcclUnitTesting {ncclCollAllToAll, ncclCollGather}, {ncclCollBroadcast, ncclCollReduceScatter}}; std::vector> const numElements = {{1250, 1048576}, {384, 384 * 1024}, {1048576, 127}}; - std::vector const dataTypes = {ncclFloat16, ncclFloat32, ncclBfloat16}; - std::vector const redops = {ncclSum, ncclProd, ncclMax}; + std::vector const testDataTypes = {ncclFloat16, ncclFloat32, ncclBfloat16}; + std::vector const testRedOps = {ncclSum, ncclProd, ncclMax}; std::vector const numCollsPerGroup = {2, 2, 2}; std::vector const numStreamsPerGroup = {1, 1, 1}; std::vector const useHipGraphList = {true, false, true}; @@ -241,6 +265,18 @@ namespace RcclUnitTesting int const numGroupCalls = groupCalls.size(); int const numIterations = 10; + std::vector dataTypes; + testBed.GetSupportedDataTypes(dataTypes, testDataTypes); + if (dataTypes.empty()) { + GTEST_SKIP() << "Skipping... test datatypes excluded by UT_DATATYPES."; + } + + std::vector redOps; + testBed.GetSupportedRedOps(redOps, testRedOps); + if (redOps.empty()) { + GTEST_SKIP() << "Skipping... test reduction operations excluded by UT_REDOPS."; + } + bool isCorrect = true; for (int totalRanks : testBed.ev.GetNumGpusList()) for (int isMultiProcess : testBed.ev.GetIsMultiProcessList()) @@ -258,7 +294,7 @@ namespace RcclUnitTesting { std::vector funcTypes = groupCalls[groupCallIdx]; OptionalColArgs options; - options.redOp = redops[groupCallIdx]; + options.redOp = redOps[groupCallIdx]; options.root = 0; for (int collIdx = 0; collIdx < numCollsPerGroup[groupCallIdx]; ++collIdx) diff --git a/test/SendRecvTests.cpp b/test/SendRecvTests.cpp index 9c7b21361c..c12d060f08 100644 --- a/test/SendRecvTests.cpp +++ b/test/SendRecvTests.cpp @@ -12,12 +12,19 @@ namespace RcclUnitTesting TestBed testBed; // Configuration - std::vector const& dataTypes = {ncclInt32, ncclFloat16, ncclFloat64}; + std::vector const& testDataTypes = {ncclInt32, ncclFloat16, ncclFloat64}; std::vector const numElements = {1048576, 53327, 1024, 0}; bool const inPlace = false; bool const useManagedMem = false; OptionalColArgs options; + + std::vector dataTypes; + testBed.GetSupportedDataTypes(dataTypes, testDataTypes); + if (dataTypes.empty()) { + GTEST_SKIP() << "Skipping... test datatypes excluded by UT_DATATYPES."; + } + bool isCorrect = true; int numGpus = testBed.ev.maxGpus; for (int rpg=0; rpg < 2 && isCorrect; ++rpg) @@ -104,13 +111,20 @@ namespace RcclUnitTesting TestBed testBed; // Configuration - std::vector const& dataTypes = {ncclInt32, ncclFloat16, ncclFloat64}; + std::vector const& testDataTypes = {ncclInt32, ncclFloat16, ncclFloat64}; std::vector const numElements = {1048576, 53327, 1024}; bool const inPlace = false; bool const useManagedMem = false; bool const userRegistered = true; OptionalColArgs options; + + std::vector dataTypes; + testBed.GetSupportedDataTypes(dataTypes, testDataTypes); + if (dataTypes.empty()) { + GTEST_SKIP() << "Skipping... test datatypes excluded by UT_DATATYPES."; + } + bool isCorrect = true; int numGpus = testBed.ev.maxGpus; for (int rpg=0; rpg < 2 && isCorrect; ++rpg) diff --git a/test/common/EnvVars.cpp b/test/common/EnvVars.cpp index de8b44ef51..d666b783c2 100644 --- a/test/common/EnvVars.cpp +++ b/test/common/EnvVars.cpp @@ -9,7 +9,9 @@ #include #include #include +#include #include +#include #include namespace RcclUnitTesting @@ -337,11 +339,13 @@ namespace RcclUnitTesting std::vector result; if (getenv(varname.c_str())) { - char* token = strtok(getenv(varname.c_str()), ",;"); - while (token != NULL) + std::string env = getenv(varname.c_str()); + std::replace(env.begin(), env.end(), ';', ','); + std::istringstream ss(env); + std::string token; + while (std::getline(ss, token, ',')) { result.push_back(token); - token = strtok(NULL, ",;"); } } return result; diff --git a/test/common/TestBed.cpp b/test/common/TestBed.cpp index f870ae0735..daeef85ce6 100644 --- a/test/common/TestBed.cpp +++ b/test/common/TestBed.cpp @@ -564,6 +564,40 @@ namespace RcclUnitTesting return ev.GetAllSupportedDataTypes(); } + void TestBed::GetSupportedRedOps(std::vector& redOps, const std::vector& testRedOps) + { + // Filter out any unsupported reduction ops, in case only subset has been compiled for + auto& supportedOps = ev.GetAllSupportedRedOps(); + for (auto redop : testRedOps) + { + for (int i = 0; i < supportedOps.size(); ++i) + { + if (supportedOps[i] == redop) + { + redOps.push_back(redop); + break; + } + } + } + } + + void TestBed::GetSupportedDataTypes(std::vector& dataTypes, const std::vector& testDataTypes) + { + // Filter out any unsupported datatypes, in case only subset has been compiled for + auto& supportedDataTypes = ev.GetAllSupportedDataTypes(); + for (auto dt : testDataTypes) + { + for (int i = 0; i < supportedDataTypes.size(); ++i) + { + if (supportedDataTypes[i] == dt) + { + dataTypes.push_back(dt); + break; + } + } + } + } + std::vector const TestBed::GetNumCollsPerGroup(int numCollectivesInGroup, int numGroupCalls) { @@ -642,34 +676,16 @@ namespace RcclUnitTesting std::vector sortedN = numElements; std::sort(sortedN.rbegin(), sortedN.rend()); OptionalColArgs optionalArgs; - // Filter out any unsupported datatypes, in case only subset has been compiled for - std::vector const& supportedDataTypes = this->GetAllSupportedDataTypes(); std::vector dataTypes; - for (auto dt : tmpDataTypes) - { - for (int i = 0; i < supportedDataTypes.size(); ++i) - { - if (supportedDataTypes[i] == dt) - { - dataTypes.push_back(dt); - break; - } - } + this->GetSupportedDataTypes(dataTypes, tmpDataTypes); + if (dataTypes.empty()) { + GTEST_SKIP() << "Skipping... test datatypes excluded by UT_DATATYPES."; } - // Filter out any unsupported reduction ops, in case only subset has been compiled for - std::vector const& supportedOps = this->GetAllSupportedRedOps(); std::vector redOps; - for (auto redop : tmpRedOps) - { - for (int i = 0; i < supportedOps.size(); ++i) - { - if (supportedOps[i] == redop) - { - redOps.push_back(redop); - break; - } - } + this->GetSupportedRedOps(redOps, tmpRedOps); + if (redOps.empty()) { + GTEST_SKIP() << "Skipping... test reduction operations excluded by UT_REDOPS."; } bool isCorrect = true; diff --git a/test/common/TestBed.hpp b/test/common/TestBed.hpp index e1a3215da0..26c8edbc68 100644 --- a/test/common/TestBed.hpp +++ b/test/common/TestBed.hpp @@ -127,6 +127,12 @@ namespace RcclUnitTesting // Return all the supported data types based on build settings std::vector const& GetAllSupportedDataTypes(); + // Returns the intersection of testRedOps with supported reduction operations as redOps. + void GetSupportedRedOps(std::vector& redOps, const std::vector& testRedOps); + + // Returns the intersection of testDataTypes with supported data types as dataTypes. + void GetSupportedDataTypes(std::vector& dataTypes, const std::vector& testDataTypes); + // Return a list for # of collectives per group std::vector const GetNumCollsPerGroup(int const numCollectivesInGroup, int const numGroupCalls);