Files
rocm-systems/ext-tuner/example/test/test_plugin.c
T
2026-01-20 13:04:02 -06:00

982 líneas
37 KiB
C

/*************************************************************************
* Unit tests for NCCL Tuner Plugin
************************************************************************/
#define _GNU_SOURCE // Enable setenv/unsetenv and other GNU extensions
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <unistd.h>
#include <sys/stat.h>
#include <stdarg.h>
// Include NCCL tuner header (which includes common.h and err.h)
#include "tuner.h"
// Include plugin source for testing
#include "../plugin.c"
// Test framework macros
#define TEST_ASSERT(condition, message) \
do { \
if (!(condition)) { \
printf("FAIL: %s - %s\n", __func__, message); \
return 0; \
} \
} while(0)
#define TEST_PASS() \
do { \
printf("PASS: %s\n", __func__); \
return 1; \
} while(0)
// Global test state
static int test_log_count = 0;
// Mock logger function
void mock_logger(ncclDebugLogLevel level, unsigned long flags,
const char* file, int line, const char* fmt, ...) {
(void)flags; // Suppress unused parameter warning
test_log_count++;
// Check if we should print based on NCCL_DEBUG level
const char* debug_level = getenv("NCCL_DEBUG");
int should_print = 0;
if (debug_level) {
if (strcmp(debug_level, "TRACE") == 0) {
should_print = 1; // Print everything
} else if (strcmp(debug_level, "INFO") == 0 && level <= NCCL_LOG_INFO) {
should_print = 1; // Print INFO and below
} else if (strcmp(debug_level, "WARN") == 0 && level <= NCCL_LOG_WARN) {
should_print = 1; // Print WARN and below
}
}
if (!should_print) return;
// Convert log level to string
const char* level_str;
switch(level) {
case NCCL_LOG_NONE: level_str = "NONE"; break;
case NCCL_LOG_ERROR: level_str = "ERROR"; break;
case NCCL_LOG_VERSION: level_str = "VERSION"; break;
case NCCL_LOG_WARN: level_str = "WARN"; break;
case NCCL_LOG_INFO: level_str = "INFO"; break;
case NCCL_LOG_ABORT: level_str = "ABORT"; break;
case NCCL_LOG_TRACE: level_str = "TRACE"; break;
default: level_str = "UNKNOWN"; break;
}
// Print log header
printf("[TUNER:%s:%s:%d] ", level_str, file, line);
// Print formatted message
va_list args;
va_start(args, fmt);
vprintf(fmt, args);
va_end(args);
printf("\n");
}
// Helper function to create test config file
void create_test_config(const char* filename, const char* content) {
FILE* f = fopen(filename, "w");
if (f) {
fprintf(f, "%s", content);
fclose(f);
}
}
// Test 1: Plugin initialization
int test_plugin_init() {
void* context = NULL;
// Test successful initialization
ncclResult_t result = pluginInit(&context, 0, 8, 2, mock_logger, NULL, NULL);
TEST_ASSERT(result == ncclSuccess, "Plugin init should succeed");
TEST_ASSERT(context != NULL, "Context should be allocated");
// Clean up
pluginFinalize(context);
TEST_PASS();
}
// Test 2: Configuration file parsing - valid CSV
int test_config_parsing_valid() {
const char* test_config =
"# Test configuration\n"
"allreduce,0,65536,tree,simple,2,1,-1,-1,-1\n"
"broadcast,0,32768,ring,ll128,4,2,16,-1,-1\n"
"# Comment line\n"
"\n" // Empty line
"reduce,1024,2048,tree,simple,-1,-1,-1,-1,-1\n";
create_test_config("test_valid.conf", test_config);
// Set environment variable to use our test config
setenv("NCCL_TUNER_CONFIG_FILE", "test_valid.conf", 1);
void* context = NULL;
ncclResult_t result = pluginInit(&context, 0, 16, 2, mock_logger, NULL, NULL);
TEST_ASSERT(result == ncclSuccess, "Plugin init with valid config should succeed");
// Clean up
pluginFinalize(context);
unlink("test_valid.conf");
unsetenv("NCCL_TUNER_CONFIG_FILE");
TEST_PASS();
}
// Test 3: Configuration file parsing - invalid CSV
int test_config_parsing_invalid() {
const char* test_config =
"allreduce,0,65536,tree,simple,2,1 # Missing nRanks and other fields\n"
"invalid_collective,0,1024,ring,simple,1,1,1,-1,-1\n"
"broadcast,abc,def,ring,simple,1,1,1,-1,-1\n"; // Invalid numbers
create_test_config("test_invalid.conf", test_config);
setenv("NCCL_TUNER_CONFIG_FILE", "test_invalid.conf", 1);
void* context = NULL;
ncclResult_t result = pluginInit(&context, 0, 8, 1, mock_logger, NULL, NULL);
// Should still succeed but with no valid configs loaded
TEST_ASSERT(result == ncclSuccess, "Plugin init should succeed even with invalid config");
// Clean up
pluginFinalize(context);
unlink("test_invalid.conf");
unsetenv("NCCL_TUNER_CONFIG_FILE");
TEST_PASS();
}
// Test 4: Collective type matching
int test_collective_matching() {
const char* test_config =
"allreduce,0,65536,tree,simple,8,1,-1,-1,-1\n"
"broadcast,0,32768,ring,ll128,4,-1,-1,-1,-1\n";
create_test_config("test_match.conf", test_config);
setenv("NCCL_TUNER_CONFIG_FILE", "test_match.conf", 1);
void* context = NULL;
pluginInit(&context, 0, 8, 1, mock_logger, NULL, NULL);
// Create mock cost table
float cost_table[NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS];
float* cost_table_ptr[NCCL_NUM_ALGORITHMS];
for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) {
cost_table_ptr[i] = cost_table[i];
for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) {
cost_table[i][j] = 1.0; // Default high cost
}
}
int nChannels;
// Test allreduce matching (should match first config)
ncclResult_t result = pluginGetCollInfo(context, ncclFuncAllReduce, 32768, 1,
cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS,
0, &nChannels);
TEST_ASSERT(result == ncclSuccess, "GetCollInfo should succeed");
mock_logger(NCCL_LOG_INFO, NCCL_ALL, __FILE__, __LINE__,
"DEBUG: Checking cost_table[TREE][SIMPLE] (%p) = %.1f (expecting 0.0)",
&cost_table[NCCL_ALGO_TREE][NCCL_PROTO_SIMPLE], cost_table[NCCL_ALGO_TREE][NCCL_PROTO_SIMPLE]);
TEST_ASSERT(cost_table[NCCL_ALGO_TREE][NCCL_PROTO_SIMPLE] == 0.0, "Tree/Simple should have low cost");
TEST_ASSERT(nChannels == 8, "Should set 8 channels");
// Test broadcast matching (should match second config)
for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) {
for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) {
cost_table[i][j] = 1.0; // Reset costs
}
}
result = pluginGetCollInfo(context, ncclFuncBroadcast, 16384, 1,
cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS,
0, &nChannels);
TEST_ASSERT(result == ncclSuccess, "GetCollInfo should succeed");
mock_logger(NCCL_LOG_INFO, NCCL_ALL, __FILE__, __LINE__,
"DEBUG: Checking cost_table[RING][LL128] (%p) = %.1f (expecting 0.0)",
&cost_table[NCCL_ALGO_RING][NCCL_PROTO_LL128], cost_table[NCCL_ALGO_RING][NCCL_PROTO_LL128]);
TEST_ASSERT(cost_table[NCCL_ALGO_RING][NCCL_PROTO_LL128] == 0.0, "Ring/LL128 should have low cost");
TEST_ASSERT(nChannels == 4, "Should set 4 channels");
// Clean up
pluginFinalize(context);
unlink("test_match.conf");
unsetenv("NCCL_TUNER_CONFIG_FILE");
TEST_PASS();
}
// Test 5: Size range matching
int test_size_matching() {
const char* test_config =
"allreduce,0,1024,tree,simple,2,-1,-1,-1,-1\n"
"allreduce,1025,65536,ring,simple,4,-1,-1,-1,-1\n"
"allreduce,65537,4294967295,ring,ll128,8,-1,-1,-1,-1\n";
create_test_config("test_size.conf", test_config);
setenv("NCCL_TUNER_CONFIG_FILE", "test_size.conf", 1);
void* context = NULL;
pluginInit(&context, 0, 8, 1, mock_logger, NULL, NULL);
float cost_table[NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS];
float* cost_table_ptr[NCCL_NUM_ALGORITHMS];
for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) {
cost_table_ptr[i] = cost_table[i];
for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) {
cost_table[i][j] = 1.0;
}
}
int nChannels = 1;
pluginGetCollInfo(context, ncclFuncAllReduce, 512, 1,
cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS,
0, &nChannels);
mock_logger(NCCL_LOG_INFO, NCCL_ALL, __FILE__, __LINE__,
"DEBUG: Small message - checking cost_table[TREE][SIMPLE] (%p) = %.1f (expecting 0.0)",
&cost_table[NCCL_ALGO_TREE][NCCL_PROTO_SIMPLE], cost_table[NCCL_ALGO_TREE][NCCL_PROTO_SIMPLE]);
TEST_ASSERT(cost_table[NCCL_ALGO_TREE][NCCL_PROTO_SIMPLE] == 0.0, "Small: Tree/Simple should have low cost");
TEST_ASSERT(nChannels == 2, "Small: Should set 2 channels");
// Test medium message (should match second config)
for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) {
for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) {
cost_table[i][j] = 1.0;
}
}
pluginGetCollInfo(context, ncclFuncAllReduce, 32768, 1,
cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS,
0, &nChannels);
mock_logger(NCCL_LOG_INFO, NCCL_ALL, __FILE__, __LINE__,
"DEBUG: Medium message - checking cost_table[RING][SIMPLE] (%p) = %.1f (expecting 0.0)",
&cost_table[NCCL_ALGO_RING][NCCL_PROTO_SIMPLE], cost_table[NCCL_ALGO_RING][NCCL_PROTO_SIMPLE]);
TEST_ASSERT(cost_table[NCCL_ALGO_RING][NCCL_PROTO_SIMPLE] == 0.0, "Medium: Ring/Simple should have low cost");
TEST_ASSERT(nChannels == 4, "Medium: Should set 4 channels");
// Test large message (should match third config)
for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) {
for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) {
cost_table[i][j] = 1.0;
}
}
pluginGetCollInfo(context, ncclFuncAllReduce, 1048576, 1,
cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS,
0, &nChannels);
mock_logger(NCCL_LOG_INFO, NCCL_ALL, __FILE__, __LINE__,
"DEBUG: Large message - checking cost_table[RING][LL128] (%p) = %.1f (expecting 0.0)",
&cost_table[NCCL_ALGO_RING][NCCL_PROTO_LL128], cost_table[NCCL_ALGO_RING][NCCL_PROTO_LL128]);
TEST_ASSERT(cost_table[NCCL_ALGO_RING][NCCL_PROTO_LL128] == 0.0, "Large: Ring/LL128 should have low cost");
TEST_ASSERT(nChannels == 8, "Large: Should set 8 channels");
// Clean up
pluginFinalize(context);
unlink("test_size.conf");
unsetenv("NCCL_TUNER_CONFIG_FILE");
TEST_PASS();
}
// Test 6: Topology matching
int test_topology_matching() {
const char* test_config =
"allreduce,0,65536,tree,simple,2,1,-1,-1,-1\n" // Single node only
"allreduce,0,65536,ring,simple,4,4,32,-1,-1\n" // 4 nodes, 32 ranks exactly
"allreduce,0,65536,ring,ll128,8,-1,-1,-1,-1\n"; // Any topology
create_test_config("test_topo.conf", test_config);
setenv("NCCL_TUNER_CONFIG_FILE", "test_topo.conf", 1);
// Test with single node setup
void* context1 = NULL;
pluginInit(&context1, 0, 8, 1, mock_logger, NULL, NULL); // 8 ranks, 1 node
float cost_table[NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS];
float* cost_table_ptr[NCCL_NUM_ALGORITHMS];
for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) {
cost_table_ptr[i] = cost_table[i];
for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) {
cost_table[i][j] = 1.0;
}
}
int nChannels;
pluginGetCollInfo(context1, ncclFuncAllReduce, 32768, 1,
cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS,
0, &nChannels);
TEST_ASSERT(cost_table[NCCL_ALGO_TREE][NCCL_PROTO_SIMPLE] == 0.0, "Single node: Should match tree config");
TEST_ASSERT(nChannels == 2, "Single node: Should set 2 channels");
pluginFinalize(context1);
// Test with 4 nodes, 32 ranks setup
void* context2 = NULL;
pluginInit(&context2, 0, 32, 4, mock_logger, NULL, NULL); // 32 ranks, 4 nodes
for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) {
for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) {
cost_table[i][j] = 1.0;
}
}
pluginGetCollInfo(context2, ncclFuncAllReduce, 32768, 1,
cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS,
0, &nChannels);
TEST_ASSERT(cost_table[NCCL_ALGO_RING][NCCL_PROTO_SIMPLE] == 0.0, "4-node: Should match ring/simple config");
TEST_ASSERT(nChannels == 4, "4-node: Should set 4 channels");
// Clean up
unlink("test_topo.conf");
unsetenv("NCCL_TUNER_CONFIG_FILE");
TEST_PASS();
}
// Test 7: Default channels behavior (-1)
int test_default_channels() {
const char* test_config =
"allreduce,0,65536,tree,simple,-1,-1,-1,-1,-1\n"; // Use default channels
create_test_config("test_default.conf", test_config);
setenv("NCCL_TUNER_CONFIG_FILE", "test_default.conf", 1);
void* context = NULL;
pluginInit(&context, 0, 8, 1, mock_logger, NULL, NULL);
float cost_table[NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS];
float* cost_table_ptr[NCCL_NUM_ALGORITHMS];
for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) {
cost_table_ptr[i] = cost_table[i];
for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) {
cost_table[i][j] = 1.0;
}
}
int nChannels = 99; // Set to known value
pluginGetCollInfo(context, ncclFuncAllReduce, 32768, 1,
cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS,
0, &nChannels);
TEST_ASSERT(cost_table[NCCL_ALGO_TREE][NCCL_PROTO_SIMPLE] == 0.0, "Should apply algorithm/protocol");
TEST_ASSERT(nChannels == 1, "Should keep default channels (1) when config has -1");
// Clean up
pluginFinalize(context);
unlink("test_default.conf");
unsetenv("NCCL_TUNER_CONFIG_FILE");
TEST_PASS();
}
// Test 8: regBuff matching
int test_regbuff_matching() {
const char* test_config =
"allreduce,0,65536,tree,simple,2,-1,-1,-1,1\n" // Registered buffers only
"allreduce,0,65536,ring,simple,4,-1,-1,-1,0\n" // Non-registered buffers only
"allreduce,0,65536,ring,ll128,8,-1,-1,-1,-1\n"; // Any buffer type (backward compatible)
create_test_config("test_regbuff.conf", test_config);
setenv("NCCL_TUNER_CONFIG_FILE", "test_regbuff.conf", 1);
void* context = NULL;
pluginInit(&context, 0, 8, 1, mock_logger, NULL, NULL);
float cost_table[NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS];
float* cost_table_ptr[NCCL_NUM_ALGORITHMS];
for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) {
cost_table_ptr[i] = cost_table[i];
}
int nChannels;
// Test registered buffer (should match first config)
for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) {
for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) {
cost_table[i][j] = 1.0;
}
}
pluginGetCollInfo(context, ncclFuncAllReduce, 32768, 1,
cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS,
1, &nChannels); // regBuff = 1 (registered)
TEST_ASSERT(cost_table[NCCL_ALGO_TREE][NCCL_PROTO_SIMPLE] == 0.0, "Registered buffer: Tree/Simple should have low cost");
TEST_ASSERT(nChannels == 2, "Registered buffer: Should set 2 channels");
// Test non-registered buffer (should match second config)
for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) {
for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) {
cost_table[i][j] = 1.0;
}
}
pluginGetCollInfo(context, ncclFuncAllReduce, 32768, 1,
cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS,
0, &nChannels); // regBuff = 0 (non-registered)
TEST_ASSERT(cost_table[NCCL_ALGO_RING][NCCL_PROTO_SIMPLE] == 0.0, "Non-registered buffer: Ring/Simple should have low cost");
TEST_ASSERT(nChannels == 4, "Non-registered buffer: Should set 4 channels");
// Test backward compatibility - config without regBuff should match any regBuff value
for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) {
for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) {
cost_table[i][j] = 1.0;
}
}
// First try with regBuff=2 (unusual value, should match third config)
pluginGetCollInfo(context, ncclFuncAllReduce, 32768, 1,
cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS,
2, &nChannels); // regBuff = 2 (only third config should match)
TEST_ASSERT(cost_table[NCCL_ALGO_RING][NCCL_PROTO_LL128] == 0.0, "Any regBuff: Ring/LL128 should have low cost");
TEST_ASSERT(nChannels == 8, "Any regBuff: Should set 8 channels");
// Clean up
pluginFinalize(context);
unlink("test_regbuff.conf");
unsetenv("NCCL_TUNER_CONFIG_FILE");
TEST_PASS();
}
// Test 9: numPipeOps matching
int test_pipeops_matching() {
const char* test_config =
"allreduce,0,65536,tree,simple,2,-1,-1,1,-1\n" // Single pipeline op
"allreduce,0,65536,ring,simple,4,-1,-1,4,-1\n" // Multiple pipeline ops
"allreduce,0,65536,ring,ll128,8,-1,-1,-1,-1\n"; // Any pipeline ops (backward compatible)
create_test_config("test_pipeops.conf", test_config);
setenv("NCCL_TUNER_CONFIG_FILE", "test_pipeops.conf", 1);
void* context = NULL;
pluginInit(&context, 0, 8, 1, mock_logger, NULL, NULL);
float cost_table[NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS];
float* cost_table_ptr[NCCL_NUM_ALGORITHMS];
for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) {
cost_table_ptr[i] = cost_table[i];
}
int nChannels;
// Test single pipeline op (should match first config)
for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) {
for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) {
cost_table[i][j] = 1.0;
}
}
pluginGetCollInfo(context, ncclFuncAllReduce, 32768, 1,
cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS,
0, &nChannels);
TEST_ASSERT(cost_table[NCCL_ALGO_TREE][NCCL_PROTO_SIMPLE] == 0.0, "Single pipeOp: Tree/Simple should have low cost");
TEST_ASSERT(nChannels == 2, "Single pipeOp: Should set 2 channels");
// Test multiple pipeline ops (should match second config)
for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) {
for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) {
cost_table[i][j] = 1.0;
}
}
pluginGetCollInfo(context, ncclFuncAllReduce, 32768, 4,
cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS,
0, &nChannels);
TEST_ASSERT(cost_table[NCCL_ALGO_RING][NCCL_PROTO_SIMPLE] == 0.0, "Multiple pipeOps: Ring/Simple should have low cost");
TEST_ASSERT(nChannels == 4, "Multiple pipeOps: Should set 4 channels");
// Test different number of pipeline ops (should match third config - backward compatible)
for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) {
for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) {
cost_table[i][j] = 1.0;
}
}
pluginGetCollInfo(context, ncclFuncAllReduce, 32768, 2,
cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS,
0, &nChannels);
TEST_ASSERT(cost_table[NCCL_ALGO_RING][NCCL_PROTO_LL128] == 0.0, "Any pipeOps: Ring/LL128 should have low cost");
TEST_ASSERT(nChannels == 8, "Any pipeOps: Should set 8 channels");
// Clean up
pluginFinalize(context);
unlink("test_pipeops.conf");
unsetenv("NCCL_TUNER_CONFIG_FILE");
TEST_PASS();
}
// Test 10: No matching configuration (fallback behavior)
int test_no_match_fallback() {
const char* test_config =
"broadcast,0,1024,tree,simple,2,-1,-1,-1,-1\n"; // Only broadcast config
create_test_config("test_fallback.conf", test_config);
setenv("NCCL_TUNER_CONFIG_FILE", "test_fallback.conf", 1);
void* context = NULL;
pluginInit(&context, 0, 8, 1, mock_logger, NULL, NULL);
float cost_table[NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS];
float* cost_table_ptr[NCCL_NUM_ALGORITHMS];
for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) {
cost_table_ptr[i] = cost_table[i];
for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) {
cost_table[i][j] = 1.0;
}
}
int nChannels;
// Try allreduce (should not match, use fallback)
pluginGetCollInfo(context, ncclFuncAllReduce, 32768, 1,
cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS,
0, &nChannels);
mock_logger(NCCL_LOG_INFO, NCCL_ALL, __FILE__, __LINE__,
"DEBUG: Fallback test - checking cost_table[RING][SIMPLE] (%p) = %.1f (expecting 0.0)",
&cost_table[NCCL_ALGO_RING][NCCL_PROTO_SIMPLE], cost_table[NCCL_ALGO_RING][NCCL_PROTO_SIMPLE]);
TEST_ASSERT(cost_table[NCCL_ALGO_RING][NCCL_PROTO_SIMPLE] == 1.0, "Should use pass through unmodified");
TEST_ASSERT(nChannels == 1, "Should use default channels");
// Clean up
pluginFinalize(context);
unlink("test_fallback.conf");
unsetenv("NCCL_TUNER_CONFIG_FILE");
TEST_PASS();
}
// Test 11: Large configuration files (testing dynamic allocation)
int test_large_config() {
const char* large_config_file = "test_large.conf";
// Create a large configuration file with many entries
// This tests the dynamic allocation functionality
FILE* f = fopen(large_config_file, "w");
TEST_ASSERT(f != NULL, "Should be able to create large config file");
// Write header comment
fprintf(f, "# Large configuration file for testing dynamic allocation\n");
fprintf(f, "# This file contains many configurations to test memory allocation\n");
// Generate a large number of configurations (much more than the old MAX_CONFIGS=100)
const int num_configs = 500; // 5x the old static limit
const char* collectives[] = {"allreduce", "broadcast", "reduce", "allgather", "reducescatter"};
const char* algorithms[] = {"tree", "ring", "collnet_direct", "nvls"};
const char* protocols[] = {"simple", "ll", "ll128"};
for (int i = 0; i < num_configs; i++) {
// Vary the configurations to create realistic test data
const char* coll = collectives[i % 5];
const char* algo = algorithms[i % 4];
const char* proto = protocols[i % 3];
size_t min_bytes = (i * 1024) % 1048576; // Vary from 0 to 1MB
size_t max_bytes = min_bytes + 65536; // 64KB range
int channels = (i % 8) + 1; // 1-8 channels
int nodes = (i % 4) == 0 ? -1 : (i % 4); // Mix of -1 and 1-3 nodes
int ranks = (i % 8) == 0 ? -1 : (i % 32) + 1; // Mix of -1 and 1-32 ranks
int pipeOps = (i % 3) == 0 ? -1 : (i % 4) + 1; // Mix of -1 and 1-4 pipeOps
int regBuff = (i % 3) == 0 ? -1 : (i % 2); // Mix of -1, 0, 1
fprintf(f, "%s,%zu,%zu,%s,%s,%d,%d,%d,%d,%d\n",
coll, min_bytes, max_bytes, algo, proto, channels, nodes, ranks, pipeOps, regBuff);
}
fclose(f);
// Set environment to use our large config file
setenv("NCCL_TUNER_CONFIG_FILE", large_config_file, 1);
// Initialize plugin with large config
void* context = NULL;
ncclResult_t result = pluginInit(&context, 0, 16, 4, mock_logger, NULL, NULL);
TEST_ASSERT(result == ncclSuccess, "Plugin init with large config should succeed");
TEST_ASSERT(context != NULL, "Context should be allocated");
// Verify that configurations were loaded
TunerContext* ctx = (TunerContext*)context;
TEST_ASSERT(ctx->numConfigs == num_configs, "Should load all configurations from large file");
TEST_ASSERT(ctx->maxConfigs == num_configs, "maxConfigs should match allocated size");
TEST_ASSERT(ctx->configs != NULL, "Configs array should be dynamically allocated");
// Test that we can access configurations throughout the array
// (This would have failed with the old static MAX_CONFIGS=100 limit)
for (int i = 0; i < ctx->numConfigs; i++) {
TuningConfig* config = &ctx->configs[i];
// Basic sanity checks on the loaded configurations
TEST_ASSERT(config->collType >= ncclFuncBroadcast && config->collType <= ncclFuncAllReduce,
"Collective type should be valid");
TEST_ASSERT(config->maxBytes >= config->minBytes, "maxBytes should be >= minBytes");
TEST_ASSERT(config->nChannels > 0, "nChannels should be positive");
}
// Test specific configuration access at various indices
// Index 0 (first config)
TuningConfig* first_config = &ctx->configs[0];
TEST_ASSERT(first_config != NULL, "First config should be accessible");
// Index in middle
TuningConfig* mid_config = &ctx->configs[num_configs / 2];
TEST_ASSERT(mid_config != NULL, "Middle config should be accessible");
// Index near end (this would have crashed with static array of 100)
TuningConfig* late_config = &ctx->configs[num_configs - 1];
TEST_ASSERT(late_config != NULL, "Last config should be accessible");
// Test memory allocation size - verify we didn't over-allocate
mock_logger(NCCL_LOG_INFO, NCCL_ALL, __FILE__, __LINE__,
"Successfully loaded %d configurations (dynamic allocation)", ctx->numConfigs);
mock_logger(NCCL_LOG_INFO, NCCL_ALL, __FILE__, __LINE__,
"Memory allocated for %d configurations (%zu bytes total)",
ctx->maxConfigs, ctx->maxConfigs * sizeof(TuningConfig));
// Test that the plugin can still find matching configurations from the large set
float cost_table[NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS];
float* cost_table_ptr[NCCL_NUM_ALGORITHMS];
for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) {
cost_table_ptr[i] = cost_table[i];
for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) {
cost_table[i][j] = 1.0; // Default high cost
}
}
int nChannels;
// Try to find a matching configuration - should work with large config set
result = pluginGetCollInfo(context, ncclFuncAllReduce, 32768, 1,
cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS,
0, &nChannels);
TEST_ASSERT(result == ncclSuccess, "GetCollInfo should work with large config set");
// Clean up
pluginFinalize(context);
unlink(large_config_file);
unsetenv("NCCL_TUNER_CONFIG_FILE");
TEST_PASS();
}
// Test 12: Very large configuration stress test
int test_very_large_config_stress() {
const char* stress_config_file = "test_stress.conf";
// Create an even larger configuration file to stress test the implementation
FILE* f = fopen(stress_config_file, "w");
TEST_ASSERT(f != NULL, "Should be able to create stress test config file");
fprintf(f, "# Stress test configuration with very large number of entries\n");
// Generate an extremely large number of configurations
const int stress_configs = 2000; // 20x the old static limit
for (int i = 0; i < stress_configs; i++) {
// Create varied but valid configurations
fprintf(f, "allreduce,%d,%d,ring,simple,4,-1,-1,-1,-1\n",
i * 512, (i * 512) + 1024);
}
fclose(f);
setenv("NCCL_TUNER_CONFIG_FILE", stress_config_file, 1);
// Test initialization with stress config
void* context = NULL;
ncclResult_t result = pluginInit(&context, 0, 8, 2, mock_logger, NULL, NULL);
TEST_ASSERT(result == ncclSuccess, "Plugin should handle very large config files");
TunerContext* ctx = (TunerContext*)context;
TEST_ASSERT(ctx->numConfigs == stress_configs, "Should load all stress test configurations");
TEST_ASSERT(ctx->configs != NULL, "Stress test configs should be allocated");
mock_logger(NCCL_LOG_INFO, NCCL_ALL, __FILE__, __LINE__,
"Stress test - loaded %d configurations successfully", stress_configs);
mock_logger(NCCL_LOG_INFO, NCCL_ALL, __FILE__, __LINE__,
"Memory usage: %zu bytes for configuration array",
stress_configs * sizeof(TuningConfig));
// Verify we can access configurations throughout the entire range
for (int i = 0; i < stress_configs; i += 100) { // Sample every 100th config
TuningConfig* config = &ctx->configs[i];
TEST_ASSERT(config->collType == ncclFuncAllReduce, "Config should have correct collective type");
TEST_ASSERT(config->minBytes == (size_t)(i * 512), "Config should have correct minBytes");
}
// Clean up
pluginFinalize(context);
unlink(stress_config_file);
unsetenv("NCCL_TUNER_CONFIG_FILE");
TEST_PASS();
}
// Test 13: Edge case - empty config file
int test_empty_config() {
const char* empty_config_file = "test_empty.conf";
// Create empty config file (only comments)
create_test_config(empty_config_file,
"# Empty configuration file\n"
"# No actual configurations\n"
"\n"
"\n");
setenv("NCCL_TUNER_CONFIG_FILE", empty_config_file, 1);
void* context = NULL;
ncclResult_t result = pluginInit(&context, 0, 8, 2, mock_logger, NULL, NULL);
TEST_ASSERT(result == ncclSuccess, "Plugin should handle empty config files");
TunerContext* ctx = (TunerContext*)context;
TEST_ASSERT(ctx->numConfigs == 0, "Should have zero configurations");
TEST_ASSERT(ctx->maxConfigs == 0, "Should have zero max configurations");
TEST_ASSERT(ctx->configs == NULL, "Should not allocate memory for empty config");
// Test that plugin still works with no configurations (fallback behavior)
float cost_table[NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS];
float* cost_table_ptr[NCCL_NUM_ALGORITHMS];
for (int i = 0; i < NCCL_NUM_ALGORITHMS; i++) {
cost_table_ptr[i] = cost_table[i];
for (int j = 0; j < NCCL_NUM_PROTOCOLS; j++) {
cost_table[i][j] = 1.0;
}
}
int nChannels;
result = pluginGetCollInfo(context, ncclFuncAllReduce, 32768, 1,
cost_table_ptr, NCCL_NUM_ALGORITHMS, NCCL_NUM_PROTOCOLS,
0, &nChannels);
TEST_ASSERT(result == ncclSuccess, "GetCollInfo should work with empty config");
// Clean up
pluginFinalize(context);
unlink(empty_config_file);
unsetenv("NCCL_TUNER_CONFIG_FILE");
TEST_PASS();
}
// Test NVLink domain info handling
int test_nvl_domain_info() {
printf("Testing NVLink domain info handling...\n");
// Test NVLink domain structure with min/max ranks per domain
ncclNvlDomainInfo_v5_t nvl_domain = {
.nNvlDomains = 2, // 2 nodes = 2 domains
.minRanksPerNvlDomain = 3, // minimum ranks across all domains (bottleneck)
.maxRanksPerNvlDomain = 5 // maximum ranks across all domains (capacity)
};
void* context = NULL;
ncclResult_t result = pluginInit(&context, 0, 8, 2, mock_logger, &nvl_domain, NULL);
TEST_ASSERT(result == ncclSuccess, "Plugin init with NVLink domains should succeed");
// Validate NVLD info structure
TEST_ASSERT(nvl_domain.nNvlDomains == 2, "Should have 2 domains (nodes)");
TEST_ASSERT(nvl_domain.minRanksPerNvlDomain == 3, "Should have minimum 3 ranks per domain");
TEST_ASSERT(nvl_domain.maxRanksPerNvlDomain == 5, "Should have maximum 5 ranks per domain");
// Clean up
pluginFinalize(context);
printf("NVLink domain info test passed!\n");
TEST_PASS();
}
int test_tuner_constants() {
// Initialize constants to -1.0 for testing purposes
ncclTunerConstants_v5_t constants = {
// Base latencies: [NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS]
.baseLatencies = {
{-1.0, -1.0, -1.0}, // NCCL_ALGO_TREE: LL, LL128, Simple
{-1.0, -1.0, -1.0}, // NCCL_ALGO_RING: LL, LL128, Simple
{-1.0, -1.0, -1.0}, // NCCL_ALGO_COLLNET_DIRECT
{-1.0, -1.0, -1.0}, // NCCL_ALGO_COLLNET_CHAIN
{-1.0, -1.0, -1.0}, // NCCL_ALGO_NVLS
{-1.0, -1.0, -1.0}, // NCCL_ALGO_NVLS_TREE
{-1.0, -1.0, -1.0} // NCCL_ALGO_PAT
},
// Hardware latencies: [NCCL_NUM_HW_LINKS][NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS]
.hwLatencies = {
// NCCL_HW_NVLINK
{
{-1.0, -1.0, -1.0}, // TREE
{-1.0, -1.0, -1.0}, // RING
{-1.0, -1.0, -1.0}, // COLLNET_DIRECT
{-1.0, -1.0, -1.0}, // COLLNET_CHAIN
{-1.0, -1.0, -1.0}, // NVLS
{-1.0, -1.0, -1.0}, // NVLS_TREE
{-1.0, -1.0, -1.0} // PAT
},
// NCCL_HW_PCI
{
{-1.0, -1.0, -1.0}, // TREE
{-1.0, -1.0, -1.0}, // RING
{-1.0, -1.0, -1.0}, // COLLNET_DIRECT
{-1.0, -1.0, -1.0}, // COLLNET_CHAIN
{-1.0, -1.0, -1.0}, // NVLS
{-1.0, -1.0, -1.0}, // NVLS_TREE
{-1.0, -1.0, -1.0} // PAT
},
// NCCL_HW_NET
{
{-1.0, -1.0, -1.0}, // TREE
{-1.0, -1.0, -1.0}, // RING
{-1.0, -1.0, -1.0}, // COLLNET_DIRECT
{-1.0, -1.0, -1.0}, // COLLNET_CHAIN
{-1.0, -1.0, -1.0}, // NVLS
{-1.0, -1.0, -1.0}, // NVLS_TREE
{-1.0, -1.0, -1.0} // PAT
}
},
// LL maximum bandwidths: [NCCL_NUM_COMPCAPS][NCCL_NUM_TUNING_SCALES]
.llMaxBws = {
{-1.0, -1.0, -1.0}, // Volta: 1node, 2nodes, 4nodes
{-1.0, -1.0, -1.0}, // Ampere: 1node, 2nodes, 4nodes
{-1.0, -1.0, -1.0}, // Hopper: 1node, 2nodes, 4nodes
{-1.0, -1.0, -1.0} // Blackwell: 1node, 2nodes, 4nodes
},
// Per-channel maximum Ring LL128 bandwidths: [NCCL_NUM_COMPCAPS][NCCL_NUM_TUNING_SCALES]
.perChMaxRingLL128Bws = {
{-1.0, -1.0, -1.0}, // Volta: 1node, 2nodes, 4nodes
{-1.0, -1.0, -1.0}, // Ampere: 1node, 2nodes, 4nodes
{-1.0, -1.0, -1.0}, // Hopper: 1node, 2nodes, 4nodes
{-1.0, -1.0, -1.0} // Blackwell: 1node, 2nodes, 4nodes
},
// Per-channel maximum Tree LL128 bandwidths: [NCCL_NUM_COMPCAPS][NCCL_NUM_TUNING_SCALES]
.perChMaxTreeLL128Bws = {
{-1.0, -1.0, -1.0}, // Volta: 1node, 2nodes, 4nodes
{-1.0, -1.0, -1.0}, // Ampere: 1node, 2nodes, 4nodes
{-1.0, -1.0, -1.0}, // Hopper: 1node, 2nodes, 4nodes
{-1.0, -1.0, -1.0} // Blackwell: 1node, 2nodes, 4nodes
},
// Per-channel maximum Tree bandwidths: [NCCL_NUM_COMPCAPS][NCCL_NUM_TUNING_SCALES]
.perChMaxTreeBws = {
{-1.0, -1.0, -1.0}, // Volta: 1node, 2nodes, 4nodes
{-1.0, -1.0, -1.0}, // Ampere: 1node, 2nodes, 4nodes
{-1.0, -1.0, -1.0}, // Hopper: 1node, 2nodes, 4nodes
{-1.0, -1.0, -1.0} // Blackwell: 1node, 2nodes, 4nodes
}
};
void* context = NULL;
ncclResult_t result = pluginInit(&context, 0, 8, 2, mock_logger, NULL, &constants);
TEST_ASSERT(result == ncclSuccess, "Plugin init with constants should succeed");
// Test that the constants were set correctly
TEST_ASSERT(constants.perChMaxTreeBws[NCCL_BLACKWELL_COMPCAP_IDX][NCCL_TUNING_SCALE_4NODES] == 15.0, "Tree bandwidth should be 15GB/s");
TEST_ASSERT(constants.perChMaxRingLL128Bws[NCCL_BLACKWELL_COMPCAP_IDX][NCCL_TUNING_SCALE_4NODES] == 20.0, "Ring bandwidth should be 20GB/s");
TEST_ASSERT(constants.hwLatencies[NCCL_HW_NET][NCCL_ALGO_NVLS][NCCL_PROTO_SIMPLE] == 24.0, "NVLSTree base network latency should be 24us");
// Clean up
pluginFinalize(context);
TEST_PASS();
}
// Test runner function pointer type
typedef int (*TestFunction)(void);
// Test registry
typedef struct {
const char* name;
TestFunction func;
const char* description;
} TestCase;
// All available tests
TestCase test_cases[] = {
{"init", test_plugin_init, "Plugin initialization"},
{"config-valid", test_config_parsing_valid, "Valid configuration parsing"},
{"config-invalid", test_config_parsing_invalid, "Invalid configuration parsing"},
{"collective", test_collective_matching, "Collective type matching"},
{"size", test_size_matching, "Size range matching"},
{"topology", test_topology_matching, "Topology matching"},
{"channels", test_default_channels, "Default channels behavior"},
{"regbuff", test_regbuff_matching, "Registered buffer matching"},
{"pipeops", test_pipeops_matching, "Pipeline operations matching"},
{"fallback", test_no_match_fallback, "Fallback behavior"},
{"large-config", test_large_config, "Large configuration files (dynamic allocation)"},
{"stress-config", test_very_large_config_stress, "Very large configuration stress test"},
{"empty-config", test_empty_config, "Empty configuration file handling"},
{"nvl-domain", test_nvl_domain_info, "NVL domain info handling"},
{"constants", test_tuner_constants, "Tuner constants initialization"},
{NULL, NULL, NULL} // End marker
};
// Show help/usage information
void show_help(const char* program_name) {
printf("Usage: %s [test_name ...]\n\n", program_name);
printf("Available tests:\n");
for (int i = 0; test_cases[i].name != NULL; i++) {
printf(" %-15s - %s\n", test_cases[i].name, test_cases[i].description);
}
printf("\nExamples:\n");
printf(" %s # Run all tests\n", program_name);
printf(" %s init # Run only initialization test\n", program_name);
printf(" %s init collective # Run initialization and collective tests\n", program_name);
printf(" %s --help # Show this help\n", program_name);
}
// Find test by name
TestFunction find_test(const char* name) {
for (int i = 0; test_cases[i].name != NULL; i++) {
if (strcmp(test_cases[i].name, name) == 0) {
return test_cases[i].func;
}
}
return NULL;
}
// Main test runner
int main(int argc, char* argv[]) {
int passed = 0, total = 0;
// Check for help
if (argc > 1 && (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0)) {
show_help(argv[0]);
return 0;
}
printf("Running NCCL Tuner Plugin Unit Tests\n");
printf("=====================================\n");
if (argc == 1) {
// No arguments - run all tests
for (int i = 0; test_cases[i].name != NULL; i++) {
printf("Running test: %s\n", test_cases[i].name);
total++;
passed += test_cases[i].func();
}
} else {
// Run specific tests
for (int arg = 1; arg < argc; arg++) {
TestFunction test_func = find_test(argv[arg]);
if (test_func) {
total++;
passed += test_func();
} else {
printf("ERROR: Unknown test '%s'\n", argv[arg]);
printf("Use --help to see available tests\n");
return 1;
}
}
}
printf("\n=====================================\n");
printf("Test Results: %d/%d tests passed\n", passed, total);
if (passed == total) {
printf("All tests PASSED!\n");
return 0;
} else {
printf("Some tests FAILED!\n");
return 1;
}
}