2025-03-12 13:46:21 -07:00
/*************************************************************************
* Copyright (c) 2022-2023, NVIDIA CORPORATION. All rights reserved.
*
* See LICENSE.txt for license information
************************************************************************/
# include <stdlib.h>
# include <string.h>
# include <errno.h>
# include <dlfcn.h>
# include "debug.h"
# define MAX_STR_LEN 255
enum ncclPluginType {
ncclPluginTypeNet ,
ncclPluginTypeTuner ,
ncclPluginTypeProfiler ,
} ;
# define NUM_LIBS 3
static void * libHandles [ NUM_LIBS ] ;
static const char * pluginNames [ NUM_LIBS ] = { " NET " , " TUNER " , " PROFILER " } ;
static const char * pluginPrefix [ NUM_LIBS ] = { " libnccl-net " , " libnccl-tuner " , " libnccl-profiler " } ;
2025-05-29 20:56:40 -07:00
static const char * pluginFallback [ NUM_LIBS ] = { " " , " Using internal tuner plugin. " , " " } ;
2025-03-12 13:46:21 -07:00
static unsigned long subsys [ NUM_LIBS ] = { NCCL_INIT | NCCL_NET , NCCL_INIT | NCCL_TUNING , NCCL_INIT } ;
static void * tryOpenLib ( char * name , int * err , char * errStr ) {
* err = 0 ;
if ( nullptr = = name | | strlen ( name ) = = 0 ) {
return nullptr ;
}
if ( strncasecmp ( name , " STATIC_PLUGIN " , strlen ( name ) ) = = 0 ) {
name = nullptr ;
}
void * handle = dlopen ( name , RTLD_NOW | RTLD_LOCAL ) ;
if ( nullptr = = handle ) {
strncpy ( errStr , dlerror ( ) , MAX_STR_LEN ) ;
errStr [ MAX_STR_LEN ] = ' \0 ' ;
// "handle" and "name" won't be NULL at the same time.
// coverity[var_deref_model]
if ( strstr ( errStr , name ) & & strstr ( errStr , " No such file or directory " ) ) {
* err = ENOENT ;
}
}
return handle ;
}
2025-05-29 20:56:40 -07:00
static void appendNameToList ( char * nameList , int * leftChars , char * name ) {
snprintf ( nameList + PATH_MAX - * leftChars , * leftChars , " %s " , name ) ;
* leftChars - = strlen ( name ) + 1 ;
2025-03-12 13:46:21 -07:00
}
static void * openPluginLib ( enum ncclPluginType type , const char * libName ) {
int openErr , len = PATH_MAX ;
char libName_ [ MAX_STR_LEN ] = { 0 } ;
char openErrStr [ MAX_STR_LEN + 1 ] = { 0 } ;
char eNoEntNameList [ PATH_MAX ] = { 0 } ;
if ( libName & & strlen ( libName ) ) {
2025-06-18 10:34:47 -07:00
snprintf ( libName_ , MAX_STR_LEN , " %s " , libName ) ;
libHandles [ type ] = tryOpenLib ( libName_ , & openErr , openErrStr ) ;
if ( libHandles [ type ] ) {
INFO ( subsys [ type ] , " %s/Plugin: Plugin name set by env to %s " , pluginNames [ type ] , libName_ ) ;
return libHandles [ type ] ;
}
if ( openErr = = ENOENT ) {
appendNameToList ( eNoEntNameList , & len , libName_ ) ;
2025-03-12 13:46:21 -07:00
} else {
2025-06-18 10:34:47 -07:00
INFO ( subsys [ type ] , " %s/Plugin: %s " , pluginNames [ type ] , openErrStr ) ;
}
// libName can't be a relative or absolute path (start with '.' or contain any '/'). It can't be a library name either (start with 'lib' or end with '.so')
if ( strchr ( libName , ' / ' ) = = nullptr & & ( strncmp ( libName , " lib " , strlen ( " lib " ) ) | | strlen ( libName ) < strlen ( " .so " ) | | strncmp ( libName + strlen ( libName ) - strlen ( " .so " ) , " .so " , strlen ( " .so " ) ) ) ) {
2025-05-29 20:56:40 -07:00
snprintf ( libName_ , MAX_STR_LEN , " %s-%s.so " , pluginPrefix [ type ] , libName ) ;
libHandles [ type ] = tryOpenLib ( libName_ , & openErr , openErrStr ) ;
if ( libHandles [ type ] ) {
INFO ( subsys [ type ] , " %s/Plugin: Plugin name set by env to %s " , pluginNames [ type ] , libName_ ) ;
return libHandles [ type ] ;
}
if ( openErr = = ENOENT ) {
appendNameToList ( eNoEntNameList , & len , libName_ ) ;
} else {
INFO ( subsys [ type ] , " %s/Plugin: %s " , pluginNames [ type ] , openErrStr ) ;
}
2025-03-12 13:46:21 -07:00
}
} else {
snprintf ( libName_ , MAX_STR_LEN , " %s.so " , pluginPrefix [ type ] ) ;
libHandles [ type ] = tryOpenLib ( libName_ , & openErr , openErrStr ) ;
if ( libHandles [ type ] ) {
return libHandles [ type ] ;
}
if ( openErr = = ENOENT ) {
appendNameToList ( eNoEntNameList , & len , libName_ ) ;
} else {
INFO ( subsys [ type ] , " %s/Plugin: %s " , pluginNames [ type ] , openErrStr ) ;
}
}
if ( strlen ( eNoEntNameList ) ) {
INFO ( subsys [ type ] , " %s/Plugin: Could not find:%s. %s " , pluginNames [ type ] , eNoEntNameList , pluginFallback [ type ] ) ;
} else if ( strlen ( pluginFallback [ type ] ) ) {
INFO ( subsys [ type ] , " %s/Plugin: %s " , pluginNames [ type ] , pluginFallback [ type ] ) ;
}
return nullptr ;
}
void * ncclOpenNetPluginLib ( const char * name ) {
return openPluginLib ( ncclPluginTypeNet , name ) ;
}
void * ncclOpenTunerPluginLib ( const char * name ) {
return openPluginLib ( ncclPluginTypeTuner , name ) ;
}
void * ncclOpenProfilerPluginLib ( const char * name ) {
return openPluginLib ( ncclPluginTypeProfiler , name ) ;
}
void * ncclGetNetPluginLib ( void ) {
return libHandles [ ncclPluginTypeNet ] ;
}
ncclResult_t ncclClosePluginLib ( void * handle ) {
2025-05-29 20:56:40 -07:00
bool found = false ;
2025-03-12 13:46:21 -07:00
for ( int l = 0 ; l < NUM_LIBS ; l + + ) {
if ( libHandles [ l ] = = handle ) {
libHandles [ l ] = nullptr ;
2025-05-29 20:56:40 -07:00
if ( ! found ) {
if ( handle ) {
dlclose ( handle ) ;
}
found = true ;
}
2025-03-12 13:46:21 -07:00
}
}
2025-05-29 20:56:40 -07:00
return ncclSuccess ;
2025-03-12 13:46:21 -07:00
}