ASAN build excluding additional files, Algodir support for share folder
* ASAN build excluding additional files, Algodir support for share folder (#786) * Algodir support for share folder
Этот коммит содержится в:
@@ -613,8 +613,9 @@ rocm_install(FILES ${PROJECT_BINARY_DIR}/include/rccl/rccl.h src/include/n
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rccl)
|
||||
file(COPY tools/msccl-algorithms DESTINATION ${PROJECT_BINARY_DIR})
|
||||
file(COPY tools/msccl-unit-test-algorithms DESTINATION ${PROJECT_BINARY_DIR})
|
||||
install(DIRECTORY ${PROJECT_BINARY_DIR}/msccl-algorithms DESTINATION ${CMAKE_INSTALL_LIBDIR})
|
||||
install(DIRECTORY ${PROJECT_BINARY_DIR}/msccl-unit-test-algorithms DESTINATION ${CMAKE_INSTALL_LIBDIR})
|
||||
## Install Algorithm files under share folder
|
||||
install(DIRECTORY ${PROJECT_BINARY_DIR}/msccl-algorithms DESTINATION ${CMAKE_INSTALL_DATADIR}/rccl)
|
||||
install(DIRECTORY ${PROJECT_BINARY_DIR}/msccl-unit-test-algorithms DESTINATION ${CMAKE_INSTALL_DATADIR}/rccl)
|
||||
|
||||
rocm_export_targets(
|
||||
NAMESPACE roc::
|
||||
|
||||
@@ -70,11 +70,16 @@ static const char* mscclAlgoDirEnv = "MSCCL_ALGO_DIR";
|
||||
static const char* mscclAlgoDefaultDir = "msccl-algorithms";
|
||||
extern "C" bool mscclUnitTestMode() __attribute__((__weak__));
|
||||
static const char* mscclUnitTestAlgoDefaultDir = "msccl-unit-test-algorithms";
|
||||
static const char* mscclAlgoShareDirPath = "share/rccl/msccl-algorithms";
|
||||
static const char* mscclUnitTestAlgoShareDirPath = "share/rccl/msccl-unit-test-algorithms";
|
||||
|
||||
static ncclResult_t mscclInternalSchedulerInit() {
|
||||
mscclStatus& status = mscclGetStatus();
|
||||
const char* mscclAlgoDir = getenv(mscclAlgoDirEnv);
|
||||
const char* mscclAlgoShareDir = nullptr;
|
||||
std::string mscclAlgoDirStr;
|
||||
std::string mscclAlgoShareDirStr;
|
||||
const char *fullDirPath = nullptr;
|
||||
if (mscclAlgoDir == nullptr) {
|
||||
// Try to find default algorithm directory based on librccl.so path
|
||||
Dl_info dl_info;
|
||||
@@ -87,20 +92,31 @@ static ncclResult_t mscclInternalSchedulerInit() {
|
||||
mscclAlgoDirStr = selfLibPath.substr(0, selfLibPath.find_last_of("/\\") + 1);
|
||||
mscclAlgoDirStr += (mscclUnitTestMode && mscclUnitTestMode()) ? mscclUnitTestAlgoDefaultDir : mscclAlgoDefaultDir;
|
||||
mscclAlgoDir = mscclAlgoDirStr.c_str();
|
||||
// Get share Directory Paths
|
||||
mscclAlgoShareDirStr = selfLibPath.substr(0, selfLibPath.find_first_of("lib") );
|
||||
mscclAlgoShareDirStr += (mscclUnitTestMode && mscclUnitTestMode()) ? mscclUnitTestAlgoShareDirPath : mscclAlgoShareDirPath;
|
||||
mscclAlgoShareDir = mscclAlgoShareDirStr.c_str();
|
||||
}
|
||||
struct dirent *entry = nullptr;
|
||||
DIR *dp = nullptr;
|
||||
dp = opendir(mscclAlgoDir);
|
||||
if (dp == nullptr) {
|
||||
WARN("MSCCL Internal Scheduler: open algorithm directory %s failed", mscclAlgoDir);
|
||||
return ncclInvalidUsage;
|
||||
//Try to find the algorithm directory under share folder based on librccl.so path
|
||||
dp = opendir(mscclAlgoShareDir);
|
||||
if (dp == nullptr) {
|
||||
WARN("MSCCL Internal Scheduler: open algorithm in share directory %s failed", mscclAlgoShareDir);
|
||||
return ncclInvalidUsage;
|
||||
}
|
||||
fullDirPath = mscclAlgoShareDir;
|
||||
} else {
|
||||
fullDirPath = mscclAlgoDir;
|
||||
}
|
||||
while ((entry = readdir(dp))) {
|
||||
if (entry->d_type != DT_LNK && entry->d_type != DT_REG) {
|
||||
continue;
|
||||
}
|
||||
status.algoMetas.emplace_back();
|
||||
std::string fullPath = mscclAlgoDir;
|
||||
std::string fullPath = fullDirPath;
|
||||
fullPath += "/";
|
||||
fullPath += entry->d_name;
|
||||
NCCLCHECK(mscclGetAlgoMetaFromXmlFile(fullPath.c_str(), &(status.algoMetas.back())));
|
||||
|
||||
Ссылка в новой задаче
Block a user