ASAN build excluding additional files, Algodir support for share folder

* ASAN build excluding additional files, Algodir support for share folder (#786)
* Algodir support for share folder
Этот коммит содержится в:
arvindcheru
2023-06-23 10:57:20 -04:00
коммит произвёл GitHub
родитель bb55848450
Коммит bd14ac8b59
2 изменённых файлов: 22 добавлений и 5 удалений
+3 -2
Просмотреть файл
@@ -613,8 +613,9 @@ rocm_install(FILES ${PROJECT_BINARY_DIR}/include/rccl/rccl.h src/include/n
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rccl)
file(COPY tools/msccl-algorithms DESTINATION ${PROJECT_BINARY_DIR})
file(COPY tools/msccl-unit-test-algorithms DESTINATION ${PROJECT_BINARY_DIR})
install(DIRECTORY ${PROJECT_BINARY_DIR}/msccl-algorithms DESTINATION ${CMAKE_INSTALL_LIBDIR})
install(DIRECTORY ${PROJECT_BINARY_DIR}/msccl-unit-test-algorithms DESTINATION ${CMAKE_INSTALL_LIBDIR})
## Install Algorithm files under share folder
install(DIRECTORY ${PROJECT_BINARY_DIR}/msccl-algorithms DESTINATION ${CMAKE_INSTALL_DATADIR}/rccl)
install(DIRECTORY ${PROJECT_BINARY_DIR}/msccl-unit-test-algorithms DESTINATION ${CMAKE_INSTALL_DATADIR}/rccl)
rocm_export_targets(
NAMESPACE roc::
+19 -3
Просмотреть файл
@@ -70,11 +70,16 @@ static const char* mscclAlgoDirEnv = "MSCCL_ALGO_DIR";
static const char* mscclAlgoDefaultDir = "msccl-algorithms";
extern "C" bool mscclUnitTestMode() __attribute__((__weak__));
static const char* mscclUnitTestAlgoDefaultDir = "msccl-unit-test-algorithms";
static const char* mscclAlgoShareDirPath = "share/rccl/msccl-algorithms";
static const char* mscclUnitTestAlgoShareDirPath = "share/rccl/msccl-unit-test-algorithms";
static ncclResult_t mscclInternalSchedulerInit() {
mscclStatus& status = mscclGetStatus();
const char* mscclAlgoDir = getenv(mscclAlgoDirEnv);
const char* mscclAlgoShareDir = nullptr;
std::string mscclAlgoDirStr;
std::string mscclAlgoShareDirStr;
const char *fullDirPath = nullptr;
if (mscclAlgoDir == nullptr) {
// Try to find default algorithm directory based on librccl.so path
Dl_info dl_info;
@@ -87,20 +92,31 @@ static ncclResult_t mscclInternalSchedulerInit() {
mscclAlgoDirStr = selfLibPath.substr(0, selfLibPath.find_last_of("/\\") + 1);
mscclAlgoDirStr += (mscclUnitTestMode && mscclUnitTestMode()) ? mscclUnitTestAlgoDefaultDir : mscclAlgoDefaultDir;
mscclAlgoDir = mscclAlgoDirStr.c_str();
// Get share Directory Paths
mscclAlgoShareDirStr = selfLibPath.substr(0, selfLibPath.find_first_of("lib") );
mscclAlgoShareDirStr += (mscclUnitTestMode && mscclUnitTestMode()) ? mscclUnitTestAlgoShareDirPath : mscclAlgoShareDirPath;
mscclAlgoShareDir = mscclAlgoShareDirStr.c_str();
}
struct dirent *entry = nullptr;
DIR *dp = nullptr;
dp = opendir(mscclAlgoDir);
if (dp == nullptr) {
WARN("MSCCL Internal Scheduler: open algorithm directory %s failed", mscclAlgoDir);
return ncclInvalidUsage;
//Try to find the algorithm directory under share folder based on librccl.so path
dp = opendir(mscclAlgoShareDir);
if (dp == nullptr) {
WARN("MSCCL Internal Scheduler: open algorithm in share directory %s failed", mscclAlgoShareDir);
return ncclInvalidUsage;
}
fullDirPath = mscclAlgoShareDir;
} else {
fullDirPath = mscclAlgoDir;
}
while ((entry = readdir(dp))) {
if (entry->d_type != DT_LNK && entry->d_type != DT_REG) {
continue;
}
status.algoMetas.emplace_back();
std::string fullPath = mscclAlgoDir;
std::string fullPath = fullDirPath;
fullPath += "/";
fullPath += entry->d_name;
NCCLCHECK(mscclGetAlgoMetaFromXmlFile(fullPath.c_str(), &(status.algoMetas.back())));