diff --git a/projects/rccl/.jenkins/common.groovy b/projects/rccl/.jenkins/common.groovy index 827dde71ff..c24ec7151b 100644 --- a/projects/rccl/.jenkins/common.groovy +++ b/projects/rccl/.jenkins/common.groovy @@ -23,7 +23,7 @@ def runTestCommand (platform, project, gfilter) cd ${project.paths.project_build_prefix}/build/release/test ${sudo} ulimit -l unlimited ulimit -a - ${sudo} RCCL_ENABLE_SIGNALHANDLER=1 NCCL_DEBUG=INFO HSA_FORCE_FINE_GRAIN_PCIE=1 ./rccl-UnitTests --gtest_filter=${gfilter} --gtest_output=xml --gtest_color=yes + ${sudo} RCCL_ENABLE_SIGNALHANDLER=0 NCCL_DEBUG=INFO HSA_FORCE_FINE_GRAIN_PCIE=1 ./rccl-UnitTests --gtest_filter=${gfilter} --gtest_output=xml --gtest_color=yes """ platform.runCommand(this, command) diff --git a/projects/rccl/src/graph/search.cc b/projects/rccl/src/graph/search.cc index f680619ab5..5a431c17ed 100644 --- a/projects/rccl/src/graph/search.cc +++ b/projects/rccl/src/graph/search.cc @@ -1187,7 +1187,7 @@ ncclResult_t ncclTopoGetLinkType(struct ncclTopoSystem* system, int cudaDev1, in struct ncclTopoNode* remNode = link->remNode; if (remNode->gpu.dev == cudaDev2) { *isXGMI = (link->type == LINK_NVL); - return ncclSuccess; + if (*isXGMI) return ncclSuccess; } } } diff --git a/projects/rccl/src/init.cc b/projects/rccl/src/init.cc index c0b856e12f..28c11023e1 100644 --- a/projects/rccl/src/init.cc +++ b/projects/rccl/src/init.cc @@ -1431,7 +1431,7 @@ fail: #ifdef USE_INDIRECT_FUNCTION_CALL NCCL_PARAM(SetStackSize, "SET_STACK_SIZE", 1); -RCCL_PARAM(StackSizeOverride, "STACK_SIZE_OVERRIDE", 256); +RCCL_PARAM(StackSizeOverride, "STACK_SIZE_OVERRIDE", 512); #else NCCL_PARAM(SetStackSize, "SET_STACK_SIZE", 0); RCCL_PARAM(StackSizeOverride, "STACK_SIZE_OVERRIDE", 0);