Merge remote-tracking branch 'nccl/master' into develop

这个提交包含在:
Marzieh Berenjkoub
2026-01-20 13:01:49 -06:00
当前提交 858b4e76eb
修改 240 个文件,包含 16266 行新增3578 行删除
+39 -3
查看文件
@@ -26,7 +26,7 @@ option(BUILD_TESTS "Build unit test programs"
option(COLLTRACE "Collective Trace Option" ON)
option(DUMP_ASM "Disassemble and dump" OFF)
option(ENABLE_CODE_COVERAGE "Enable code coverage" OFF)
option(ENABLE_MSCCL_KERNEL "Enable MSCCL while compiling" ON)
option(ENABLE_MSCCL_KERNEL "Enable MSCCL while compiling" OFF)
option(ENABLE_MSCCLPP "Enable MSCCL++" OFF)
option(ENABLE_MSCCLPP_CLIP "Enable MSCCL++ CLIP" OFF)
option(ENABLE_MSCCLPP_EXECUTOR "Enable MSCCL++ Executor" OFF)
@@ -463,10 +463,12 @@ configure_file(src/nccl.h.in ${PROJECT_BINARY_DIR}/include/nccl.h) # Used b
set(SRC_FILES
src/allocator.cc
src/bootstrap.cc
src/ce_coll.cc
src/channel.cc
src/collectives.cc
src/commDump.cc
src/debug.cc
src/dev_runtime.cc
src/enqueue.cc
src/group.cc
src/init.cc
@@ -475,7 +477,7 @@ set(SRC_FILES
src/msccl.cc
src/proxy.cc
src/rccl_wrap.cc
src/symmetric.cc
src/sym_kernels.cc
src/transport.cc
src/device/all_gather.h
src/device/all_reduce.h
@@ -526,6 +528,7 @@ set(SRC_FILES
src/include/BfdBacktrace.hpp
src/include/bitops.h
src/include/bootstrap.h
src/include/ce_coll.h
src/include/channel.h
src/include/checks.h
src/include/collectives.h
@@ -535,6 +538,7 @@ set(SRC_FILES
src/include/cpuset.h
# src/include/cudawrap.h
src/include/debug.h
src/include/dev_runtime.h
src/include/device.h
src/include/enqueue.h
src/include/gdrwrap.h
@@ -549,6 +553,7 @@ set(SRC_FILES
src/include/ipcsocket.h
src/include/mnnvl.h
src/include/nccl_common.h
src/include/nccl_device.h
src/include/net_device.h
src/include/net.h
src/include/nvmlwrap.h
@@ -569,12 +574,13 @@ set(SRC_FILES
src/include/rocmwrap.h
src/include/roctx.h
src/include/recorder.h
src/include/scheduler.h
src/include/shm.h
src/include/shmutils.h
src/include/signals.h
src/include/socket.h
src/include/strongstream.h
src/include/symmetric.h
src/include/sym_kernels.h
src/include/timer.h
src/include/transport.h
src/include/trees.h
@@ -592,6 +598,23 @@ set(SRC_FILES
src/include/msccl/msccl_setup.h
src/include/msccl/msccl_status.h
src/include/msccl/msccl_struct.h
src/include/nccl_device/comm.h
src/include/nccl_device/coop.h
src/include/nccl_device/core.h
src/include/nccl_device/ll_a2a.h
src/include/nccl_device/mem_barrier.h
src/include/nccl_device/ptr.h
src/include/nccl_device/utility.h
src/include/nccl_device/impl/comm__funcs.h
src/include/nccl_device/impl/comm__types.h
src/include/nccl_device/impl/core__funcs.h
src/include/nccl_device/impl/core__types.h
src/include/nccl_device/impl/ll_a2a__funcs.h
src/include/nccl_device/impl/ll_a2a__types.h
src/include/nccl_device/impl/mem_barrier__funcs.h
src/include/nccl_device/impl/mem_barrier__types.h
src/include/nccl_device/impl/ptr__funcs.h
src/include/nccl_device/impl/ptr__types.h
src/include/npkit/npkit.h
src/include/npkit/npkit_event.h
src/include/npkit/npkit_struct.h
@@ -639,6 +662,7 @@ set(SRC_FILES
src/include/plugin/net/net_v8.h
src/include/plugin/net/net_v9.h
src/include/plugin/net/net_v10.h
src/include/plugin/net/net_v11.h
src/include/plugin/profiler/net_ib_v1.h
src/include/plugin/profiler/net_ib.h
src/include/plugin/profiler/net_socket_v1.h
@@ -647,9 +671,11 @@ set(SRC_FILES
src/include/plugin/profiler/profiler_v2.h
src/include/plugin/profiler/profiler_v3.h
src/include/plugin/profiler/profiler_v4.h
src/include/plugin/profiler/profiler_v5.h
src/include/plugin/tuner/tuner_v2.h
src/include/plugin/tuner/tuner_v3.h
src/include/plugin/tuner/tuner_v4.h
src/include/plugin/tuner/tuner_v5.h
src/misc/alt_rsmi.cc
src/misc/archinfo.cc
src/misc/argcheck.cc
@@ -682,6 +708,9 @@ set(SRC_FILES
src/misc/msccl/msccl_setup.cc
src/misc/msccl/msccl_status.cc
src/misc/proxy_trace/proxy_trace.cc
src/nccl_device/core.cc
src/nccl_device/ll_a2a.cc
src/nccl_device/mem_barrier.cc
src/plugin/net.cc
src/plugin/plugin_open.cc
src/plugin/profiler.cc
@@ -691,13 +720,16 @@ set(SRC_FILES
src/plugin/net/net_v8.cc
src/plugin/net/net_v9.cc
src/plugin/net/net_v10.cc
src/plugin/net/net_v11.cc
src/plugin/profiler/profiler_v1.cc
src/plugin/profiler/profiler_v2.cc
src/plugin/profiler/profiler_v3.cc
src/plugin/profiler/profiler_v4.cc
src/plugin/profiler/profiler_v5.cc
src/plugin/tuner/tuner_v2.cc
src/plugin/tuner/tuner_v3.cc
src/plugin/tuner/tuner_v4.cc
src/plugin/tuner/tuner_v5.cc
src/ras/client.cc
src/ras/client_support.cc
src/ras/collectives.cc
@@ -708,6 +740,7 @@ set(SRC_FILES
src/register/coll_reg.cc
src/register/register.cc
src/register/sendrecv_reg.cc
src/scheduler/symmetric_sched.cc
src/transport/coll_net.cc
src/transport/generic.cc
src/transport/net.cc
@@ -880,6 +913,7 @@ target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src/device)
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src/device/network/unpack)
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src/include)
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src/include/mlx5)
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src/include/nccl_device)
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src/include/ionic)
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/src/include/plugin)
target_include_directories(rccl PRIVATE ${HIPIFY_DIR}/gensrc)
@@ -899,6 +933,7 @@ if(COLLTRACE)
target_compile_definitions(rccl PRIVATE ENABLE_COLLTRACE)
endif()
if(ENABLE_MSCCL_KERNEL)
message(WARNING "MSCCL is deprecated and will be removed in a future version of RCCL.")
target_compile_definitions(rccl PRIVATE COMPILE_MSCCL_KERNEL)
endif()
if(ENABLE_MSCCLPP)
@@ -939,6 +974,7 @@ endif()
# NPKit flags
## May be better to move these to a separate file
if(ENABLE_NPKIT)
message(WARNING "NPKit is deprecated and will be removed in a future version of RCCL. Please consider using alternative profiling tools.")
target_compile_definitions(rccl PRIVATE ENABLE_NPKIT)
target_compile_definitions(rccl PRIVATE ENABLE_NPKIT_EVENT_TIME_SYNC_GPU)
target_compile_definitions(rccl PRIVATE ENABLE_NPKIT_EVENT_TIME_SYNC_CPU)