From 626dc50ab54f0dd97ed4596767875ea12bf0717f Mon Sep 17 00:00:00 2001 From: gilbertlee-amd <44450918+gilbertlee-amd@users.noreply.github.com> Date: Thu, 20 Mar 2025 18:10:01 -0600 Subject: [PATCH] Removing the experimental clique kernel files (#1610) --- CMakeLists.txt | 18 +- src/clique/AllReduceCliqueKernel.h | 75 ---- src/clique/CliqueCommon.h | 93 ----- src/clique/CliqueManager.cc | 577 ----------------------------- src/clique/CliqueManager.h | 127 ------- src/clique/CliqueShmNames.h | 37 -- src/clique/HandleCache.cc | 31 -- src/clique/HandleCache.h | 142 ------- src/clique/HandleShm.cc | 69 ---- src/clique/HandleShm.h | 53 --- src/clique/Hash.cc | 34 -- src/clique/Hash.h | 28 -- src/clique/MsgQueue.cc | 101 ----- src/clique/MsgQueue.h | 39 -- src/clique/SharedMemHelper.h | 43 --- src/clique/ShmObject.cc | 45 --- src/clique/ShmObject.h | 247 ------------ src/device/all_reduce.h | 2 - src/init.cc | 8 +- test/AllReduceTests.cpp | 30 +- tools/HelloRccl/runTest.sh | 13 +- tools/topo_expl/utils.cpp | 1 - 22 files changed, 9 insertions(+), 1804 deletions(-) delete mode 100644 src/clique/AllReduceCliqueKernel.h delete mode 100644 src/clique/CliqueCommon.h delete mode 100644 src/clique/CliqueManager.cc delete mode 100644 src/clique/CliqueManager.h delete mode 100644 src/clique/CliqueShmNames.h delete mode 100644 src/clique/HandleCache.cc delete mode 100644 src/clique/HandleCache.h delete mode 100644 src/clique/HandleShm.cc delete mode 100644 src/clique/HandleShm.h delete mode 100644 src/clique/Hash.cc delete mode 100644 src/clique/Hash.h delete mode 100644 src/clique/MsgQueue.cc delete mode 100644 src/clique/MsgQueue.h delete mode 100644 src/clique/SharedMemHelper.h delete mode 100644 src/clique/ShmObject.cc delete mode 100644 src/clique/ShmObject.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 177012a21d..a4197636b7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -424,22 +424,6 @@ set(SRC_FILES src/proxy.cc src/register.cc src/transport.cc -# src/clique/AllReduceCliqueKernel.h -# src/clique/CliqueCommon.h -# src/clique/CliqueManager.cc -# src/clique/CliqueManager.h -# src/clique/CliqueShmNames.h -# src/clique/HandleCache.cc -# src/clique/HandleCache.h -# src/clique/HandleShm.cc -# src/clique/HandleShm.h -# src/clique/Hash.cc -# src/clique/Hash.h -# src/clique/MsgQueue.cc -# src/clique/MsgQueue.h -# src/clique/SharedMemHelper.h -# src/clique/ShmObject.cc -# src/clique/ShmObject.h src/device/all_gather.h src/device/all_reduce.h src/device/alltoall_pivot.h @@ -888,7 +872,7 @@ else() execute_process( COMMAND bash "-c" "free | grep -o '[[:digit:]]*' | head -1" OUTPUT_VARIABLE memory_max_string) - ## memory_max_string holds the free memory in KB + ## memory_max_string holds the free memory in KB if (${memory_max_string} MATCHES "^[0-9]+") math(EXPR memory_in_gb "${memory_max_string} / (1024 * 1024)") ## KB to GB conversion else() diff --git a/src/clique/AllReduceCliqueKernel.h b/src/clique/AllReduceCliqueKernel.h deleted file mode 100644 index 1e2e579c09..0000000000 --- a/src/clique/AllReduceCliqueKernel.h +++ /dev/null @@ -1,75 +0,0 @@ -/* -Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef ALLREDUCECLIQUEKERNEL_H -#define ALLREDUCECLIQUEKERNEL_H - -#include "CliqueCommon.h" -#include "devcomm.h" -#include "reduce_kernel.h" -#include "common_kernel.h" - -template -__device__ void AllReduceCliqueSplitKernel(struct ncclWorkElem* args) -{ - // Clique-specific kernel arguments - cliqueDevicePtrs_t* cliquePtrs = args->clique.ptrs; // Collection of all input/output pointers across ranks in clique - size_t const N = args->clique.count; // Total number of elements to reduce - int const nBlocks = args->clique.nChannels; // Total number of blocks assigned to this kernel (may be different than gridDim.x) - int const blockId = args->clique.bid; // 0-indexed blockIdx for this threadblock (may be different than blockIdx.x) - int const rank = args->comm->rank; // Current rank - - // Each threadblock works independently of others on a subsection of the input - // First split evently across ranks, while maintaining multiples of blocksize - size_t const perRankN = RoundUp((N + NUM_RANKS - 1) / NUM_RANKS, blockDim.x); - size_t const perBlockN = RoundUp((perRankN + nBlocks - 1) / nBlocks, blockDim.x); - size_t const currBlockStart = min((rank * nBlocks + blockId) * perBlockN, N); - size_t const currBlockStop = min(currBlockStart + perBlockN, N); - size_t const blockN = currBlockStop - currBlockStart; - - if (blockN > 0) - { - // Prepare input / output subarrays - T const** inputs = (T const**)cliquePtrs->inputs; - T** outputs = (T **)cliquePtrs->outputs; - T const* srcs[NUM_RANKS]; - T* dsts[NUM_RANKS]; - - #pragma unroll - for (int r = 0; r < NUM_RANKS; r++) - { - srcs[r] = inputs[r] + currBlockStart; - dsts[r] = outputs[r] + currBlockStart; - } - - // Perform the reduction - #define ALL_REDUCE_CLIQUE_UNROLL 1 - ReduceOrCopyMulti( - threadIdx.x, blockDim.x, nullptr, false, NUM_RANKS, srcs, NUM_RANKS, dsts, blockN); - } - - // Even if there was nothing for this GPU to do, it must participate in a barrier - // because other GPUs may be modifying this GPUs output buffer still - if (blockId == 0) WaitForBarrier(cliquePtrs->barrier); -} - -#endif diff --git a/src/clique/CliqueCommon.h b/src/clique/CliqueCommon.h deleted file mode 100644 index 13fd5a92ed..0000000000 --- a/src/clique/CliqueCommon.h +++ /dev/null @@ -1,93 +0,0 @@ -/* -Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef CLIQUE_COMMON_H -#define CLIQUE_COMMON_H - -#include "nccl.h" -#include - -#define MIN_CLIQUE_SIZE 2 -#define MAX_CLIQUE_SIZE 8 - -typedef struct -{ - int* globalCount; // Shared across GPUs - int* globalSense; // Shared across GPUs - int* localSense; // Local to this GPU -} gpuBarrier_t; - -typedef struct -{ - // Input/output pointers from participating ranks - void const* inputs[MAX_CLIQUE_SIZE]; - void* outputs[MAX_CLIQUE_SIZE]; - - // Barrier variable - gpuBarrier_t barrier; -} cliqueDevicePtrs_t; - -// Helper macro to launch an appropriate kernel by converting rank to a template argument -#define LAUNCH_CLIQUE_KERNEL(kernelname, FUNC, T, args) \ - { \ - switch (args->comm->nRanks){ \ - case 2: kernelname(args); break; \ - case 3: kernelname(args); break; \ - case 4: kernelname(args); break; \ - case 5: kernelname(args); break; \ - case 6: kernelname(args); break; \ - case 7: kernelname(args); break; \ - case 8: kernelname(args); break; \ - } \ - } - -// Multi-GPU (on same node) barrier. One thread per grid per GPU updates barrier / waits -template -__forceinline__ __device__ void WaitForBarrier(gpuBarrier_t const& barrier) -{ - if (threadIdx.x == 0) - { - // Sense inversion barrier - *barrier.localSense = 1 - *barrier.localSense; - int localSense = *barrier.localSense; - - int val = __atomic_add_fetch(barrier.globalCount, 1, __ATOMIC_SEQ_CST); - if (val == NUM_RANKS) - { - // Last arrival resets barrier - __atomic_store_n(barrier.globalCount, 0, __ATOMIC_SEQ_CST); - __atomic_store_n(barrier.globalSense, localSense, __ATOMIC_SEQ_CST); - } - else - { - // Wait for all ranks to reach barrier - while (__atomic_load_n(barrier.globalSense, __ATOMIC_SEQ_CST) != localSense); - } - } -} - -__forceinline__ __host__ __device__ size_t RoundUp(size_t X, size_t Y) -{ - return (X+Y-1)/Y * Y; -} - -#endif diff --git a/src/clique/CliqueManager.cc b/src/clique/CliqueManager.cc deleted file mode 100644 index 368d99a797..0000000000 --- a/src/clique/CliqueManager.cc +++ /dev/null @@ -1,577 +0,0 @@ -/* -Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CliqueManager.h" -#include "CliqueShmNames.h" -#include "MsgQueue.h" - -#include "nccl.h" -#include "core.h" - -#include "Hash.h" - -#include "AllReduceCliqueKernel.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -cliqueDevicePtrs_t CliqueManager::m_staticCliquePtrs[NCCL_MAX_OPS] = {}; -int CliqueManager::m_staticBarrierCount[NCCL_MAX_OPS*2] = {}; -int* CliqueManager::m_staticGpuBarrierMem = NULL; - -// Define some environment variables that affect clique-based kernels -RCCL_PARAM(EnableClique, "ENABLE_CLIQUE", 0); // Opt-in environment variable for clique-based kernels -RCCL_PARAM(AllReduceCliqueByteLimit, "CLIQUE_ALLREDUCE_BYTE_LIMIT", 0); // Max number of bytes to use clique-based kernels for all reduce (0 for auto-select) -RCCL_PARAM(AllReduceNumChannels, "CLIQUE_ALLREDUCE_NCHANNELS", 0); // Number of channels to use for all-reduce. (0 for auto-select) - -CliqueManager::CliqueManager(int const rank, - int const numRanks, - cliqueMode_t const cliqueMode) : - m_rank(rank), - m_numRanks(numRanks), - m_hash(0), - m_cliqueMode(cliqueMode), - m_opIndexHead(0), - m_opIndexTail(0), - m_init(false), - m_gcnArchName(char[256]), - m_allReduceByteLimit(0), - m_pinnedCliquePtrs(NULL), - m_gpuBarrierGlobalCount(NULL), - m_gpuBarrierGlobalSense(NULL), - m_gpuBarrierLocalSense(NULL), - m_cpuBarrierCount(NULL), - m_shmHandles(), - m_ipcHandleSendCache(), - m_ipcHandleRecvCache(), - m_sharedCpuMemory(), - m_sharedIpcHandle(), - m_fineGrainBarrierMem(NULL), - m_sharedBarrierCount(NULL) -{} - -CliqueManager::~CliqueManager() -{ - if (m_init) - { - CleanUp(); - } -} - -void CliqueManager::CleanUp() -{ - if (m_cliqueMode == CLIQUE_DISABLED) return; - - // Free variables that are shared between SINGLE_PROCESS / SINGLE_NODE - if (m_pinnedCliquePtrs) hipHostFree(m_pinnedCliquePtrs); - if (m_gpuBarrierLocalSense) hipFree(m_gpuBarrierLocalSense); - - if (m_cliqueMode == CLIQUE_SINGLE_NODE) - { - // Release caches - INFO(NCCL_COLL, "Rank %d deleting IPC caches", m_rank); - if (m_ipcHandleSendCache) delete m_ipcHandleSendCache; - if (m_ipcHandleRecvCache) delete m_ipcHandleRecvCache; - - // Close shared memory - m_shmHandles.Close(); - m_sharedCpuMemory.Close(); - m_sharedIpcHandle.Close(); - - if (m_fineGrainBarrierMem) - { - if (m_rank == 0) - hipFree(m_fineGrainBarrierMem); - else - hipIpcCloseMemHandle(m_fineGrainBarrierMem); - } - } - else if (m_cliqueMode == CLIQUE_SINGLE_PROCESS) - { - if (m_rank == 0 && m_staticGpuBarrierMem) - hipFree(m_staticGpuBarrierMem); - } - - m_init = false; -} - -ncclResult_t CliqueManager::Init(ncclUniqueId const* commId, int suffix) -{ - ncclResult_t res; - if (m_init) return ncclSuccess; - m_init = true; - - m_hash = djb2Hash(commId->internal); - if (m_cliqueMode == CLIQUE_DISABLED) - { - INFO(NCCL_INIT, "Clique kernels disabled"); - return ncclSuccess; - } - - // Check parameters - if (m_rank < 0 || m_rank >= m_numRanks) - { - WARN("Invalid rank specified. Expected 0 <= %d < %d for CliqueManager", m_rank, m_numRanks); - return ncclInvalidUsage; - } - - // For now, opt-into clique based kernels via RCCL_ENABLE_CLIQUE env var - if (!rcclParamEnableClique()) - { - INFO(NCCL_INIT, "Disabling clique-based kernels (did not find env var RCCL_ENABLE_CLIQUE)"); - m_cliqueMode = CLIQUE_DISABLED; - return ncclSuccess; - } - - // Allocate pinned CPU memory for holding clique pointers, which kernels will have access to - if (hipHostMalloc(&m_pinnedCliquePtrs, sizeof(cliqueDevicePtrs_t) * NCCL_MAX_OPS) != hipSuccess) - { - WARN("Unable to allocated pinned host memory for clique pointers. Disabling clique-based kernels"); - m_cliqueMode = CLIQUE_DISABLED; - m_init = true; - return ncclSuccess; - } - - std::string shmSuffix = std::to_string(m_hash) + "_" + std::to_string(suffix); - - // Allocate sense barrier variable on local GPU - NCCLCHECKGOTO(ncclCudaCalloc(&m_gpuBarrierLocalSense, NCCL_MAX_OPS * sizeof(int)), res, dropback); - - if (m_cliqueMode == CLIQUE_SINGLE_NODE) - { - // Initialize shared memory file for IPC handles (based on commId hash) - m_shmHandles = NcclIpcHandleShm(m_rank, m_numRanks, m_hash, NUM_HANDLES_PER_RANK, NCCL_MAX_OPS, shmSuffix); - NCCLCHECKGOTO(m_shmHandles.Open(), res, dropback); - - // Initialize IPC caches - m_ipcHandleSendCache = new NcclIpcHandleSendCache(m_numRanks * NUM_HANDLES_PER_RANK * NCCL_MAX_OPS); - m_ipcHandleRecvCache = new NcclIpcHandleRecvCache(m_numRanks * NUM_HANDLES_PER_RANK * NCCL_MAX_OPS, - 100, - hipIpcMemHandleHash, - hipIpcMemHandleEqual); - - // Initialize shared object for GPU barrier IPC handle - m_sharedIpcHandle = ShmObject(std::max(4096LU, sizeof(hipIpcMemHandle_t)), - CliqueShmNames["Barriers"] + shmSuffix, - m_rank, - m_numRanks, - m_hash); - NCCLCHECKGOTO(m_sharedIpcHandle.Open(), res, dropback); - - if (m_rank == 0) - { - hipIpcMemHandle_t handle; - // Allocate fine-grained device memory on rank 0 and get IPC handle for it - // Re-usable barrier consists of (globalCount / globalSense) pair of integers - NCCLCHECKGOTO(ncclCudaCalloc(&m_fineGrainBarrierMem, NCCL_MAX_OPS * 2 * sizeof(int), nullptr, hipDeviceMallocFinegrained), res, dropback); - if (hipIpcGetMemHandle(&handle, m_fineGrainBarrierMem) != hipSuccess) - { - WARN("Unable to get IPC handle for barrier memory"); - goto dropback; - } - // Write IPC handle to shared memory for other ranks to receive - *m_sharedIpcHandle.Get() = handle; - - // Set up global count/sense for first rank - m_gpuBarrierGlobalCount = &m_fineGrainBarrierMem[0]; - m_gpuBarrierGlobalSense = &m_fineGrainBarrierMem[NCCL_MAX_OPS]; - } - - // Initialize shared CPU memory to be used for barrier variables - m_sharedCpuMemory = ShmObject(NCCL_MAX_OPS * 2 * sizeof(int32_t), - CliqueShmNames["SharedCounters"] + shmSuffix, - m_rank, - m_numRanks, - m_hash); - NCCLCHECKGOTO(m_sharedCpuMemory.Open(), res, dropback); - - // Split up the shared CPU memory for barrier counters / global sense - m_cpuBarrierCount = m_sharedCpuMemory.Get(); - - // Initialize CPU barriers - if (m_rank == 0) - { - memset(m_cpuBarrierCount, 0, NCCL_MAX_OPS * 2 * sizeof(int32_t)); - } - } - else if (m_cliqueMode == CLIQUE_SINGLE_PROCESS) - { - m_cpuBarrierCount = &m_staticBarrierCount[0]; - - // First rank prepares fine-grained memory shared across ranks used for the two barrier variables - if (m_rank == 0) - { - NCCLCHECKGOTO(ncclCudaCalloc(&m_staticGpuBarrierMem, NCCL_MAX_OPS * 2 * sizeof(int), nullptr, hipDeviceMallocFinegrained), res, dropback); - // Prepare all barriers - for (int opIndex = 0; opIndex < NCCL_MAX_OPS; opIndex++) - { - m_staticCliquePtrs[opIndex].barrier.globalCount = &m_staticGpuBarrierMem[opIndex]; - m_staticCliquePtrs[opIndex].barrier.globalSense = &m_staticGpuBarrierMem[opIndex + NCCL_MAX_OPS];; - } - } - } - - // Figure out device arch for tuning - int deviceId; - CUDACHECK(hipGetDevice(&deviceId)); - hipDeviceProp_t devProp; - CUDACHECK(hipGetDeviceProperties(&devProp, deviceId)); - m_gcnArchName = devProp.gcnArchName; - - // Establish when to use clique-based kernels based on input size - SetByteLimits(); - - m_init = true; - INFO(NCCL_INIT, "Clique-based kernels enabled (mode %d) [GCN %d]", m_cliqueMode, m_gcnArchName); - return ncclSuccess; - -dropback: - // NOTE: This currently assumes that all ranks will fail the same way - // Additional support is required to handle cases when some processes succeed while others fail - WARN("Unable to initialize shared memory. Disabling clique-based kernels"); - CleanUp(); - m_cliqueMode = CLIQUE_DISABLED; - return ncclSuccess; -} - -void CliqueManager::SetByteLimits() -{ - m_allReduceByteLimit = rcclParamAllReduceCliqueByteLimit(); - if (m_allReduceByteLimit == 0) - { - if (IsArchMatch(m_gcnArchName, "gfx906")) - m_allReduceByteLimit = 16777216; - else if (IsArchMatch(m_gcnArchName, "gfx908")) - m_allReduceByteLimit = 8388608; - else - m_allReduceByteLimit = 16777216; - } -} - -bool CliqueManager::IsSupported(ncclFunc_t const coll, - size_t const count, - ncclDataType_t const datatype, - ncclRedOp_t const op) const -{ - if (m_cliqueMode == CLIQUE_DISABLED) return false; - - // Filter based on total input size for each collective type and ops sum/prod/min/max - size_t totalBytes = count * ncclTypeSize(datatype); - if (coll == ncclFuncAllReduce && (totalBytes <= m_allReduceByteLimit) && op < ncclAvg) return true; - return false; -} - -ncclResult_t CliqueManager::DeclarePointers(void const* inputPtr, void* outputPtr) -{ - // Do nothing if disabled - if (m_cliqueMode == CLIQUE_DISABLED) return ncclSuccess; - - if (!m_init) - { - WARN("CliqueManager must be initialized before use"); - return ncclInvalidUsage; - } - - // Add to queue of in-progress collectives - int32_t const opIndex = m_opIndexTail; - m_opIndexTail = (m_opIndexTail + 1) % NCCL_MAX_OPS; - - INFO(NCCL_COLL, "Rank %d declaring pointers for opIndex %d", m_rank, opIndex); - if (m_cliqueMode == CLIQUE_SINGLE_NODE) - { - // Get fine-grained device memory if not already done - if (m_fineGrainBarrierMem == NULL) - { - hipIpcMemHandle_t handle = *m_sharedIpcHandle.Get(); - CUDACHECK(hipIpcOpenMemHandle((void**)&m_fineGrainBarrierMem, handle, hipIpcMemLazyEnablePeerAccess)); - - // Prepare global count/sense barrier variables used the ipc-shared gpu device memory - m_gpuBarrierGlobalCount = &m_fineGrainBarrierMem[0]; - m_gpuBarrierGlobalSense = &m_fineGrainBarrierMem[NCCL_MAX_OPS]; - } - - std::vector> handles(NUM_HANDLES_PER_RANK); - - // Get IPC handles for input/output pointers from cache - NCCLCHECK(CheckCacheForPtr(const_cast(inputPtr), m_ipcHandleSendCache, m_rank, &handles[0])); - NCCLCHECK(CheckCacheForPtr(outputPtr , m_ipcHandleSendCache, m_rank, &handles[1])); - - // Prepare barrier pointers (done after the IpcOpenMemory) - m_pinnedCliquePtrs[opIndex].barrier.globalCount = &m_gpuBarrierGlobalCount[opIndex]; - m_pinnedCliquePtrs[opIndex].barrier.globalSense = &m_gpuBarrierGlobalSense[opIndex]; - m_pinnedCliquePtrs[opIndex].barrier.localSense = &m_gpuBarrierLocalSense[opIndex]; - - // Write IPC handles to shared memory for given rank / opCount - NCCLCHECK(m_shmHandles.WriteHandles(opIndex, handles)); - } - else if (m_cliqueMode == CLIQUE_SINGLE_PROCESS) - { - // Store this rank's input/output pointers into static member - m_staticCliquePtrs[opIndex].inputs[m_rank] = inputPtr; - m_staticCliquePtrs[opIndex].outputs[m_rank] = outputPtr; - } - - // Increment entry barrier counter - must not block - volatile int* entryCounter = &m_cpuBarrierCount[2 * opIndex]; - int entryVal = LOAD(entryCounter); - // Loop until successful atomic update to counter - bool done = false; - while (done == false) { - // Last rank resets exit barrier counter prior to incrementing entry count to numRanks - if (entryVal+1 == m_numRanks) - m_cpuBarrierCount[2 * opIndex + 1] = 0; - done = __sync_bool_compare_and_swap(entryCounter, entryVal, entryVal+1); - entryVal++; - } - return ncclSuccess; -} - -ncclResult_t CliqueManager::GetNumChannelsToUse(ncclFunc_t const coll, - size_t const count, - ncclDataType_t const datatype, - ncclRedOp_t const op, - int const totalNumChannels, - uint8_t* numChannelstoUse) const -{ - size_t const totalBytes = count * ncclTypeSize(datatype); - *numChannelstoUse = 1; - - if (coll == ncclFuncAllReduce) { - if (rcclParamAllReduceNumChannels() == 0) - { - // NOTE: These are currently based on collected data and not necessarily ideal for all hardware - int numChannels; - if (IsArchMatch(m_gcnArchName, "gfx906")) { - if (totalBytes <= 16384) numChannels = 1; - else numChannels = 2; - } else if (IsArchMatch(m_gcnArchName, "gfx908")) { - if (totalBytes <= 131072) numChannels = 2; - else if (totalBytes <= 524288) numChannels = 6; - else if (totalBytes <= 1048576) numChannels = 13; - else numChannels = 16; - } else if (IsArchMatch(m_gcnArchName, "gfx90a")) { - if (totalBytes <= 262144) numChannels = 4; - else numChannels = 8; - } else { - if (totalBytes <= 65536) numChannels = 1; - else if (totalBytes <= 262144) numChannels = 2; - else if (totalBytes <= 524288) numChannels = 4; - else if (totalBytes <= 2097152) numChannels = 8; - else numChannels = 11; - } - *numChannelstoUse = std::min(numChannels, totalNumChannels); - } - else - { - *numChannelstoUse = std::min((int)rcclParamAllReduceNumChannels(), totalNumChannels); - } - } - return ncclSuccess; -} - -ncclResult_t CliqueManager::WaitForPointers(ncclWorkElem* args) -{ - // Do nothing if disabled - if (m_cliqueMode == CLIQUE_DISABLED) return ncclSuccess; - - if (!m_init) - { - WARN("CliqueManager must be initialized before use"); - return ncclInvalidUsage; - } - - // Check that collective queue is not empty - if (m_opIndexHead == m_opIndexTail) - { - WARN("WaitForPointers must be called after DeclarePointers"); - return ncclInvalidUsage; - } - - // Pop first collective off queue - int32_t const opIndex = m_opIndexHead; - INFO(NCCL_COLL, "Rank %d waiting for pointers for opIndex %d", m_rank, opIndex); - - m_opIndexHead = (m_opIndexHead + 1) % NCCL_MAX_OPS; - args->clique.ptrs = &m_pinnedCliquePtrs[opIndex]; - - // Wait for all ranks to declare pointers for this opIndex - volatile int* entryCounter = (volatile int*)(&m_cpuBarrierCount[2 * opIndex]); - int entryVal = LOAD(entryCounter); - while (entryVal != m_numRanks) entryVal = LOAD(entryCounter); - - // Last rank to past barrier resets entry barrier - // NOTE: There is another GPU-barrier performed during the kernels therefore it should - // not be possible for any rank to modify entry count prior to being reset - volatile int* exitCounter = &m_cpuBarrierCount[2 * opIndex + 1]; - int exitVal = LOAD(exitCounter); - // Loop until successful atomic update to counter - bool done = false; - while (done == false) { - // Last rank resets entry counter - if (exitVal+1 == m_numRanks) - m_cpuBarrierCount[2 * opIndex] = 0; - done = __sync_bool_compare_and_swap(exitCounter, exitVal, exitVal+1); - exitVal++; - } - INFO(NCCL_COLL, "Rank %d past opIndex barrier %d", m_rank, opIndex); - - // Collect pointers - if (m_cliqueMode == CLIQUE_SINGLE_NODE) - { - int numHandles = m_numRanks * NUM_HANDLES_PER_RANK; - std::vector> handles(numHandles); - - // Collect the ready handles from shared memory and convert them to device pointers - NCCLCHECK(m_shmHandles.ReadHandles(opIndex, handles)); - for (int i = 0; i < m_numRanks; i++) - { - void *input; - NCCLCHECK(CheckCacheForHandle(handles[i * NUM_HANDLES_PER_RANK], - m_ipcHandleRecvCache, &input)); - m_pinnedCliquePtrs[opIndex].inputs[i] = const_cast(input); - - NCCLCHECK(CheckCacheForHandle(handles[(i * NUM_HANDLES_PER_RANK) + 1], - m_ipcHandleRecvCache, &m_pinnedCliquePtrs[opIndex].outputs[i])); - } - } - else if (m_cliqueMode == CLIQUE_SINGLE_PROCESS) - { - // Copy from static memory to pinned host memory and set local sense - memcpy(&m_pinnedCliquePtrs[opIndex], &m_staticCliquePtrs[opIndex], sizeof(cliqueDevicePtrs_t)); - m_pinnedCliquePtrs[opIndex].barrier.localSense = &m_gpuBarrierLocalSense[opIndex]; - } - return ncclSuccess; -} - -ncclResult_t CliqueManager::CheckCacheForPtr(void* devPtr, - NcclIpcHandleSendCache* cache, - int rank, - std::pair* handlePair) -{ - // Get the base address for this device allocation - hsa_status_t status; - hsa_amd_pointer_info_t info; - info.size = sizeof(hsa_amd_pointer_info_t); - status = hsa_amd_pointer_info(devPtr, &info, NULL, NULL, NULL); - if (status != HSA_STATUS_SUCCESS) { - WARN("Uanble to get pointer information for %p", devPtr); - return ncclInvalidArgument; - } - - // Compute the offset between the device addres and the base address - uint64_t baseAddr = (uint64_t)info.agentBaseAddress; - uint64_t realAddr = (uint64_t)devPtr; - handlePair->second = realAddr - baseAddr; - - CUDACHECK(hipIpcGetMemHandle(&handlePair->first, (void*)baseAddr)); - - /* Disabling cache until proper deallocation methods are available - // IPC handles are only supported for base address pointers - NcclIpcHandleSendCache::iterator it = cache->find(baseAddr); - - if (it == cache->end()) - { - INFO(NCCL_COLL, "Rank %d searching IPC handle cache for %p (not found)", rank, devPtr); - CUDACHECK(hipIpcGetMemHandle(&handlePair->first, (void*)baseAddr)); - cache->insert(baseAddr, handlePair->first); - } - else - { - INFO(NCCL_COLL, "Rank %d searching IPC handle cache for %p (found!)", rank, devPtr); - handlePair->first = (it->second).first; - } - */ - return ncclSuccess; -} - -ncclResult_t CliqueManager::CheckCacheForHandle(std::pair const& handlePair, - NcclIpcHandleRecvCache* cache, - void** ptr) -{ - // Until proper deallocation hooks are implemented, receive cache can not be used - // Handles will need to be extract each time - void* baseAddr; - CUDACHECK(hipIpcOpenMemHandle(&baseAddr, handlePair.first, hipIpcMemLazyEnablePeerAccess)); - - /* - NcclIpcHandleRecvCache::iterator it = cache->find(handlePair.first); - - // Get base address pointer from cache if it exists - - if (it == cache->end()) - { - CUDACHECK(hipIpcOpenMemHandle(&baseAddr, handlePair.first, hipIpcMemLazyEnablePeerAccess)); - cache->insert(handlePair.first, baseAddr); - } - else - { - baseAddr = (it->second).first; - } - */ - - // Modify base address pointer with offset - uint64_t realAddr = (uint64_t)baseAddr + handlePair.second; - *ptr = (void*)realAddr; - return ncclSuccess; -} - -ncclResult_t CliqueManager::BootstrapRootInit(int pid, unsigned long hash) -{ - if (rcclParamEnableClique()) - { - for (auto it = CliqueShmNames.begin(); it != CliqueShmNames.end(); it++) - { - mqd_t mq_desc; - std::string msgQueueName = it->second + std::to_string(hash) + "_" + std::to_string(pid); - NCCLCHECK(MsgQueueGetId(msgQueueName, true, mq_desc)); - NCCLCHECK(MsgQueueClose(msgQueueName, mq_desc, true)); - } - - std::string shmDir = "/dev/shm/"; - - for (auto it = CliqueShmNames.begin(); it != CliqueShmNames.end(); it++) - { - struct stat fileStatus; - std::string shmFileName = it->second + std::to_string(hash) + "_" + std::to_string(pid); - std::string shmFullPath = shmDir + shmFileName; - - // Check if shm file already exists; if so, unlink it - if (stat(shmFullPath.c_str(), &fileStatus) == 0) - { - NCCLCHECK(shmUnlink(shmFileName.c_str())); - } - } - } - else - { - INFO(NCCL_INIT, "Not performing bootstrap root for clique kernels as clique mode not enabled."); - } - return ncclSuccess; -} diff --git a/src/clique/CliqueManager.h b/src/clique/CliqueManager.h deleted file mode 100644 index 33264759e7..0000000000 --- a/src/clique/CliqueManager.h +++ /dev/null @@ -1,127 +0,0 @@ -/* -Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef RCCL_CLIQUE_MANAGER_HPP_ -#define RCCL_CLIQUE_MANAGER_HPP_ - -#include -#include - -#include "nccl.h" -#include "devcomm.h" -#include "CliqueCommon.h" -#include "HandleCache.h" -#include "HandleShm.h" - -#define NUM_HANDLES_PER_RANK 2 - -class CliqueManager -{ -public: - typedef enum - { - CLIQUE_DISABLED = 0, - CLIQUE_SINGLE_PROCESS = 1, - CLIQUE_SINGLE_NODE = 2 - } cliqueMode_t; - - CliqueManager(int const rank, int const numRanks, cliqueMode_t const cliqueMode); - - ~CliqueManager(); - - void CleanUp(); - - ncclResult_t Init(ncclUniqueId const* commId, int suffix); - - void SetByteLimits(); - - // Returns true if the collective is supported via a clique-based kernel - bool IsSupported(ncclFunc_t const coll, - size_t const count, - ncclDataType_t const datatype, - ncclRedOp_t const op) const; - - // Provide the pointers to be exchanged across the clique for the given rank / opCount - ncclResult_t DeclarePointers(void const* inputPtr, void* outputPtr); - - // Determine the number of channels / CUs to use for this call - ncclResult_t GetNumChannelsToUse(ncclFunc_t const coll, - size_t const count, - ncclDataType_t const datatype, - ncclRedOp_t const op, - int const totalNumChannels, - uint8_t* numChannelstoUse) const; - - // Blocking call that only returns the in-progress clique pointers are ready - // This needs to be called in same order as DeclarePointers - ncclResult_t WaitForPointers(ncclWorkElem* args); - - // Prepares shared memory files upon initialization - static ncclResult_t BootstrapRootInit(int pid, unsigned long hash); - -protected: - ncclResult_t CheckCacheForPtr(void* devPtr, - NcclIpcHandleSendCache* cache, - int rank, - std::pair* handlePair); - - ncclResult_t CheckCacheForHandle(std::pair const& handlePair, - NcclIpcHandleRecvCache* cache, - void** ptr); - - int m_rank; // Associated rank - int m_numRanks; // Total number of ranks - unsigned long m_hash; // Hash used for identifying message queues & shared memory - cliqueMode_t m_cliqueMode; // Clique mode (off/single process/single node) - int32_t m_opIndexHead; // Track start of outstanding requests - int32_t m_opIndexTail; // Track end of outstanding requests - bool m_init; // Whether CliqueManager has been initialized - char[256] m_gcnArchName; // Device GCN arch value - size_t m_allReduceByteLimit; // Byte limit for AllReduce - cliqueDevicePtrs_t* m_pinnedCliquePtrs; // Pinned-host-memory (device accessible) containing device pointers - int* m_gpuBarrierGlobalCount; // Part of GPU barrier (count variable shared across ranks) - int* m_gpuBarrierGlobalSense; // Part of GPU barrier (reset variable shared across ranks) - int* m_gpuBarrierLocalSense; // Part of GPU barrier (reset variable local to this rank) - int* m_cpuBarrierCount; // Points to either m_sharedBarrierCount or m_staticBarrierCount - - // IPC-related (CLIQUE_SINGLE_NODE) - NcclIpcHandleShm m_shmHandles; // Used to exchange IPC handles between ranks - NcclIpcHandleSendCache* m_ipcHandleSendCache; // Caches pointers to IPC handles (to send to other processes) - NcclIpcHandleRecvCache* m_ipcHandleRecvCache; // Caches IPC handles to pointers (received from other processes) - ShmObject m_sharedCpuMemory; // Used to pass shared memory used for CPU barrier - ShmObject m_sharedIpcHandle; // Used to pass fine-grained device memory buffer IPC handle - int* m_fineGrainBarrierMem; // Fine-grained GPU memory barrier (allocated only on 1st rank, shared on others) - int* m_sharedBarrierCount; // Part of CPU barrier (count variable shared across ranks) - - // Single-process (CLIQUE_SINGLE_PROCESS) - static cliqueDevicePtrs_t m_staticCliquePtrs[NCCL_MAX_OPS]; // Use shared static memory to exchange pointer info - static int m_staticBarrierCount[2*NCCL_MAX_OPS]; // Part of CPU barrier (count variable shared across ranks) - static int* m_staticGpuBarrierMem; // Static storage backing for fine-grained gpu barrier -}; - -// For use in bootstrapping code -struct bootstrapRootStruct { - int listenFd; - unsigned long hash; -}; - -#endif diff --git a/src/clique/CliqueShmNames.h b/src/clique/CliqueShmNames.h deleted file mode 100644 index 577af8be8d..0000000000 --- a/src/clique/CliqueShmNames.h +++ /dev/null @@ -1,37 +0,0 @@ -/* -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef NCCL_CLIQUE_SHM_NAMES_H_ -#define NCCL_CLIQUE_SHM_NAMES_H_ - -#include -#include - -static std::map CliqueShmNames = -{ - {"SharedCounters", "RcclCounters" }, - {"Mutexes" , "RcclMutexes" }, - {"IpcHandles" , "RcclIpcHandles"}, - {"Barriers" , "RcclBarriers" } -}; - -#endif diff --git a/src/clique/HandleCache.cc b/src/clique/HandleCache.cc deleted file mode 100644 index 8fe4b6ab4b..0000000000 --- a/src/clique/HandleCache.cc +++ /dev/null @@ -1,31 +0,0 @@ -/* -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "HandleCache.h" - -#include "Hash.h" - -// djb2 hash function for hashing char array in hipIpcMemHandle_t -unsigned long hipIpcMemHandleHash(const hipIpcMemHandle_t& handle) -{ - return djb2Hash(handle.reserved); -} diff --git a/src/clique/HandleCache.h b/src/clique/HandleCache.h deleted file mode 100644 index 978e19d41f..0000000000 --- a/src/clique/HandleCache.h +++ /dev/null @@ -1,142 +0,0 @@ -/* -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef NCCL_HANDLE_CACHE_H_ -#define NCCL_HANDLE_CACHE_H_ - -#include -#include -#include - -#include "core.h" - -//#include "llvm/ADT/DenseMap.h" - -template < - class Key, - class Value, - class Hash, - class KeyEqual, - class Allocator -> -class NcclIpcHandleCache -{ -public: - typedef std::pair::iterator> NcclIpcHandleCacheValueType; - typedef std::unordered_map LRUCache; - using iterator = typename LRUCache::iterator; - NcclIpcHandleCache(size_t size, - size_t bucket_count = 100, - const Hash& hash = Hash(), - const KeyEqual& eql = KeyEqual(), - const Allocator& alloc = Allocator() ) : m_cache(bucket_count, hash, eql, alloc) - { - m_capacity = size; - } - - ~NcclIpcHandleCache() - { - m_lruHistory.clear(); - m_cache.clear(); - } - - iterator begin() - { - return m_cache.begin(); - } - - iterator end() - { - return m_cache.end(); - } - - iterator find(const Key& key) - { - iterator it = m_cache.find(key); - if (it != m_cache.end()) - { - updateHistory(it); - } - - return it; - } - - std::pair insert(const Key& key, const Value& value) - { - if (m_cache.size() == m_capacity) - { - // remove entry - pop(); - } - - typename LRUCache::iterator it = m_cache.find(key); - bool inserted; - if (it == m_cache.end()) - { - typename std::list::iterator it = m_lruHistory.insert(m_lruHistory.end(), key); - m_cache.insert(std::make_pair(key, std::make_pair(value, it))); - inserted = true; - } - else - { - inserted = false; - } - - return std::pair(it, inserted); - } - -private: - void pop() - { - typename LRUCache::iterator it = m_cache.find(m_lruHistory.front()); - m_cache.erase(it); - m_lruHistory.pop_front(); - } - - void updateHistory(const iterator& it) - { - if (!m_lruHistory.empty()) - { - m_lruHistory.splice(m_lruHistory.end(), m_lruHistory, (it->second).second); - } - } - size_t m_capacity; - std::list m_lruHistory; - LRUCache m_cache; -}; - -// djb2 hash function for hashing char array in hipIpcMemHandle_t -unsigned long hipIpcMemHandleHash(const hipIpcMemHandle_t& handle); - -// equality function required for unordered_map -auto hipIpcMemHandleEqual = [](const hipIpcMemHandle_t& l, const hipIpcMemHandle_t& r) -{ - return memcmp(l.reserved, r.reserved, sizeof(l.reserved)) == 0; -}; - -//typedef llvm::DenseMap SendCache; -//typedef llvm::DenseMap RecvCache; - -typedef NcclIpcHandleCache, std::equal_to, std::allocator< std::pair::iterator>>>> NcclIpcHandleSendCache; -typedef NcclIpcHandleCache::iterator>>>> NcclIpcHandleRecvCache; - -#endif diff --git a/src/clique/HandleShm.cc b/src/clique/HandleShm.cc deleted file mode 100644 index 4e73fb4e3d..0000000000 --- a/src/clique/HandleShm.cc +++ /dev/null @@ -1,69 +0,0 @@ -/* -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include - -#include "HandleShm.h" -#include "CliqueShmNames.h" -#include "core.h" -#include "Hash.h" -#include "shm.h" - -NcclIpcHandleShm::NcclIpcHandleShm(int rank, int numRanks, int projid, int numHandlesPerRank, int capacity, std::string const& suffix) : - ShmObject>(numRanks * numHandlesPerRank * capacity * sizeof(std::pair), - CliqueShmNames["IpcHandles"] + suffix, - rank, - numRanks, - projid), - m_numHandlesPerRank(numHandlesPerRank), - m_numHandlesPerOpCount(numRanks * numHandlesPerRank) -{ -} - -NcclIpcHandleShm::NcclIpcHandleShm() : - m_numHandlesPerRank(0), - m_numHandlesPerOpCount(0) -{ -} - -NcclIpcHandleShm::~NcclIpcHandleShm() -{ -} - -ncclResult_t NcclIpcHandleShm::Open() -{ - return ShmObject::Open(); -} - -ncclResult_t NcclIpcHandleShm::WriteHandles(uint64_t opCount, std::vector> const& sendHandles) -{ - size_t idx = (opCount * m_numHandlesPerOpCount) + (m_rank * m_numHandlesPerRank); - memcpy(m_shmPtr + idx, sendHandles.data(), sizeof(std::pair) * m_numHandlesPerRank); - return ncclSuccess; -} - -ncclResult_t NcclIpcHandleShm::ReadHandles(uint64_t opCount, std::vector>& recvHandles) -{ - size_t idx = opCount * m_numHandlesPerOpCount; - memcpy(recvHandles.data(), m_shmPtr + idx, m_numHandlesPerOpCount * sizeof(std::pair)); - return ncclSuccess; -} diff --git a/src/clique/HandleShm.h b/src/clique/HandleShm.h deleted file mode 100644 index c3f47cb458..0000000000 --- a/src/clique/HandleShm.h +++ /dev/null @@ -1,53 +0,0 @@ -/* -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef NCCL_IPC_HANDLE_SHM_H_ -#define NCCL_IPC_HANDLE_SHM_H_ - -#include -#include -#include - -#include "nccl.h" -#include "ShmObject.h" - -class NcclIpcHandleShm : public ShmObject> -{ -public: - NcclIpcHandleShm(int rank, int numRanks, int projid, int numHandlesPerRank, int capacity, std::string const& suffix); - - NcclIpcHandleShm(); - - ~NcclIpcHandleShm(); - - ncclResult_t Open(); - - ncclResult_t WriteHandles(uint64_t opCount, std::vector> const& sendHandles); - - ncclResult_t ReadHandles(uint64_t opCount, std::vector>& recvHandles); - -private: - int m_numHandlesPerRank; - int m_numHandlesPerOpCount; -}; - -#endif diff --git a/src/clique/Hash.cc b/src/clique/Hash.cc deleted file mode 100644 index 249c66b329..0000000000 --- a/src/clique/Hash.cc +++ /dev/null @@ -1,34 +0,0 @@ -/* -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "Hash.h" - -unsigned long djb2Hash(const char* data) -{ - unsigned long hash = 5381; - int c; - - while ((c = *(data)++)) - hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ - - return hash; -} diff --git a/src/clique/Hash.h b/src/clique/Hash.h deleted file mode 100644 index e6cbbaa569..0000000000 --- a/src/clique/Hash.h +++ /dev/null @@ -1,28 +0,0 @@ -/* -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef NCCL_HASH_H_ -#define NCCL_HASH_H_ - -unsigned long djb2Hash(const char* data); - -#endif diff --git a/src/clique/MsgQueue.cc b/src/clique/MsgQueue.cc deleted file mode 100644 index 22cfbbfb20..0000000000 --- a/src/clique/MsgQueue.cc +++ /dev/null @@ -1,101 +0,0 @@ -/* -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "MsgQueue.h" -#include - -#define MSG_QUEUE_PERM S_IRUSR | S_IWUSR -#define MSG_QUEUE_MODE O_RDWR -#define MSG_SIZE 1 -#define MSG_QUEUE_TIMEOUT 60 - -ncclResult_t MsgQueueGetId(std::string const& name, bool exclusive, mqd_t& mq_desc) -{ - int flag = (exclusive == true ? O_CREAT | O_EXCL : O_CREAT); - struct mq_attr attr; - attr.mq_maxmsg = 10; - attr.mq_msgsize = MSG_SIZE; - attr.mq_flags = 0; - - std::string mq_name = "/" + name; - mq_desc = mq_open(mq_name.c_str(), flag | MSG_QUEUE_MODE, MSG_QUEUE_PERM, &attr); - - // Check if we're trying to create message queue and it already exists; if so, delete existing queue - if (mq_desc == -1 && exclusive == true && errno == EBUSY) - { - NCCLCHECK(MsgQueueClose(name, mq_desc, true)); - SYSCHECKVAL(mq_open(mq_name.c_str(), flag | MSG_QUEUE_MODE, MSG_QUEUE_PERM, attr), "mq_open", mq_desc); - } - else if (mq_desc == -1) - { - WARN("Call to MsgQueueGetId failed : %s", strerror(errno)); - return ncclSystemError; - } - return ncclSuccess; -} - -ncclResult_t MsgQueueSend(mqd_t const& mq_desc, const char* msgp, size_t msgsz) -{ - SYSCHECK(mq_send(mq_desc, msgp, msgsz, 0), "mq_send"); - return ncclSuccess; -} - -ncclResult_t MsgQueueRecv(mqd_t const& mq_desc, char* msgp, size_t msgsz) -{ - SYSCHECK(mq_receive(mq_desc, msgp, msgsz, NULL), "mq_receive"); - return ncclSuccess; -} - -ncclResult_t MsgQueueWaitUntilEmpty(mqd_t const& mq_desc) -{ - mq_attr attr; - mq_getattr(mq_desc, &attr); - - auto start = std::chrono::steady_clock::now(); - while(attr.mq_curmsgs > 0) - { - SYSCHECK(mq_getattr(mq_desc, &attr), "mq_getattr"); - if(std::chrono::steady_clock::now() - start > std::chrono::seconds(MSG_QUEUE_TIMEOUT)) - { - WARN("Message Queue timed out waiting for all ranks to receive messages."); - return ncclSystemError; - } - } - return ncclSuccess; -} - -ncclResult_t MsgQueueClose(std::string const& name, mqd_t& mq_desc, bool unlink) -{ - if (unlink) - { - NCCLCHECK(MsgQueueUnlink(name)); - } - SYSCHECK(mq_close(mq_desc), "mq_close"); - return ncclSuccess; -} - -ncclResult_t MsgQueueUnlink(std::string const& name) -{ - std::string mq_name = "/" + name; - SYSCHECK(mq_unlink(mq_name.c_str()), "mq_unlink"); - return ncclSuccess; -} diff --git a/src/clique/MsgQueue.h b/src/clique/MsgQueue.h deleted file mode 100644 index 74513d9125..0000000000 --- a/src/clique/MsgQueue.h +++ /dev/null @@ -1,39 +0,0 @@ -/* -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef RCCL_MSG_QUEUE_HPP_ -#define RCCL_MSG_QUEUE_HPP_ - -#include -#include - -#include "nccl.h" -#include "core.h" - -ncclResult_t MsgQueueGetId(std::string const& name, bool exclusive, mqd_t& mq_desc); -ncclResult_t MsgQueueSend(mqd_t const& mq_desc, const char* msgp, size_t msgsz); -ncclResult_t MsgQueueRecv(mqd_t const& mq_desc, char* msgp, size_t msgsz); -ncclResult_t MsgQueueWaitUntilEmpty(mqd_t const& mq_desc); -ncclResult_t MsgQueueClose(std::string const& name, mqd_t& mq_desc, bool unlink); -ncclResult_t MsgQueueUnlink(std::string const& name); - -#endif diff --git a/src/clique/SharedMemHelper.h b/src/clique/SharedMemHelper.h deleted file mode 100644 index 6fc269e7fc..0000000000 --- a/src/clique/SharedMemHelper.h +++ /dev/null @@ -1,43 +0,0 @@ -/* -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef SHAREDMEMHELPER_H -#define SHAREDMEMHELPER_H - - -class SharedMemHelper -{ -public: - SharedMemHelper(int rank, int numRanks, int numEntries); - - ncclStatus_t Init(std::string const& baseFilename); - - ncclStatus_t - - -protected: - bool m_initialized; - int m_rank; - int m_numRanks; -}; - -#endif diff --git a/src/clique/ShmObject.cc b/src/clique/ShmObject.cc deleted file mode 100644 index 353779e41d..0000000000 --- a/src/clique/ShmObject.cc +++ /dev/null @@ -1,45 +0,0 @@ -/* -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "ShmObject.h" -#include - -// Template specializations for sem_t objects which require additional initialization -template<> -ncclResult_t ShmObject::Close() -{ - size_t numMutexes = m_shmSize / sizeof(sem_t); - - for (size_t i = 0; i < numMutexes; i++) - { - sem_destroy(static_cast(&m_shmPtr[i])); - } - - int retVal = shm_unlink(m_shmName.c_str()); - if (retVal == -1 && errno != ENOENT) - { - WARN("Call to shm_unlink in ShmObject failed : %s", strerror(errno)); - return ncclSystemError; - } - - return ncclSuccess; -} diff --git a/src/clique/ShmObject.h b/src/clique/ShmObject.h deleted file mode 100644 index 4663084f91..0000000000 --- a/src/clique/ShmObject.h +++ /dev/null @@ -1,247 +0,0 @@ -/* -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef NCCL_SHM_OBJECT_H_ -#define NCCL_SHM_OBJECT_H_ - -#include -#include -#include -#include -#include -#include - -#include "MsgQueue.h" -#include "nccl.h" -#include "core.h" -#include "shm.h" - -// ShmObject abstracts away the nitty-gritty when multiple processes need to handle opening a shared -// memory object at the same time. - -static ncclResult_t shmSetupExclusive(const char* shmname, const int shmsize, int* fd, void** ptr, int create) { - *fd = shm_open(shmname, O_CREAT | O_RDWR | O_EXCL, S_IRUSR | S_IWUSR); - if (*fd == -1) return ncclSystemError; - if (create) SYSCHECK(shm_allocate(*fd, shmsize), "posix_fallocate"); - SYSCHECK(shm_map(*fd, shmsize, ptr), "mmap"); - close(*fd); - *fd = -1; - if (create) memset(*ptr, 0, shmsize); - return ncclSuccess; -} - -template -class ShmObject -{ -public: - ShmObject(size_t size, std::string const& fileName, int rank, int numRanks, int projid) : - m_shmSize(size), - m_shmName(fileName), - m_rank(rank), - m_numRanks(numRanks), - m_projid(projid), - m_alloc(false), - m_shmPtr(nullptr) {} - - ShmObject() : - m_shmSize(0), - m_shmName(""), - m_rank(0), - m_numRanks(0), - m_projid(0), - m_alloc(false), - m_shmPtr(nullptr) {} - - ~ShmObject() {} - - ncclResult_t Open(); - - ncclResult_t Close() - { - if (m_alloc) - { - SYSCHECK(munmap(m_shmPtr, m_shmSize), "munmap"); - } - return ncclSuccess; - } - - T*& Get() - { - return m_shmPtr; - } -protected: - ncclResult_t BroadcastMessage(mqd_t& mq_desc, bool pass) const - { - char msg_text[1]; - msg_text[0] = (pass == 0 ? 'F': 'P'); - for (int rank = 0; rank < m_numRanks; rank++) - { - if (rank == m_rank) continue; - NCCLCHECK(MsgQueueSend(mq_desc, &msg_text[0], sizeof(msg_text))); - } - return ncclSuccess; - } - - ncclResult_t BroadcastAndCloseMessageQueue(mqd_t& mq_desc, bool pass) - { - ncclResult_t res; - NCCLCHECKGOTO(BroadcastMessage(mq_desc, pass), res, dropback); - NCCLCHECKGOTO(MsgQueueWaitUntilEmpty(mq_desc), res, dropback); - NCCLCHECK(MsgQueueClose(m_shmName, mq_desc, true)); - return ncclSuccess; - -dropback: - WARN("Root rank unable to broadcast across message queue. Closing message queue."); - NCCLCHECK(MsgQueueClose(m_shmName, mq_desc, true)); - return ncclSystemError; - } - - // tag for dispatch - template - struct OpenTag{}; - - static ncclResult_t InitIfSemaphore(OpenTag tag); - ncclResult_t InitIfSemaphore(OpenTag tag); - static ncclResult_t InitIfSemaphore(OpenTag tag); - ncclResult_t InitIfSemaphore(OpenTag tag); - static ncclResult_t InitIfSemaphore(OpenTag> tag); - - size_t m_shmSize; - std::string m_shmName; - int m_rank; - int m_numRanks; - int m_projid; - bool m_alloc; - T* m_shmPtr; -}; - -template -ncclResult_t ShmObject::Open() -{ - mqd_t mq_desc; - if (m_alloc == false) - { - int shmFd; - INFO(NCCL_INIT, "Rank %d Initializing message queue for %s\n", m_rank, m_shmName.c_str()); - - NCCLCHECK(MsgQueueGetId(m_shmName, false, mq_desc)); - if (m_rank == 0) - { - ncclResult_t resultSetup = shmSetupExclusive(m_shmName.c_str(), m_shmSize, &shmFd, (void**)&m_shmPtr, 1); - ncclResult_t resultSemInit = InitIfSemaphore(OpenTag{}); - if ((resultSetup != ncclSuccess && errno != EEXIST) || (resultSemInit != ncclSuccess)) - { - NCCLCHECK(BroadcastAndCloseMessageQueue(mq_desc, false)); - WARN("Call to ShmObject::Open in root rank failed : %s", strerror(errno)); - if (resultSetup == ncclSuccess) - { - Close(); - } - return ncclSystemError; - } - ncclResult_t result; - - // Broadcast two sets of messages: one set is consumed by the other ranks to acknowledge root rank - // has successfully opened shared memory; second set is consumed by the other ranks to indicate - // that they have successfully opened shared memory and root rank can now unlink shared memory - NCCLCHECK(BroadcastMessage(mq_desc, true)); - NCCLCHECK(BroadcastAndCloseMessageQueue(mq_desc, true)); - - int retVal = shm_unlink(m_shmName.c_str()); - if (retVal == -1 && errno != ENOENT) - { - WARN("Call to shm_unlink in ShmObject failed : %s", strerror(errno)); - return ncclSystemError; - } - } - else - { - char msg_text[1]; - ncclResult_t res; - NCCLCHECKGOTO(MsgQueueRecv(mq_desc, &msg_text[0], sizeof(msg_text)), res, dropback); - - if (msg_text[0] == 'P') - { - NCCLCHECK(shmSetup(m_shmName.c_str(), m_shmSize, &shmFd, (void**)&m_shmPtr, 0)); - NCCLCHECKGOTO(MsgQueueRecv(mq_desc, &msg_text[0], sizeof(msg_text)), res, dropback); - NCCLCHECK(MsgQueueClose(m_shmName, mq_desc, false)); - } - else - { - NCCLCHECK(MsgQueueClose(m_shmName, mq_desc, false)); - WARN("Call to shm_open from non-root rank in ShmObject failed : %s", strerror(errno)); - return ncclSystemError; - } - } - m_alloc = true; - } - else - { - WARN("Cannot allocate ShmObject twice.\n"); - return ncclInvalidUsage; - } - return ncclSuccess; - -dropback: - WARN("Rank %d failed ShmObject::Open(). Closing message queue.", m_rank); - NCCLCHECK(MsgQueueClose(m_shmName, mq_desc, false)); - SYSCHECK(shm_unlink(m_shmName.c_str()), "shm_unlink"); - NCCLCHECK(Close()); - return ncclSystemError; -} - -template -ncclResult_t ShmObject::InitIfSemaphore(OpenTag tag) -{ - return ncclSuccess; -} - -template -ncclResult_t ShmObject::InitIfSemaphore(OpenTag tag) -{ - return ncclSuccess; -} - -template -ncclResult_t ShmObject::InitIfSemaphore(OpenTag tag) -{ - return ncclSuccess; -} - -template -ncclResult_t ShmObject::InitIfSemaphore(OpenTag> tag) -{ - return ncclSuccess; -} - -template -ncclResult_t ShmObject::InitIfSemaphore(OpenTag tag) -{ - size_t numMutexes = m_shmSize / sizeof(sem_t); - - for (size_t i = 0; i < numMutexes; i++) - { - SYSCHECK(sem_init(static_cast(&m_shmPtr[i]), 1, 1), "sem_init"); - } - return ncclSuccess; -} -#endif diff --git a/src/device/all_reduce.h b/src/device/all_reduce.h index 6a394fb2bd..d938298a59 100644 --- a/src/device/all_reduce.h +++ b/src/device/all_reduce.h @@ -1059,7 +1059,6 @@ template struct RunWorkColl { __device__ __forceinline__ void run(int tid, int nthreads, struct ncclDevWorkColl* work) { runRing(tid, nthreads, work); - //LAUNCH_CLIQUE_KERNEL(AllReduceCliqueSplitKernel, RedOp, T, work); } }; @@ -1067,6 +1066,5 @@ template struct RunWorkColl { __device__ __forceinline__ void run(int tid, int nthreads, struct ncclDevWorkColl* work) { runTreeSplit(tid, nthreads, work); - //LAUNCH_CLIQUE_KERNEL(AllReduceCliqueSplitKernel, RedOp, T, worrk); } }; diff --git a/src/init.cc b/src/init.cc index d0427f187d..8b554a3d17 100644 --- a/src/init.cc +++ b/src/init.cc @@ -43,7 +43,6 @@ #include "git_version.h" #include "rccl_vars.h" #include "hip_rocm_version_info.h" -//#include "clique/CliqueManager.h" //#include #ifdef ENABLE_MSCCLPP #include "mscclpp/mscclpp_nccl.h" @@ -485,7 +484,6 @@ static ncclResult_t commFree(ncclComm_t comm) { return ncclSuccess; } -RCCL_PARAM(CliqueIgnoreTopo, "CLIQUE_IGNORE_TOPO", 0); RCCL_PARAM(P2pNetDisable, "P2P_NET_DISABLE", 0); RCCL_PARAM(PivotAlltoallEnable, "PIVOT_ALLTOALL_ENABLE", 1); RCCL_PARAM(LL128ForceEnable, "LL128_FORCE_ENABLE", 0); @@ -1578,7 +1576,7 @@ static ncclResult_t initTransportsRank(struct ncclComm* comm, struct ncclComm* p } else { NCCLCHECKGOTO(ncclProxyCreate(comm), ret, fail); } - + timers[TIMER_INIT_CONNECT] = clockNano(); do { // Build p2p schedule int node = comm->node; @@ -1979,7 +1977,7 @@ static ncclResult_t ncclCommInitRankFunc(struct ncclAsyncJob* job_) { if (rcclParamMscclppForceEnabled()) { comm->mscclppForceEnable = true; } else { - comm->mscclppForceEnable = false; + comm->mscclppForceEnable = false; } } else { WARN("MSCCL++: Cannot enable MSCCL++ on %s architecture", devProp.gcnArchName); @@ -2410,7 +2408,7 @@ static ncclResult_t commDestroySync(struct ncclAsyncJob* job_) { // And keep polling until all graphs referencing us die. while (comm->persistentRefs != 0) { NCCLCHECKGOTO(ncclCommPollCallbacks(comm, /*waitSome=*/true), ret, fail); - } + } } if ((ret = ncclProxyStop(comm)) != ncclSuccess) { diff --git a/test/AllReduceTests.cpp b/test/AllReduceTests.cpp index 2753c56c47..3a49411026 100644 --- a/test/AllReduceTests.cpp +++ b/test/AllReduceTests.cpp @@ -118,7 +118,7 @@ namespace RcclUnitTesting std::vector const managedMemList = {false}; std::vector const useHipGraphList = {false, true}; std::vector const channelList = {"84", "112"}; - bool const enableSweep = false; + bool const enableSweep = false; for (auto channel : channelList) { setenv("NCCL_MIN_NCHANNELS", channel, 1); testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, @@ -149,30 +149,6 @@ namespace RcclUnitTesting testBed.Finalize(); } - TEST(AllReduce, DISABLED_Clique) - { - // Set clique env var prior to TestBed - setenv("RCCL_ENABLE_CLIQUE", "1", 1); - - TestBed testBed; - - // Configuration - std::vector const funcTypes = {ncclCollAllReduce}; - std::vector const dataTypes = testBed.GetAllSupportedDataTypes(); - std::vector const redOps = testBed.GetAllSupportedRedOps(); - std::vector const roots = {0}; - std::vector const numElements = {1048576, 1024}; - std::vector const inPlaceList = {false, true}; - std::vector const managedMemList = {false}; - std::vector const useHipGraphList = {false, true}; - - testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, - inPlaceList, managedMemList, useHipGraphList); - testBed.Finalize(); - - unsetenv("RCCL_ENABLE_CLIQUE"); - } - // This tests using custom pre-mult scalars reductions TEST(AllReduce, PreMultScalar) { @@ -245,7 +221,7 @@ namespace RcclUnitTesting } TEST(AllReduce, UserBufferRegistration) - { + { const int nranks = 8; size_t count = 2048; std::vector sendBuff(count, 0); @@ -260,7 +236,7 @@ namespace RcclUnitTesting } TEST(AllReduce, ManagedMemUserBufferRegistration) - { + { const int nranks = 8; size_t count = 2048; std::vector sendBuff(count, 0); diff --git a/tools/HelloRccl/runTest.sh b/tools/HelloRccl/runTest.sh index c6e832e3c4..9b24b42f12 100755 --- a/tools/HelloRccl/runTest.sh +++ b/tools/HelloRccl/runTest.sh @@ -3,19 +3,10 @@ RCCL_INSTALL=../../build/release EXE=$PWD/HelloRccl LDPATH=$LD_LIBRARY_PATH:$RCCL_INSTALL -echo "Single process - With clique-based kernels:" -RCCL_CLIQUE_ALLREDUCE_BYTE_LIMIT=1073741824 RCCL_FORCE_ENABLE_CLIQUE=1 NCCL_DEBUG=INFO RCCL_ENABLE_CLIQUE=1 LD_LIBRARY_PATH=$LDPATH $EXE 4 - -echo "Single process - Without clique-based kernels:" +echo "Single process:" NCCL_DEBUG=INFO LD_LIBRARY_PATH=$LDPATH $EXE 4 -echo "With clique-based kernels:" -RCCL_CLIQUE_ALLREDUCE_BYTE_LIMIT=1073741824 RCCL_FORCE_ENABLE_CLIQUE=1 NCCL_DEBUG=INFO RCCL_ENABLE_CLIQUE=1 NCCL_COMM_ID=$HOSTNAME:12345 LD_LIBRARY_PATH=$LDPATH $EXE 4 0 & -RCCL_CLIQUE_ALLREDUCE_BYTE_LIMIT=1073741824 RCCL_FORCE_ENABLE_CLIQUE=1 NCCL_DEBUG=INFO RCCL_ENABLE_CLIQUE=1 NCCL_COMM_ID=$HOSTNAME:12345 LD_LIBRARY_PATH=$LDPATH $EXE 4 1 & -RCCL_CLIQUE_ALLREDUCE_BYTE_LIMIT=1073741824 RCCL_FORCE_ENABLE_CLIQUE=1 NCCL_DEBUG=INFO RCCL_ENABLE_CLIQUE=1 NCCL_COMM_ID=$HOSTNAME:12345 LD_LIBRARY_PATH=$LDPATH $EXE 4 2 & -RCCL_CLIQUE_ALLREDUCE_BYTE_LIMIT=1073741824 RCCL_FORCE_ENABLE_CLIQUE=1 NCCL_DEBUG=INFO RCCL_ENABLE_CLIQUE=1 NCCL_COMM_ID=$HOSTNAME:12345 LD_LIBRARY_PATH=$LDPATH $EXE 4 3 - -echo "Without clique-based kernels:" +echo "Multi-process:" NCCL_COMM_ID=$HOSTNAME:12345 LD_LIBRARY_PATH=$LDPATH $EXE 4 0 & NCCL_COMM_ID=$HOSTNAME:12345 LD_LIBRARY_PATH=$LDPATH $EXE 4 1 & NCCL_COMM_ID=$HOSTNAME:12345 LD_LIBRARY_PATH=$LDPATH $EXE 4 2 & diff --git a/tools/topo_expl/utils.cpp b/tools/topo_expl/utils.cpp index 396bd270f0..2e793b8346 100644 --- a/tools/topo_expl/utils.cpp +++ b/tools/topo_expl/utils.cpp @@ -38,7 +38,6 @@ const char* ncclProtoStr[NCCL_NUM_PROTOCOLS] = { "LL", "LL128", "Simple" }; extern NodeModel *node_model; -RCCL_PARAM(CliqueIgnoreTopo, "CLIQUE_IGNORE_TOPO", 0); RCCL_PARAM(P2pNetDisable, "P2P_NET_DISABLE", 0); RCCL_PARAM(PivotAlltoallEnable, "PIVOT_ALLTOALL_ENABLE", 1); RCCL_PARAM(LL128ForceEnable, "LL128_FORCE_ENABLE", 0);