Files
rocm-systems/projects/rccl/src/include/enqueue.h
T
David Addison 17c8317cb1 NCCL 2.4.6-1
Added detection of IBM/Power NVLink bridge device.
    Add NUMA support to PCI distance calculations.
    Added NCCL_IGNORE_CPU_AFFINITY env var.
    Fix memory leaks; GithubIssue#180
    Compiler warning fix; GithubIssue#178
    Replace non-standard variable length arrays. GithubIssue#171
    Fix Tree+Shared Memory crash. GithubPR#185
    Fix LL cleanup hang during long running DL jobs.
    Fix NCCL_RINGS environment variable handling.
    Added extra checks to catch repeat calls to ncclCommDestroy() GithubIssue#191
    Improve bootstrap socket connection reliability at scale.
    Fix hostname hashing issue. GithubIssue#187
    Code cleanup to rename all non device files from *.cu to *.cc


[ROCm/rccl commit: f40ce73e89]
2019-04-05 13:05:45 -07:00

28 строки
1.1 KiB
C

/*************************************************************************
* Copyright (c) 2015-2019, NVIDIA CORPORATION. All rights reserved.
*
* See LICENSE.txt for license information
************************************************************************/
#ifndef NCCL_ENQUEUE_H_
#define NCCL_ENQUEUE_H_
#include "core.h"
#include "group.h"
// Channels / LL tuning
#define NCCL_LL_CHANNEL_THRESHOLD 8 // Per thread size before we start increasing nrings
#define NCCL_THREAD_THRESHOLD 64 // Per thread size before we switch to non-LL
#define NCCL_THREAD_THRESHOLD_PREVOLTA 32 // Per thread size before we switch to non-LL for pre-Volta archs
#define NCCL_LL_MIN_NTHREADS 64
ncclResult_t ncclEnqueueCheck(struct ncclInfo* info);
ncclResult_t ncclCpuBarrierIn(ncclComm_t comm, int* isLast);
ncclResult_t ncclCpuBarrierLast(ncclComm_t comm);
ncclResult_t ncclCpuBarrierOut(ncclComm_t comm);
ncclResult_t ncclBarrierEnqueue(ncclComm_t comm);
ncclResult_t ncclBarrierEnqueueWait(ncclComm_t comm);
ncclResult_t ncclEnqueueEvents(ncclComm_t comm);
#endif // End include guard