Added RCCL env params to control setting the SO_REUSEADDR and SO_LINGER socket options (#1418)
* Added RCCL env params to control setting the SO_REUSEADDR and SO_LINGER socket options. This can allow control over the number of file descriptors created during bootstrapping.
* Casted the linger value to `int` sooner to avoid a scope of unknown typed-ness.
* Added CHANGELOG entry for this feature.
[ROCm/rccl commit: 2e35417fe5]
This commit is contained in:
committed by
GitHub
vanhempi
cf907dbf61
commit
ebacc24598
@@ -2,6 +2,12 @@
|
||||
|
||||
Full documentation for RCCL is available at [https://rccl.readthedocs.io](https://rccl.readthedocs.io)
|
||||
|
||||
## Unreleased
|
||||
|
||||
### Added
|
||||
|
||||
* `RCCL_SOCKET_REUSEADDR` and `RCCL_SOCKET_LINGER` environment parameters
|
||||
|
||||
## RCCL 2.21.5 for ROCm 6.3.1
|
||||
|
||||
### Added
|
||||
|
||||
@@ -15,6 +15,9 @@
|
||||
#include <sys/syscall.h>
|
||||
#include "param.h"
|
||||
|
||||
RCCL_PARAM(SocketReuseAddr, "SOCKET_REUSEADDR", 0);
|
||||
RCCL_PARAM(SocketLinger, "SOCKET_LINGER", -1);
|
||||
|
||||
static ncclResult_t socketProgressOpt(int op, struct ncclSocket* sock, void* ptr, int size, int* offset, int block, int* closed) {
|
||||
int bytes = 0;
|
||||
*closed = 0;
|
||||
@@ -727,6 +730,17 @@ ncclResult_t ncclSocketInit(struct ncclSocket* sock, union ncclSocketAddress* ad
|
||||
ret = ncclSystemError;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
// [RCCL] Runtime socket options
|
||||
if (rcclParamSocketReuseAddr()) {
|
||||
int opt = 1;
|
||||
SYSCHECKGOTO(setsockopt(sock->fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)), ret, fail);
|
||||
}
|
||||
int lingerParam = (int)rcclParamSocketLinger();
|
||||
if (lingerParam > -1) {
|
||||
linger linger_opt = { 1, lingerParam };
|
||||
SYSCHECKGOTO(setsockopt(sock->fd, SOL_SOCKET, SO_LINGER, &linger_opt, sizeof(linger_opt)), ret, fail);
|
||||
}
|
||||
} else {
|
||||
memset(&sock->addr, 0, sizeof(union ncclSocketAddress));
|
||||
}
|
||||
|
||||
Viittaa uudesa ongelmassa
Block a user