Added RCCL env params to control setting the SO_REUSEADDR and SO_LINGER socket options (#1418)

* Added RCCL env params to control setting the SO_REUSEADDR and SO_LINGER socket options. This can allow control over the number of file descriptors created during bootstrapping.

* Casted the linger value to `int` sooner to avoid a scope of unknown typed-ness.

* Added CHANGELOG entry for this feature.

[ROCm/rccl commit: 2e35417fe5]
This commit is contained in:
corey-derochie-amd
2025-01-14 10:26:04 -07:00
committed by GitHub
vanhempi cf907dbf61
commit ebacc24598
2 muutettua tiedostoa jossa 20 lisäystä ja 0 poistoa
+6
Näytä tiedosto
@@ -2,6 +2,12 @@
Full documentation for RCCL is available at [https://rccl.readthedocs.io](https://rccl.readthedocs.io)
## Unreleased
### Added
* `RCCL_SOCKET_REUSEADDR` and `RCCL_SOCKET_LINGER` environment parameters
## RCCL 2.21.5 for ROCm 6.3.1
### Added
+14
Näytä tiedosto
@@ -15,6 +15,9 @@
#include <sys/syscall.h>
#include "param.h"
RCCL_PARAM(SocketReuseAddr, "SOCKET_REUSEADDR", 0);
RCCL_PARAM(SocketLinger, "SOCKET_LINGER", -1);
static ncclResult_t socketProgressOpt(int op, struct ncclSocket* sock, void* ptr, int size, int* offset, int block, int* closed) {
int bytes = 0;
*closed = 0;
@@ -727,6 +730,17 @@ ncclResult_t ncclSocketInit(struct ncclSocket* sock, union ncclSocketAddress* ad
ret = ncclSystemError;
goto fail;
}
// [RCCL] Runtime socket options
if (rcclParamSocketReuseAddr()) {
int opt = 1;
SYSCHECKGOTO(setsockopt(sock->fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)), ret, fail);
}
int lingerParam = (int)rcclParamSocketLinger();
if (lingerParam > -1) {
linger linger_opt = { 1, lingerParam };
SYSCHECKGOTO(setsockopt(sock->fd, SOL_SOCKET, SO_LINGER, &linger_opt, sizeof(linger_opt)), ret, fail);
}
} else {
memset(&sock->addr, 0, sizeof(union ncclSocketAddress));
}