Use better name for kernel collective trace enable (#357)

"NCCL_DEBUG=INFO NCCL_DEBUG_SUBSYS=INIT,COLL" enables collectives API
trace. Adding "RCCL_KERNEL_COLL_TRACE_ENABLE=1" enables kernel traces.
Этот коммит содержится в:
Wenkai Du
2021-04-26 08:35:53 -07:00
коммит произвёл GitHub
родитель 9cc9c3360b
Коммит ed237dcaa7
+3 -5
Просмотреть файл
@@ -165,14 +165,12 @@ void NCCL_NO_OPTIMIZE commPoison(ncclComm_t comm) {
comm->rank = comm->cudaDev = comm->busId = comm->nRanks = -1;
}
RCCL_PARAM(KernelCollEnable, "KERNEL_COLL_ENABLE", 0);
RCCL_PARAM(KernelCollPrint, "KERNEL_COLL_PRINT", 0);
RCCL_PARAM(KernelCollTraceEnable, "KERNEL_COLL_TRACE_ENABLE", 0);
#ifdef ENABLE_COLLTRACE
void *ncclCommThreadMain(void *arg) {
ncclComm_t comm = (ncclComm_t)arg;
int head = comm->hostDevComm.collTraceHead;
bool kern_print = rcclParamKernelCollPrint() && rcclParamKernelCollEnable();
#define MAX_NAME_LENGTH 32
char* func_names = (char *)malloc(MAX_NAME_LENGTH*(FUNC_INDEX_P2P+1));
for (int func = 0; func < NCCL_NUM_FUNCTIONS; func++) {
@@ -256,7 +254,7 @@ void *ncclCommThreadMain(void *arg) {
break;
}
}
if (kern_print) INFO(NCCL_INIT, "%s", line);
INFO(NCCL_COLL, "%s", line);
STORE(&(td->type), ncclCollTraceNotReady);
head ++;
head %= COLLTRACE_NUM_ITEMS;
@@ -420,7 +418,7 @@ static ncclResult_t commAlloc(ncclComm_t* comret, int ndev, int rank) {
NCCLCHECK(ncclCudaHostCalloc(&comm->hostDevComm.collTrace, COLLTRACE_NUM_ITEMS));
memset(comm->hostDevComm.collTrace, 0, sizeof(struct ncclCollTrace) * COLLTRACE_NUM_ITEMS);
comm->hostDevComm.collTraceExit = comm->hostDevComm.collTraceHead = *comm->hostDevComm.collTraceTail = 0;
if ((ncclDebugLevel >= NCCL_LOG_INFO) && rcclParamKernelCollEnable())
if ((ncclDebugLevel >= NCCL_LOG_INFO) && rcclParamKernelCollTraceEnable())
pthread_create(&comm->hostDevComm.collTraceThread, NULL, ncclCommThreadMain, (void *)comm);
else
comm->hostDevComm.collTraceThread = 0;