/************************************************************************* * Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. * * See LICENSE.txt for license information ************************************************************************/ #include #include #include #include #include #include #include #include #include "profiler.h" #include "inspector.h" #define __hidden __attribute__ ((visibility("hidden"))) static int gInitialized; static pthread_mutex_t gLock = PTHREAD_MUTEX_INITIALIZER; /* * Description: * Records an event trace with timestamp and sequence number * * Thread Safety: * Not thread-safe - must be called with proper locking. This function * is designed to be called from within locked sections where the * collective info structure is already protected. * * Input: * struct inspectorEventTraceInfo* evtTrace - event trace array * int eventIndex - index in the event trace array (must be valid) * struct inspectorCollInfo* collInfo - collective info structure (must not be NULL) * * Output: * Event trace is updated with current timestamp and next sequence * number from collective * * Return: * uint64_t - the sequence number assigned to this event * * Preconditions: * - collInfo must not be NULL * - eventIndex must be within valid bounds for evtTrace array * - Function must be called from within a locked section */ static uint64_t inspectorRecordEventTrace(struct inspectorEventTraceInfo* evtTrace, int eventIndex, struct inspectorCollInfo* collInfo) { evtTrace[eventIndex].ts = inspectorGetTime(); evtTrace[eventIndex].sn = ++collInfo->collEvtTrk.sn; // Increment coll sequence counter return evtTrace[eventIndex].sn; } /* * Description: * * Initializes the NCCL Inspector plugin and global state for a * communicator. * * Thread Safety: * Thread-safe (uses mutex for initialization). * * Input: * void** context - pointer to plugin context. * int* eActivationMask - pointer to activation mask output. * const char* commName - communicator name. * uint64_t commHash - communicator hash. * int nNodes - number of nodes. * int nranks - number of ranks. * int rank - rank. * ncclDebugLogger_t logfn - logger function pointer. * * Output: * context is set to plugin context; eActivationMask is set. * * Return: * ncclResult_t - success or error code. * */ __hidden ncclResult_t inspectorPluginInit(void** context, uint64_t commHash, int* eActivationMask, const char* commName, int nNodes, int nranks, int rank, ncclDebugLogger_t logfn) { inspectorResult_t res = inspectorSuccess; *context = nullptr; logFn = logfn; pthread_mutex_lock(&gLock); if (++gInitialized == 1) { res = inspectorGlobalInit(rank); if (res != inspectorSuccess) { WARN("Inspector Init Failed %s:%d -> error %d: %s",__FILE__, __LINE__, res, inspectorErrorString(res)); gInitialized = 0; pthread_mutex_unlock(&gLock); return ncclInternalError; } } pthread_mutex_unlock(&gLock); INS_CHK_GOTO(inspectorAddComm((struct inspectorCommInfo **)context, commName, commHash, nNodes, nranks, rank), res, success); *eActivationMask = ncclProfileColl | ncclProfileKernelCh; INFO(NCCL_INIT, "PROFILER/Plugin: init commName: %s commHash: %lu nranks: %d rank: %d", commName ? commName : "", commHash, nranks, rank); success: if (res != inspectorSuccess) { return ncclInternalError; } else { return ncclSuccess; } } /* * Description: * * Finalizes the NCCL Inspector plugin and global state for a * communicator. * * Thread Safety: * Thread-safe (uses mutex for finalization). * * Input: * void* context - plugin context. * * Output: * Plugin context is finalized and cleaned up. * * Return: * ncclResult_t - success or error code. * */ __hidden ncclResult_t inspectorPluginFinalize(void* context) { inspectorDelComm((struct inspectorCommInfo *)context); pthread_mutex_lock(&gLock); if (--gInitialized == 0) { inspectorGlobalFinalize(); } pthread_mutex_unlock(&gLock); return ncclSuccess; } inspectorResult_t inspectorPluginCollInfoRef(struct inspectorCollInfo *collInfo) { collInfo->refCount += 1; return inspectorSuccess; } inspectorResult_t inspectorPluginCollInfoRefSafe(struct inspectorCollInfo *collInfo) { inspectorLockWr(&collInfo->guard); inspectorPluginCollInfoRef(collInfo); inspectorUnlockRWLock(&collInfo->guard); return inspectorSuccess; } inspectorResult_t inspectorPluginCollInfoDeRef(struct inspectorCollInfo *collInfo) { collInfo->refCount -= 1; if (collInfo->refCount == 0) { inspectorLockDestroy(&collInfo->guard); memset(collInfo, 0, sizeof(struct inspectorCollInfo)); free(collInfo); return inspectorReturn; } return inspectorSuccess; } inspectorResult_t inspectorPluginCollInfoDeRefSafe(struct inspectorCollInfo *collInfo) { inspectorLockWr(&collInfo->guard); inspectorResult_t res = inspectorPluginCollInfoDeRef(collInfo); inspectorUnlockRWLock(&collInfo->guard); return res; } /* * Description: * Initializes a new inspectorCollInfo structure for a collective * event. * * Thread Safety: * Not thread-safe (allocates and initializes a new collective info * structure). * * Input: * * struct inspectorCollInfo **collInfo - pointer to output * collective info struct. * ncclProfilerEventDescr_t *eDescr - event descriptor. * * Output: * collInfo is set to the new collective info struct. * * Return: * None. */ static void inspectorPluginCollInfoInit(struct inspectorCollInfo **collInfo, ncclProfilerEventDescr_t *eDescr, struct inspectorCommInfo *commInfo) { struct inspectorCollInfo *collInfoPtr = (struct inspectorCollInfo*)calloc(1, sizeof(struct inspectorCollInfo)); if (collInfoPtr == nullptr) { WARN("Inspector: Failed to allocate memory for collective info structure"); *collInfo = nullptr; return; } collInfoPtr->type = ncclProfileColl; collInfoPtr->refCount = 0; inspectorPluginCollInfoRef(collInfoPtr); //self ref; no locks needed collInfoPtr->func = eDescr->coll.func; collInfoPtr->sn = eDescr->coll.seqNumber; collInfoPtr->nChannels = eDescr->coll.nChannels; if (collInfoPtr->nChannels > 0) { inspectorPluginCollInfoRef(collInfoPtr); //extra ref for kernel completion } collInfoPtr->tsStartUsec = inspectorGetTime(); collInfoPtr->msgSizeBytes = ncclTypeSize(inspectorStringToDatatype(eDescr->coll.datatype)) * eDescr->coll.count; collInfoPtr->commInfo = commInfo; collInfoPtr->collEvtTrk.sn = 0; collInfoPtr->collEvtTrk.nChannels = collInfoPtr->nChannels; inspectorRecordEventTrace(collInfoPtr->collEvtTrk.evntTrace, NCCL_INSP_EVT_TRK_COLL_START, collInfoPtr); inspectorLockInit(&collInfoPtr->guard); *collInfo = collInfoPtr; } /* * Description: * * Initializes a new inspectorKernelChInfo structure for a kernel * channel event. * * Thread Safety: * Not thread-safe (initializes kernel channel info within a * collective info structure). * * Input: * struct inspectorKernelChInfo **kernelChInfo - pointer to output * kernel channel info struct. * ncclProfilerEventDescr_t *eDescr - event descriptor. * * Output: * * kernelChInfo is set to the new kernel channel info struct. * * Return: * None. */ static void inspectorPluginKernelChInfoInit(struct inspectorKernelChInfo **kernelChInfo, ncclProfilerEventDescr_t *eDescr) { if (eDescr->parentObj) { uint64_t parentType=*(uint64_t*)eDescr->parentObj; if (parentType == ncclProfileColl) { struct inspectorCollInfo *collInfo = (struct inspectorCollInfo*)eDescr->parentObj; if (collInfo && collInfo->type == ncclProfileColl) { inspectorLockWr(&collInfo->guard); struct inspectorEventTraceInfo *krnlEvtTrk = collInfo->collEvtTrk.kernelCh[eDescr->kernelCh.channelId].evntTrace; inspectorRecordEventTrace(krnlEvtTrk, NCCL_INSP_EVT_TRK_KERNEL_START, collInfo); struct inspectorKernelChInfo *kernelChInfoPtr = &collInfo->kernelCh[eDescr->kernelCh.channelId]; kernelChInfoPtr->type = ncclProfileKernelCh; kernelChInfoPtr->channelId = eDescr->kernelCh.channelId; kernelChInfoPtr->startGpuClk = eDescr->kernelCh.pTimer; if (kernelChInfoPtr->stopGpuClk == 0) { inspectorPluginCollInfoRef(collInfo); //Pairs with Record Kernel Stop event } kernelChInfoPtr->tsStartUsec = inspectorGetTime(); if (collInfo->nKernelChStarted == 0) { collInfo->tsStartUsec = kernelChInfoPtr->tsStartUsec; } collInfo->nKernelChStarted += 1; inspectorPluginCollInfoRef(collInfo); //Pairs with Stop Kernel Event kernelChInfoPtr->collInfo = collInfo; *kernelChInfo = kernelChInfoPtr; inspectorUnlockRWLock(&collInfo->guard); } } } } /* * Description: * * Starts a profiling event for the NCCL Inspector plugin. * * Thread Safety: * Thread-safe (allocates and initializes event structures). * * Input: * void* context - plugin context. * void** eHandle - pointer to event handle output. * ncclProfilerEventDescr_t* eDescr - event descriptor. * * Output: * eHandle is set to the new event structure. * * Return: * ncclResult_t - success or error code. * */ __hidden ncclResult_t inspectorPluginStartEvent(void* context, void** eHandle, ncclProfilerEventDescr_t* eDescr) { if (context == nullptr || eDescr == nullptr) { INFO(NCCL_INIT, "Profiler/Plugin: context/eDescr NULL for start event %s", __func__); return ncclSuccess; } *eHandle = nullptr; if (eDescr->type == ncclProfileColl) { struct inspectorCollInfo *collEvent = nullptr; struct inspectorCommInfo *commInfoCtx = (struct inspectorCommInfo*)context; inspectorPluginCollInfoInit(&collEvent, eDescr, commInfoCtx); *eHandle = collEvent; } else if (eDescr->type == ncclProfileKernelCh) { struct inspectorKernelChInfo *kernelChEvent = nullptr; inspectorPluginKernelChInfoInit(&kernelChEvent, eDescr); *eHandle = kernelChEvent; } else { return ncclSuccess; } return ncclSuccess; } /* * Description: * * Stops a profiling event for the NCCL Inspector plugin. * * Thread Safety: * * Thread-safe (updates event state and performance info). * * Input: * * void *eHandle - event handle. * * Output: * * Event is stopped and performance info may be updated. * * Return: * ncclResult_t - success or error code. * */ __hidden ncclResult_t inspectorPluginStopEvent(void *eHandle) { if (eHandle == nullptr) { INFO(NCCL_INIT, "Profiler/Plugin: Event Handle NULL for start event %s", __func__); return ncclSuccess; } uint64_t type = *(uint64_t *)eHandle; inspectorResult_t res = inspectorSuccess; if (type == ncclProfileColl) { struct inspectorCollInfo *collInfo = (struct inspectorCollInfo *)eHandle; // Record collective stop event inspectorLockWr(&collInfo->guard); inspectorRecordEventTrace(collInfo->collEvtTrk.evntTrace, NCCL_INSP_EVT_TRK_COLL_STOP, collInfo); res = inspectorPluginCollInfoDeRef(collInfo); if (res == inspectorReturn) { // WARN("NCCL Inspector unnatural return: inspectorPluginStopEvent:ncclProfileColl"); return ncclSuccess; } inspectorUnlockRWLock(&collInfo->guard); return ncclSuccess; } else if (type == ncclProfileKernelCh) { struct inspectorKernelChInfo *kernelChInfo = (struct inspectorKernelChInfo *)eHandle; struct inspectorCollInfo *collInfo = kernelChInfo->collInfo; if (collInfo && collInfo->type == ncclProfileColl) { inspectorLockWr(&collInfo->guard); struct inspectorEventTraceInfo *krnlEvtTrk = collInfo->collEvtTrk.kernelCh[kernelChInfo->channelId].evntTrace; inspectorRecordEventTrace(krnlEvtTrk, NCCL_INSP_EVT_TRK_KERNEL_STOP, collInfo); kernelChInfo->tsCompletedUsec = inspectorGetTime(); collInfo->nKernelChCompleted += 1; res = inspectorPluginCollInfoDeRef(collInfo); if (res == inspectorReturn) { WARN("NCCL Inspector unnatural return: inspectorPluginStopEvent:ncclProfileKernelCh"); return ncclSuccess; } if ((collInfo->nKernelChCompleted == collInfo->nKernelChStarted) && (collInfo->nKernelChCompleted == collInfo->nChannels)) { struct inspectorCompletedCollInfo completedColl; struct inspectorCommInfo *commInfo = collInfo->commInfo; collInfo->tsCompletedUsec = kernelChInfo->tsCompletedUsec; inspectorUpdateCollPerf(&completedColl, collInfo); res = inspectorPluginCollInfoDeRef(collInfo); if (res != inspectorReturn) { inspectorUnlockRWLock(&collInfo->guard); } if (commInfo != nullptr) { inspectorLockWr(&commInfo->guard); inspectorComputeCollBw(commInfo, &completedColl, completedColl.func); memcpy(&commInfo->completedCollInfo, &completedColl, sizeof(struct inspectorCompletedCollInfo)); commInfo->dump = true; inspectorUnlockRWLock(&commInfo->guard); } return ncclSuccess; } inspectorUnlockRWLock(&collInfo->guard); } return ncclSuccess; } return ncclSuccess; } /* * Description: * * Records the state of a profiling event for the NCCL Inspector * plugin. * * Thread Safety: * * Thread-safe (updates event state as needed). * * Input: * void* eHandle - event handle. * ncclProfilerEventState_t eState - event state. * ncclProfilerEventStateArgs_t* eStateArgs - event state arguments. * * Output: * Event state is updated as needed. * * Return: * ncclResult_t - success or error code. * */ __hidden ncclResult_t inspectorPluginRecordEventState(void* eHandle, ncclProfilerEventState_t eState, ncclProfilerEventStateArgs_t* eStateArgs) { if (eHandle == nullptr || eStateArgs == nullptr) return ncclSuccess; uint64_t type = *(uint64_t *)eHandle; if (type == ncclProfileKernelCh && eState == ncclProfilerKernelChStop) { struct inspectorKernelChInfo *kernelChInfo = (struct inspectorKernelChInfo *)eHandle; struct inspectorCollInfo *collInfo = kernelChInfo->collInfo; inspectorResult_t res = inspectorSuccess; if (collInfo && collInfo->type == ncclProfileColl) { inspectorLockWr(&collInfo->guard); struct inspectorEventTraceInfo *krnlEvtTrk = collInfo->collEvtTrk.kernelCh[kernelChInfo->channelId].evntTrace; inspectorRecordEventTrace(krnlEvtTrk, NCCL_INSP_EVT_TRK_KERNEL_RECORD, collInfo); kernelChInfo->stopGpuClk = eStateArgs->kernelCh.pTimer; if (kernelChInfo->startGpuClk != 0) { res = inspectorPluginCollInfoDeRef(collInfo); if (res == inspectorReturn) { WARN("NCCL Inspector unnatural return: inspectorPluginRecordEventState"); return ncclSuccess; } } inspectorUnlockRWLock(&collInfo->guard); } } return ncclSuccess; } ncclProfiler_t ncclProfiler_v5 = { "Inspector", inspectorPluginInit, inspectorPluginStartEvent, inspectorPluginStopEvent, inspectorPluginRecordEventState, inspectorPluginFinalize, };