/************************************************************************* * Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. * * See LICENSE.txt for license information ************************************************************************/ #ifndef PROFILER_V5_H_ #define PROFILER_V5_H_ #include typedef struct { uint64_t type; // event type descriptor: ncclProfileGroupApi, ... void* parentObj; // pointer to the profiler parent object int rank; // originating rank union { struct { int graphCaptured; int groupDepth; } groupApi; struct { const char* func; size_t count; const char* datatype; int root; void* stream; bool graphCaptured; } collApi; struct { const char* func; size_t count; const char* datatype; void* stream; bool graphCaptured; } p2pApi; struct { void* stream; } kernelLaunch; struct { uint64_t seqNumber; const char* func; void const* sendBuff; void* recvBuff; size_t count; int root; const char* datatype; uint8_t nChannels; uint8_t nWarps; const char* algo; const char* proto; void* parentGroup; // for backward compatibility with v4 } coll; struct { const char* func; void* buff; const char* datatype; size_t count; int peer; uint8_t nChannels; void* parentGroup; // for backward compatibility with v4 } p2p; struct { pid_t pid; // pid of the originating process uint8_t channelId; // channel id for this proxy operation int peer; // remote rank for send/recv int nSteps; // number of steps for this proxy operation int chunkSize; // amount of data transferred by this proxy operation int isSend; } proxyOp; struct { int step; } proxyStep; struct { uint8_t channelId; uint64_t pTimer; // start timestamp from GPU globaltimer } kernelCh; struct { int64_t id; void* data; } netPlugin; }; } ncclProfilerEventDescr_v5_t; typedef union { struct { size_t transSize; } proxyStep; struct { int appendedProxyOps; } proxyCtrl; struct { void* data; } netPlugin; struct { uint64_t pTimer; } kernelCh; } ncclProfilerEventStateArgs_v5_t; typedef struct { const char* name; // init - initialize the profiler plugin // Input // - context : opaque profiler context object for separating profiler behavior across comms // - commId : communicator id // - commName : user assigned communicator name // - nNodes : number of nodes in communicator // - nranks : number of ranks in communicator // - rank : rank identifier in communicator // - logfn : logger function // Output // - eActivationMask: bitmask of active events set by the plugin ncclResult_t (*init)(void** context, uint64_t commId, int* eActivationMask, const char* commName, int nNodes, int nranks, int rank, ncclDebugLogger_t logfn); // startEvent - initialize and start a new event for the supplied event descriptor inside the eventset // Input // - context: opaque profiler context object // - eDescr : pointer to ncclProfilerEventDescr_t object // Output // - eHandle: return event handle for supplied event descriptor object ncclResult_t (*startEvent)(void* context, void** eHandle, ncclProfilerEventDescr_v5_t* eDescr); // stopEvent - stop/finalize an event inside and event set // Input // - eHandle: handle to event object ncclResult_t (*stopEvent)(void* eHandle); // recordEventState - record event state transitions and event attribute updates // Input // - eHandle : handle to event object created through startEvent // - eStateArgs: optional argument used to capture event attribute updates associated with the state transition // - eState : event state transition ncclResult_t (*recordEventState)(void* eHandle, ncclProfilerEventState_v5_t eState, ncclProfilerEventStateArgs_v5_t* eStateArgs); // finalize - finalize the profiler plugin // Input // - context: opaque profiler context object ncclResult_t (*finalize)(void* context); } ncclProfiler_v5_t; #endif