Fichiers
rocm-systems/projects/rccl/ext-profiler/inspector/inspector.cc
T

1531 lignes
47 KiB
C++

#include "inspector.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <time.h>
#include <unistd.h>
#include <errno.h>
#include <cstring>
#include "common.h"
#define JSON_CHK(expr) \
do { \
const jsonResult_t res = (expr); \
if (res != jsonSuccess) { \
INFO(NCCL_INSPECTOR, "jsonError: %s\n", jsonErrorString(res)); \
return inspectorJsonError; \
} \
} while (0)
#define INS_CHK(call) \
do { \
inspectorResult_t res = call; \
if (inspectorSuccess != res) { \
INFO(NCCL_INSPECTOR, "%s:%d -> error %d: %s", __FILE__, __LINE__, res, \
inspectorErrorString(res)); \
return res; \
} \
} while (0);
#define JSON_CHK_GOTO(expr, res, label) \
do { \
const jsonResult_t macro_res = (expr); \
if (macro_res != jsonSuccess) { \
INFO(NCCL_INSPECTOR, "jsonError: %s\n", jsonErrorString(macro_res)); \
res = inspectorJsonError; \
goto label; \
} \
} while (0)
#define INS_CUDA_CHK(cmd) \
do { \
cudaError_t err = cmd; \
if (err != cudaSuccess) { \
INFO(NCCL_INSPECTOR, "Cuda failure '%s'", cudaGetErrorString(err)); \
return inspectorCudaError; \
} \
} while (false)
// Global flag to control inspector use
static bool enableNcclInspector = false;
// Global flag to control starting internal dump thread
static bool enableNcclInspectorDumpThread = false;
// Global flag to control verbose dumping (event_trace)
static bool enableNcclInspectorDumpVerbose = false;
// Extra guard to prevent spurious messages for eager pollers that try to dump
// out results before we have initialized
static bool ncclInspectorInit = false;
// Define the global logFn variable
ncclDebugLogger_t logFn = nullptr;
/*
* Description:
*
* Returns the current time in microseconds since the epoch.
*
* Thread Safety:
*
* Thread-safe (uses gettimeofday).
*
* Input:
*
* None.
*
* Output:
*
* None.
*
* Return:
* uint64_t - current time in microseconds.
*
* Error Handling:
* This function uses gettimeofday() which rarely fails. In case of
* failure, the function returns 0. Callers should check for 0 return
* value if precise error handling is required.
*
*/
uint64_t inspectorGetTime() {
uint64_t ts = 0;
timeval tv;
gettimeofday(&tv, 0);
ts = tv.tv_sec * 1000000 + tv.tv_usec;
return ts;
}
/*
* Description:
*
* Converts a string to the corresponding ncclDataType_t enum value.
*
* Thread Safety:
* Thread-safe (read-only string input).
*
* Input:
*
* const char* str - string representation of the datatype.
*
* Output:
*
* None.
*
* Return:
*
* ncclDataType_t - corresponding enum value, or -1 if unknown.
*
*/
ncclDataType_t inspectorStringToDatatype(const char* str) {
if (strcmp(str, "ncclInt8") == 0) return ncclInt8;
if (strcmp(str, "ncclInt32") == 0) return ncclInt32;
if (strcmp(str, "ncclUint32") == 0) return ncclUint32;
if (strcmp(str, "ncclInt64") == 0) return ncclInt64;
if (strcmp(str, "ncclUint64") == 0) return ncclUint64;
if (strcmp(str, "ncclFloat16") == 0) return ncclFloat16;
if (strcmp(str, "ncclFloat32") == 0) return ncclFloat32;
if (strcmp(str, "ncclFloat64") == 0) return ncclFloat64;
if (strcmp(str, "ncclBfloat16") == 0) return ncclBfloat16;
if (strcmp(str, "ncclFloat8e4m3") == 0) return ncclFloat8e4m3;
if (strcmp(str, "ncclFloat8e5m2") == 0) return ncclFloat8e5m2;
return (ncclDataType_t)-1; // Or handle error as appropriate
}
/*
* Description:
*
* Converts a string to the corresponding ncclFunc_t enum value.
*
* Thread Safety:
* Thread-safe (read-only string input).
*
* Input:
* const char* str - string representation of the function (must not be NULL).
*
* Output:
* None.
*
* Return:
* ncclFunc_t - corresponding enum value, or ncclNumFuncs if unknown.
*
* Preconditions:
* - str must not be NULL
*/
ncclFunc_t ncclStringToFunc(const char* str) {
if (strcmp(str, "AllGather") == 0) return ncclFuncAllGather;
if (strcmp(str, "AllReduce") == 0) return ncclFuncAllReduce;
if (strcmp(str, "Broadcast") == 0) return ncclFuncBroadcast;
if (strcmp(str, "Recv") == 0) return ncclFuncRecv;
if (strcmp(str, "Reduce") == 0) return ncclFuncReduce;
if (strcmp(str, "ReduceScatter") == 0) return ncclFuncReduceScatter;
if (strcmp(str, "SendRecv") == 0) return ncclFuncSendRecv;
if (strcmp(str, "Send") == 0) return ncclFuncSend;
return ncclNumFuncs; // Invalid / unknown
}
const char* ncclFuncToString(ncclFunc_t fn) {
switch (fn) {
case ncclFuncAllGather: return "AllGather";
case ncclFuncAllReduce: return "AllReduce";
case ncclFuncBroadcast: return "Broadcast";
case ncclFuncRecv: return "Recv";
case ncclFuncReduce: return "Reduce";
case ncclFuncReduceScatter: return "ReduceScatter";
case ncclFuncSendRecv: return "SendRecv";
case ncclFuncSend: return "Send";
default: return "Invalid";
}
}
struct inspectorDumpThread;
static inspectorDumpThread* dumper = nullptr;
#define UNUSED(x) (void)(x)
inspectorResult_t inspectorLockInit(pthread_rwlock_t* lockRef) {
if (0 != pthread_rwlock_init(lockRef, nullptr)) {
return inspectorLockError;
} else {
return inspectorSuccess;
}
}
inspectorResult_t inspectorLockDestroy(pthread_rwlock_t* lockRef) {
if (0 != pthread_rwlock_destroy(lockRef)) {
return inspectorLockError;
} else {
return inspectorSuccess;
}
}
inspectorResult_t inspectorLockRd(pthread_rwlock_t* lockRef) {
if (0 != pthread_rwlock_rdlock(lockRef)) {
return inspectorLockError;
} else {
return inspectorSuccess;
}
}
inspectorResult_t inspectorLockWr(pthread_rwlock_t* lockRef) {
if (0 != pthread_rwlock_wrlock(lockRef)) {
return inspectorLockError;
} else {
return inspectorSuccess;
}
}
inspectorResult_t inspectorUnlockRWLock(pthread_rwlock_t* lockRef) {
if (0 != pthread_rwlock_unlock(lockRef)) {
return inspectorLockError;
} else {
return inspectorSuccess;
}
}
// TODO inspect these retvals
#define INSPECTOR_LOCK_RD_FLAG(lockRef, lockFlag, debug) \
do { \
if (!lockFlag) { \
INS_CHK(inspectorLockRd(lockRef)); \
} \
lockFlag = true; \
} while (0);
#define INSPECTOR_LOCK_WR_FLAG(lockRef, lockFlag, debug) \
do { \
if (!lockFlag) { \
INS_CHK(inspectorLockWr(lockRef)); \
} \
lockFlag = true; \
} while (0);
#define INSPECTOR_UNLOCK_RW_LOCK_FLAG(lockRef, lockFlag, debug) \
do { \
if (lockFlag) { \
INS_CHK(inspectorUnlockRWLock(lockRef)); \
} \
lockFlag = false; \
} while (0);
struct inspectorCommInfoList {
struct inspectorCommInfo* comms;
uint32_t ncomms;
pthread_rwlock_t guard;
};
struct inspectorState {
struct inspectorCommInfoList liveComms;
struct inspectorCommInfoList deletedComms;
};
static inspectorState g_state;
static inspectorResult_t inspectorCommInfoListInit(struct inspectorCommInfoList* commList) {
if (commList->comms) {
return inspectorGlobalInitError;
}
commList->comms = nullptr;
commList->ncomms = 0;
INS_CHK(inspectorLockInit(&commList->guard));
return inspectorSuccess;
}
static inspectorResult_t inspectorGlobalStateInit() {
memset(&g_state, 0, sizeof(struct inspectorState));
INS_CHK(inspectorCommInfoListInit(&g_state.liveComms));
INS_CHK(inspectorCommInfoListInit(&g_state.deletedComms));
return inspectorSuccess;
}
/*
* Description:
*
* Converts inspectorTimingSource_t enum to a string representation.
*
* Thread Safety:
* Thread-safe (read-only operation).
*
* Input:
* inspectorTimingSource_t timingSource - timing source enum value.
*
* Output:
* None.
*
* Return:
* const char* - string representation of the timing source.
*/
static const char* inspectorTimingSourceToString(inspectorTimingSource_t timingSource) {
switch (timingSource) {
case inspectorTimingSourceKernelGpu:
return "kernel_gpu";
case inspectorTimingSourceKernelCpu:
return "kernel_cpu";
case inspectorTimingSourceCollectiveCpu:
return "collective_cpu";
default:
return "unknown";
}
}
/*
* Description:
*
* Writes the header information for a communicator to the JSON output.
*
* Thread Safety:
* Not thread-safe (should be called with proper locking).
*
* Input:
* jsonFileOutput* jfo - JSON output handle.
* struct inspectorCommInfo* commInfo - communicator info.
*
* Output:
* Header is written to JSON output.
*
* Return:
* inspectorResult_t - success or error code.
*
*/
static inspectorResult_t inspectorCommInfoHeader(jsonFileOutput* jfo,
struct inspectorCommInfo* commInfo) {
JSON_CHK(jsonStartObject(jfo));
JSON_CHK(jsonKey(jfo, "id")); JSON_CHK(jsonStr(jfo, commInfo->commHashStr));
JSON_CHK(jsonKey(jfo, "rank")); JSON_CHK(jsonInt(jfo, commInfo->rank));
JSON_CHK(jsonKey(jfo, "n_ranks")); JSON_CHK(jsonInt(jfo, commInfo->nranks));
JSON_CHK(jsonKey(jfo, "nnodes")); JSON_CHK(jsonUint64(jfo, commInfo->nnodes));
JSON_CHK(jsonFinishObject(jfo));
return inspectorSuccess;
}
/*
* Description:
*
* Writes metadata header information to the JSON output.
*
* Thread Safety:
* Not thread-safe (should be called with proper locking).
*
* Input:
* jsonFileOutput* jfo - JSON output handle.
*
* Output:
* Metadata header is written to JSON output.
*
* Return:
* inspectorResult_t - success or error code.
*
*/
static inspectorResult_t inspectorCommInfoMetaHeader(jsonFileOutput* jfo) {
JSON_CHK(jsonStartObject(jfo));
{
JSON_CHK(jsonKey(jfo, "inspector_output_format_version")); JSON_CHK(jsonStr(jfo, "v4.0"));
JSON_CHK(jsonKey(jfo, "git_rev")); JSON_CHK(jsonStr(jfo, get_git_version_info()));
JSON_CHK(jsonKey(jfo, "rec_mechanism")); JSON_CHK(jsonStr(jfo, "nccl_profiler_interface"));
JSON_CHK(jsonKey(jfo, "dump_timestamp_us")); JSON_CHK(jsonUint64(jfo, inspectorGetTime()));
char hostname[256];
gethostname(hostname, 255);
JSON_CHK(jsonKey(jfo, "hostname")); JSON_CHK(jsonStr(jfo, hostname));
JSON_CHK(jsonKey(jfo, "pid")); JSON_CHK(jsonUint64(jfo, getpid()));
}
JSON_CHK(jsonFinishObject(jfo));
return inspectorSuccess;
}
/*
* Description:
*
* Writes verbose information (event_trace) for a completed
* collective operation to the JSON output.
*
* Thread Safety:
* Not thread-safe (should be called with proper locking).
*
* Input:
* jsonFileOutput* jfo - JSON output handle.
* const struct inspectorCompletedCollInfo* collInfo - completed
* collective info.
*
* Output:
* Verbose collective info is written to JSON output.
*
* Return:
* inspectorResult_t - success or error code.
*
*/
static inline inspectorResult_t inspectorCompletedCollVerbose(jsonFileOutput* jfo,
struct inspectorCompletedCollInfo* collInfo) {
// Add event trace information
JSON_CHK(jsonKey(jfo, "event_trace_sn"));
JSON_CHK(jsonStartObject(jfo));
{
// Collective events
JSON_CHK(jsonKey(jfo, "coll_start_sn")); JSON_CHK(jsonUint64(jfo, collInfo->collEvtTrk.evntTrace[NCCL_INSP_EVT_TRK_COLL_START].sn));
JSON_CHK(jsonKey(jfo, "coll_stop_sn")); JSON_CHK(jsonUint64(jfo, collInfo->collEvtTrk.evntTrace[NCCL_INSP_EVT_TRK_COLL_STOP].sn));
// Kernel events
JSON_CHK(jsonKey(jfo, "kernel_events"));
JSON_CHK(jsonStartList(jfo));
for (uint32_t ch = 0; ch < collInfo->collEvtTrk.nChannels; ch++) {
JSON_CHK(jsonStartObject(jfo));
JSON_CHK(jsonKey(jfo, "channel_id")); JSON_CHK(jsonInt(jfo, ch));
JSON_CHK(jsonKey(jfo, "kernel_start_sn")); JSON_CHK(jsonUint64(jfo, collInfo->collEvtTrk.kernelCh[ch].evntTrace[NCCL_INSP_EVT_TRK_KERNEL_START].sn));
JSON_CHK(jsonKey(jfo, "kernel_stop_sn")); JSON_CHK(jsonUint64(jfo, collInfo->collEvtTrk.kernelCh[ch].evntTrace[NCCL_INSP_EVT_TRK_KERNEL_STOP].sn));
JSON_CHK(jsonKey(jfo, "kernel_record_sn")); JSON_CHK(jsonUint64(jfo, collInfo->collEvtTrk.kernelCh[ch].evntTrace[NCCL_INSP_EVT_TRK_KERNEL_RECORD].sn));
JSON_CHK(jsonFinishObject(jfo));
}
JSON_CHK(jsonFinishList(jfo));
}
JSON_CHK(jsonFinishObject(jfo));
JSON_CHK(jsonKey(jfo, "event_trace_ts"));
JSON_CHK(jsonStartObject(jfo));
{
// Collective events
JSON_CHK(jsonKey(jfo, "coll_start_ts")); JSON_CHK(jsonUint64(jfo, collInfo->collEvtTrk.evntTrace[NCCL_INSP_EVT_TRK_COLL_START].ts));
JSON_CHK(jsonKey(jfo, "coll_stop_ts")); JSON_CHK(jsonUint64(jfo, collInfo->collEvtTrk.evntTrace[NCCL_INSP_EVT_TRK_COLL_STOP].ts));
// Kernel events
JSON_CHK(jsonKey(jfo, "kernel_events"));
JSON_CHK(jsonStartList(jfo));
for (uint32_t ch = 0; ch < collInfo->collEvtTrk.nChannels; ch++) {
JSON_CHK(jsonStartObject(jfo));
JSON_CHK(jsonKey(jfo, "channel_id")); JSON_CHK(jsonInt(jfo, ch));
JSON_CHK(jsonKey(jfo, "kernel_start_ts")); JSON_CHK(jsonUint64(jfo, collInfo->collEvtTrk.kernelCh[ch].evntTrace[NCCL_INSP_EVT_TRK_KERNEL_START].ts));
JSON_CHK(jsonKey(jfo, "kernel_stop_ts")); JSON_CHK(jsonUint64(jfo, collInfo->collEvtTrk.kernelCh[ch].evntTrace[NCCL_INSP_EVT_TRK_KERNEL_STOP].ts));
JSON_CHK(jsonKey(jfo, "kernel_record_ts")); JSON_CHK(jsonUint64(jfo, collInfo->collEvtTrk.kernelCh[ch].evntTrace[NCCL_INSP_EVT_TRK_KERNEL_RECORD].ts));
JSON_CHK(jsonFinishObject(jfo));
}
JSON_CHK(jsonFinishList(jfo));
}
JSON_CHK(jsonFinishObject(jfo));
return inspectorSuccess;
}
/*
* Description:
*
* Writes completed collective operation information to the JSON
* output.
*
* Thread Safety:
* Not thread-safe (should be called with proper locking).
*
* Input:
* jsonFileOutput* jfo - JSON output handle.
* const struct inspectorCompletedCollInfo* collInfo - completed
* collective info.
*
* Output:
* Collective info is written to JSON output.
*
* Return:
* inspectorResult_t - success or error code.
*
*/
static inline inspectorResult_t inspectorCompletedColl(jsonFileOutput* jfo,
struct inspectorCompletedCollInfo* collInfo) {
JSON_CHK(jsonStartObject(jfo));
{
JSON_CHK(jsonKey(jfo, "coll")); JSON_CHK(jsonStr(jfo, ncclFuncToString(collInfo->func)));
JSON_CHK(jsonKey(jfo, "coll_sn")); JSON_CHK(jsonUint64(jfo, collInfo->sn));
JSON_CHK(jsonKey(jfo, "coll_msg_size_bytes")); JSON_CHK(jsonUint64(jfo, collInfo->msgSizeBytes));
JSON_CHK(jsonKey(jfo, "coll_exec_time_us")); JSON_CHK(jsonUint64(jfo, collInfo->execTimeUsecs));
JSON_CHK(jsonKey(jfo, "coll_timing_source")); JSON_CHK(jsonStr(jfo, inspectorTimingSourceToString(collInfo->timingSource)));
JSON_CHK(jsonKey(jfo, "coll_algobw_gbs")); JSON_CHK(jsonDouble(jfo, collInfo->algoBwGbs));
JSON_CHK(jsonKey(jfo, "coll_busbw_gbs")); JSON_CHK(jsonDouble(jfo, collInfo->busBwGbs));
if (enableNcclInspectorDumpVerbose) {
INS_CHK(inspectorCompletedCollVerbose(jfo, collInfo));
}
}
JSON_CHK(jsonFinishObject(jfo));
return inspectorSuccess;
}
/*
* Description:
*
* Dumps the state of a communicator to the JSON output if needed.
*
* Thread Safety:
* Not thread-safe (should be called with proper locking).
*
* Input:
* jsonFileOutput* jfo - JSON output handle.
* inspectorCommInfo* commInfo - communicator info.
* bool* needs_writing - set to true if output was written.
*
* Output:
* State is dumped to JSON output if needed.
*
* Return:
* inspectorResult_t - success or error code.
*
*/
static inspectorResult_t inspectorCommInfoDump(jsonFileOutput* jfo,
inspectorCommInfo* commInfo,
bool* needs_writing) {
*needs_writing = false;
if (commInfo == nullptr)
return inspectorSuccess;
struct inspectorCompletedCollInfo collInfo;
memset(&collInfo, 0, sizeof(struct inspectorCompletedCollInfo));
inspectorLockWr(&commInfo->guard);
if (commInfo->dump) {
*needs_writing = true;
memcpy(&collInfo,
&commInfo->completedCollInfo,
sizeof(struct inspectorCompletedCollInfo));
commInfo->dump = false;
}
inspectorUnlockRWLock(&commInfo->guard);
if (*needs_writing) {
JSON_CHK(jsonLockOutput(jfo));
JSON_CHK(jsonStartObject(jfo));
{
JSON_CHK(jsonKey(jfo, "header"));
inspectorCommInfoHeader(jfo, commInfo);
JSON_CHK(jsonKey(jfo, "metadata"));
inspectorCommInfoMetaHeader(jfo);
JSON_CHK(jsonKey(jfo, "coll_perf"));
INS_CHK(inspectorCompletedColl(jfo, &collInfo));
}
JSON_CHK(jsonFinishObject(jfo));
JSON_CHK(jsonNewline(jfo));
JSON_CHK(jsonUnlockOutput(jfo));
}
return inspectorSuccess;
}
/*
* Description:
*
* Dumps the state of all communicators in a commList to the JSON
* output.
*
* Thread Safety:
* Thread-safe - assumes no locks are taken and acquires all necessary
* locks to iterate through all communicator objects and dump their state.
*
* Input:
* jsonFileOutput* jfo - JSON output handle (must not be NULL).
* struct inspectorCommInfoList* commList - list of communicators (must not be NULL).
*
* Output:
* State of all communicators is dumped to JSON output.
*
* Return:
* inspectorResult_t - success or error code.
*
*/
static inspectorResult_t inspectorCommInfoListDump(jsonFileOutput* jfo,
struct inspectorCommInfoList* commList) {
bool flush = false;
INS_CHK(inspectorLockRd(&commList->guard));
inspectorResult_t res = inspectorSuccess;
if (commList->ncomms > 0) {
for (struct inspectorCommInfo* itr = commList->comms;
itr != nullptr;
itr = itr->next) {
bool needs_writing;
INS_CHK_GOTO(inspectorCommInfoDump(jfo, itr, &needs_writing), res, finalize);
if (needs_writing) {
flush = true;
}
}
if (flush) {
JSON_CHK_GOTO(jsonLockOutput(jfo), res, finalize);
JSON_CHK_GOTO(jsonFlushOutput(jfo), res, finalize);
JSON_CHK_GOTO(jsonUnlockOutput(jfo), res, finalize);
}
}
finalize:
INS_CHK(inspectorUnlockRWLock(&commList->guard));
return res;
}
/*
* Description:
* Finalizes and cleans up a commList, freeing all communicators.
*
* Thread Safety:
* Not thread-safe (should be called with proper locking).
*
* Input:
* struct commList* commList - list of communicators.
*
* Output:
* All communicators are freed.
*
* Return:
* inspectorResult_t - success or error code.
*
*/
static inspectorResult_t inspectorCommInfoListFinalize(struct inspectorCommInfoList* commList) {
struct inspectorCommInfo* nextComm = nullptr;
INS_CHK(inspectorLockWr(&commList->guard));
while (commList->comms != nullptr && commList->ncomms != 0) {
INFO(NCCL_INSPECTOR, "NCCL Inspector: comm %lu still in tracker",
commList->comms->commHash);
nextComm = commList->comms->next;
INS_CHK(inspectorLockDestroy(&commList->comms->guard));
free(commList->comms);
commList->comms = nextComm;
commList->ncomms--;
}
INS_CHK(inspectorUnlockRWLock(&commList->guard));
return inspectorSuccess;
}
/*
* Description:
*
* Ensures the given directory exists and is writable, creating it
* if necessary.
*
* Thread Safety:
* Not thread-safe (should be called during initialization).
*
* Input:
* char* workdir - directory path.
*
* Output:
* Directory is created if needed.
*
* Return:
*
* bool - true if directory exists and is writable, false otherwise.
*
*/
static bool ensureDir(char* workdir) {
struct stat st;
// Check if directory exists
if (stat(workdir, &st) == 0) {
if (S_ISDIR(st.st_mode)) {
// Directory exists, check if it's writable
if (access(workdir, W_OK) == 0) {
return true; // Directory exists and is writable
} else {
INFO(NCCL_INSPECTOR,
"NCCL Inspectoer: dump directory %s exists, but is not "
"writable",
workdir);
return false;
}
} else {
INFO(NCCL_INSPECTOR,
"NCCL Inspector: dump location %s exists, but is not a "
"directory",
workdir);
return false;
}
} else {
// Directory doesn't exist, try to create it
const mode_t mode = 0777;
if (mkdir(workdir, mode) == 0) {
return true; // Directory created successfully
} else {
INFO(NCCL_INSPECTOR,
"NCCL Inspector: failed to create dump directory %s: %s", workdir,
strerror(errno));
return false;
}
}
}
/*
* Description:
*
* Generates the output dump directory path based on environment
* variables.
*
* Thread Safety:
* Not thread-safe (should be called during initialization).
*
* Input:
* char** workdir - pointer to output directory string.
*
* Output:
* workdir is set to the generated directory path.
*
* Return:
* None.
*/
static void genDumpDir(char** workdir) {
char* dumpdir = getenv("NCCL_INSPECTOR_DUMP_DIR");
if (dumpdir != NULL) {
*workdir = strdup(dumpdir);
// TODO check errors here
return;
}
char* jobid = getenv("SLURM_JOBID");
bool badJobId = true;
if (jobid != NULL) {
errno = 0;
const int intid = strtol(jobid, NULL, 10);
if (errno == 0) {
char tmp[2048];
snprintf(tmp, 2048, "nccl-inspector-%d", intid);
*workdir = strdup(tmp);
badJobId = false;
}
}
if (badJobId) {
*workdir = strdup("nccl-inspector-unknown-jobid");
}
}
struct inspectorDumpThread {
bool run{false};
jsonFileOutput* jfo;
char* outputRoot;
uint64_t sampleIntervalUsecs;
pthread_t pthread;
pthread_rwlock_t guard;
inspectorDumpThread(const char* outputRoot, uint64_t sampleIntervalUsecs)
: jfo(nullptr), outputRoot(strdup(outputRoot)), sampleIntervalUsecs(sampleIntervalUsecs) {
if (inspectorLockInit(&guard) != inspectorSuccess) {
INFO(NCCL_INSPECTOR, "NCCL Inspector inspectorDumpThread: couldn't init lock");
}
}
~inspectorDumpThread() {
if (jfo != nullptr) {
jsonFinalizeFileOutput(jfo);
jfo = nullptr;
}
if (outputRoot != nullptr) {
free(outputRoot);
outputRoot = nullptr;
}
if (inspectorLockDestroy(&guard) != inspectorSuccess) {
INFO(NCCL_INSPECTOR, "NCCL Inspector inspectorDumpThread: couldn't destroy lock");
}
}
void startThread() {
inspectorLockWr(&guard);
run = true;
inspectorUnlockRWLock(&guard);
if (pthread_create(&pthread, NULL, dumpMain, this) != 0) {
INFO(NCCL_INSPECTOR,
"NCCL Inspector inspectorDumpThread: couldn't create dump thread!");
return;
}
INFO(NCCL_INSPECTOR, "NCCL Inspector inspectorDumpThread: created");
}
void stopThread() {
INFO(NCCL_ENV, "NCCL Inspector Stopping Dump thread");
inspectorLockWr(&guard);
run = false;
inspectorUnlockRWLock(&guard);
struct timespec ts;
ts.tv_sec = 0;
ts.tv_nsec = 1000000; // 1ms
nanosleep(&ts, NULL);
INFO(NCCL_INSPECTOR, "NCCL Inspector inspectorDumpThread: stopped");
}
inspectorResult_t inspectorStateDump(const char* output_root) {
if (!ncclInspectorInit) {
return inspectorUninitializedError;
}
if (!enableNcclInspector) {
INFO(NCCL_INSPECTOR, "NCCL Inspector is not enabled, will not do ncclAllCommTallyDump");
return inspectorDisabledError;
}
if (jfo == 0) {
char hostname[256];
gethostname(hostname, 255);
char tmp[2048];
snprintf(tmp, sizeof(tmp), "%s/%s-pid%d.log", output_root, hostname, getpid());
jsonResult_t result = jsonInitFileOutput(&jfo, tmp);
if (jsonSuccess != result) {
INFO(NCCL_INSPECTOR, "Cannot open %s for writing: %s", tmp, jsonErrorString(result));
return inspectorFileOpenError;
}
chmod(tmp, 0666);
}
if (jfo != nullptr) {
inspectorCommInfoListDump(jfo, &g_state.liveComms);
inspectorCommInfoListDump(jfo, &g_state.deletedComms);
}
if (g_state.deletedComms.ncomms > 0) {
inspectorCommInfoListFinalize(&g_state.deletedComms);
}
return inspectorSuccess;
}
static void* dumpMain(void* arg) {
inspectorDumpThread* dumper = (inspectorDumpThread*)arg;
inspectorResult_t res = inspectorSuccess;
struct timespec ts;
ts.tv_sec = dumper->sampleIntervalUsecs / 1000000;
ts.tv_nsec = dumper->sampleIntervalUsecs % 1000000;
while (dumper->run) {
inspectorLockWr(&dumper->guard);
if (!dumper->run) {
inspectorUnlockRWLock(&dumper->guard);
break;
}
res = dumper->inspectorStateDump(dumper->outputRoot);
if (res == inspectorFileOpenError || res == inspectorDisabledError) {
inspectorUnlockRWLock(&dumper->guard);
break;
}
inspectorUnlockRWLock(&dumper->guard);
nanosleep(&ts, NULL);
}
return 0;
}
};
/*
* Description:
*
* Shows the NCCL Inspector plugin version and configuration
* environment variables in a structured format similar to NCCL's
* showVersion function.
*
* Thread Safety:
* Thread-safe (read-only environment variable access).
*
* Input:
* None.
*
* Output:
* Logs version and environment variables to debug output.
*
* Return:
* None.
*/
static void showInspectorVersion() {
VERSION("NCCL Inspector Plugin - Version: %s", get_git_version_info());
}
/*
* Description:
*
* Shows all NCCL Inspector environment variables and their values
* in a structured format.
*
* Thread Safety:
* Thread-safe (read-only environment variable access).
*
* Input:
* None.
*
* Output:
* Logs environment variables to debug output.
*
* Return:
* None.
*/
static void showInspectorEnvVars() {
struct {
const char* name;
const char* value;
const char* defaultVal;
const char* description;
} envVars[] = {
{"NCCL_INSPECTOR_ENABLE", getenv("NCCL_INSPECTOR_ENABLE"), "0", "Enable/disable inspector plugin"},
{"NCCL_INSPECTOR_DUMP_THREAD_ENABLE", getenv("NCCL_INSPECTOR_DUMP_THREAD_ENABLE"), "1", "Enable/disable dump thread"},
{"NCCL_INSPECTOR_DUMP_THREAD_INTERVAL_MICROSECONDS", getenv("NCCL_INSPECTOR_DUMP_THREAD_INTERVAL_MICROSECONDS"), "0", "Dump thread interval in microseconds"},
{"NCCL_INSPECTOR_DUMP_DIR", getenv("NCCL_INSPECTOR_DUMP_DIR"), "(auto-generated)", "Output directory for inspector logs"},
{"NCCL_INSPECTOR_DUMP_VERBOSE", getenv("NCCL_INSPECTOR_DUMP_VERBOSE"), "0", "Enable/disable verbose dumping (event_trace)"}
};
const int numEnvVars = sizeof(envVars) / sizeof(envVars[0]);
VERSION("NCCL Inspector Environment Variables:");
for (int i = 0; i < numEnvVars; i++) {
VERSION(" %s = %s%s%s",
envVars[i].name,
envVars[i].value ? envVars[i].value : "(not set)",
envVars[i].value ? "" : ", default=",
envVars[i].value ? "" : envVars[i].defaultVal);
}
}
/*
* Description:
*
* Initializes the global inspector state and starts the dump thread
* if enabled.
*
* Thread Safety:
*
* Not thread-safe (should be called during initialization).
*
* Input:
* None.
*
* Output:
* Global state is initialized and dump thread may be started.
*
* Return:
* inspectorResult_t - success or error code.
*/
inspectorResult_t inspectorGlobalInit(int rank) {
char* str = getenv("NCCL_INSPECTOR_ENABLE");
int enable = str ? atoi(str) : 0; // default disable
enableNcclInspector = enable == 0 ? false : true;
ncclInspectorInit = true;
// Show version and environment configuration (similar to NCCL's showVersion)
if (rank == 0) {
showInspectorVersion();
showInspectorEnvVars();
}
if (enableNcclInspector == false) {
VERSION("NCCL Inspector Plugin DISABLED (NCCL_INSPECTOR_ENABLE=%s)",
str ? str : "0");
return inspectorDisabledError;
}
INS_CHK(inspectorGlobalStateInit());
str = getenv("NCCL_INSPECTOR_DUMP_THREAD_ENABLE");
enable = str ? atoi(str) : 1; // default enable
enableNcclInspectorDumpThread = enable == 0 ? false : true;
str = getenv("NCCL_INSPECTOR_DUMP_VERBOSE");
enable = str ? atoi(str) : 0; // default disable
enableNcclInspectorDumpVerbose = enable == 0 ? false : true;
if (enableNcclInspectorDumpThread) {
str = getenv("NCCL_INSPECTOR_DUMP_THREAD_INTERVAL_MICROSECONDS");
const uint64_t interval = str ? strtoull(str, 0, 0) : 0;
if (interval == 0) {
INFO(NCCL_INSPECTOR, "NCCL Inspector: dump thread enabled but "
"NCCL_INSPECTOR_DUMP_THREAD_INTERVAL_MICROSECONDS is 0; not "
"starting internal dump "
"thread.");
return inspectorSuccess;
}
char* dumpdir;
genDumpDir(&dumpdir);
if (dumpdir != nullptr) {
if (!ensureDir(dumpdir)) {
free(dumpdir);
INFO(NCCL_INSPECTOR, "NCCL Inspector: failed to generate a dump dir; not "
"starting internal dump thread.");
return inspectorSuccess;
}
dumper = new inspectorDumpThread(dumpdir, interval);
dumper->startThread();
INFO(NCCL_INSPECTOR,
"NCCL Inspector enabled with polling interval %lu us and "
"output directory %s",
interval, dumpdir);
free(dumpdir);
} else {
INFO(NCCL_INSPECTOR, "NCCL Inspector: failed to generate a dump "
"dir; not starting internal dump thread.");
}
} else {
INFO(NCCL_INSPECTOR,
"NCCL Inspector: NCCL_INSPECTOR_DUMP_THREAD_ENABLE set to 0; not "
"starting internal dump "
"thread.");
}
return inspectorSuccess;
}
/*
* Description:
*
* Returns a string describing the given inspectorResult_t error
* code.
*
* Thread Safety:
* Thread-safe (read-only operation).
*
* Input:
* inspectorResult_t result - error code.
*
* Output:
* None.
*
* Return:
* const char* - error string.
*/
const char* inspectorErrorString(inspectorResult_t result) {
switch (result) {
case inspectorSuccess:
return "Success";
case inspectorUninitializedError:
return "Inspector is not initialized";
case inspectorMemoryError:
return "Inspector encountered issue allocating memory";
case inspectorFileOpenError:
return "Inspector could not open file";
case inspectorDisabledError:
return "Inspector is disabled";
case inspectorLockError:
return "Inspector encountered error with lock";
case inspectorPthreadError:
return "Inspector encountered error with pthreads";
case inspectorJsonError:
return "Inspector encountered error while emitting JSON";
case inspectorCudaError:
return "Inspector encountered CUDA error";
case inspectorBadHash:
return "Inspector encountered bad communicator hash";
case inspectorDeleteUnknownCommError:
return "Inspector was asked to delete a communicator that it is not "
"tracking";
case inspectorAddDuplicateCommError:
return "Inspector was asked to add a communicator it was already "
"tracking";
case inspectorNop:
return "Inspector NOP";
case inspectorNullTally:
return "Inspector encountered a null OpTally";
case inspectorGlobalInitError:
return "Inspector encountered a repeated global init";
case inspectorReturn:
return "Inspector Unconditional Return";
default:
return "Unknown error";
}
}
/*
* Description:
* Converts a communicator hash to a string.
*
* Thread Safety:
* Thread-safe (writes to provided buffer).
*
* Input:
* uint64_t commHash - communicator hash.
* char hashStr[NCCL_COMM_HASH_LENGTH] - output buffer.
*
* Output:
* hashStr is set to the string representation of commHash.
*
* Return:
* inspectorResult_t - success or error code.
*/
inspectorResult_t inspectorCommGetHashStr(uint64_t commHash,
char hashStr[NCCL_COMM_HASH_LENGTH]) {
snprintf(hashStr, NCCL_COMM_HASH_LENGTH, "0x%lx",
commHash);
return inspectorSuccess;
}
/*
* Description:
* Compares two communicator configurations for equality.
*
* Thread Safety:
* Thread-safe (read-only comparison).
*
* Input:
* uint64_t lCommHash - left communicator hash.
* uint64_t rCommHash - right communicator hash.
* int lRank - left rank.
* int rRank - right rank.
*
* Output:
* None.
*
* Return:
* bool - true if communicators are equal (same hash and rank), false otherwise.
*/
static bool comm_eq(uint64_t lCommHash, uint64_t rCommHash,
int lRank, int rRank) {
return lCommHash == rCommHash && lRank == rRank;
}
/*
* Description:
* Initializes a communicator info structure with the provided parameters.
*
* Thread Safety:
* Not thread-safe - should be called during communicator initialization.
*
* Input:
* struct inspectorCommInfo* commInfo - communicator info structure to initialize (must not be NULL).
* const char* commName - communicator name (can be NULL).
* uint64_t commHash - communicator hash.
* int nnodes - number of nodes (must be > 0).
* int nranks - number of ranks (must be > 0).
* int rank - rank (must be >= 0 and < nranks).
*
* Output:
* commInfo is initialized with the provided parameters.
*
* Return:
* inspectorResult_t - success or error code.
*
* Preconditions:
* - commInfo must not be NULL
* - nnodes must be positive
* - nranks must be positive
* - rank must be non-negative and less than nranks
*/
static inspectorResult_t inspectorFillCommInfo(struct inspectorCommInfo* commInfo,
const char* commName, uint64_t commHash,
int nnodes, int nranks, int rank) {
commInfo->commName = commName;
commInfo->commHash = commHash;
inspectorCommGetHashStr(commHash, commInfo->commHashStr);
commInfo->rank = rank;
commInfo->nranks = nranks;
commInfo->nnodes = nnodes;
commInfo->dump = false;
INS_CHK(inspectorLockInit(&commInfo->guard));
commInfo->next = nullptr;
return inspectorSuccess;
}
/*
* Description:
* Adds a communicator to the global state.
*
* Thread Safety:
* Thread-safe (uses locks internally).
*
* Input:
* struct inspectorCommInfo **commInfo - pointer to output struct (must not be NULL).
* const char* commName - communicator name (can be NULL).
* uint64_t commHash - communicator hash.
* int nNodes - number of nodes (must be > 0).
* int nranks - number of ranks (must be > 0).
* int rank - rank (must be >= 0 and < nranks).
*
* Output:
* commInfo is set to the new communicator struct.
*
* Return:
* inspectorResult_t - success or error code.
*
* Preconditions:
* - commInfo must not be NULL
* - nNodes must be positive
* - nranks must be positive
* - rank must be non-negative and less than nranks
*/
inspectorResult_t inspectorAddComm(struct inspectorCommInfo **commInfo,
const char* commName, uint64_t commHash,
int nNodes, int nranks, int rank) {
struct inspectorCommInfoList* liveCommInfoList = &g_state.liveComms;
struct inspectorCommInfo* commInfoPtr = nullptr;
inspectorResult_t res = inspectorSuccess;
bool locked = false;
INSPECTOR_LOCK_RD_FLAG(&liveCommInfoList->guard, locked,
"inspectorAddComm: commList::guard -rd");
for (struct inspectorCommInfo* itr = liveCommInfoList->comms;
itr != nullptr;
itr = itr->next) {
if (comm_eq(commHash, itr->commHash, rank, itr->rank)) {
INFO(NCCL_INSPECTOR, "NCCL Inspector: comm 0x%lx already in tracker",
commHash);
res = inspectorAddDuplicateCommError;
goto finalize;
}
}
INSPECTOR_UNLOCK_RW_LOCK_FLAG(&liveCommInfoList->guard, locked,
"inspectorAddComm: commList::guard");
commInfoPtr
= (struct inspectorCommInfo*)calloc(1, sizeof(struct inspectorCommInfo));
if (0 == commInfoPtr) {
res = inspectorMemoryError;
goto finalize;
}
INS_CHK_GOTO(inspectorFillCommInfo(commInfoPtr,
commName,
commHash,
nNodes,
nranks,
rank),
res, fail);
INSPECTOR_LOCK_WR_FLAG(&liveCommInfoList->guard, locked,
"inspectorAddComm: commList::guard -wr");
++liveCommInfoList->ncomms;
commInfoPtr->next = liveCommInfoList->comms;
liveCommInfoList->comms = commInfoPtr;
finalize:
INSPECTOR_UNLOCK_RW_LOCK_FLAG(&liveCommInfoList->guard, locked,
"inspectorAddComm: commList::guard");
*commInfo = commInfoPtr;
return res;
fail:
if (commInfoPtr) {
free(commInfoPtr);
commInfoPtr = nullptr;
}
goto finalize;
}
/*
* Description:
*
* Removes a communicator from the global state and moves it to the
* deleted list.
*
* Thread Safety:
* Thread-safe (uses locks internally).
*
* Input:
* struct inspectorCommInfo *commInfo - communicator to remove.
*
* Output:
* Communicator is removed from live list and added to deleted list.
*
* Return:
* inspectorResult_t - success or error code.
*/
inspectorResult_t inspectorDelComm(struct inspectorCommInfo *commInfo) {
struct inspectorCommInfoList* liveCommInfoList = &g_state.liveComms;
struct inspectorCommInfoList* deletedCommInfoList = &g_state.deletedComms;
struct inspectorCommInfo* commInfoPtr = nullptr;
bool locked = false;
INFO(NCCL_INSPECTOR, "NCCL Inspector: DelComm removing 0x%lx",
commInfo->commHash);
INSPECTOR_LOCK_WR_FLAG(&liveCommInfoList->guard, locked,
"inspectorDelComm: liveCommInfoList::guard -wr");
struct inspectorCommInfo** prev_ptr = &liveCommInfoList->comms;
for (struct inspectorCommInfo* itr = liveCommInfoList->comms;
itr != nullptr;
itr = itr->next) {
if (comm_eq(commInfo->commHash, itr->commHash, commInfo->rank, itr->rank)) {
*prev_ptr = itr->next;
liveCommInfoList->ncomms--;
commInfoPtr = itr;
break;
}
prev_ptr = &itr->next;
}
INSPECTOR_UNLOCK_RW_LOCK_FLAG(&liveCommInfoList->guard, locked,
"inspectorDelComm: liveCommInfoList::guard -unlock");
if (!commInfoPtr) {
INFO(NCCL_INSPECTOR, "NCCL Inspector: DelComm can't remove 0x%lx, not present",
commInfo->commHash);
return inspectorDeleteUnknownCommError;
}
inspectorLockWr(&commInfoPtr->guard);
commInfoPtr->dump = false;
inspectorUnlockRWLock(&commInfoPtr->guard);
INSPECTOR_LOCK_WR_FLAG(&deletedCommInfoList->guard, locked,
"inspectorDelComm: deletedCommInfoList::guard -wr");
commInfoPtr->next = deletedCommInfoList->comms;
deletedCommInfoList->comms = commInfoPtr;
deletedCommInfoList->ncomms++;
INSPECTOR_UNLOCK_RW_LOCK_FLAG(&deletedCommInfoList->guard, locked,
"inspectorDelComm: deletedCommInfoList::guard -unlock");
return inspectorSuccess;
}
/*
* Description:
*
* Computes the algorithmic and bus bandwidth (in GB/s) for a given
* NCCL collective operation, based on the communication info and
* completed collective details. The calculation uses the message
* size, execution time, and the type of collective operation to
* determine the effective bandwidths. The 'factor' variable adjusts
* the bus bandwidth calculation according to the communication
* pattern of each collective, as described in the NCCL performance
* documentation:
* https://github.com/NVIDIA/nccl-tests/blob/master/doc/PERFORMANCE.md
*
* Thread Safety:
*
* This function does not perform any locking and assumes the caller
* ensures thread safety if required.
*
* Input:
*
* commInfo - Pointer to inspectorCommInfo structure containing
* communicator details.
*
* completedColl- Pointer to inspectorCompletedCollInfo structure
* containing completed collective info.
*
* collType - The type of NCCL collective operation (ncclFunc_t).
*
* Output:
* Updates the algoBwGbs and busBwGbs fields of the completedColl
* structure.
*
* Return:
* N.A. (void function)
*/
void inspectorComputeCollBw(struct inspectorCommInfo *commInfo,
struct inspectorCompletedCollInfo *completedColl,
ncclFunc_t collType) {
double timeInSec = completedColl->execTimeUsecs / 1000000.0;
double factor = 0.0;
double trafficSize = 0.0;
switch (collType) {
case ncclFuncReduce:
case ncclFuncBroadcast:
trafficSize = (double)completedColl->msgSizeBytes;
factor = 1;
break;
case ncclFuncAllReduce:
trafficSize = (double)completedColl->msgSizeBytes;
factor = ((double)(2 * (commInfo->nranks - 1))) / ((double)commInfo->nranks);
break;
case ncclFuncReduceScatter:
trafficSize = (double)(completedColl->msgSizeBytes * commInfo->nranks);
factor = ((double)(commInfo->nranks - 1)) / ((double)commInfo->nranks);
break;
case ncclFuncAllGather:
trafficSize = (double)(completedColl->msgSizeBytes * commInfo->nranks);
factor = ((double)(commInfo->nranks - 1)) / ((double)commInfo->nranks);
break;
case ncclFuncSendRecv:
case ncclFuncSend:
case ncclFuncRecv:
trafficSize = (double)completedColl->msgSizeBytes;
factor = 1;
break;
default:
trafficSize = 0;
factor = 0.0;
}
completedColl->algoBwGbs = timeInSec != 0 ? (trafficSize / 1.0E9 / timeInSec) : 0;
completedColl->busBwGbs = completedColl->algoBwGbs * factor;
}
/*
* Description:
*
* Helper function to calculate kernel execution time using GPU
* clock values. The GPU clock values are measured in nanoseconds
* from the globaltimer register.
*
* Thread Safety:
* Thread-safe (read-only operations on kernel info).
*
* Input:
* struct inspectorKernelChInfo *kernelCh - kernel channel info
* containing GPU clock values.
*
* Output:
* None.
*
* Return:
* uint64_t - execution time in microseconds, or 0 if invalid timing
* data.
*/
static uint64_t calculateKernelGpuExecTimeUsecs(struct inspectorKernelChInfo *kernelCh) {
if (kernelCh->startGpuClk != 0 && kernelCh->stopGpuClk != 0) {
if (kernelCh->stopGpuClk > kernelCh->startGpuClk) {
uint64_t execTimeNanosecs = kernelCh->stopGpuClk - kernelCh->startGpuClk;
return execTimeNanosecs / 1000;
}
}
return 0;
}
/*
* Description:
*
* Calculates the maximum kernel execution time across all kernel
* channels in a collective operation, using GPU clock values when
* available and falling back to CPU timestamps when necessary.
*
* Thread Safety:
* Thread-safe (read-only operations on collective info).
*
* Input:
* struct inspectorCollInfo *collInfo - collective operation info
* containing kernel channels.
* inspectorTimingSource_t *timingSource - pointer to store the timing source used.
*
* Output:
* timingSource is set to indicate whether GPU, CPU, or collective timing was used.
*
* Return:
*
* uint64_t - maximum execution time in microseconds across all
* kernels, or collective execution time if no kernel
* timing is available.
*
*/
static uint64_t calculateMaxKernelExecTimeUsecs(struct inspectorCollInfo *collInfo,
inspectorTimingSource_t *timingSource) {
uint64_t maxKernelExecTimeUsecs = 0;
inspectorTimingSource_t bestTimingSource = inspectorTimingSourceCollectiveCpu;
for (uint32_t i = 0; i < collInfo->nChannels; i++) {
struct inspectorKernelChInfo *kernelCh = &collInfo->kernelCh[i];
uint64_t gpuExecTimeUsecs = calculateKernelGpuExecTimeUsecs(kernelCh);
if (gpuExecTimeUsecs > 0) {
if (gpuExecTimeUsecs > maxKernelExecTimeUsecs) {
maxKernelExecTimeUsecs = gpuExecTimeUsecs;
bestTimingSource = inspectorTimingSourceKernelGpu;
}
} else {
if (kernelCh->tsCompletedUsec > kernelCh->tsStartUsec) {
uint64_t cpuExecTimeUsecs = kernelCh->tsCompletedUsec - kernelCh->tsStartUsec;
if (cpuExecTimeUsecs > maxKernelExecTimeUsecs) {
maxKernelExecTimeUsecs = cpuExecTimeUsecs;
bestTimingSource = inspectorTimingSourceKernelCpu;
}
}
}
}
if (maxKernelExecTimeUsecs > 0) {
*timingSource = bestTimingSource;
return maxKernelExecTimeUsecs;
} else {
*timingSource = inspectorTimingSourceCollectiveCpu;
return collInfo->tsCompletedUsec - collInfo->tsStartUsec;
}
}
/*
* Description:
*
* Updates the performance information for a completed collective
* operation.
*
* Thread Safety:
* Thread-safe (uses locks internally).
*
* Input:
* struct inspectorCommInfo *commInfo - communicator info.
* struct inspectorCollInfo *collInfo - completed collective info.
*
* Output:
* commInfo is updated with completed collective info.
*
* Return:
* None.
*
*/
void inspectorUpdateCollPerf(struct inspectorCompletedCollInfo *completedColl,
struct inspectorCollInfo *collInfo) {
completedColl->func = ncclStringToFunc(collInfo->func);
completedColl->sn = collInfo->sn;
completedColl->msgSizeBytes = collInfo->msgSizeBytes;
completedColl->execTimeUsecs =
calculateMaxKernelExecTimeUsecs(collInfo, &completedColl->timingSource);
completedColl->collEvtTrk = collInfo->collEvtTrk;
}
/*
* Description:
*
* Finalizes the global inspector state and stops the dump thread if
* running.
*
* Thread Safety:
* Not thread-safe (should be called during teardown).
*
* Input:
* None.
*
* Output:
* Global state is finalized and dump thread is stopped.
*
* Return:
* inspectorResult_t - success or error code.
*
*/
inspectorResult_t inspectorGlobalFinalize() {
if (dumper) {
dumper->stopThread();
delete dumper;
dumper = nullptr;
}
return inspectorSuccess;
}