From acdafac49f8b87c8704b3447a3ed3fa0c429a34e Mon Sep 17 00:00:00 2001 From: "Kapil S. Pawar" Date: Mon, 10 Nov 2025 15:38:48 -0600 Subject: [PATCH] [RcclReplayer] Compile without the need for RCCL to be compiled (#2039) --- src/include/recorder.h | 1 - src/misc/recorder.cc | 1 + tools/RcclReplayer/Makefile | 2 +- tools/RcclReplayer/README.md | 2 +- tools/RcclReplayer/rcclReplayer.hpp | 33 +++++++++++++++++++++++++++-- 5 files changed, 34 insertions(+), 5 deletions(-) diff --git a/src/include/recorder.h b/src/include/recorder.h index a3440a5a57..4cd30f2be3 100644 --- a/src/include/recorder.h +++ b/src/include/recorder.h @@ -5,7 +5,6 @@ #include #include #include -#include "debug.h" namespace rccl { diff --git a/src/misc/recorder.cc b/src/misc/recorder.cc index 3d390225c7..051cedbbe9 100644 --- a/src/misc/recorder.cc +++ b/src/misc/recorder.cc @@ -7,6 +7,7 @@ #include #include #include +#include "debug.h" using namespace std::chrono; diff --git a/tools/RcclReplayer/Makefile b/tools/RcclReplayer/Makefile index 45197ecdf5..0555a6fddd 100644 --- a/tools/RcclReplayer/Makefile +++ b/tools/RcclReplayer/Makefile @@ -2,7 +2,7 @@ ROCM_DIR ?= /opt/rocm RCCL_DIR ?= ../../build/release MPI_DIR ?= /opt/ompi -INCLUDES = -I$(MPI_DIR)/include -I$(RCCL_DIR)/include -I$(RCCL_DIR)/hipify/src/include +INCLUDES = -I$(MPI_DIR)/include -I$(RCCL_DIR)/include -I../../src/include LDFLAGS = -L$(MPI_DIR)/lib -L$(RCCL_DIR) -lmpi -lrccl main: rcclReplayer.cpp diff --git a/tools/RcclReplayer/README.md b/tools/RcclReplayer/README.md index 981b68b450..a6f48bacc5 100644 --- a/tools/RcclReplayer/README.md +++ b/tools/RcclReplayer/README.md @@ -73,7 +73,7 @@ Replayer is a separate tool which aims to re-run the same set of RCCL calls as r ## Installation * Replayer relies on MPI for out of band communication. * Under `rccl/tools/RcclReplayer`, run `MPI_DIR=${MPI_PATH} make` -* Replayer has to be built from RCCL source. Furthermore, it requires RCCL library to be built from the same source in `../../build/release`. For compatibility reason, it is recommended that the logs are collected using same RCCL library as well. +* Replayer can be built from RCCL source using internal headers directly. It links against a RCCL library specified by `RCCL_DIR` (defaults to `../../build/release`). For compatibility, it is recommended that logs are collected using the same RCCL library version. ## Running * Replayer requires the exact same number of processes and processes per node as the recorded job. And all log files must be accessible by all processes in Replayer, either through shared filesystem or copies. * To run Replayer, simply call `mpirun -np ${np} ./rcclReplayer ${filename}.${extension}` diff --git a/tools/RcclReplayer/rcclReplayer.hpp b/tools/RcclReplayer/rcclReplayer.hpp index 86e9e8aec7..c01c709a64 100644 --- a/tools/RcclReplayer/rcclReplayer.hpp +++ b/tools/RcclReplayer/rcclReplayer.hpp @@ -5,13 +5,17 @@ #include #include #include +#include #include #include #include "hip/hip_fp16.h" -#include "rccl_float8.h" -#include "info.h" +// Forward declaration for ncclInfo +// - recorder.h declares functions that take 'const ncclInfo&' as parameter +// - These functions are only used during recording (by recorder.cc), not during replay +// - RcclReplayer only uses rcclApiCall struct +struct ncclInfo; #include "recorder.h" // NOTE: Parsing is based on this line logging collective information in enqueue.cc @@ -63,6 +67,31 @@ struct DeviceGraphInfo int counter = 0; }; +// ncclTypeSize() - extracted from collectives.h to avoid deep dependencies +// This is the only function we need from info.h/collectives.h +static inline int ncclTypeSize(ncclDataType_t type) { + switch (type) { + case ncclInt8: + case ncclUint8: + case ncclFloat8e4m3: + case ncclFloat8e5m2: + return 1; + case ncclFloat16: + case ncclBfloat16: + return 2; + case ncclInt32: + case ncclUint32: + case ncclFloat32: + return 4; + case ncclInt64: + case ncclUint64: + case ncclFloat64: + return 8; + default: + return -1; + } +} + class Replayer { private: