From bf6660ee4e130a8af65ba2cda5e8e833a78971cd Mon Sep 17 00:00:00 2001 From: Nilesh M Negi Date: Sat, 23 Aug 2025 00:15:38 -0500 Subject: [PATCH] [BUILD] Populate host_table entries only for 1 unroll (#1871) --- src/device/generate.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/device/generate.py b/src/device/generate.py index 4f5ca038c8..53e1cdeffc 100755 --- a/src/device/generate.py +++ b/src/device/generate.py @@ -267,6 +267,7 @@ def equivalent_primary(coll, algo, proto, redop, ty, acc, unroll): return (coll, algo, proto, redop, ty, acc, unroll) # Order rows are enumerated must match formula of `ncclDevFuncId()`: +# outermost loop should be for unroll factor; refer to host_table section def enumerate_func_rows(): for unroll in all_unroll: for acc in use_acc: @@ -474,7 +475,11 @@ with open(os.path.join(gensrc, "host_table.cpp"), "w") as f: out("// bits 16-19: ty index\n") out("#include \n") out("extern std::unordered_map ncclDevFuncNameToId = {\n") - for fn in func_rows: + + # host_table entries map device functions based on collective, algorithm, protocol, redop, and datatype + # For GPU targets that support multiple unrolls, e.g., gfx950 + # (or) for non-local builds, only a single set of functions are needed in the host_table. + for fn in func_rows[:len(func_rows)//len(all_unroll)]: fn_id = -1 if fn is not None: fn_id = primary_to_index[equivalent_primary(*fn)]