Initializing all ranks to the same value to avoid failure of UT AllR… (#1459)

* Initializing all ranks to the same value to avoid failure of UT AllReduce for FP8 type Co-authored-by: Marzieh Berenjkoub <mberenjk@amd.com>
2025-01-02 11:39:02 -06:00
@@ -13,7 +13,7 @@ namespace RcclUnitTesting

    // Configuration
    std::vector<ncclFunc_t>     const funcTypes       = {ncclCollAllReduce};
-    std::vector<ncclDataType_t> const dataTypes       = {ncclFloat32};
+    std::vector<ncclDataType_t> const dataTypes       = {ncclFloat32, ncclFp8E4M3, ncclFp8E5M2};
    std::vector<ncclRedOp_t>    const redOps          = {ncclSum};
    std::vector<int>            const roots           = {0};
    std::vector<int>            const numElements     = {393216, 384};
@@ -148,7 +148,9 @@ namespace RcclUnitTesting

    for (int i = 0; i < numElements; i++)
    {
-      int    valueI = (globalRank + i) % 256;
+      // Due to floating-point math not being commutative, the ordering in which ranks are added will matter.
+      // For lower-precision data types, we initialize all ranks to the same value to avoid this
+      int    valueI = (dataType == ncclFp8E4M3 || dataType == ncclFp8E5M2)? (i % 16) :(globalRank + i) % 256;
      double valueF = 1.0L/((double)valueI+1.0L);
      temp.Set(dataType, i, valueI, valueF);
    }