Using hip_bf16.h instead of hip_bfloat16.h for the __bf16 intrinsic (#2037)

* Using hip_bf16.h instead of hip_bfloat16.h for the __bf16 intrinsic * Switching to hip_bf16.h from ROCm 6.0.0 [ROCm/rccl commit: fb67e5b467]
2025-11-13 15:56:18 -05:00
parent 7f7c8d14f6
commit 3d2fc04b45
@@ -414,7 +414,7 @@ SPECIALIZE_REDUCE(FuncMinMax, half, 1, half, fn.isMinNotMax ? __hmin(x, y) : __h
  SPECIALIZE_REDUCE(FuncMinMax, __nv_bfloat16, 1, __nv_bfloat16, fn.isMinNotMax ? __hmin(x, y) : __hmax(x, y))
  // coverity[copy_constructor_call]
  SPECIALIZE_REDUCE(FuncMinMax, __nv_bfloat16, 2, __nv_bfloat162, fn.isMinNotMax ? __hmin2(x, y) : __hmax2(x, y))
-#else
+#elif ROCM_VERSION < 60000
  SPECIALIZE_REDUCE(FuncSum, hip_bfloat16, 1, hip_bfloat16, (hip_bfloat16)((float)(x) + (float)(y)))
  SPECIALIZE_REDUCE(FuncProd, hip_bfloat16, 1, hip_bfloat16, (hip_bfloat16)((float)(x) * (float)(y)))
  SPECIALIZE_REDUCE(FuncMinMax, hip_bfloat16, 1, hip_bfloat16, (hip_bfloat16)(fn.isMinNotMax ? fminf((float)(x), (float)(y)) : fmaxf((float)(x), (float)(y))))
@@ -11,7 +11,13 @@

 #include "nccl.h"
 #include "rccl_float8.h"
-#include <hip/hip_bfloat16.h>
+#if ROCM_VERSION >= 60000
+   // hip_bf16.h should be used from ROCm 6.0
+  #include <hip/hip_bf16.h>
+  typedef __hip_bfloat16 hip_bfloat16;
+#else
+  #include <hip/hip_bfloat16.h>
+#endif
 #include "nccl_common.h"
 #include "bitops.h"
 #include "symmetric.h"