device: change unroll factor
The default value of unroll factor is 2. Changing the unroll
factor to 4 provides better performance for most of the collectives.
[ROCm/rccl commit: 4d1cfb17c8]
Tento commit je obsažen v:
@@ -11,7 +11,7 @@
|
||||
#include "collectives.h"
|
||||
#include "devcomm.h"
|
||||
|
||||
#define COLL_UNROLL 2
|
||||
#define COLL_UNROLL 4
|
||||
#define NCCL_MAX_DEV_ARITY (NCCL_MAX_TREE_ARITY-1) // Using balanced tree instead of split tree
|
||||
|
||||
#define __syncwarp()
|
||||
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele