@@ -12,15 +12,17 @@ namespace RcclUnitTesting
|
||||
TestBed testBed;
|
||||
|
||||
// Configuration
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllGather};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclInt8, ncclInt32, ncclInt64};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {true};
|
||||
std::vector<bool> const managedMemList = {false};
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllGather};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclInt8, ncclInt32, ncclInt64};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {true};
|
||||
std::vector<bool> const managedMemList = {false};
|
||||
std::vector<bool> const useHipGraphList = {false, true};
|
||||
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
|
||||
inPlaceList, managedMemList, useHipGraphList);
|
||||
testBed.Finalize();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
|
||||
TestBed testBed;
|
||||
|
||||
// Configuration
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllGather};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclUint8, ncclUint32, ncclUint64};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {false};
|
||||
std::vector<bool> const managedMemList = {true};
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllGather};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclUint8, ncclUint32, ncclUint64};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {false};
|
||||
std::vector<bool> const managedMemList = {true};
|
||||
std::vector<bool> const useHipGraphList = {false, true};
|
||||
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
|
||||
inPlaceList, managedMemList, useHipGraphList);
|
||||
testBed.Finalize();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
|
||||
TestBed testBed;
|
||||
|
||||
// Configuration
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllGather};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclFloat32, ncclFloat64, ncclBfloat16};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {false};
|
||||
std::vector<bool> const managedMemList = {false};
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllGather};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclFloat32, ncclFloat64, ncclBfloat16};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {false};
|
||||
std::vector<bool> const managedMemList = {false};
|
||||
std::vector<bool> const useHipGraphList = {false, true};
|
||||
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
|
||||
inPlaceList, managedMemList, useHipGraphList);
|
||||
testBed.Finalize();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,15 +15,17 @@ namespace RcclUnitTesting
|
||||
TestBed testBed;
|
||||
|
||||
// Configuration
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllReduce};
|
||||
std::vector<ncclDataType_t> const dataTypes = testBed.GetAllSupportedDataTypes();
|
||||
std::vector<ncclRedOp_t> const redOps = testBed.GetAllSupportedRedOps();
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {false, true};
|
||||
std::vector<bool> const managedMemList = {false};
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllReduce};
|
||||
std::vector<ncclDataType_t> const dataTypes = testBed.GetAllSupportedDataTypes();
|
||||
std::vector<ncclRedOp_t> const redOps = testBed.GetAllSupportedRedOps();
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {false, true};
|
||||
std::vector<bool> const managedMemList = {false};
|
||||
std::vector<bool> const useHipGraphList = {false, true};
|
||||
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
|
||||
inPlaceList, managedMemList, useHipGraphList);
|
||||
testBed.Finalize();
|
||||
|
||||
unsetenv("RCCL_ENABLE_CLIQUE");
|
||||
|
||||
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
|
||||
TestBed testBed;
|
||||
|
||||
// Configuration
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllReduce};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclInt8, ncclInt32, ncclFloat32};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum, ncclProd};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {true};
|
||||
std::vector<bool> const managedMemList = {false};
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllReduce};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclInt8, ncclInt32, ncclFloat32};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum, ncclProd};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {true};
|
||||
std::vector<bool> const managedMemList = {false};
|
||||
std::vector<bool> const useHipGraphList = {false, true};
|
||||
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
|
||||
inPlaceList, managedMemList, useHipGraphList);
|
||||
testBed.Finalize();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
|
||||
TestBed testBed;
|
||||
|
||||
// Configuration
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllReduce};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclFloat32, ncclUint8, ncclUint64};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum, ncclMax};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {false};
|
||||
std::vector<bool> const managedMemList = {true};
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllReduce};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclFloat32, ncclUint8, ncclUint64};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum, ncclMax};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {false};
|
||||
std::vector<bool> const managedMemList = {true};
|
||||
std::vector<bool> const useHipGraphList = {false, true};
|
||||
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
|
||||
inPlaceList, managedMemList, useHipGraphList);
|
||||
testBed.Finalize();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
|
||||
TestBed testBed;
|
||||
|
||||
// Configuration
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllReduce};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclFloat16, ncclFloat32, ncclFloat64, ncclBfloat16};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum, ncclMin};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {false};
|
||||
std::vector<bool> const managedMemList = {false};
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllReduce};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclFloat16, ncclFloat32, ncclFloat64, ncclBfloat16};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum, ncclMin};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {false};
|
||||
std::vector<bool> const managedMemList = {false};
|
||||
std::vector<bool> const useHipGraphList = {false, true};
|
||||
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
|
||||
inPlaceList, managedMemList, useHipGraphList);
|
||||
testBed.Finalize();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
|
||||
TestBed testBed;
|
||||
|
||||
// Configuration
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllToAll};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclUint8, ncclUint32, ncclUint64};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {false};
|
||||
std::vector<bool> const managedMemList = {true};
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllToAll};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclUint8, ncclUint32, ncclUint64};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {false};
|
||||
std::vector<bool> const managedMemList = {true};
|
||||
std::vector<bool> const useHipGraphList = {false, true};
|
||||
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
|
||||
inPlaceList, managedMemList, useHipGraphList);
|
||||
testBed.Finalize();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
|
||||
TestBed testBed;
|
||||
|
||||
// Configuration
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllToAll};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclFloat32, ncclFloat64, ncclBfloat16};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {false};
|
||||
std::vector<bool> const managedMemList = {false};
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllToAll};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclFloat32, ncclFloat64, ncclBfloat16};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {false};
|
||||
std::vector<bool> const managedMemList = {false};
|
||||
std::vector<bool> const useHipGraphList = {false, true};
|
||||
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
|
||||
inPlaceList, managedMemList, useHipGraphList);
|
||||
testBed.Finalize();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -71,7 +71,7 @@ namespace RcclUnitTesting
|
||||
std::string name = testBed.GetTestCaseName(totalRanks, isMultiProcess,
|
||||
ncclCollAllToAllv, dataTypes[dataIdx],
|
||||
ncclSum, -1,
|
||||
inPlace, useManagedMem);
|
||||
inPlace, useManagedMem, false);
|
||||
INFO("%s\n", name.c_str());
|
||||
|
||||
}
|
||||
|
||||
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
|
||||
TestBed testBed;
|
||||
|
||||
// Configuration
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollBroadcast};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclInt8, ncclInt32, ncclInt64};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {true};
|
||||
std::vector<bool> const managedMemList = {false};
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollBroadcast};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclInt8, ncclInt32, ncclInt64};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {true};
|
||||
std::vector<bool> const managedMemList = {false};
|
||||
std::vector<bool> const useHipGraphList = {false, true};
|
||||
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
|
||||
inPlaceList, managedMemList, useHipGraphList);
|
||||
testBed.Finalize();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
|
||||
TestBed testBed;
|
||||
|
||||
// Configuration
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollBroadcast};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclUint8, ncclUint32, ncclUint64};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {false};
|
||||
std::vector<bool> const managedMemList = {true};
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollBroadcast};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclUint8, ncclUint32, ncclUint64};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {false};
|
||||
std::vector<bool> const managedMemList = {true};
|
||||
std::vector<bool> const useHipGraphList = {false, true};
|
||||
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
|
||||
inPlaceList, managedMemList, useHipGraphList);
|
||||
testBed.Finalize();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
|
||||
TestBed testBed;
|
||||
|
||||
// Configuration
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollBroadcast};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclFloat32, ncclFloat64, ncclBfloat16};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum};
|
||||
std::vector<int> const roots = {1};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {false};
|
||||
std::vector<bool> const managedMemList = {false};
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollBroadcast};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclFloat32, ncclFloat64, ncclBfloat16};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum};
|
||||
std::vector<int> const roots = {1};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {false};
|
||||
std::vector<bool> const managedMemList = {false};
|
||||
std::vector<bool> const useHipGraphList = {false, true};
|
||||
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
|
||||
inPlaceList, managedMemList, useHipGraphList);
|
||||
testBed.Finalize();
|
||||
}
|
||||
}
|
||||
|
||||
+10
-8
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
|
||||
TestBed testBed;
|
||||
|
||||
// Configuration
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollGather};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclInt8, ncclInt32, ncclInt64, ncclFloat16};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {true};
|
||||
std::vector<bool> const managedMemList = {false};
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollGather};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclInt8, ncclInt32, ncclInt64, ncclFloat16};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {true};
|
||||
std::vector<bool> const managedMemList = {false};
|
||||
std::vector<bool> const useHipGraphList = {false, true};
|
||||
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
|
||||
inPlaceList, managedMemList, useHipGraphList);
|
||||
testBed.Finalize();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
|
||||
TestBed testBed;
|
||||
|
||||
// Configuration
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollGather};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclUint8, ncclUint32, ncclUint64};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum};
|
||||
std::vector<int> const roots = {1};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {false};
|
||||
std::vector<bool> const managedMemList = {true};
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollGather};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclUint8, ncclUint32, ncclUint64};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum};
|
||||
std::vector<int> const roots = {1};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {false};
|
||||
std::vector<bool> const managedMemList = {true};
|
||||
std::vector<bool> const useHipGraphList = {false, true};
|
||||
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
|
||||
inPlaceList, managedMemList, useHipGraphList);
|
||||
testBed.Finalize();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
|
||||
TestBed testBed;
|
||||
|
||||
// Configuration
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollGather};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclFloat32, ncclFloat64, ncclBfloat16};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum};
|
||||
std::vector<int> const roots = {1};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {false};
|
||||
std::vector<bool> const managedMemList = {false};
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollGather};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclFloat32, ncclFloat64, ncclBfloat16};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum};
|
||||
std::vector<int> const roots = {1};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {false};
|
||||
std::vector<bool> const managedMemList = {false};
|
||||
std::vector<bool> const useHipGraphList = {false, true};
|
||||
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
|
||||
inPlaceList, managedMemList, useHipGraphList);
|
||||
testBed.Finalize();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
|
||||
TestBed testBed;
|
||||
|
||||
// Configuration
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollReduceScatter};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclInt8, ncclInt32, ncclInt64, ncclFloat16};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum, ncclProd};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 5461, 1024};
|
||||
std::vector<bool> const inPlaceList = {true};
|
||||
std::vector<bool> const managedMemList = {false};
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollReduceScatter};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclInt8, ncclInt32, ncclInt64, ncclFloat16};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum, ncclProd};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 5461, 1024};
|
||||
std::vector<bool> const inPlaceList = {true};
|
||||
std::vector<bool> const managedMemList = {false};
|
||||
std::vector<bool> const useHipGraphList = {false, true};
|
||||
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
|
||||
inPlaceList, managedMemList, useHipGraphList);
|
||||
testBed.Finalize();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
|
||||
TestBed testBed;
|
||||
|
||||
// Configuration
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollReduceScatter};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclUint8, ncclUint32, ncclUint64};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {false};
|
||||
std::vector<bool> const managedMemList = {true};
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollReduceScatter};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclUint8, ncclUint32, ncclUint64};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {false};
|
||||
std::vector<bool> const managedMemList = {true};
|
||||
std::vector<bool> const useHipGraphList = {false, true};
|
||||
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
|
||||
inPlaceList, managedMemList, useHipGraphList);
|
||||
testBed.Finalize();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
|
||||
TestBed testBed;
|
||||
|
||||
// Configuration
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollReduceScatter};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclFloat16, ncclFloat32, ncclFloat64, ncclBfloat16};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclMin, ncclMax, ncclAvg};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 5461, 1024};
|
||||
std::vector<bool> const inPlaceList = {false};
|
||||
std::vector<bool> const managedMemList = {false};
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollReduceScatter};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclFloat16, ncclFloat32, ncclFloat64, ncclBfloat16};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclMin, ncclMax, ncclAvg};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 5461, 1024};
|
||||
std::vector<bool> const inPlaceList = {false};
|
||||
std::vector<bool> const managedMemList = {false};
|
||||
std::vector<bool> const useHipGraphList = {false, true};
|
||||
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
|
||||
inPlaceList, managedMemList, useHipGraphList);
|
||||
testBed.Finalize();
|
||||
}
|
||||
}
|
||||
|
||||
+10
-8
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
|
||||
TestBed testBed;
|
||||
|
||||
// Configuration
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollReduce};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclInt8, ncclInt32, ncclInt64};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum, ncclProd};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {true};
|
||||
std::vector<bool> const managedMemList = {false};
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollReduce};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclInt8, ncclInt32, ncclInt64};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum, ncclProd};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {true};
|
||||
std::vector<bool> const managedMemList = {false};
|
||||
std::vector<bool> const useHipGraphList = {false, true};
|
||||
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
|
||||
inPlaceList, managedMemList, useHipGraphList);
|
||||
testBed.Finalize();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
|
||||
TestBed testBed;
|
||||
|
||||
// Configuration
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollReduce};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclUint8, ncclUint32, ncclUint64};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {false};
|
||||
std::vector<bool> const managedMemList = {true};
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollReduce};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclUint8, ncclUint32, ncclUint64};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {false};
|
||||
std::vector<bool> const managedMemList = {true};
|
||||
std::vector<bool> const useHipGraphList = {false, true};
|
||||
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
|
||||
inPlaceList, managedMemList, useHipGraphList);
|
||||
testBed.Finalize();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
|
||||
TestBed testBed;
|
||||
|
||||
// Configuration
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollReduce};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclFloat32, ncclFloat64, ncclBfloat16};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclMin, ncclMax, ncclAvg};
|
||||
std::vector<int> const roots = {1};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {false};
|
||||
std::vector<bool> const managedMemList = {false};
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollReduce};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclFloat32, ncclFloat64, ncclBfloat16};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclMin, ncclMax, ncclAvg};
|
||||
std::vector<int> const roots = {1};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {false};
|
||||
std::vector<bool> const managedMemList = {false};
|
||||
std::vector<bool> const useHipGraphList = {false, true};
|
||||
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
|
||||
inPlaceList, managedMemList, useHipGraphList);
|
||||
testBed.Finalize();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
|
||||
TestBed testBed;
|
||||
|
||||
// Configuration
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollScatter};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclInt8, ncclInt32, ncclInt64};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {true};
|
||||
std::vector<bool> const managedMemList = {false};
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollScatter};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclInt8, ncclInt32, ncclInt64};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {true};
|
||||
std::vector<bool> const managedMemList = {false};
|
||||
std::vector<bool> const useHipGraphList = {false, true};
|
||||
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
|
||||
inPlaceList, managedMemList, useHipGraphList);
|
||||
testBed.Finalize();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
|
||||
TestBed testBed;
|
||||
|
||||
// Configuration
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollScatter};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclUint8, ncclUint32, ncclUint64};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {false};
|
||||
std::vector<bool> const managedMemList = {true};
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollScatter};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclUint8, ncclUint32, ncclUint64};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum};
|
||||
std::vector<int> const roots = {0};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {false};
|
||||
std::vector<bool> const managedMemList = {true};
|
||||
std::vector<bool> const useHipGraphList = {false, true};
|
||||
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
|
||||
inPlaceList, managedMemList, useHipGraphList);
|
||||
testBed.Finalize();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
|
||||
TestBed testBed;
|
||||
|
||||
// Configuration
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollScatter};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclFloat32, ncclFloat64, ncclBfloat16};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum};
|
||||
std::vector<int> const roots = {1};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {false};
|
||||
std::vector<bool> const managedMemList = {false};
|
||||
std::vector<ncclFunc_t> const funcTypes = {ncclCollScatter};
|
||||
std::vector<ncclDataType_t> const dataTypes = {ncclFloat32, ncclFloat64, ncclBfloat16};
|
||||
std::vector<ncclRedOp_t> const redOps = {ncclSum};
|
||||
std::vector<int> const roots = {1};
|
||||
std::vector<int> const numElements = {1048576, 53327, 1024};
|
||||
std::vector<bool> const inPlaceList = {false};
|
||||
std::vector<bool> const managedMemList = {false};
|
||||
std::vector<bool> const useHipGraphList = {false, true};
|
||||
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
|
||||
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
|
||||
inPlaceList, managedMemList, useHipGraphList);
|
||||
testBed.Finalize();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -76,9 +76,10 @@ namespace RcclUnitTesting
|
||||
struct OptionalColArgs
|
||||
{
|
||||
ncclRedOp_t redOp = ncclSum;
|
||||
int root = 0; // Used as "peer" for Send/Recv
|
||||
ScalarTransport scalarTransport; // Used for custom reduction operators
|
||||
int scalarMode = -1; // -1 if scalar not used
|
||||
int root = 0; // Used as "peer" for Send/Recv
|
||||
ScalarTransport scalarTransport; // Used for custom reduction operators
|
||||
int scalarMode = -1; // -1 if scalar not used
|
||||
|
||||
// allToAllv args
|
||||
size_t sendcounts[MAX_RANKS*MAX_RANKS];
|
||||
size_t sdispls[MAX_RANKS*MAX_RANKS];
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
* See LICENSE.txt for license information
|
||||
************************************************************************/
|
||||
#pragma once
|
||||
#include <cstring>
|
||||
|
||||
namespace RcclUnitTesting
|
||||
{
|
||||
@@ -31,7 +32,8 @@ namespace RcclUnitTesting
|
||||
hipError_t error = (func); \
|
||||
if (error != hipSuccess) \
|
||||
{ \
|
||||
fprintf(stderr, "\033[0;33" "[ ERROR ] HIP error: %s\n" "\033[m", hipGetErrorString(error)); \
|
||||
fprintf(stderr, "\033[0;31m" "[ ERROR ] HIP error: %s File:%s Line:%d\n" "\033[m", \
|
||||
hipGetErrorString(error), strrchr("/" __FILE__, '/') + 1, __LINE__); \
|
||||
return TEST_FAIL; \
|
||||
} \
|
||||
}
|
||||
|
||||
+38
-33
@@ -15,7 +15,7 @@
|
||||
{ \
|
||||
if (ev.verbose) INFO("Calling PIPE_READ to Child %d\n", childId); \
|
||||
ssize_t retval = read(childList[childId]->parentReadFd, &val, sizeof(val)); \
|
||||
if (ev.verbose) INFO("Got PIPE_READ %ld\n", retval); \
|
||||
if (ev.verbose) INFO("Got PIPE_READ %ld from Child %d\n", retval, childId); \
|
||||
if (retval == -1) \
|
||||
{ \
|
||||
ERROR("Unable to read from child %d: Error %s\n", childId, strerror(errno)); \
|
||||
@@ -104,7 +104,7 @@ namespace RcclUnitTesting
|
||||
}
|
||||
}
|
||||
|
||||
//Determine number of unique GPUs being used.
|
||||
// Determine number of unique GPUs being used.
|
||||
std::set<int> unique_devices;
|
||||
for (auto a: this->rankToDeviceMap)
|
||||
unique_devices.insert(a);
|
||||
@@ -240,7 +240,7 @@ namespace RcclUnitTesting
|
||||
}
|
||||
}
|
||||
|
||||
void TestBed::ExecuteCollectives(std::vector<int> const ¤tRanks)
|
||||
void TestBed::ExecuteCollectives(std::vector<int> const ¤tRanks, bool const useHipGraph)
|
||||
{
|
||||
int const cmd = TestBedChild::CHILD_EXECUTE_COLL;
|
||||
++TestBed::NumTestsRun();
|
||||
@@ -257,6 +257,7 @@ namespace RcclUnitTesting
|
||||
if ((currentRanks.size() == 0) || (ranksPerChild[childId].size() > 0))
|
||||
{
|
||||
PIPE_WRITE(childId, cmd);
|
||||
PIPE_WRITE(childId, useHipGraph);
|
||||
int tempCurrentRanks = currentRanks.size();
|
||||
PIPE_WRITE(childId, tempCurrentRanks);
|
||||
for (int rank = 0; rank < currentRanks.size(); ++rank){
|
||||
@@ -372,16 +373,16 @@ namespace RcclUnitTesting
|
||||
}
|
||||
|
||||
std::vector<std::vector<int>> TestBed::GetDeviceIdsList(int const numProcesses,
|
||||
int const numGpus,
|
||||
int const ranksPerGpu)
|
||||
int const numGpus,
|
||||
int const ranksPerGpu)
|
||||
{
|
||||
std::vector<std::vector<int>> result(numProcesses);
|
||||
int ntasks = numProcesses == 1 ? numGpus : 1;
|
||||
int k=0;
|
||||
for (int i = 0; i < numProcesses; i++)
|
||||
for (int j = 0; j < ntasks * ranksPerGpu; j++) {
|
||||
result[i].push_back(k%numGpus);
|
||||
k++;
|
||||
result[i].push_back(k%numGpus);
|
||||
k++;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@@ -394,7 +395,8 @@ namespace RcclUnitTesting
|
||||
int const root,
|
||||
bool const inPlace,
|
||||
bool const managedMem,
|
||||
int const ranksPerProc)
|
||||
bool const useHipGraph,
|
||||
int const ranksPerProc)
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << (isMultiProcess ? "MP" : "SP") << " ";
|
||||
@@ -405,7 +407,9 @@ namespace RcclUnitTesting
|
||||
ss << " ";
|
||||
ss << "ranks ";
|
||||
ss << ncclFuncNames[funcType] << " ";
|
||||
ss << "(" << (inPlace ? "IP" : "OP") << "," << (managedMem ? "MM" : "GM") << ") ";
|
||||
ss << "(" << (inPlace ? "IP" : "OP") << ","
|
||||
<< (managedMem ? "MM" : "GM") << ","
|
||||
<< (useHipGraph ? "GL" : "NL") <<") ";
|
||||
ss << ncclDataTypeNames[dataType] << " ";
|
||||
if (CollectiveArgs::UsesReduce(funcType)) ss << ncclRedOpNames[redOp] << " ";
|
||||
if (CollectiveArgs::UsesRoot(funcType)) ss << "Root " << root << " ";
|
||||
@@ -418,7 +422,8 @@ namespace RcclUnitTesting
|
||||
std::vector<int> const& roots,
|
||||
std::vector<int> const& numElements,
|
||||
std::vector<bool> const& inPlaceList,
|
||||
std::vector<bool> const& managedMemList)
|
||||
std::vector<bool> const& managedMemList,
|
||||
std::vector<bool> const& useHipGraphList)
|
||||
{
|
||||
// Sort numElements in descending order to cut down on # of allocations
|
||||
std::vector<int> sortedN = numElements;
|
||||
@@ -475,16 +480,6 @@ namespace RcclUnitTesting
|
||||
for (int ipIdx = 0; ipIdx < inPlaceList.size() && isCorrect; ++ipIdx)
|
||||
for (int mmIdx = 0; mmIdx < managedMemList.size() && isCorrect; ++mmIdx)
|
||||
{
|
||||
if (ev.showNames)
|
||||
{
|
||||
std::string name = this->GetTestCaseName(numGpus, isMultiProcess,
|
||||
funcTypes[ftIdx], dataTypes[dtIdx],
|
||||
redOps[rdIdx], roots[rtIdx],
|
||||
inPlaceList[ipIdx], managedMemList[mmIdx],
|
||||
ranksPerGpu);
|
||||
INFO("%s\n", name.c_str());
|
||||
}
|
||||
|
||||
for (int neIdx = 0; neIdx < numElements.size() && isCorrect; ++neIdx)
|
||||
{
|
||||
int numInputElements, numOutputElements;
|
||||
@@ -504,24 +499,34 @@ namespace RcclUnitTesting
|
||||
// Only allocate once for largest size
|
||||
if (neIdx == 0) this->AllocateMem(inPlaceList[ipIdx], managedMemList[mmIdx]);
|
||||
|
||||
// There are some cases when data does not need to be re-prepared
|
||||
// e.g. AllReduce subarray expected results are still valid
|
||||
bool canSkip = (neIdx != 0 && !inPlaceList[ipIdx] &&
|
||||
(funcTypes[ftIdx] == ncclCollBroadcast ||
|
||||
funcTypes[ftIdx] == ncclCollReduce ||
|
||||
funcTypes[ftIdx] == ncclCollAllReduce));
|
||||
if (!canSkip) this->PrepareData();
|
||||
|
||||
this->ExecuteCollectives();
|
||||
this->ValidateResults(isCorrect);
|
||||
if (!isCorrect)
|
||||
for (int hgIdx = 0; hgIdx < useHipGraphList.size() && isCorrect; ++hgIdx)
|
||||
{
|
||||
// There are some cases when data does not need to be re-prepared
|
||||
// e.g. AllReduce subarray expected results are still valid
|
||||
bool canSkip = (neIdx != 0 && !inPlaceList[ipIdx] &&
|
||||
(funcTypes[ftIdx] == ncclCollBroadcast ||
|
||||
funcTypes[ftIdx] == ncclCollReduce ||
|
||||
funcTypes[ftIdx] == ncclCollAllReduce));
|
||||
if (!canSkip) this->PrepareData();
|
||||
|
||||
std::string name = this->GetTestCaseName(numGpus, isMultiProcess,
|
||||
funcTypes[ftIdx], dataTypes[dtIdx],
|
||||
redOps[rdIdx], roots[rtIdx],
|
||||
inPlaceList[ipIdx], managedMemList[mmIdx],
|
||||
ranksPerGpu);
|
||||
ERROR("Incorrect output for %s\n", name.c_str());
|
||||
useHipGraphList[hgIdx], ranksPerGpu);
|
||||
|
||||
if (ev.showNames)
|
||||
{
|
||||
INFO("%s [%d elements]\n", name.c_str(), numInputElements);
|
||||
}
|
||||
|
||||
std::vector<int> currentRanksEmpty = {};
|
||||
this->ExecuteCollectives(currentRanksEmpty, useHipGraphList[hgIdx]);
|
||||
this->ValidateResults(isCorrect);
|
||||
if (!isCorrect)
|
||||
{
|
||||
ERROR("Incorrect output for %s\n", name.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
this->DeallocateMem();
|
||||
|
||||
+10
-5
@@ -32,9 +32,12 @@ namespace RcclUnitTesting
|
||||
TestBed();
|
||||
|
||||
// Prepare TestBed for use with GPUs across multiple child processes
|
||||
void InitComms(std::vector<std::vector<int>> const& deviceIdsPerChild, int const numCollectivesInGroup = 1);
|
||||
void InitComms(std::vector<std::vector<int>> const& deviceIdsPerChild,
|
||||
int const numCollectivesInGroup = 1);
|
||||
|
||||
// Prepare TestBed for use with GPUs on a single child process
|
||||
void InitComms(int const numGpus, int const numCollectivesInGroup = 1);
|
||||
void InitComms(int const numGpus,
|
||||
int const numCollectivesInGroup = 1);
|
||||
|
||||
// Set collectives arguments for specified collective / rank
|
||||
// Setting scalarsPerRank to non-null will create custom reduction operator
|
||||
@@ -69,7 +72,7 @@ namespace RcclUnitTesting
|
||||
|
||||
// Execute all collectives on all test children
|
||||
// Blocks until collective is completed
|
||||
void ExecuteCollectives(std::vector<int> const ¤tRanks = {});
|
||||
void ExecuteCollectives(std::vector<int> const ¤tRanks = {}, bool const useHipGraph = false);
|
||||
|
||||
// Perform results validation - compare output to expected
|
||||
void ValidateResults(bool& isCorrect, int collId = -1, int const rank = -1);
|
||||
@@ -96,7 +99,7 @@ namespace RcclUnitTesting
|
||||
// Helper function that splits up GPUs to the given number of processes
|
||||
static std::vector<std::vector<int>> GetDeviceIdsList(int const numProcesses,
|
||||
int const numGpus,
|
||||
int const ranksPerGpu);
|
||||
int const ranksPerGpu);
|
||||
static std::vector<std::vector<int>> GetDeviceIdsList(int const numProcesses,
|
||||
int const numGpus);
|
||||
|
||||
@@ -109,6 +112,7 @@ namespace RcclUnitTesting
|
||||
int const root,
|
||||
bool const inPlace,
|
||||
bool const managedMem,
|
||||
bool const useHipGraph,
|
||||
int const ranksPerProc=1);
|
||||
|
||||
// Run a simple sweep
|
||||
@@ -118,7 +122,8 @@ namespace RcclUnitTesting
|
||||
std::vector<int> const& roots,
|
||||
std::vector<int> const& numElements,
|
||||
std::vector<bool> const& inPlaceList,
|
||||
std::vector<bool> const& managedMemList);
|
||||
std::vector<bool> const& managedMemList,
|
||||
std::vector<bool> const& useHipGraphList);
|
||||
|
||||
// Used to track total number of calls to ExecuteCollectives()
|
||||
static int& NumTestsRun();
|
||||
|
||||
@@ -170,16 +170,16 @@ namespace RcclUnitTesting
|
||||
|
||||
if (useMultiRankPerGpu)
|
||||
{
|
||||
if (ncclCommInitRankMulti(&this->comms[localRank], this->totalRanks, id, globalRank, globalRank) != ncclSuccess)
|
||||
if (ncclCommInitRankMulti(&this->comms[localRank], this->totalRanks, id, globalRank, globalRank) != ncclSuccess)
|
||||
{
|
||||
ERROR("Rank %d on child %d unable to call ncclCommInitRankMulti\n", globalRank, this->childId);
|
||||
status = TEST_FAIL;
|
||||
break;
|
||||
}
|
||||
ERROR("Rank %d on child %d unable to call ncclCommInitRankMulti\n", globalRank, this->childId);
|
||||
status = TEST_FAIL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (ncclCommInitRank(&this->comms[localRank], this->totalRanks, id, globalRank) != ncclSuccess)
|
||||
if (ncclCommInitRank(&this->comms[localRank], this->totalRanks, id, globalRank) != ncclSuccess)
|
||||
{
|
||||
ERROR("Rank %d on child %d unable to call ncclCommInitRank\n", globalRank, this->childId);
|
||||
status = TEST_FAIL;
|
||||
@@ -337,6 +337,9 @@ namespace RcclUnitTesting
|
||||
|
||||
ErrCode TestBedChild::ExecuteCollectives()
|
||||
{
|
||||
bool useHipGraph = false;
|
||||
PIPE_READ(useHipGraph);
|
||||
|
||||
int numRanksToExecute, tempRank;
|
||||
std::vector<int> ranksToExecute = {};
|
||||
PIPE_READ(numRanksToExecute);
|
||||
@@ -345,7 +348,31 @@ namespace RcclUnitTesting
|
||||
PIPE_READ(tempRank);
|
||||
ranksToExecute.push_back(tempRank - this->rankOffset);
|
||||
}
|
||||
if (this->verbose) INFO("Child %d begins ExecuteCollectives()\n", this->childId);
|
||||
if (this->verbose) INFO("Child %d begins ExecuteCollectives() %s\n", this->childId, useHipGraph ? "(using hipGraphs)" : "");
|
||||
|
||||
// Determine which local ranks to execute on
|
||||
std::vector<int> localRanksToExecute;
|
||||
for (int localRank = 0; localRank < this->deviceIds.size(); ++localRank)
|
||||
{
|
||||
// If ranksToExeute is empty, execute all local ranks belonging to this child
|
||||
if (!ranksToExecute.empty() &&
|
||||
(std::count(ranksToExecute.begin(), ranksToExecute.end(), localRank) == 0)) continue;
|
||||
localRanksToExecute.push_back(localRank);
|
||||
}
|
||||
|
||||
numRanksToExecute = (int)localRanksToExecute.size();
|
||||
hipGraph_t graphs[numRanksToExecute];
|
||||
hipGraphExec_t graphExec[numRanksToExecute];
|
||||
|
||||
// Start HIP graph stream capture if requested
|
||||
if (useHipGraph)
|
||||
{
|
||||
for (int localRank : localRanksToExecute)
|
||||
{
|
||||
if (this->verbose) INFO("Capturing stream for rank %d\n", localRank);
|
||||
CHECK_HIP(hipStreamBeginCapture(this->streams[localRank], hipStreamCaptureModeGlobal));
|
||||
}
|
||||
}
|
||||
|
||||
// Start group call
|
||||
CHILD_NCCL_CALL(ncclGroupStart(), "ncclGroupStart");
|
||||
@@ -354,16 +381,13 @@ namespace RcclUnitTesting
|
||||
for (int collId = 0; collId < this->numCollectivesInGroup; ++collId)
|
||||
{
|
||||
// Loop over all local ranks
|
||||
for (int localRank = 0; localRank < this->deviceIds.size(); ++localRank)
|
||||
for (int localRank : localRanksToExecute)
|
||||
{
|
||||
// If ranks to execute is empty, execute all ranks belonging to child
|
||||
if (!ranksToExecute.empty() && (std::count(ranksToExecute.begin(), ranksToExecute.end(), localRank) == 0)) continue;
|
||||
|
||||
CHECK_HIP(hipSetDevice(this->deviceIds[localRank]));
|
||||
|
||||
CollectiveArgs const& collArg = this->collArgs[localRank][collId];
|
||||
|
||||
if (this->printValues)
|
||||
if (this->printValues && !useHipGraph)
|
||||
{
|
||||
int const numInputElementsToPrint = (this->printValues < 0 ? collArg.numInputElements : this->printValues);
|
||||
PtrUnion inputCpu;
|
||||
@@ -502,17 +526,60 @@ namespace RcclUnitTesting
|
||||
// End group call
|
||||
CHILD_NCCL_CALL(ncclGroupEnd(), "ncclGroupEnd");
|
||||
|
||||
// Synchronize
|
||||
if (this->verbose) INFO("Child %d submits group call. Waiting for completion\n", this->childId);
|
||||
for (int localRank = 0; localRank < this->streams.size(); ++localRank)
|
||||
// Instantiate and launch HIP graph if requested
|
||||
if (useHipGraph)
|
||||
{
|
||||
for (int localRank : localRanksToExecute)
|
||||
{
|
||||
if (this->verbose) INFO("Ending stream capture for rank %d\n", localRank);
|
||||
|
||||
CHECK_HIP(hipStreamEndCapture(this->streams[localRank], &graphs[localRank]));
|
||||
if (this->verbose)
|
||||
{
|
||||
size_t numNodes;
|
||||
hipGraphNode_t* nodes;
|
||||
CHECK_HIP(hipGraphGetNodes(graphs[localRank], nodes, &numNodes));
|
||||
INFO("Graph for rank %d has %lu nodes\n", localRank, numNodes);
|
||||
}
|
||||
|
||||
if (this->verbose) INFO("Instantiating executable graph for rank %d\n", localRank);
|
||||
CHECK_HIP(hipGraphInstantiate(&graphExec[localRank], graphs[localRank], NULL, NULL, 0));
|
||||
}
|
||||
|
||||
for (int localRank : localRanksToExecute)
|
||||
{
|
||||
if (this->verbose) INFO("Launch graph for rank %d\n", localRank);
|
||||
CHECK_HIP(hipGraphLaunch(graphExec[localRank], this->streams[localRank]));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (this->verbose)
|
||||
INFO("Child %d submits group call. Waiting for completion\n", this->childId);
|
||||
}
|
||||
|
||||
// Synchronize
|
||||
for (int localRank : localRanksToExecute)
|
||||
{
|
||||
if (this->verbose) INFO("Starting synchronization for rank %d\n", localRank);
|
||||
CHECK_HIP(hipStreamSynchronize(this->streams[localRank]));
|
||||
}
|
||||
|
||||
// Destroy graphs
|
||||
if (useHipGraph)
|
||||
{
|
||||
for (int localRank : localRanksToExecute)
|
||||
{
|
||||
if (this->verbose) INFO("Destroying graphs for rank %d\n", localRank);
|
||||
CHECK_HIP(hipGraphDestroy(graphs[localRank]));
|
||||
CHECK_HIP(hipGraphExecDestroy(graphExec[localRank]));
|
||||
}
|
||||
}
|
||||
|
||||
if (this->printValues)
|
||||
{
|
||||
for (int collId = 0; collId < this->numCollectivesInGroup; ++collId)
|
||||
for (int localRank = 0; localRank < this->deviceIds.size(); ++localRank)
|
||||
for (int localRank : localRanksToExecute)
|
||||
{
|
||||
CollectiveArgs const& collArg = this->collArgs[localRank][collId];
|
||||
|
||||
|
||||
@@ -121,7 +121,10 @@ int main(int argc, char **argv)
|
||||
if (usingGraphs)
|
||||
{
|
||||
for (int r = 0; r < nranks; ++r)
|
||||
{
|
||||
HIP_CALL(hipSetDevice(r));
|
||||
HIP_CALL(hipStreamBeginCapture(stream[r], hipStreamCaptureModeThreadLocal));
|
||||
}
|
||||
|
||||
NCCL_CALL(ncclGroupStart());
|
||||
for (int r = 0; r < nranks; ++r)
|
||||
@@ -132,11 +135,17 @@ int main(int argc, char **argv)
|
||||
NCCL_CALL(ncclGroupEnd());
|
||||
|
||||
for (int r = 0; r < nranks; ++r)
|
||||
{
|
||||
//HIP_CALL(hipSetDevice(r));
|
||||
HIP_CALL(hipStreamEndCapture(stream[r], &graphs[r]));
|
||||
}
|
||||
|
||||
// Instantiating graphs
|
||||
for (int r = 0; r < nranks; ++r)
|
||||
{
|
||||
HIP_CALL(hipSetDevice(r));
|
||||
HIP_CALL(hipGraphInstantiate(&graphExec[r], graphs[r], NULL, NULL, 0));
|
||||
}
|
||||
}
|
||||
auto setupDelta = std::chrono::high_resolution_clock::now() - setupStart;
|
||||
double setupTime = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(setupDelta).count();
|
||||
@@ -150,7 +159,10 @@ int main(int argc, char **argv)
|
||||
if (usingGraphs)
|
||||
{
|
||||
for (int r = 0; r < nranks; r++)
|
||||
{
|
||||
HIP_CALL(hipSetDevice(r));
|
||||
HIP_CALL(hipGraphLaunch(graphExec[r], stream[r]));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -162,6 +174,7 @@ int main(int argc, char **argv)
|
||||
}
|
||||
NCCL_CALL(ncclGroupEnd());
|
||||
}
|
||||
|
||||
for (int r = 0; r < nranks; r++)
|
||||
HIP_CALL(hipStreamSynchronize(stream[r]));
|
||||
|
||||
@@ -194,6 +207,13 @@ int main(int argc, char **argv)
|
||||
}
|
||||
average[usingGraphs] /= numIterations;
|
||||
printf("%12.3f", average[usingGraphs]);
|
||||
|
||||
for (int r = 0; r < nranks; r++)
|
||||
{
|
||||
HIP_CALL(hipSetDevice(r));
|
||||
HIP_CALL(hipGraphDestroy(graphs[r]));
|
||||
HIP_CALL(hipGraphExecDestroy(graphExec[r]));
|
||||
}
|
||||
}
|
||||
printf("%12.3f\n", average[0] / average[1]);
|
||||
fflush(stdout);
|
||||
|
||||
Reference in New Issue
Block a user