Graph unit tests (#656)

* Adding hipGraph unit tests
This commit is contained in:
gilbertlee-amd
2022-12-01 10:28:42 -07:00
committed by GitHub
parent aebed537a5
commit faed69f9fc
31 changed files with 399 additions and 251 deletions
+10 -8
View File
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
TestBed testBed;
// Configuration
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllGather};
std::vector<ncclDataType_t> const dataTypes = {ncclInt8, ncclInt32, ncclInt64};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {true};
std::vector<bool> const managedMemList = {false};
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllGather};
std::vector<ncclDataType_t> const dataTypes = {ncclInt8, ncclInt32, ncclInt64};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {true};
std::vector<bool> const managedMemList = {false};
std::vector<bool> const useHipGraphList = {false, true};
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
inPlaceList, managedMemList, useHipGraphList);
testBed.Finalize();
}
}
+10 -8
View File
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
TestBed testBed;
// Configuration
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllGather};
std::vector<ncclDataType_t> const dataTypes = {ncclUint8, ncclUint32, ncclUint64};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {true};
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllGather};
std::vector<ncclDataType_t> const dataTypes = {ncclUint8, ncclUint32, ncclUint64};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {true};
std::vector<bool> const useHipGraphList = {false, true};
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
inPlaceList, managedMemList, useHipGraphList);
testBed.Finalize();
}
}
+10 -8
View File
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
TestBed testBed;
// Configuration
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllGather};
std::vector<ncclDataType_t> const dataTypes = {ncclFloat32, ncclFloat64, ncclBfloat16};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {false};
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllGather};
std::vector<ncclDataType_t> const dataTypes = {ncclFloat32, ncclFloat64, ncclBfloat16};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {false};
std::vector<bool> const useHipGraphList = {false, true};
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
inPlaceList, managedMemList, useHipGraphList);
testBed.Finalize();
}
}
+10 -8
View File
@@ -15,15 +15,17 @@ namespace RcclUnitTesting
TestBed testBed;
// Configuration
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllReduce};
std::vector<ncclDataType_t> const dataTypes = testBed.GetAllSupportedDataTypes();
std::vector<ncclRedOp_t> const redOps = testBed.GetAllSupportedRedOps();
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {false, true};
std::vector<bool> const managedMemList = {false};
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllReduce};
std::vector<ncclDataType_t> const dataTypes = testBed.GetAllSupportedDataTypes();
std::vector<ncclRedOp_t> const redOps = testBed.GetAllSupportedRedOps();
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {false, true};
std::vector<bool> const managedMemList = {false};
std::vector<bool> const useHipGraphList = {false, true};
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
inPlaceList, managedMemList, useHipGraphList);
testBed.Finalize();
unsetenv("RCCL_ENABLE_CLIQUE");
+10 -8
View File
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
TestBed testBed;
// Configuration
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllReduce};
std::vector<ncclDataType_t> const dataTypes = {ncclInt8, ncclInt32, ncclFloat32};
std::vector<ncclRedOp_t> const redOps = {ncclSum, ncclProd};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {true};
std::vector<bool> const managedMemList = {false};
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllReduce};
std::vector<ncclDataType_t> const dataTypes = {ncclInt8, ncclInt32, ncclFloat32};
std::vector<ncclRedOp_t> const redOps = {ncclSum, ncclProd};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {true};
std::vector<bool> const managedMemList = {false};
std::vector<bool> const useHipGraphList = {false, true};
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
inPlaceList, managedMemList, useHipGraphList);
testBed.Finalize();
}
}
+10 -8
View File
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
TestBed testBed;
// Configuration
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllReduce};
std::vector<ncclDataType_t> const dataTypes = {ncclFloat32, ncclUint8, ncclUint64};
std::vector<ncclRedOp_t> const redOps = {ncclSum, ncclMax};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {true};
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllReduce};
std::vector<ncclDataType_t> const dataTypes = {ncclFloat32, ncclUint8, ncclUint64};
std::vector<ncclRedOp_t> const redOps = {ncclSum, ncclMax};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {true};
std::vector<bool> const useHipGraphList = {false, true};
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
inPlaceList, managedMemList, useHipGraphList);
testBed.Finalize();
}
}
+10 -8
View File
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
TestBed testBed;
// Configuration
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllReduce};
std::vector<ncclDataType_t> const dataTypes = {ncclFloat16, ncclFloat32, ncclFloat64, ncclBfloat16};
std::vector<ncclRedOp_t> const redOps = {ncclSum, ncclMin};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {false};
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllReduce};
std::vector<ncclDataType_t> const dataTypes = {ncclFloat16, ncclFloat32, ncclFloat64, ncclBfloat16};
std::vector<ncclRedOp_t> const redOps = {ncclSum, ncclMin};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {false};
std::vector<bool> const useHipGraphList = {false, true};
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
inPlaceList, managedMemList, useHipGraphList);
testBed.Finalize();
}
}
+10 -8
View File
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
TestBed testBed;
// Configuration
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllToAll};
std::vector<ncclDataType_t> const dataTypes = {ncclUint8, ncclUint32, ncclUint64};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {true};
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllToAll};
std::vector<ncclDataType_t> const dataTypes = {ncclUint8, ncclUint32, ncclUint64};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {true};
std::vector<bool> const useHipGraphList = {false, true};
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
inPlaceList, managedMemList, useHipGraphList);
testBed.Finalize();
}
}
+10 -8
View File
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
TestBed testBed;
// Configuration
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllToAll};
std::vector<ncclDataType_t> const dataTypes = {ncclFloat32, ncclFloat64, ncclBfloat16};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {false};
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllToAll};
std::vector<ncclDataType_t> const dataTypes = {ncclFloat32, ncclFloat64, ncclBfloat16};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {false};
std::vector<bool> const useHipGraphList = {false, true};
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
inPlaceList, managedMemList, useHipGraphList);
testBed.Finalize();
}
}
+1 -1
View File
@@ -71,7 +71,7 @@ namespace RcclUnitTesting
std::string name = testBed.GetTestCaseName(totalRanks, isMultiProcess,
ncclCollAllToAllv, dataTypes[dataIdx],
ncclSum, -1,
inPlace, useManagedMem);
inPlace, useManagedMem, false);
INFO("%s\n", name.c_str());
}
+10 -8
View File
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
TestBed testBed;
// Configuration
std::vector<ncclFunc_t> const funcTypes = {ncclCollBroadcast};
std::vector<ncclDataType_t> const dataTypes = {ncclInt8, ncclInt32, ncclInt64};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {true};
std::vector<bool> const managedMemList = {false};
std::vector<ncclFunc_t> const funcTypes = {ncclCollBroadcast};
std::vector<ncclDataType_t> const dataTypes = {ncclInt8, ncclInt32, ncclInt64};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {true};
std::vector<bool> const managedMemList = {false};
std::vector<bool> const useHipGraphList = {false, true};
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
inPlaceList, managedMemList, useHipGraphList);
testBed.Finalize();
}
}
+10 -8
View File
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
TestBed testBed;
// Configuration
std::vector<ncclFunc_t> const funcTypes = {ncclCollBroadcast};
std::vector<ncclDataType_t> const dataTypes = {ncclUint8, ncclUint32, ncclUint64};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {true};
std::vector<ncclFunc_t> const funcTypes = {ncclCollBroadcast};
std::vector<ncclDataType_t> const dataTypes = {ncclUint8, ncclUint32, ncclUint64};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {true};
std::vector<bool> const useHipGraphList = {false, true};
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
inPlaceList, managedMemList, useHipGraphList);
testBed.Finalize();
}
}
+10 -8
View File
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
TestBed testBed;
// Configuration
std::vector<ncclFunc_t> const funcTypes = {ncclCollBroadcast};
std::vector<ncclDataType_t> const dataTypes = {ncclFloat32, ncclFloat64, ncclBfloat16};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {1};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {false};
std::vector<ncclFunc_t> const funcTypes = {ncclCollBroadcast};
std::vector<ncclDataType_t> const dataTypes = {ncclFloat32, ncclFloat64, ncclBfloat16};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {1};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {false};
std::vector<bool> const useHipGraphList = {false, true};
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
inPlaceList, managedMemList, useHipGraphList);
testBed.Finalize();
}
}
+10 -8
View File
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
TestBed testBed;
// Configuration
std::vector<ncclFunc_t> const funcTypes = {ncclCollGather};
std::vector<ncclDataType_t> const dataTypes = {ncclInt8, ncclInt32, ncclInt64, ncclFloat16};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {true};
std::vector<bool> const managedMemList = {false};
std::vector<ncclFunc_t> const funcTypes = {ncclCollGather};
std::vector<ncclDataType_t> const dataTypes = {ncclInt8, ncclInt32, ncclInt64, ncclFloat16};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {true};
std::vector<bool> const managedMemList = {false};
std::vector<bool> const useHipGraphList = {false, true};
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
inPlaceList, managedMemList, useHipGraphList);
testBed.Finalize();
}
}
+10 -8
View File
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
TestBed testBed;
// Configuration
std::vector<ncclFunc_t> const funcTypes = {ncclCollGather};
std::vector<ncclDataType_t> const dataTypes = {ncclUint8, ncclUint32, ncclUint64};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {1};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {true};
std::vector<ncclFunc_t> const funcTypes = {ncclCollGather};
std::vector<ncclDataType_t> const dataTypes = {ncclUint8, ncclUint32, ncclUint64};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {1};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {true};
std::vector<bool> const useHipGraphList = {false, true};
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
inPlaceList, managedMemList, useHipGraphList);
testBed.Finalize();
}
}
+10 -8
View File
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
TestBed testBed;
// Configuration
std::vector<ncclFunc_t> const funcTypes = {ncclCollGather};
std::vector<ncclDataType_t> const dataTypes = {ncclFloat32, ncclFloat64, ncclBfloat16};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {1};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {false};
std::vector<ncclFunc_t> const funcTypes = {ncclCollGather};
std::vector<ncclDataType_t> const dataTypes = {ncclFloat32, ncclFloat64, ncclBfloat16};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {1};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {false};
std::vector<bool> const useHipGraphList = {false, true};
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
inPlaceList, managedMemList, useHipGraphList);
testBed.Finalize();
}
}
+10 -8
View File
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
TestBed testBed;
// Configuration
std::vector<ncclFunc_t> const funcTypes = {ncclCollReduceScatter};
std::vector<ncclDataType_t> const dataTypes = {ncclInt8, ncclInt32, ncclInt64, ncclFloat16};
std::vector<ncclRedOp_t> const redOps = {ncclSum, ncclProd};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 5461, 1024};
std::vector<bool> const inPlaceList = {true};
std::vector<bool> const managedMemList = {false};
std::vector<ncclFunc_t> const funcTypes = {ncclCollReduceScatter};
std::vector<ncclDataType_t> const dataTypes = {ncclInt8, ncclInt32, ncclInt64, ncclFloat16};
std::vector<ncclRedOp_t> const redOps = {ncclSum, ncclProd};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 5461, 1024};
std::vector<bool> const inPlaceList = {true};
std::vector<bool> const managedMemList = {false};
std::vector<bool> const useHipGraphList = {false, true};
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
inPlaceList, managedMemList, useHipGraphList);
testBed.Finalize();
}
}
+10 -8
View File
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
TestBed testBed;
// Configuration
std::vector<ncclFunc_t> const funcTypes = {ncclCollReduceScatter};
std::vector<ncclDataType_t> const dataTypes = {ncclUint8, ncclUint32, ncclUint64};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {true};
std::vector<ncclFunc_t> const funcTypes = {ncclCollReduceScatter};
std::vector<ncclDataType_t> const dataTypes = {ncclUint8, ncclUint32, ncclUint64};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {true};
std::vector<bool> const useHipGraphList = {false, true};
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
inPlaceList, managedMemList, useHipGraphList);
testBed.Finalize();
}
}
+10 -8
View File
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
TestBed testBed;
// Configuration
std::vector<ncclFunc_t> const funcTypes = {ncclCollReduceScatter};
std::vector<ncclDataType_t> const dataTypes = {ncclFloat16, ncclFloat32, ncclFloat64, ncclBfloat16};
std::vector<ncclRedOp_t> const redOps = {ncclMin, ncclMax, ncclAvg};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 5461, 1024};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {false};
std::vector<ncclFunc_t> const funcTypes = {ncclCollReduceScatter};
std::vector<ncclDataType_t> const dataTypes = {ncclFloat16, ncclFloat32, ncclFloat64, ncclBfloat16};
std::vector<ncclRedOp_t> const redOps = {ncclMin, ncclMax, ncclAvg};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 5461, 1024};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {false};
std::vector<bool> const useHipGraphList = {false, true};
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
inPlaceList, managedMemList, useHipGraphList);
testBed.Finalize();
}
}
+10 -8
View File
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
TestBed testBed;
// Configuration
std::vector<ncclFunc_t> const funcTypes = {ncclCollReduce};
std::vector<ncclDataType_t> const dataTypes = {ncclInt8, ncclInt32, ncclInt64};
std::vector<ncclRedOp_t> const redOps = {ncclSum, ncclProd};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {true};
std::vector<bool> const managedMemList = {false};
std::vector<ncclFunc_t> const funcTypes = {ncclCollReduce};
std::vector<ncclDataType_t> const dataTypes = {ncclInt8, ncclInt32, ncclInt64};
std::vector<ncclRedOp_t> const redOps = {ncclSum, ncclProd};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {true};
std::vector<bool> const managedMemList = {false};
std::vector<bool> const useHipGraphList = {false, true};
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
inPlaceList, managedMemList, useHipGraphList);
testBed.Finalize();
}
}
+10 -8
View File
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
TestBed testBed;
// Configuration
std::vector<ncclFunc_t> const funcTypes = {ncclCollReduce};
std::vector<ncclDataType_t> const dataTypes = {ncclUint8, ncclUint32, ncclUint64};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {true};
std::vector<ncclFunc_t> const funcTypes = {ncclCollReduce};
std::vector<ncclDataType_t> const dataTypes = {ncclUint8, ncclUint32, ncclUint64};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {true};
std::vector<bool> const useHipGraphList = {false, true};
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
inPlaceList, managedMemList, useHipGraphList);
testBed.Finalize();
}
}
+10 -8
View File
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
TestBed testBed;
// Configuration
std::vector<ncclFunc_t> const funcTypes = {ncclCollReduce};
std::vector<ncclDataType_t> const dataTypes = {ncclFloat32, ncclFloat64, ncclBfloat16};
std::vector<ncclRedOp_t> const redOps = {ncclMin, ncclMax, ncclAvg};
std::vector<int> const roots = {1};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {false};
std::vector<ncclFunc_t> const funcTypes = {ncclCollReduce};
std::vector<ncclDataType_t> const dataTypes = {ncclFloat32, ncclFloat64, ncclBfloat16};
std::vector<ncclRedOp_t> const redOps = {ncclMin, ncclMax, ncclAvg};
std::vector<int> const roots = {1};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {false};
std::vector<bool> const useHipGraphList = {false, true};
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
inPlaceList, managedMemList, useHipGraphList);
testBed.Finalize();
}
}
+10 -8
View File
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
TestBed testBed;
// Configuration
std::vector<ncclFunc_t> const funcTypes = {ncclCollScatter};
std::vector<ncclDataType_t> const dataTypes = {ncclInt8, ncclInt32, ncclInt64};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {true};
std::vector<bool> const managedMemList = {false};
std::vector<ncclFunc_t> const funcTypes = {ncclCollScatter};
std::vector<ncclDataType_t> const dataTypes = {ncclInt8, ncclInt32, ncclInt64};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {true};
std::vector<bool> const managedMemList = {false};
std::vector<bool> const useHipGraphList = {false, true};
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
inPlaceList, managedMemList, useHipGraphList);
testBed.Finalize();
}
}
+10 -8
View File
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
TestBed testBed;
// Configuration
std::vector<ncclFunc_t> const funcTypes = {ncclCollScatter};
std::vector<ncclDataType_t> const dataTypes = {ncclUint8, ncclUint32, ncclUint64};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {true};
std::vector<ncclFunc_t> const funcTypes = {ncclCollScatter};
std::vector<ncclDataType_t> const dataTypes = {ncclUint8, ncclUint32, ncclUint64};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {true};
std::vector<bool> const useHipGraphList = {false, true};
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
inPlaceList, managedMemList, useHipGraphList);
testBed.Finalize();
}
}
+10 -8
View File
@@ -12,15 +12,17 @@ namespace RcclUnitTesting
TestBed testBed;
// Configuration
std::vector<ncclFunc_t> const funcTypes = {ncclCollScatter};
std::vector<ncclDataType_t> const dataTypes = {ncclFloat32, ncclFloat64, ncclBfloat16};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {1};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {false};
std::vector<ncclFunc_t> const funcTypes = {ncclCollScatter};
std::vector<ncclDataType_t> const dataTypes = {ncclFloat32, ncclFloat64, ncclBfloat16};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {1};
std::vector<int> const numElements = {1048576, 53327, 1024};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {false};
std::vector<bool> const useHipGraphList = {false, true};
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements, inPlaceList, managedMemList);
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
inPlaceList, managedMemList, useHipGraphList);
testBed.Finalize();
}
}
+4 -3
View File
@@ -76,9 +76,10 @@ namespace RcclUnitTesting
struct OptionalColArgs
{
ncclRedOp_t redOp = ncclSum;
int root = 0; // Used as "peer" for Send/Recv
ScalarTransport scalarTransport; // Used for custom reduction operators
int scalarMode = -1; // -1 if scalar not used
int root = 0; // Used as "peer" for Send/Recv
ScalarTransport scalarTransport; // Used for custom reduction operators
int scalarMode = -1; // -1 if scalar not used
// allToAllv args
size_t sendcounts[MAX_RANKS*MAX_RANKS];
size_t sdispls[MAX_RANKS*MAX_RANKS];
+3 -1
View File
@@ -4,6 +4,7 @@
* See LICENSE.txt for license information
************************************************************************/
#pragma once
#include <cstring>
namespace RcclUnitTesting
{
@@ -31,7 +32,8 @@ namespace RcclUnitTesting
hipError_t error = (func); \
if (error != hipSuccess) \
{ \
fprintf(stderr, "\033[0;33" "[ ERROR ] HIP error: %s\n" "\033[m", hipGetErrorString(error)); \
fprintf(stderr, "\033[0;31m" "[ ERROR ] HIP error: %s File:%s Line:%d\n" "\033[m", \
hipGetErrorString(error), strrchr("/" __FILE__, '/') + 1, __LINE__); \
return TEST_FAIL; \
} \
}
+38 -33
View File
@@ -15,7 +15,7 @@
{ \
if (ev.verbose) INFO("Calling PIPE_READ to Child %d\n", childId); \
ssize_t retval = read(childList[childId]->parentReadFd, &val, sizeof(val)); \
if (ev.verbose) INFO("Got PIPE_READ %ld\n", retval); \
if (ev.verbose) INFO("Got PIPE_READ %ld from Child %d\n", retval, childId); \
if (retval == -1) \
{ \
ERROR("Unable to read from child %d: Error %s\n", childId, strerror(errno)); \
@@ -104,7 +104,7 @@ namespace RcclUnitTesting
}
}
//Determine number of unique GPUs being used.
// Determine number of unique GPUs being used.
std::set<int> unique_devices;
for (auto a: this->rankToDeviceMap)
unique_devices.insert(a);
@@ -240,7 +240,7 @@ namespace RcclUnitTesting
}
}
void TestBed::ExecuteCollectives(std::vector<int> const &currentRanks)
void TestBed::ExecuteCollectives(std::vector<int> const &currentRanks, bool const useHipGraph)
{
int const cmd = TestBedChild::CHILD_EXECUTE_COLL;
++TestBed::NumTestsRun();
@@ -257,6 +257,7 @@ namespace RcclUnitTesting
if ((currentRanks.size() == 0) || (ranksPerChild[childId].size() > 0))
{
PIPE_WRITE(childId, cmd);
PIPE_WRITE(childId, useHipGraph);
int tempCurrentRanks = currentRanks.size();
PIPE_WRITE(childId, tempCurrentRanks);
for (int rank = 0; rank < currentRanks.size(); ++rank){
@@ -372,16 +373,16 @@ namespace RcclUnitTesting
}
std::vector<std::vector<int>> TestBed::GetDeviceIdsList(int const numProcesses,
int const numGpus,
int const ranksPerGpu)
int const numGpus,
int const ranksPerGpu)
{
std::vector<std::vector<int>> result(numProcesses);
int ntasks = numProcesses == 1 ? numGpus : 1;
int k=0;
for (int i = 0; i < numProcesses; i++)
for (int j = 0; j < ntasks * ranksPerGpu; j++) {
result[i].push_back(k%numGpus);
k++;
result[i].push_back(k%numGpus);
k++;
}
return result;
}
@@ -394,7 +395,8 @@ namespace RcclUnitTesting
int const root,
bool const inPlace,
bool const managedMem,
int const ranksPerProc)
bool const useHipGraph,
int const ranksPerProc)
{
std::stringstream ss;
ss << (isMultiProcess ? "MP" : "SP") << " ";
@@ -405,7 +407,9 @@ namespace RcclUnitTesting
ss << " ";
ss << "ranks ";
ss << ncclFuncNames[funcType] << " ";
ss << "(" << (inPlace ? "IP" : "OP") << "," << (managedMem ? "MM" : "GM") << ") ";
ss << "(" << (inPlace ? "IP" : "OP") << ","
<< (managedMem ? "MM" : "GM") << ","
<< (useHipGraph ? "GL" : "NL") <<") ";
ss << ncclDataTypeNames[dataType] << " ";
if (CollectiveArgs::UsesReduce(funcType)) ss << ncclRedOpNames[redOp] << " ";
if (CollectiveArgs::UsesRoot(funcType)) ss << "Root " << root << " ";
@@ -418,7 +422,8 @@ namespace RcclUnitTesting
std::vector<int> const& roots,
std::vector<int> const& numElements,
std::vector<bool> const& inPlaceList,
std::vector<bool> const& managedMemList)
std::vector<bool> const& managedMemList,
std::vector<bool> const& useHipGraphList)
{
// Sort numElements in descending order to cut down on # of allocations
std::vector<int> sortedN = numElements;
@@ -475,16 +480,6 @@ namespace RcclUnitTesting
for (int ipIdx = 0; ipIdx < inPlaceList.size() && isCorrect; ++ipIdx)
for (int mmIdx = 0; mmIdx < managedMemList.size() && isCorrect; ++mmIdx)
{
if (ev.showNames)
{
std::string name = this->GetTestCaseName(numGpus, isMultiProcess,
funcTypes[ftIdx], dataTypes[dtIdx],
redOps[rdIdx], roots[rtIdx],
inPlaceList[ipIdx], managedMemList[mmIdx],
ranksPerGpu);
INFO("%s\n", name.c_str());
}
for (int neIdx = 0; neIdx < numElements.size() && isCorrect; ++neIdx)
{
int numInputElements, numOutputElements;
@@ -504,24 +499,34 @@ namespace RcclUnitTesting
// Only allocate once for largest size
if (neIdx == 0) this->AllocateMem(inPlaceList[ipIdx], managedMemList[mmIdx]);
// There are some cases when data does not need to be re-prepared
// e.g. AllReduce subarray expected results are still valid
bool canSkip = (neIdx != 0 && !inPlaceList[ipIdx] &&
(funcTypes[ftIdx] == ncclCollBroadcast ||
funcTypes[ftIdx] == ncclCollReduce ||
funcTypes[ftIdx] == ncclCollAllReduce));
if (!canSkip) this->PrepareData();
this->ExecuteCollectives();
this->ValidateResults(isCorrect);
if (!isCorrect)
for (int hgIdx = 0; hgIdx < useHipGraphList.size() && isCorrect; ++hgIdx)
{
// There are some cases when data does not need to be re-prepared
// e.g. AllReduce subarray expected results are still valid
bool canSkip = (neIdx != 0 && !inPlaceList[ipIdx] &&
(funcTypes[ftIdx] == ncclCollBroadcast ||
funcTypes[ftIdx] == ncclCollReduce ||
funcTypes[ftIdx] == ncclCollAllReduce));
if (!canSkip) this->PrepareData();
std::string name = this->GetTestCaseName(numGpus, isMultiProcess,
funcTypes[ftIdx], dataTypes[dtIdx],
redOps[rdIdx], roots[rtIdx],
inPlaceList[ipIdx], managedMemList[mmIdx],
ranksPerGpu);
ERROR("Incorrect output for %s\n", name.c_str());
useHipGraphList[hgIdx], ranksPerGpu);
if (ev.showNames)
{
INFO("%s [%d elements]\n", name.c_str(), numInputElements);
}
std::vector<int> currentRanksEmpty = {};
this->ExecuteCollectives(currentRanksEmpty, useHipGraphList[hgIdx]);
this->ValidateResults(isCorrect);
if (!isCorrect)
{
ERROR("Incorrect output for %s\n", name.c_str());
}
}
}
this->DeallocateMem();
+10 -5
View File
@@ -32,9 +32,12 @@ namespace RcclUnitTesting
TestBed();
// Prepare TestBed for use with GPUs across multiple child processes
void InitComms(std::vector<std::vector<int>> const& deviceIdsPerChild, int const numCollectivesInGroup = 1);
void InitComms(std::vector<std::vector<int>> const& deviceIdsPerChild,
int const numCollectivesInGroup = 1);
// Prepare TestBed for use with GPUs on a single child process
void InitComms(int const numGpus, int const numCollectivesInGroup = 1);
void InitComms(int const numGpus,
int const numCollectivesInGroup = 1);
// Set collectives arguments for specified collective / rank
// Setting scalarsPerRank to non-null will create custom reduction operator
@@ -69,7 +72,7 @@ namespace RcclUnitTesting
// Execute all collectives on all test children
// Blocks until collective is completed
void ExecuteCollectives(std::vector<int> const &currentRanks = {});
void ExecuteCollectives(std::vector<int> const &currentRanks = {}, bool const useHipGraph = false);
// Perform results validation - compare output to expected
void ValidateResults(bool& isCorrect, int collId = -1, int const rank = -1);
@@ -96,7 +99,7 @@ namespace RcclUnitTesting
// Helper function that splits up GPUs to the given number of processes
static std::vector<std::vector<int>> GetDeviceIdsList(int const numProcesses,
int const numGpus,
int const ranksPerGpu);
int const ranksPerGpu);
static std::vector<std::vector<int>> GetDeviceIdsList(int const numProcesses,
int const numGpus);
@@ -109,6 +112,7 @@ namespace RcclUnitTesting
int const root,
bool const inPlace,
bool const managedMem,
bool const useHipGraph,
int const ranksPerProc=1);
// Run a simple sweep
@@ -118,7 +122,8 @@ namespace RcclUnitTesting
std::vector<int> const& roots,
std::vector<int> const& numElements,
std::vector<bool> const& inPlaceList,
std::vector<bool> const& managedMemList);
std::vector<bool> const& managedMemList,
std::vector<bool> const& useHipGraphList);
// Used to track total number of calls to ExecuteCollectives()
static int& NumTestsRun();
+83 -16
View File
@@ -170,16 +170,16 @@ namespace RcclUnitTesting
if (useMultiRankPerGpu)
{
if (ncclCommInitRankMulti(&this->comms[localRank], this->totalRanks, id, globalRank, globalRank) != ncclSuccess)
if (ncclCommInitRankMulti(&this->comms[localRank], this->totalRanks, id, globalRank, globalRank) != ncclSuccess)
{
ERROR("Rank %d on child %d unable to call ncclCommInitRankMulti\n", globalRank, this->childId);
status = TEST_FAIL;
break;
}
ERROR("Rank %d on child %d unable to call ncclCommInitRankMulti\n", globalRank, this->childId);
status = TEST_FAIL;
break;
}
}
else
{
if (ncclCommInitRank(&this->comms[localRank], this->totalRanks, id, globalRank) != ncclSuccess)
if (ncclCommInitRank(&this->comms[localRank], this->totalRanks, id, globalRank) != ncclSuccess)
{
ERROR("Rank %d on child %d unable to call ncclCommInitRank\n", globalRank, this->childId);
status = TEST_FAIL;
@@ -337,6 +337,9 @@ namespace RcclUnitTesting
ErrCode TestBedChild::ExecuteCollectives()
{
bool useHipGraph = false;
PIPE_READ(useHipGraph);
int numRanksToExecute, tempRank;
std::vector<int> ranksToExecute = {};
PIPE_READ(numRanksToExecute);
@@ -345,7 +348,31 @@ namespace RcclUnitTesting
PIPE_READ(tempRank);
ranksToExecute.push_back(tempRank - this->rankOffset);
}
if (this->verbose) INFO("Child %d begins ExecuteCollectives()\n", this->childId);
if (this->verbose) INFO("Child %d begins ExecuteCollectives() %s\n", this->childId, useHipGraph ? "(using hipGraphs)" : "");
// Determine which local ranks to execute on
std::vector<int> localRanksToExecute;
for (int localRank = 0; localRank < this->deviceIds.size(); ++localRank)
{
// If ranksToExeute is empty, execute all local ranks belonging to this child
if (!ranksToExecute.empty() &&
(std::count(ranksToExecute.begin(), ranksToExecute.end(), localRank) == 0)) continue;
localRanksToExecute.push_back(localRank);
}
numRanksToExecute = (int)localRanksToExecute.size();
hipGraph_t graphs[numRanksToExecute];
hipGraphExec_t graphExec[numRanksToExecute];
// Start HIP graph stream capture if requested
if (useHipGraph)
{
for (int localRank : localRanksToExecute)
{
if (this->verbose) INFO("Capturing stream for rank %d\n", localRank);
CHECK_HIP(hipStreamBeginCapture(this->streams[localRank], hipStreamCaptureModeGlobal));
}
}
// Start group call
CHILD_NCCL_CALL(ncclGroupStart(), "ncclGroupStart");
@@ -354,16 +381,13 @@ namespace RcclUnitTesting
for (int collId = 0; collId < this->numCollectivesInGroup; ++collId)
{
// Loop over all local ranks
for (int localRank = 0; localRank < this->deviceIds.size(); ++localRank)
for (int localRank : localRanksToExecute)
{
// If ranks to execute is empty, execute all ranks belonging to child
if (!ranksToExecute.empty() && (std::count(ranksToExecute.begin(), ranksToExecute.end(), localRank) == 0)) continue;
CHECK_HIP(hipSetDevice(this->deviceIds[localRank]));
CollectiveArgs const& collArg = this->collArgs[localRank][collId];
if (this->printValues)
if (this->printValues && !useHipGraph)
{
int const numInputElementsToPrint = (this->printValues < 0 ? collArg.numInputElements : this->printValues);
PtrUnion inputCpu;
@@ -502,17 +526,60 @@ namespace RcclUnitTesting
// End group call
CHILD_NCCL_CALL(ncclGroupEnd(), "ncclGroupEnd");
// Synchronize
if (this->verbose) INFO("Child %d submits group call. Waiting for completion\n", this->childId);
for (int localRank = 0; localRank < this->streams.size(); ++localRank)
// Instantiate and launch HIP graph if requested
if (useHipGraph)
{
for (int localRank : localRanksToExecute)
{
if (this->verbose) INFO("Ending stream capture for rank %d\n", localRank);
CHECK_HIP(hipStreamEndCapture(this->streams[localRank], &graphs[localRank]));
if (this->verbose)
{
size_t numNodes;
hipGraphNode_t* nodes;
CHECK_HIP(hipGraphGetNodes(graphs[localRank], nodes, &numNodes));
INFO("Graph for rank %d has %lu nodes\n", localRank, numNodes);
}
if (this->verbose) INFO("Instantiating executable graph for rank %d\n", localRank);
CHECK_HIP(hipGraphInstantiate(&graphExec[localRank], graphs[localRank], NULL, NULL, 0));
}
for (int localRank : localRanksToExecute)
{
if (this->verbose) INFO("Launch graph for rank %d\n", localRank);
CHECK_HIP(hipGraphLaunch(graphExec[localRank], this->streams[localRank]));
}
}
else
{
if (this->verbose)
INFO("Child %d submits group call. Waiting for completion\n", this->childId);
}
// Synchronize
for (int localRank : localRanksToExecute)
{
if (this->verbose) INFO("Starting synchronization for rank %d\n", localRank);
CHECK_HIP(hipStreamSynchronize(this->streams[localRank]));
}
// Destroy graphs
if (useHipGraph)
{
for (int localRank : localRanksToExecute)
{
if (this->verbose) INFO("Destroying graphs for rank %d\n", localRank);
CHECK_HIP(hipGraphDestroy(graphs[localRank]));
CHECK_HIP(hipGraphExecDestroy(graphExec[localRank]));
}
}
if (this->printValues)
{
for (int collId = 0; collId < this->numCollectivesInGroup; ++collId)
for (int localRank = 0; localRank < this->deviceIds.size(); ++localRank)
for (int localRank : localRanksToExecute)
{
CollectiveArgs const& collArg = this->collArgs[localRank][collId];
+20
View File
@@ -121,7 +121,10 @@ int main(int argc, char **argv)
if (usingGraphs)
{
for (int r = 0; r < nranks; ++r)
{
HIP_CALL(hipSetDevice(r));
HIP_CALL(hipStreamBeginCapture(stream[r], hipStreamCaptureModeThreadLocal));
}
NCCL_CALL(ncclGroupStart());
for (int r = 0; r < nranks; ++r)
@@ -132,11 +135,17 @@ int main(int argc, char **argv)
NCCL_CALL(ncclGroupEnd());
for (int r = 0; r < nranks; ++r)
{
//HIP_CALL(hipSetDevice(r));
HIP_CALL(hipStreamEndCapture(stream[r], &graphs[r]));
}
// Instantiating graphs
for (int r = 0; r < nranks; ++r)
{
HIP_CALL(hipSetDevice(r));
HIP_CALL(hipGraphInstantiate(&graphExec[r], graphs[r], NULL, NULL, 0));
}
}
auto setupDelta = std::chrono::high_resolution_clock::now() - setupStart;
double setupTime = std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(setupDelta).count();
@@ -150,7 +159,10 @@ int main(int argc, char **argv)
if (usingGraphs)
{
for (int r = 0; r < nranks; r++)
{
HIP_CALL(hipSetDevice(r));
HIP_CALL(hipGraphLaunch(graphExec[r], stream[r]));
}
}
else
{
@@ -162,6 +174,7 @@ int main(int argc, char **argv)
}
NCCL_CALL(ncclGroupEnd());
}
for (int r = 0; r < nranks; r++)
HIP_CALL(hipStreamSynchronize(stream[r]));
@@ -194,6 +207,13 @@ int main(int argc, char **argv)
}
average[usingGraphs] /= numIterations;
printf("%12.3f", average[usingGraphs]);
for (int r = 0; r < nranks; r++)
{
HIP_CALL(hipSetDevice(r));
HIP_CALL(hipGraphDestroy(graphs[r]));
HIP_CALL(hipGraphExecDestroy(graphExec[r]));
}
}
printf("%12.3f\n", average[0] / average[1]);
fflush(stdout);