Standardize output formats (#140)

* remove spaces from csv
* consistently set redop to none when applicable
* write output file after test finishes
This commit is contained in:
Kajsa Arnold
2025-07-30 17:28:04 -05:00
zatwierdzone przez GitHub
rodzic 645be0eb45
commit a7809b3243
4 zmienionych plików z 48 dodań i 25 usunięć
+1 -1
Wyświetl plik
@@ -181,7 +181,7 @@ testResult_t AlltoAllvRunTest(struct threadArgs* args, int root, ncclDataType_t
}
for (int i=0; i<type_count; i++) {
TESTCHECK(TimeTest(args, run_types[i], run_typenames[i], (ncclRedOp_t)0, "", -1));
TESTCHECK(TimeTest(args, run_types[i], run_typenames[i], (ncclRedOp_t)0, "none", -1));
}
return testSuccess;
}
+42 -23
Wyświetl plik
@@ -133,16 +133,6 @@ Reporter::Reporter(std::string fileName, std::string outputFormat) : _outputForm
if (isMainThread()) {
_out = std::ofstream(fileName, std::ios_base::out);
_outputValid = true;
if (_outputFormat == "csv") {
_out << "numCycle, ";
_out << "collective, ";
#ifdef MPI_SUPPORT
_out << "ranks, rankspernode, gpusperrank, ";
#else
_out << "gpus, ";
#endif
_out << "size, type, redop, inplace, time, algbw, busbw, #wrong\n";
}
}
}
}
@@ -184,27 +174,54 @@ void Reporter::addResult(int gpusPerRank, int ranksPerNode, int totalRanks, size
outputValuesKeys.push_back(makeValueKeyPair(busBw, "busBw"));
outputValuesKeys.push_back(makeValueKeyPair(wrongEltsStr, "wrong"));
for (auto iter = outputValuesKeys.begin(); iter != outputValuesKeys.end(); ++iter) {
if (_outputFormat == "csv") {
_out << iter->first;
if (std::next(iter) != outputValuesKeys.end()) {
_out << ", ";
_outputData.push_back(outputValuesKeys);
}
void Reporter::writeFile() {
if (!isMainThread() || !_outputValid)
return;
if (_outputFormat == "csv") {
_out << "numCycle,";
_out << "collective,";
#ifdef MPI_SUPPORT
_out << "ranks,rankspernode,gpusperrank,";
#else
_out << "gpus,";
#endif
_out << "size,type,redop,inplace,time,algbw,busbw,#wrong\n";
for (auto iterEntries = _outputData.begin(); iterEntries != _outputData.end(); ++iterEntries) {
for (auto iterVals = (*iterEntries).begin(); iterVals != (*iterEntries).end(); ++iterVals) {
_out << iterVals->first;
if (std::next(iterVals) != (*iterEntries).end()) {
_out << ",";
}
}
} else { //json
if (iter == outputValuesKeys.begin()) {
_out << "{";
_out << std::endl;
}
} else { //json
_out << "[" << std::endl;
for (auto iterEntries = _outputData.begin(); iterEntries != _outputData.end(); ++iterEntries) {
for (auto iterVals = (*iterEntries).begin(); iterVals != (*iterEntries).end(); ++iterVals) {
if (iterVals == (*iterEntries).begin()) {
_out << "{";
}
_out << "\"" << iterVals->second << "\":" << iterVals->first;
if (std::next(iterVals) != (*iterEntries).end()) {
_out << ", ";
}
}
_out << "\"" << iter->second << "\":" << iter->first;
if (std::next(iter) != outputValuesKeys.end()) {
_out << ", ";
if (std::next(iterEntries) != _outputData.end()) {
_out << "}," << std::endl;
} else {
_out << "}";
_out << "}" << std::endl;
}
}
_out << "]" << std::endl;
}
_out << std::endl;
}
bool Reporter::isMainThread() { return is_main_thread == 1; }
#define NUM_BLOCKS 32
@@ -1711,6 +1728,8 @@ testResult_t run() {
MPI_Finalize();
#endif
reporter.writeFile();
// 'cuda-memcheck --leak-check full' requires this
PRINT("%s\n", ncclGetLastError(NULL));
cudaDeviceReset();
+4
Wyświetl plik
@@ -22,6 +22,8 @@
#include <string>
#include <fstream>
#include <iostream>
#include <utility>
#include <vector>
// Ensures backward compatibility for FP8 datatypes
#if NCCL_VERSION_CODE < NCCL_VERSION(2,24,3)
@@ -119,6 +121,7 @@ class Reporter {
~Reporter() { if (_outputValid) { _out.close(); } };
void setParameters(const size_t numCycle, const char* name, const char* typeName, const char* opName);
void addResult(int gpusPerRank, int ranksPerNode, int totalRanks, size_t numBytes, int inPlace, double timeUsec, double algBw, double busBw, int64_t wrongElts = -1);
void writeFile();
private:
bool isMainThread();
@@ -132,6 +135,7 @@ class Reporter {
std::string _collectiveName;
std::string _typeName;
std::string _opName;
std::vector<std::vector<std::pair<std::string, std::string>>> _outputData;
};
struct testEngine {
+1 -1
Wyświetl plik
@@ -101,7 +101,7 @@ testResult_t HyperCubeRunTest(struct threadArgs* args, int root, ncclDataType_t
int nRanks = args->nProcs*args->nThreads*args->nGpus;
if (nRanks && !(nRanks & (nRanks - 1))) {
for (int i=0; i<type_count; i++) {
TESTCHECK(TimeTest(args, run_types[i], run_typenames[i], (ncclRedOp_t)0, "", -1));
TESTCHECK(TimeTest(args, run_types[i], run_typenames[i], (ncclRedOp_t)0, "none", -1));
}
} else {
printf("nRanks %d is not a power of 2, skipping\n", nRanks);