From 1093821c335437b399035f3ebf3b67a3e960de8f Mon Sep 17 00:00:00 2001 From: Sylvain Jeaugey Date: Thu, 1 Dec 2016 15:16:35 -0800 Subject: [PATCH] Replace min BW by average BW in tests --- test/single/all_gather_test.cu | 14 +++++++++----- test/single/all_reduce_test.cu | 16 ++++++++++------ test/single/broadcast_test.cu | 14 +++++++++----- test/single/reduce_scatter_test.cu | 16 ++++++++++------ test/single/reduce_test.cu | 16 ++++++++++------ 5 files changed, 48 insertions(+), 28 deletions(-) diff --git a/test/single/all_gather_test.cu b/test/single/all_gather_test.cu index ba3841fb14..40d2f31fb2 100644 --- a/test/single/all_gather_test.cu +++ b/test/single/all_gather_test.cu @@ -14,7 +14,8 @@ #include "test_utilities.h" int errors = 0; -double min_bw = 10000.0; +double avg_bw = 0.0; +int avg_count = 0; bool is_reduction = false; template @@ -89,7 +90,9 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type, maxDelta); if (maxDelta > deltaMaxValue(type, is_reduction)) errors++; - if (busbw < min_bw) min_bw = busbw; + avg_bw += busbw; + avg_count++; + } for (int i = 0; i < nDev; ++i) { @@ -218,12 +221,13 @@ int main(int argc, char* argv[]) { free(comms); char* str = getenv("NCCL_TESTS_MIN_BW"); - double check_min_bw = str ? atof(str) : -1; + double check_avg_bw = str ? atof(str) : -1; + avg_bw /= avg_count; printf(" Out of bounds values : %d %s\n", errors, errors ? "FAILED" : "OK"); - printf(" Min bus bandwidth : %g %s\n", min_bw, check_min_bw == -1 ? "" : (min_bw < check_min_bw ? "FAILED" : "OK")); + printf(" Avg bus bandwidth : %g %s\n", avg_bw, check_avg_bw == -1 ? "" : (avg_bw < check_avg_bw ? "FAILED" : "OK")); printf("\n"); - if (errors || min_bw < check_min_bw) + if (errors || avg_bw < check_avg_bw) exit(EXIT_FAILURE); else exit(EXIT_SUCCESS); diff --git a/test/single/all_reduce_test.cu b/test/single/all_reduce_test.cu index 642be80658..1935a38fa7 100644 --- a/test/single/all_reduce_test.cu +++ b/test/single/all_reduce_test.cu @@ -16,7 +16,8 @@ int csv = false; int errors = 0; -double min_bw = 10000.0; +double avg_bw = 0.0; +int avg_count = 0; bool is_reduction = true; template @@ -99,7 +100,8 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type, elapsedSec * 1.0E3, algbw, busbw, maxDelta); if (maxDelta > deltaMaxValue(type, is_reduction)) errors++; - if (busbw < min_bw) min_bw = busbw; + avg_bw += busbw; + avg_count++; nvtxRangePop(); } @@ -145,7 +147,8 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type, elapsedSec * 1.0E3, algbw, busbw, maxDelta); if (maxDelta > deltaMaxValue(type, is_reduction)) errors++; - if (busbw < min_bw) min_bw = busbw; + avg_bw += busbw; + avg_count++; nvtxRangePop(); } @@ -284,12 +287,13 @@ int main(int argc, char* argv[]) { free(comms); char* str = getenv("NCCL_TESTS_MIN_BW"); - double check_min_bw = str ? atof(str) : -1; + double check_avg_bw = str ? atof(str) : -1; + avg_bw /= avg_count; printf(" Out of bounds values : %d %s\n", errors, errors ? "FAILED" : "OK"); - printf(" Min bus bandwidth : %g %s\n", min_bw, check_min_bw == -1 ? "" : (min_bw < check_min_bw ? "FAILED" : "OK")); + printf(" Avg bus bandwidth : %g %s\n", avg_bw, check_avg_bw == -1 ? "" : (avg_bw < check_avg_bw ? "FAILED" : "OK")); printf("\n"); - if (errors || min_bw < check_min_bw) + if (errors || avg_bw < check_avg_bw) exit(EXIT_FAILURE); else exit(EXIT_SUCCESS); diff --git a/test/single/broadcast_test.cu b/test/single/broadcast_test.cu index 30afebd227..6b1e04fb9d 100644 --- a/test/single/broadcast_test.cu +++ b/test/single/broadcast_test.cu @@ -14,7 +14,8 @@ #include "test_utilities.h" int errors = 0; -double min_bw = 10000.0; +double avg_bw = 0.0; +int avg_count = 0; bool is_reduction = false; template @@ -91,7 +92,9 @@ void RunTest(T** buff, const int N, const ncclDataType_t type, const int root, maxDelta); if (maxDelta > deltaMaxValue(type, is_reduction)) errors++; - if (busbw < min_bw) min_bw = busbw; + avg_bw += busbw; + avg_count++; + } for(int i=0; i < nDev; ++i) { @@ -218,12 +221,13 @@ int main(int argc, char* argv[]) { free(comms); char* str = getenv("NCCL_TESTS_MIN_BW"); - double check_min_bw = str ? atof(str) : -1; + double check_avg_bw = str ? atof(str) : -1; + avg_bw /= avg_count; printf(" Out of bounds values : %d %s\n", errors, errors ? "FAILED" : "OK"); - printf(" Min bus bandwidth : %g %s\n", min_bw, check_min_bw == -1 ? "" : (min_bw < check_min_bw ? "FAILED" : "OK")); + printf(" Avg bus bandwidth : %g %s\n", avg_bw, check_avg_bw == -1 ? "" : (avg_bw < check_avg_bw ? "FAILED" : "OK")); printf("\n"); - if (errors || min_bw < check_min_bw) + if (errors || avg_bw < check_avg_bw) exit(EXIT_FAILURE); else exit(EXIT_SUCCESS); diff --git a/test/single/reduce_scatter_test.cu b/test/single/reduce_scatter_test.cu index 81f30042d6..b702800925 100644 --- a/test/single/reduce_scatter_test.cu +++ b/test/single/reduce_scatter_test.cu @@ -14,7 +14,8 @@ #include "test_utilities.h" int errors = 0; -double min_bw = 10000.0; +double avg_bw = 0.0; +int avg_count = 0; bool is_reduction = true; template @@ -95,7 +96,8 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type, maxDelta); if (maxDelta > deltaMaxValue(type, is_reduction)) errors++; - if (busbw < min_bw) min_bw = busbw; + avg_bw += busbw; + avg_count++; } { @@ -134,7 +136,8 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type, maxDelta); if (maxDelta > deltaMaxValue(type, is_reduction)) errors++; - if (busbw < min_bw) min_bw = busbw; + avg_bw += busbw; + avg_count++; } for (int i = 0; i < nDev; ++i) { @@ -268,12 +271,13 @@ int main(int argc, char* argv[]) { free(comms); char* str = getenv("NCCL_TESTS_MIN_BW"); - double check_min_bw = str ? atof(str) : -1; + double check_avg_bw = str ? atof(str) : -1; + avg_bw /= avg_count; printf(" Out of bounds values : %d %s\n", errors, errors ? "FAILED" : "OK"); - printf(" Min bus bandwidth : %g %s\n", min_bw, check_min_bw == -1 ? "" : (min_bw < check_min_bw ? "FAILED" : "OK")); + printf(" Avg bus bandwidth : %g %s\n", avg_bw, check_avg_bw == -1 ? "" : (avg_bw < check_avg_bw ? "FAILED" : "OK")); printf("\n"); - if (errors || min_bw < check_min_bw) + if (errors || avg_bw < check_avg_bw) exit(EXIT_FAILURE); else exit(EXIT_SUCCESS); diff --git a/test/single/reduce_test.cu b/test/single/reduce_test.cu index aa0d20fe7b..6abb49c45f 100644 --- a/test/single/reduce_test.cu +++ b/test/single/reduce_test.cu @@ -16,7 +16,8 @@ int csv = false; int errors = 0; -double min_bw = 10000.0; +double avg_bw = 0.0; +int avg_count = 0; bool is_reduction = true; template @@ -98,7 +99,8 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type, elapsedSec * 1.0E3, algbw, busbw, maxDelta); if (maxDelta > deltaMaxValue(type, is_reduction)) errors++; - if (busbw < min_bw) min_bw = busbw; + avg_bw += busbw; + avg_count++; nvtxRangePop(); } @@ -140,7 +142,8 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type, elapsedSec * 1.0E3, algbw, busbw, maxDelta); if (maxDelta > deltaMaxValue(type, is_reduction)) errors++; - if (busbw < min_bw) min_bw = busbw; + avg_bw += busbw; + avg_count++; nvtxRangePop(); } @@ -282,12 +285,13 @@ int main(int argc, char* argv[]) { free(comms); char* str = getenv("NCCL_TESTS_MIN_BW"); - double check_min_bw = str ? atof(str) : -1; + double check_avg_bw = str ? atof(str) : -1; + avg_bw /= avg_count; printf(" Out of bounds values : %d %s\n", errors, errors ? "FAILED" : "OK"); - printf(" Min bus bandwidth : %g %s\n", min_bw, check_min_bw == -1 ? "" : (min_bw < check_min_bw ? "FAILED" : "OK")); + printf(" Avg bus bandwidth : %g %s\n", avg_bw, check_avg_bw == -1 ? "" : (avg_bw < check_avg_bw ? "FAILED" : "OK")); printf("\n"); - if (errors || min_bw < check_min_bw) + if (errors || avg_bw < check_avg_bw) exit(EXIT_FAILURE); else exit(EXIT_SUCCESS);