Files
rocm-systems/test/AllGatherTests.cpp
T
isaki001 3398fa78fe non-hipGraph MSCCL++ tests for allReduce and allGather (#1503)
* working tests for a single message size

* move call_RCCL routine StandaloneUtils, create .cpp file for StandaloneUtils so that it can be included in several tests

* simplify test invocation

* remove unecessary logs and exit from ncclCommRegister

* set expected results for allGather

* skip test if nranks doesn't match number of gpus, call getAndDistributeNCCLid only from parent process

* fix improper size of expected-results vector

* Removing unused changes.

* Refactored to create a new file for the forked collectives call, as StandaloneUtils is for the Standalone tests. Renamed the functions to be slightly more accurate and follow existing naming conventions.

* Apply suggestions from code review

Co-authored-by: corey-derochie-amd <161367113+corey-derochie-amd@users.noreply.github.com>

---------

Co-authored-by: isaki001 <isakioti@banff-pla-r27-38.pla.dcgpu>
Co-authored-by: corey-derochie-amd <161367113+corey-derochie-amd@users.noreply.github.com>
Co-authored-by: Corey Derochie <corey.derochie@amd.com>
2025-02-04 09:11:32 -06:00

163 строки
6.2 KiB
C++

/*************************************************************************
* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
*
* See LICENSE.txt for license information
************************************************************************/
#include "TestBed.hpp"
#include "CallCollectiveForked.hpp"
namespace RcclUnitTesting
{
TEST(AllGather, OutOfPlace)
{
TestBed testBed;
// Configuration
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllGather};
std::vector<ncclDataType_t> const dataTypes = {ncclFloat16, ncclFloat32};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1048576, 500};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {false};
std::vector<bool> const useHipGraphList = {false};
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
inPlaceList, managedMemList, useHipGraphList);
testBed.Finalize();
}
TEST(AllGather, OutOfPlaceGraph)
{
TestBed testBed;
// Configuration
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllGather};
std::vector<ncclDataType_t> const dataTypes = {ncclBfloat16, ncclFloat64, ncclFp8E4M3, ncclFp8E5M2};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {586};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {false};
std::vector<bool> const useHipGraphList = {true};
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
inPlaceList, managedMemList, useHipGraphList);
testBed.Finalize();
}
TEST(AllGather, InPlace)
{
TestBed testBed;
// Configuration
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllGather};
std::vector<ncclDataType_t> const dataTypes = {ncclInt32};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {104857, 264};
std::vector<bool> const inPlaceList = {true};
std::vector<bool> const managedMemList = {false};
std::vector<bool> const useHipGraphList = {false};
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
inPlaceList, managedMemList, useHipGraphList);
testBed.Finalize();
}
TEST(AllGather, InPlaceGraph)
{
TestBed testBed;
// Configuration
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllGather};
std::vector<ncclDataType_t> const dataTypes = {ncclInt8, ncclInt64};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {958};
std::vector<bool> const inPlaceList = {true};
std::vector<bool> const managedMemList = {false};
std::vector<bool> const useHipGraphList = {true};
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
inPlaceList, managedMemList, useHipGraphList);
testBed.Finalize();
}
TEST(AllGather, ManagedMem)
{
TestBed testBed;
// Configuration
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllGather};
std::vector<ncclDataType_t> const dataTypes = {ncclUint8};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {1039203, 2500};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {true};
std::vector<bool> const useHipGraphList = {false};
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
inPlaceList, managedMemList, useHipGraphList);
testBed.Finalize();
}
TEST(AllGather, ManagedMemGraph)
{
TestBed testBed;
// Configuration
std::vector<ncclFunc_t> const funcTypes = {ncclCollAllGather};
std::vector<ncclDataType_t> const dataTypes = {ncclUint32, ncclUint64};
std::vector<ncclRedOp_t> const redOps = {ncclSum};
std::vector<int> const roots = {0};
std::vector<int> const numElements = {896};
std::vector<bool> const inPlaceList = {false};
std::vector<bool> const managedMemList = {true};
std::vector<bool> const useHipGraphList = {true};
testBed.RunSimpleSweep(funcTypes, dataTypes, redOps, roots, numElements,
inPlaceList, managedMemList, useHipGraphList);
testBed.Finalize();
}
TEST(AllGather, UserBufferRegistration)
{
const int nranks = 8;
size_t count = 2048;
std::vector<int> sendBuff(count, 0);
std::vector<int> recvBuff(count, 0);
std::vector<int> expected(nranks*count, 0);
for (int i = 0; i < count; ++i){
sendBuff[i] = i;
}
for(int r = 0; r < nranks; ++r)
for (int i = 0; i < count; ++i)
expected[r*count + i] = sendBuff[i];
callCollectiveForked(nranks, ncclCollAllGather, sendBuff, recvBuff, expected);
}
TEST(AllGather, ManagedMemUserBufferRegistration)
{
const int nranks = 8;
size_t count = 2048;
std::vector<int> sendBuff(count, 0);
std::vector<int> recvBuff(count, 0);
std::vector<int> expected(nranks*count, 0);
const bool use_managed_mem = true;
for (int i = 0; i < count; ++i){
sendBuff[i] = i;
}
for(int r = 0; r < nranks; ++r)
for (int i = 0; i < count; ++i)
expected[r*count + i] = sendBuff[i];
callCollectiveForked(nranks, ncclCollAllGather, sendBuff, recvBuff, expected, use_managed_mem);
}
}