* SyncAll test case would run Sync

* Despecialized name for argument reader

* Rename sync-test to team-sync-test as it uses teams

* Another stab at probing NUM_GPUS

[ROCm/rocshmem commit: 054bc33dc4]
This commit is contained in:
Aurelien Bouteiller
2025-10-23 13:40:41 -04:00
committed by GitHub
szülő 3eadf8cc62
commit bdb30e2984
10 fájl változott, egészen pontosan 89 új sor hozzáadva és 88 régi sor törölve
@@ -49,7 +49,7 @@ declare -A TEST_NUMBERS=(
["randomaccess"]="13"
["barrierall"]="14"
["syncall"]="15"
["sync"]="16"
["teamsync"]="16"
["collect"]="17"
["fcollect"]="18"
["alltoall"]="19"
@@ -104,8 +104,8 @@ declare -A TEST_NUMBERS=(
["wgsyncall"]="68"
["teambarrier"]="69"
["teamwavebarrier"]="70"
["wavesync"]="71"
["wgsync"]="72"
["teamwavesync"]="71"
["teamwgsync"]="72"
["teamctxsingleinfra"]="73"
["teamctxblockinfra"]="74"
["teamctxoddeveninfra"]="75"
@@ -117,18 +117,17 @@ ExecTest() {
NUM_WG=$3
NUM_THREADS=$4
MAX_MSG_SIZE=$5
if command -v amd-smi >/dev/null
then
NUM_GPUS=$(amd-smi list | grep GPU | wc -l)
elif command -v rocm-smi >/dev/null
then
NUM_GPUS=$(rocm-smi --showserial | grep GPU | wc -l)
else
NUM_GPUS=64
fi
TIMEOUT=$((5 * 60)) # Timeout in seconds
if command -v amd-smi >/dev/null && amd-smi version 2>&1 >/dev/null
then
NUM_GPUS=${NUM_GPUS:-$(amd-smi list | grep GPU | wc -l)}
elif command -v rocm-smi >/dev/null && rocm-smi --version 2>&1 >/dev/null
then
NUM_GPUS=${NUM_GPUS:-$(rocm-smi --showserial | grep GPU | wc -l)}
fi
NUM_GPUS=$(($NUM_GPUS > 0? $NUM_GPUS: 8))
TEST_NUM=${TEST_NUMBERS[$TEST_NAME]}
if [[ "" == "$TEST_NUM" ]]
@@ -168,12 +167,12 @@ ExecTest() {
CMD+=" >> $LOG_DIR/$TEST_LOG_NAME.log 2>&1"
# Run Test
if [ $NUM_RANKS -le $NUM_GPUS ] && [[ "" == "$HOSTFILE" ]]; then
if [ $NUM_GPUS -ge $NUM_RANKS ] || [[ "" != "$HOSTFILE" ]]; then
echo $TEST_LOG_NAME
echo "# $CMD" >"$LOG_DIR/$TEST_LOG_NAME.log"
eval $CMD
else
echo "Skipping test $TEST_LOG_NAME"
echo "Skipping test $TEST_LOG_NAME ($NUM_RANKS greater than $NUM_GPUS)"
fi
# Validate Test
@@ -400,20 +399,20 @@ TestColl() {
ExecTest "teamwgbarrier" 2 32 256
ExecTest "teamwgbarrier" 2 39 1024
ExecTest "sync" 2 1 1
ExecTest "sync" 2 16 64
ExecTest "sync" 2 32 256
ExecTest "sync" 2 39 1024
ExecTest "teamsync" 2 1 1
ExecTest "teamsync" 2 16 64
ExecTest "teamsync" 2 32 256
ExecTest "teamsync" 2 39 1024
ExecTest "wavesync" 2 1 1
ExecTest "wavesync" 2 16 64
ExecTest "wavesync" 2 32 256
ExecTest "wavesync" 2 39 1024
ExecTest "teamwavesync" 2 1 1
ExecTest "teamwavesync" 2 16 64
ExecTest "teamwavesync" 2 32 256
ExecTest "teamwavesync" 2 39 1024
ExecTest "wgsync" 2 1 1
ExecTest "wgsync" 2 16 64
ExecTest "wgsync" 2 32 256
ExecTest "wgsync" 2 39 1024
ExecTest "teamwgsync" 2 1 1
ExecTest "teamwgsync" 2 16 64
ExecTest "teamwgsync" 2 32 256
ExecTest "teamwgsync" 2 39 1024
ExecTest "syncall" 2 1 1
@@ -607,7 +606,7 @@ TestGDA() {
ExecTest "barrierall" 2 1 1
ExecTest "teambarrier" 2 1 1
ExecTest "sync" 2 1 1
ExecTest "teamsync" 2 1 1
ExecTest "syncall" 2 1 1
# ExecTest "alltoall" 2 1 1 512
@@ -45,7 +45,7 @@ target_sources(
${PROJECT_NAME}
PRIVATE
barrier_all_tester.cpp
sync_tester.cpp
sync_all_tester.cpp
test_driver.cpp
tester.cpp
tester_arguments.cpp
@@ -22,8 +22,8 @@
* IN THE SOFTWARE.
*****************************************************************************/
#ifndef _BARRIER_ALL_TESTER_HPP_
#define _BARRIER_ALL_TESTER_HPP_
#ifndef _SYNC_ALL_TESTER_HPP_
#define _SYNC_ALL_TESTER_HPP_
#include "tester.hpp"
@@ -22,39 +22,37 @@
* IN THE SOFTWARE.
*****************************************************************************/
#include "sync_tester.hpp"
/******************************************************************************
* DEVICE TEST KERNEL
*****************************************************************************/
__global__ void SyncTest(int loop, int skip, long long int *start_time,
long long int *end_time, TestType type,
ShmemContextType ctx_type, int wf_size,
rocshmem_team_t *teams) {
__global__ void TeamSyncTest(int loop, int skip, long long int *start_time,
long long int *end_time,
ShmemContextType ctx_type, TestType type,
int wf_size, rocshmem_team_t *teams) {
__shared__ rocshmem_ctx_t ctx;
int t_id = get_flat_block_id();
int wg_id = get_flat_grid_id();
int wf_id = t_id / wf_size;
rocshmem_wg_ctx_create(ctx_type, &ctx);
rocshmem_wg_team_create_ctx(teams[wg_id], ctx_type, &ctx);
for (int i = 0; i < loop + skip; i++) {
if (hipThreadIdx_x == 0 && i == skip) {
if (i == skip && hipThreadIdx_x == 0) {
start_time[wg_id] = wall_clock64();
}
switch (type) {
case SyncTestType:
case TeamSyncTestType:
if(t_id == 0) {
rocshmem_ctx_sync(ctx, teams[wg_id]);
}
break;
case WAVESyncTestType:
case TeamWAVESyncTestType:
if(wf_id == 0) {
rocshmem_ctx_sync_wave(ctx, teams[wg_id]);
}
break;
case WGSyncTestType:
case TeamWGSyncTestType:
rocshmem_ctx_sync_wg(ctx, teams[wg_id]);
break;
default:
@@ -73,7 +71,9 @@ __global__ void SyncTest(int loop, int skip, long long int *start_time,
/******************************************************************************
* HOST TESTER CLASS METHODS
*****************************************************************************/
SyncTester::SyncTester(TesterArguments args) : Tester(args) {
TeamSyncTester::TeamSyncTester(TesterArguments args) : Tester(args) {
my_pe = rocshmem_team_my_pe(ROCSHMEM_TEAM_WORLD);
n_pes = rocshmem_team_n_pes(ROCSHMEM_TEAM_WORLD);
char* value{nullptr};
if ((value = getenv("ROCSHMEM_MAX_NUM_TEAMS"))) {
@@ -84,15 +84,11 @@ SyncTester::SyncTester(TesterArguments args) : Tester(args) {
sizeof(rocshmem_team_t) * num_teams));
}
SyncTester::~SyncTester() {
TeamSyncTester::~TeamSyncTester() {
CHECK_HIP(hipFree(team_sync_world_dup));
}
void SyncTester::resetBuffers(size_t size) {}
void SyncTester::preLaunchKernel() {
int n_pes = rocshmem_team_n_pes(ROCSHMEM_TEAM_WORLD);
void TeamSyncTester::preLaunchKernel() {
for (int team_i = 0; team_i < num_teams; team_i++) {
team_sync_world_dup[team_i] = ROCSHMEM_TEAM_INVALID;
rocshmem_team_split_strided(ROCSHMEM_TEAM_WORLD, 0, 1, n_pes, nullptr, 0,
@@ -104,24 +100,25 @@ void SyncTester::preLaunchKernel() {
}
}
void SyncTester::launchKernel(dim3 gridSize, dim3 blockSize, int loop,
size_t size) {
void TeamSyncTester::launchKernel(dim3 gridSize, dim3 blockSize,
int loop, size_t size) {
size_t shared_bytes = 0;
int n_pes = rocshmem_team_n_pes(ROCSHMEM_TEAM_WORLD);
hipLaunchKernelGGL(SyncTest, gridSize, blockSize, shared_bytes, stream, loop,
args.skip, start_time, end_time, _type, _shmem_context,
wf_size, team_sync_world_dup);
hipLaunchKernelGGL(TeamSyncTest, gridSize, blockSize, shared_bytes,
stream, loop, args.skip, start_time, end_time,
_shmem_context, _type, wf_size,
team_sync_world_dup);
num_msgs = (loop + args.skip) * gridSize.x;
num_timed_msgs = loop * gridSize.x;
}
void SyncTester::postLaunchKernel() {
void TeamSyncTester::postLaunchKernel() {
for (int team_i = 0; team_i < num_teams; team_i++) {
rocshmem_team_destroy(team_sync_world_dup[team_i]);
}
}
void SyncTester::verifyResults(size_t size) {}
void TeamSyncTester::resetBuffers(size_t size) {}
void TeamSyncTester::verifyResults(size_t size) {}
@@ -22,10 +22,11 @@
* IN THE SOFTWARE.
*****************************************************************************/
#ifndef _SYNC_TESTER_HPP_
#define _SYNC_TESTER_HPP_
#ifndef _TEAM_SYNC_TESTER_HPP_
#define _TEAM_SYNC_TESTER_HPP_
#include <rocshmem/rocshmem.hpp>
#include <functional>
#include <utility>
#include "tester.hpp"
@@ -34,10 +35,10 @@ using namespace rocshmem;
/******************************************************************************
* HOST TESTER CLASS
*****************************************************************************/
class SyncTester : public Tester {
class TeamSyncTester : public Tester {
public:
explicit SyncTester(TesterArguments args);
virtual ~SyncTester();
explicit TeamSyncTester(TesterArguments args);
virtual ~TeamSyncTester();
protected:
virtual void resetBuffers(size_t size) override;
@@ -52,6 +53,8 @@ class SyncTester : public Tester {
virtual void verifyResults(size_t size) override;
private:
int my_pe = 0;
int n_pes = 0;
/**
* This constant should equal ROCSHMEM_MAX_NUM_TEAMS - 1.
* The default value for the maximum number of teams is 40.
@@ -60,4 +63,6 @@ class SyncTester : public Tester {
rocshmem_team_t *team_sync_world_dup;
};
#include "team_sync_tester.cpp"
#endif
@@ -197,7 +197,7 @@ int main(int argc, char *argv[]) {
/**
* Now grab the arguments from rocshmem.
*/
args.get_rocshmem_arguments();
args.get_arguments();
/**
* Using the arguments we just constructed, call the tester factory
@@ -45,7 +45,7 @@
#include "shmem_ptr_tester.hpp"
#include "signaling_operations_tester.hpp"
#include "sync_all_tester.hpp"
#include "sync_tester.hpp"
#include "team_sync_tester.hpp"
#include "team_alltoall_tester.hpp"
#include "team_barrier_tester.hpp"
#include "team_broadcast_tester.hpp"
@@ -359,27 +359,27 @@ std::vector<Tester*> Tester::create(TesterArguments args) {
return testers;
case SyncAllTestType:
if (rank == 0) std::cout << "SyncAll ###" << std::endl;
testers.push_back(new SyncTester(args));
testers.push_back(new SyncAllTester(args));
return testers;
case WAVESyncAllTestType:
if (rank == 0) std::cout << "WAVE SyncAll ###" << std::endl;
testers.push_back(new SyncTester(args));
testers.push_back(new SyncAllTester(args));
return testers;
case WGSyncAllTestType:
if (rank == 0) std::cout << "WG SyncAll ###" << std::endl;
testers.push_back(new SyncTester(args));
testers.push_back(new SyncAllTester(args));
return testers;
case SyncTestType:
if (rank == 0) std::cout << "Sync ###" << std::endl;
testers.push_back(new SyncTester(args));
case TeamSyncTestType:
if (rank == 0) std::cout << "Team Sync ###" << std::endl;
testers.push_back(new TeamSyncTester(args));
return testers;
case WAVESyncTestType:
if (rank == 0) std::cout << "WAVE Sync ###" << std::endl;
testers.push_back(new SyncTester(args));
case TeamWAVESyncTestType:
if (rank == 0) std::cout << "Team WAVE Sync ###" << std::endl;
testers.push_back(new TeamSyncTester(args));
return testers;
case WGSyncTestType:
if (rank == 0) std::cout << "WG Sync ###" << std::endl;
testers.push_back(new SyncTester(args));
case TeamWGSyncTestType:
if (rank == 0) std::cout << "Team WG Sync ###" << std::endl;
testers.push_back(new TeamSyncTester(args));
return testers;
case RandomAccessTestType:
if (rank == 0) std::cout << "Random_Access ###" << std::endl;
@@ -568,8 +568,8 @@ bool Tester::peLaunchesKernel() {
(_type == TeamAllToAllTestType) || (_type == TeamFCollectTestType) ||
(_type == PingPongTestType) || (_type == BarrierAllTestType) ||
(_type == WAVEBarrierAllTestType) || (_type == WGBarrierAllTestType) ||
(_type == SyncTestType) || (_type == WAVESyncTestType) ||
(_type == WGSyncTestType) || (_type == SyncAllTestType) ||
(_type == TeamSyncTestType) || (_type == TeamWAVESyncTestType) ||
(_type == TeamWGSyncTestType) || (_type == SyncAllTestType) ||
(_type == WAVESyncAllTestType) || (_type == WGSyncAllTestType) ||
(_type == RandomAccessTestType) || (_type == PingAllTestType) ||
(_type == TeamBarrierTestType) || (_type == TeamWAVEBarrierTestType) ||
@@ -53,7 +53,7 @@ enum TestType {
RandomAccessTestType = 13,
BarrierAllTestType = 14,
SyncAllTestType = 15,
SyncTestType = 16,
TeamSyncTestType = 16,
CollectTestType = 17,
TeamFCollectTestType = 18,
TeamAllToAllTestType = 19,
@@ -108,8 +108,8 @@ enum TestType {
WGSyncAllTestType = 68,
TeamBarrierTestType = 69,
TeamWAVEBarrierTestType = 70,
WAVESyncTestType = 71,
WGSyncTestType = 72,
TeamWAVESyncTestType = 71,
TeamWGSyncTestType = 72,
TeamCtxInfraTestSingleType = 73,
TeamCtxInfraTestBlockType = 74,
TeamCtxInfraTestOddEvenType = 75,
@@ -102,7 +102,7 @@ TesterArguments::TesterArguments(int argc, char *argv[]) {
case SyncAllTestType:
case WAVESyncAllTestType:
case WGSyncAllTestType:
case SyncTestType:
case TeamSyncTestType:
min_msg_size = 8;
max_msg_size = 8;
break;
@@ -151,7 +151,7 @@ void TesterArguments::show_usage(std::string executable_name) {
std::cout << "\t-m Atomics Address mode\n";
}
void TesterArguments::get_rocshmem_arguments() {
void TesterArguments::get_arguments() {
numprocs = rocshmem_n_pes();
myid = rocshmem_my_pe();
@@ -159,8 +159,8 @@ void TesterArguments::get_rocshmem_arguments() {
if ((type != BarrierAllTestType) && (type != WAVEBarrierAllTestType) &&
(type != WGBarrierAllTestType) && (type != SyncAllTestType) &&
(type != WAVESyncAllTestType) && (type != WGSyncAllTestType) &&
(type != SyncTestType) && (type != WAVESyncTestType) &&
(type != WGSyncTestType) && (type != TeamAllToAllTestType) &&
(type != TeamSyncTestType) && (type != TeamWAVESyncTestType) &&
(type != TeamWGSyncTestType) && (type != TeamAllToAllTestType) &&
(type != TeamFCollectTestType) && (type != TeamReductionTestType) &&
(type != TeamBroadcastTestType) && (type != PingAllTestType) &&
(type != TeamBarrierTestType) && (type != TeamWAVEBarrierTestType) &&
@@ -55,7 +55,7 @@ class TesterArguments {
* Initialize rocshmem members
* Valid after rocshmem_init function called.
*/
void get_rocshmem_arguments();
void get_arguments();
private:
/**