Tests/syncall (#291)
* SyncAll test case would run Sync
* Despecialized name for argument reader
* Rename sync-test to team-sync-test as it uses teams
* Another stab at probing NUM_GPUS
[ROCm/rocshmem commit: 054bc33dc4]
This commit is contained in:
committed by
GitHub
szülő
3eadf8cc62
commit
bdb30e2984
@@ -49,7 +49,7 @@ declare -A TEST_NUMBERS=(
|
||||
["randomaccess"]="13"
|
||||
["barrierall"]="14"
|
||||
["syncall"]="15"
|
||||
["sync"]="16"
|
||||
["teamsync"]="16"
|
||||
["collect"]="17"
|
||||
["fcollect"]="18"
|
||||
["alltoall"]="19"
|
||||
@@ -104,8 +104,8 @@ declare -A TEST_NUMBERS=(
|
||||
["wgsyncall"]="68"
|
||||
["teambarrier"]="69"
|
||||
["teamwavebarrier"]="70"
|
||||
["wavesync"]="71"
|
||||
["wgsync"]="72"
|
||||
["teamwavesync"]="71"
|
||||
["teamwgsync"]="72"
|
||||
["teamctxsingleinfra"]="73"
|
||||
["teamctxblockinfra"]="74"
|
||||
["teamctxoddeveninfra"]="75"
|
||||
@@ -117,18 +117,17 @@ ExecTest() {
|
||||
NUM_WG=$3
|
||||
NUM_THREADS=$4
|
||||
MAX_MSG_SIZE=$5
|
||||
|
||||
if command -v amd-smi >/dev/null
|
||||
then
|
||||
NUM_GPUS=$(amd-smi list | grep GPU | wc -l)
|
||||
elif command -v rocm-smi >/dev/null
|
||||
then
|
||||
NUM_GPUS=$(rocm-smi --showserial | grep GPU | wc -l)
|
||||
else
|
||||
NUM_GPUS=64
|
||||
fi
|
||||
TIMEOUT=$((5 * 60)) # Timeout in seconds
|
||||
|
||||
if command -v amd-smi >/dev/null && amd-smi version 2>&1 >/dev/null
|
||||
then
|
||||
NUM_GPUS=${NUM_GPUS:-$(amd-smi list | grep GPU | wc -l)}
|
||||
elif command -v rocm-smi >/dev/null && rocm-smi --version 2>&1 >/dev/null
|
||||
then
|
||||
NUM_GPUS=${NUM_GPUS:-$(rocm-smi --showserial | grep GPU | wc -l)}
|
||||
fi
|
||||
NUM_GPUS=$(($NUM_GPUS > 0? $NUM_GPUS: 8))
|
||||
|
||||
TEST_NUM=${TEST_NUMBERS[$TEST_NAME]}
|
||||
|
||||
if [[ "" == "$TEST_NUM" ]]
|
||||
@@ -168,12 +167,12 @@ ExecTest() {
|
||||
CMD+=" >> $LOG_DIR/$TEST_LOG_NAME.log 2>&1"
|
||||
|
||||
# Run Test
|
||||
if [ $NUM_RANKS -le $NUM_GPUS ] && [[ "" == "$HOSTFILE" ]]; then
|
||||
if [ $NUM_GPUS -ge $NUM_RANKS ] || [[ "" != "$HOSTFILE" ]]; then
|
||||
echo $TEST_LOG_NAME
|
||||
echo "# $CMD" >"$LOG_DIR/$TEST_LOG_NAME.log"
|
||||
eval $CMD
|
||||
else
|
||||
echo "Skipping test $TEST_LOG_NAME"
|
||||
echo "Skipping test $TEST_LOG_NAME ($NUM_RANKS greater than $NUM_GPUS)"
|
||||
fi
|
||||
|
||||
# Validate Test
|
||||
@@ -400,20 +399,20 @@ TestColl() {
|
||||
ExecTest "teamwgbarrier" 2 32 256
|
||||
ExecTest "teamwgbarrier" 2 39 1024
|
||||
|
||||
ExecTest "sync" 2 1 1
|
||||
ExecTest "sync" 2 16 64
|
||||
ExecTest "sync" 2 32 256
|
||||
ExecTest "sync" 2 39 1024
|
||||
ExecTest "teamsync" 2 1 1
|
||||
ExecTest "teamsync" 2 16 64
|
||||
ExecTest "teamsync" 2 32 256
|
||||
ExecTest "teamsync" 2 39 1024
|
||||
|
||||
ExecTest "wavesync" 2 1 1
|
||||
ExecTest "wavesync" 2 16 64
|
||||
ExecTest "wavesync" 2 32 256
|
||||
ExecTest "wavesync" 2 39 1024
|
||||
ExecTest "teamwavesync" 2 1 1
|
||||
ExecTest "teamwavesync" 2 16 64
|
||||
ExecTest "teamwavesync" 2 32 256
|
||||
ExecTest "teamwavesync" 2 39 1024
|
||||
|
||||
ExecTest "wgsync" 2 1 1
|
||||
ExecTest "wgsync" 2 16 64
|
||||
ExecTest "wgsync" 2 32 256
|
||||
ExecTest "wgsync" 2 39 1024
|
||||
ExecTest "teamwgsync" 2 1 1
|
||||
ExecTest "teamwgsync" 2 16 64
|
||||
ExecTest "teamwgsync" 2 32 256
|
||||
ExecTest "teamwgsync" 2 39 1024
|
||||
|
||||
ExecTest "syncall" 2 1 1
|
||||
|
||||
@@ -607,7 +606,7 @@ TestGDA() {
|
||||
ExecTest "barrierall" 2 1 1
|
||||
ExecTest "teambarrier" 2 1 1
|
||||
|
||||
ExecTest "sync" 2 1 1
|
||||
ExecTest "teamsync" 2 1 1
|
||||
ExecTest "syncall" 2 1 1
|
||||
|
||||
# ExecTest "alltoall" 2 1 1 512
|
||||
|
||||
@@ -45,7 +45,7 @@ target_sources(
|
||||
${PROJECT_NAME}
|
||||
PRIVATE
|
||||
barrier_all_tester.cpp
|
||||
sync_tester.cpp
|
||||
sync_all_tester.cpp
|
||||
test_driver.cpp
|
||||
tester.cpp
|
||||
tester_arguments.cpp
|
||||
|
||||
@@ -22,8 +22,8 @@
|
||||
* IN THE SOFTWARE.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef _BARRIER_ALL_TESTER_HPP_
|
||||
#define _BARRIER_ALL_TESTER_HPP_
|
||||
#ifndef _SYNC_ALL_TESTER_HPP_
|
||||
#define _SYNC_ALL_TESTER_HPP_
|
||||
|
||||
#include "tester.hpp"
|
||||
|
||||
|
||||
+24
-27
@@ -22,39 +22,37 @@
|
||||
* IN THE SOFTWARE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "sync_tester.hpp"
|
||||
|
||||
/******************************************************************************
|
||||
* DEVICE TEST KERNEL
|
||||
*****************************************************************************/
|
||||
__global__ void SyncTest(int loop, int skip, long long int *start_time,
|
||||
long long int *end_time, TestType type,
|
||||
ShmemContextType ctx_type, int wf_size,
|
||||
rocshmem_team_t *teams) {
|
||||
__global__ void TeamSyncTest(int loop, int skip, long long int *start_time,
|
||||
long long int *end_time,
|
||||
ShmemContextType ctx_type, TestType type,
|
||||
int wf_size, rocshmem_team_t *teams) {
|
||||
__shared__ rocshmem_ctx_t ctx;
|
||||
int t_id = get_flat_block_id();
|
||||
int wg_id = get_flat_grid_id();
|
||||
int wf_id = t_id / wf_size;
|
||||
|
||||
rocshmem_wg_ctx_create(ctx_type, &ctx);
|
||||
rocshmem_wg_team_create_ctx(teams[wg_id], ctx_type, &ctx);
|
||||
|
||||
for (int i = 0; i < loop + skip; i++) {
|
||||
if (hipThreadIdx_x == 0 && i == skip) {
|
||||
if (i == skip && hipThreadIdx_x == 0) {
|
||||
start_time[wg_id] = wall_clock64();
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
case SyncTestType:
|
||||
case TeamSyncTestType:
|
||||
if(t_id == 0) {
|
||||
rocshmem_ctx_sync(ctx, teams[wg_id]);
|
||||
}
|
||||
break;
|
||||
case WAVESyncTestType:
|
||||
case TeamWAVESyncTestType:
|
||||
if(wf_id == 0) {
|
||||
rocshmem_ctx_sync_wave(ctx, teams[wg_id]);
|
||||
}
|
||||
break;
|
||||
case WGSyncTestType:
|
||||
case TeamWGSyncTestType:
|
||||
rocshmem_ctx_sync_wg(ctx, teams[wg_id]);
|
||||
break;
|
||||
default:
|
||||
@@ -73,7 +71,9 @@ __global__ void SyncTest(int loop, int skip, long long int *start_time,
|
||||
/******************************************************************************
|
||||
* HOST TESTER CLASS METHODS
|
||||
*****************************************************************************/
|
||||
SyncTester::SyncTester(TesterArguments args) : Tester(args) {
|
||||
TeamSyncTester::TeamSyncTester(TesterArguments args) : Tester(args) {
|
||||
my_pe = rocshmem_team_my_pe(ROCSHMEM_TEAM_WORLD);
|
||||
n_pes = rocshmem_team_n_pes(ROCSHMEM_TEAM_WORLD);
|
||||
|
||||
char* value{nullptr};
|
||||
if ((value = getenv("ROCSHMEM_MAX_NUM_TEAMS"))) {
|
||||
@@ -84,15 +84,11 @@ SyncTester::SyncTester(TesterArguments args) : Tester(args) {
|
||||
sizeof(rocshmem_team_t) * num_teams));
|
||||
}
|
||||
|
||||
SyncTester::~SyncTester() {
|
||||
TeamSyncTester::~TeamSyncTester() {
|
||||
CHECK_HIP(hipFree(team_sync_world_dup));
|
||||
}
|
||||
|
||||
void SyncTester::resetBuffers(size_t size) {}
|
||||
|
||||
void SyncTester::preLaunchKernel() {
|
||||
int n_pes = rocshmem_team_n_pes(ROCSHMEM_TEAM_WORLD);
|
||||
|
||||
void TeamSyncTester::preLaunchKernel() {
|
||||
for (int team_i = 0; team_i < num_teams; team_i++) {
|
||||
team_sync_world_dup[team_i] = ROCSHMEM_TEAM_INVALID;
|
||||
rocshmem_team_split_strided(ROCSHMEM_TEAM_WORLD, 0, 1, n_pes, nullptr, 0,
|
||||
@@ -104,24 +100,25 @@ void SyncTester::preLaunchKernel() {
|
||||
}
|
||||
}
|
||||
|
||||
void SyncTester::launchKernel(dim3 gridSize, dim3 blockSize, int loop,
|
||||
size_t size) {
|
||||
void TeamSyncTester::launchKernel(dim3 gridSize, dim3 blockSize,
|
||||
int loop, size_t size) {
|
||||
size_t shared_bytes = 0;
|
||||
|
||||
int n_pes = rocshmem_team_n_pes(ROCSHMEM_TEAM_WORLD);
|
||||
|
||||
hipLaunchKernelGGL(SyncTest, gridSize, blockSize, shared_bytes, stream, loop,
|
||||
args.skip, start_time, end_time, _type, _shmem_context,
|
||||
wf_size, team_sync_world_dup);
|
||||
hipLaunchKernelGGL(TeamSyncTest, gridSize, blockSize, shared_bytes,
|
||||
stream, loop, args.skip, start_time, end_time,
|
||||
_shmem_context, _type, wf_size,
|
||||
team_sync_world_dup);
|
||||
|
||||
num_msgs = (loop + args.skip) * gridSize.x;
|
||||
num_timed_msgs = loop * gridSize.x;
|
||||
}
|
||||
|
||||
void SyncTester::postLaunchKernel() {
|
||||
void TeamSyncTester::postLaunchKernel() {
|
||||
for (int team_i = 0; team_i < num_teams; team_i++) {
|
||||
rocshmem_team_destroy(team_sync_world_dup[team_i]);
|
||||
}
|
||||
}
|
||||
|
||||
void SyncTester::verifyResults(size_t size) {}
|
||||
void TeamSyncTester::resetBuffers(size_t size) {}
|
||||
|
||||
void TeamSyncTester::verifyResults(size_t size) {}
|
||||
+11
-6
@@ -22,10 +22,11 @@
|
||||
* IN THE SOFTWARE.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef _SYNC_TESTER_HPP_
|
||||
#define _SYNC_TESTER_HPP_
|
||||
#ifndef _TEAM_SYNC_TESTER_HPP_
|
||||
#define _TEAM_SYNC_TESTER_HPP_
|
||||
|
||||
#include <rocshmem/rocshmem.hpp>
|
||||
#include <functional>
|
||||
#include <utility>
|
||||
|
||||
#include "tester.hpp"
|
||||
|
||||
@@ -34,10 +35,10 @@ using namespace rocshmem;
|
||||
/******************************************************************************
|
||||
* HOST TESTER CLASS
|
||||
*****************************************************************************/
|
||||
class SyncTester : public Tester {
|
||||
class TeamSyncTester : public Tester {
|
||||
public:
|
||||
explicit SyncTester(TesterArguments args);
|
||||
virtual ~SyncTester();
|
||||
explicit TeamSyncTester(TesterArguments args);
|
||||
virtual ~TeamSyncTester();
|
||||
|
||||
protected:
|
||||
virtual void resetBuffers(size_t size) override;
|
||||
@@ -52,6 +53,8 @@ class SyncTester : public Tester {
|
||||
virtual void verifyResults(size_t size) override;
|
||||
|
||||
private:
|
||||
int my_pe = 0;
|
||||
int n_pes = 0;
|
||||
/**
|
||||
* This constant should equal ROCSHMEM_MAX_NUM_TEAMS - 1.
|
||||
* The default value for the maximum number of teams is 40.
|
||||
@@ -60,4 +63,6 @@ class SyncTester : public Tester {
|
||||
rocshmem_team_t *team_sync_world_dup;
|
||||
};
|
||||
|
||||
#include "team_sync_tester.cpp"
|
||||
|
||||
#endif
|
||||
@@ -197,7 +197,7 @@ int main(int argc, char *argv[]) {
|
||||
/**
|
||||
* Now grab the arguments from rocshmem.
|
||||
*/
|
||||
args.get_rocshmem_arguments();
|
||||
args.get_arguments();
|
||||
|
||||
/**
|
||||
* Using the arguments we just constructed, call the tester factory
|
||||
|
||||
@@ -45,7 +45,7 @@
|
||||
#include "shmem_ptr_tester.hpp"
|
||||
#include "signaling_operations_tester.hpp"
|
||||
#include "sync_all_tester.hpp"
|
||||
#include "sync_tester.hpp"
|
||||
#include "team_sync_tester.hpp"
|
||||
#include "team_alltoall_tester.hpp"
|
||||
#include "team_barrier_tester.hpp"
|
||||
#include "team_broadcast_tester.hpp"
|
||||
@@ -359,27 +359,27 @@ std::vector<Tester*> Tester::create(TesterArguments args) {
|
||||
return testers;
|
||||
case SyncAllTestType:
|
||||
if (rank == 0) std::cout << "SyncAll ###" << std::endl;
|
||||
testers.push_back(new SyncTester(args));
|
||||
testers.push_back(new SyncAllTester(args));
|
||||
return testers;
|
||||
case WAVESyncAllTestType:
|
||||
if (rank == 0) std::cout << "WAVE SyncAll ###" << std::endl;
|
||||
testers.push_back(new SyncTester(args));
|
||||
testers.push_back(new SyncAllTester(args));
|
||||
return testers;
|
||||
case WGSyncAllTestType:
|
||||
if (rank == 0) std::cout << "WG SyncAll ###" << std::endl;
|
||||
testers.push_back(new SyncTester(args));
|
||||
testers.push_back(new SyncAllTester(args));
|
||||
return testers;
|
||||
case SyncTestType:
|
||||
if (rank == 0) std::cout << "Sync ###" << std::endl;
|
||||
testers.push_back(new SyncTester(args));
|
||||
case TeamSyncTestType:
|
||||
if (rank == 0) std::cout << "Team Sync ###" << std::endl;
|
||||
testers.push_back(new TeamSyncTester(args));
|
||||
return testers;
|
||||
case WAVESyncTestType:
|
||||
if (rank == 0) std::cout << "WAVE Sync ###" << std::endl;
|
||||
testers.push_back(new SyncTester(args));
|
||||
case TeamWAVESyncTestType:
|
||||
if (rank == 0) std::cout << "Team WAVE Sync ###" << std::endl;
|
||||
testers.push_back(new TeamSyncTester(args));
|
||||
return testers;
|
||||
case WGSyncTestType:
|
||||
if (rank == 0) std::cout << "WG Sync ###" << std::endl;
|
||||
testers.push_back(new SyncTester(args));
|
||||
case TeamWGSyncTestType:
|
||||
if (rank == 0) std::cout << "Team WG Sync ###" << std::endl;
|
||||
testers.push_back(new TeamSyncTester(args));
|
||||
return testers;
|
||||
case RandomAccessTestType:
|
||||
if (rank == 0) std::cout << "Random_Access ###" << std::endl;
|
||||
@@ -568,8 +568,8 @@ bool Tester::peLaunchesKernel() {
|
||||
(_type == TeamAllToAllTestType) || (_type == TeamFCollectTestType) ||
|
||||
(_type == PingPongTestType) || (_type == BarrierAllTestType) ||
|
||||
(_type == WAVEBarrierAllTestType) || (_type == WGBarrierAllTestType) ||
|
||||
(_type == SyncTestType) || (_type == WAVESyncTestType) ||
|
||||
(_type == WGSyncTestType) || (_type == SyncAllTestType) ||
|
||||
(_type == TeamSyncTestType) || (_type == TeamWAVESyncTestType) ||
|
||||
(_type == TeamWGSyncTestType) || (_type == SyncAllTestType) ||
|
||||
(_type == WAVESyncAllTestType) || (_type == WGSyncAllTestType) ||
|
||||
(_type == RandomAccessTestType) || (_type == PingAllTestType) ||
|
||||
(_type == TeamBarrierTestType) || (_type == TeamWAVEBarrierTestType) ||
|
||||
|
||||
@@ -53,7 +53,7 @@ enum TestType {
|
||||
RandomAccessTestType = 13,
|
||||
BarrierAllTestType = 14,
|
||||
SyncAllTestType = 15,
|
||||
SyncTestType = 16,
|
||||
TeamSyncTestType = 16,
|
||||
CollectTestType = 17,
|
||||
TeamFCollectTestType = 18,
|
||||
TeamAllToAllTestType = 19,
|
||||
@@ -108,8 +108,8 @@ enum TestType {
|
||||
WGSyncAllTestType = 68,
|
||||
TeamBarrierTestType = 69,
|
||||
TeamWAVEBarrierTestType = 70,
|
||||
WAVESyncTestType = 71,
|
||||
WGSyncTestType = 72,
|
||||
TeamWAVESyncTestType = 71,
|
||||
TeamWGSyncTestType = 72,
|
||||
TeamCtxInfraTestSingleType = 73,
|
||||
TeamCtxInfraTestBlockType = 74,
|
||||
TeamCtxInfraTestOddEvenType = 75,
|
||||
|
||||
@@ -102,7 +102,7 @@ TesterArguments::TesterArguments(int argc, char *argv[]) {
|
||||
case SyncAllTestType:
|
||||
case WAVESyncAllTestType:
|
||||
case WGSyncAllTestType:
|
||||
case SyncTestType:
|
||||
case TeamSyncTestType:
|
||||
min_msg_size = 8;
|
||||
max_msg_size = 8;
|
||||
break;
|
||||
@@ -151,7 +151,7 @@ void TesterArguments::show_usage(std::string executable_name) {
|
||||
std::cout << "\t-m Atomics Address mode\n";
|
||||
}
|
||||
|
||||
void TesterArguments::get_rocshmem_arguments() {
|
||||
void TesterArguments::get_arguments() {
|
||||
numprocs = rocshmem_n_pes();
|
||||
myid = rocshmem_my_pe();
|
||||
|
||||
@@ -159,8 +159,8 @@ void TesterArguments::get_rocshmem_arguments() {
|
||||
if ((type != BarrierAllTestType) && (type != WAVEBarrierAllTestType) &&
|
||||
(type != WGBarrierAllTestType) && (type != SyncAllTestType) &&
|
||||
(type != WAVESyncAllTestType) && (type != WGSyncAllTestType) &&
|
||||
(type != SyncTestType) && (type != WAVESyncTestType) &&
|
||||
(type != WGSyncTestType) && (type != TeamAllToAllTestType) &&
|
||||
(type != TeamSyncTestType) && (type != TeamWAVESyncTestType) &&
|
||||
(type != TeamWGSyncTestType) && (type != TeamAllToAllTestType) &&
|
||||
(type != TeamFCollectTestType) && (type != TeamReductionTestType) &&
|
||||
(type != TeamBroadcastTestType) && (type != PingAllTestType) &&
|
||||
(type != TeamBarrierTestType) && (type != TeamWAVEBarrierTestType) &&
|
||||
|
||||
@@ -55,7 +55,7 @@ class TesterArguments {
|
||||
* Initialize rocshmem members
|
||||
* Valid after rocshmem_init function called.
|
||||
*/
|
||||
void get_rocshmem_arguments();
|
||||
void get_arguments();
|
||||
|
||||
private:
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user