Files
rocm-systems/tests/functional_tests/tester.cpp
T

661 строка
24 KiB
C++
Исходник Обычный вид История

2024-07-01 09:57:08 -05:00
/******************************************************************************
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*****************************************************************************/
#include "tester.hpp"
#include <hip/hip_runtime.h>
#include <mpi.h>
#include <functional>
#include <iostream>
2024-11-25 14:12:15 -06:00
#include <rocshmem/rocshmem.hpp>
2024-07-01 09:57:08 -05:00
#include <vector>
#include "alltoall_tester.hpp"
#include "amo_bitwise_tester.hpp"
#include "amo_extended_tester.hpp"
#include "amo_standard_tester.hpp"
#include "barrier_all_tester.hpp"
#include "empty_tester.hpp"
#include "extended_primitives.hpp"
#include "fcollect_tester.hpp"
#include "ping_all_tester.hpp"
#include "ping_pong_tester.hpp"
#include "primitive_mr_tester.hpp"
#include "primitive_tester.hpp"
#include "random_access_tester.hpp"
#include "shmem_ptr_tester.hpp"
2024-11-22 15:24:50 -06:00
#include "signaling_operations_tester.hpp"
2024-07-01 09:57:08 -05:00
#include "swarm_tester.hpp"
#include "sync_tester.hpp"
#include "team_broadcast_tester.hpp"
#include "team_ctx_infra_tester.hpp"
#include "team_ctx_primitive_tester.hpp"
#include "team_reduction_tester.hpp"
#include "wave_level_primitives.hpp"
2024-07-01 09:57:08 -05:00
Tester::Tester(TesterArguments args) : args(args) {
_type = (TestType)args.algorithm;
_shmem_context = args.shmem_context;
CHECK_HIP(hipGetDevice(&device_id));
CHECK_HIP(hipGetDeviceProperties(&deviceProps, device_id));
num_warps = (args.wg_size - 1) / deviceProps.warpSize + 1;
2024-07-02 10:07:43 -07:00
CHECK_HIP(hipStreamCreate(&stream));
CHECK_HIP(hipEventCreate(&start_event));
CHECK_HIP(hipEventCreate(&stop_event));
CHECK_HIP(hipMalloc((void**)&timer, sizeof(uint64_t) * args.num_wgs));
2024-07-01 09:57:08 -05:00
}
Tester::~Tester() {
2024-07-02 10:07:43 -07:00
CHECK_HIP(hipFree(timer));
CHECK_HIP(hipEventDestroy(stop_event));
CHECK_HIP(hipEventDestroy(start_event));
CHECK_HIP(hipStreamDestroy(stream));
2024-07-01 09:57:08 -05:00
}
std::vector<Tester*> Tester::create(TesterArguments args) {
int rank = args.myid;
std::vector<Tester*> testers;
hipDeviceProp_t deviceProps;
int device_id, numWarps;
CHECK_HIP(hipGetDevice(&device_id));
CHECK_HIP(hipGetDeviceProperties(&deviceProps, device_id));
numWarps = (args.wg_size - 1) / deviceProps.warpSize + 1;
2024-07-01 09:57:08 -05:00
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "### Creating Test: ";
2024-07-01 09:57:08 -05:00
TestType type = (TestType)args.algorithm;
switch (type) {
case InitTestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "Init ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new EmptyTester(args));
return testers;
case GetTestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "Blocking Gets ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new PrimitiveTester(args));
return testers;
case GetNBITestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "Non-Blocking Gets ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new PrimitiveTester(args));
return testers;
case PutTestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "Blocking Puts ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new PrimitiveTester(args));
return testers;
case PutNBITestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "Non-Blocking Puts ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new PrimitiveTester(args));
return testers;
case TeamCtxInfraTestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "Team Ctx Infra test ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new TeamCtxInfraTester(args));
return testers;
case TeamCtxGetTestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "Blocking Team Ctx Gets ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new TeamCtxPrimitiveTester(args));
return testers;
case TeamCtxGetNBITestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "Non-Blocking Team Ctx Gets ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new TeamCtxPrimitiveTester(args));
return testers;
case TeamCtxPutTestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "Blocking Team Ctx Puts ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new TeamCtxPrimitiveTester(args));
return testers;
case TeamCtxPutNBITestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "Non-Blocking Team Ctx Puts ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new TeamCtxPrimitiveTester(args));
return testers;
case PTestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "P Test ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new PrimitiveTester(args));
return testers;
case GTestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "G Test ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new PrimitiveTester(args));
return testers;
case GetSwarmTestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "Get Swarm ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new GetSwarmTester(args));
return testers;
case TeamReductionTestType:
if (rank == 0)
2024-10-28 13:37:33 -05:00
std::cout << "All-to-All Team-based Reduction ###" << std::endl;
2024-11-25 14:12:15 -06:00
testers.push_back(new TeamReductionTester<float, ROCSHMEM_SUM>(
2024-07-01 09:57:08 -05:00
args,
[](float& f1, float& f2) {
f1 = 1;
f2 = 1;
},
[](float v, float n_pes) {
return (v == n_pes)
? std::make_pair(true, "")
: std::make_pair(false, "Got " + std::to_string(v) +
", Expect " +
std::to_string(n_pes));
}));
return testers;
case TeamBroadcastTestType:
if (rank == 0) {
2024-10-28 13:37:33 -05:00
std::cout << "Team Broadcast Test ###" << std::endl;
2024-07-01 09:57:08 -05:00
}
testers.push_back(new TeamBroadcastTester<long>(
args,
[](long& f1, long& f2) {
f1 = 1;
f2 = 2;
},
[rank](long v) {
long expected_val;
/**
* The verification routine here requires that the
* PE_root value is 0 which denotes that the
* sending processing element is rank 0.
*
* The difference in expected values arises from
* the specification for broadcast where the
* PE_root processing element does not copy the
* contents from its own source to dest during
* the broadcast.
*/
if (rank == 0) {
expected_val = 2;
} else {
expected_val = 1;
}
return (v == expected_val)
? std::make_pair(true, "")
: std::make_pair(
false, "Rank " + std::to_string(rank) + ", Got " +
std::to_string(v) + ", Expect " +
std::to_string(expected_val));
}));
return testers;
case AllToAllTestType:
if (rank == 0) {
2024-10-28 13:37:33 -05:00
std::cout << "Alltoall Test ###" << std::endl;
2024-07-01 09:57:08 -05:00
}
testers.push_back(new AlltoallTester<int64_t>(
args,
[rank](int64_t& f1, int64_t& f2, int64_t dest_pe) {
const long SRC_SHIFT = 16;
// Make value for each src, dst pair unique
// by shifting src by SRC_SHIFT bits
f1 = (rank << SRC_SHIFT) + dest_pe;
f2 = -1;
},
[rank](int64_t v, int64_t src_pe) {
const long SRC_SHIFT = 16;
// See if we obtained unique value
long expected_val = (src_pe << SRC_SHIFT) + rank;
return (v == expected_val)
? std::make_pair(true, "")
: std::make_pair(
false, "Rank " + std::to_string(rank) + ", Got " +
std::to_string(v) + ", Expect " +
std::to_string(expected_val));
}));
return testers;
case FCollectTestType:
if (rank == 0) {
2024-10-28 13:37:33 -05:00
std::cout << "Fcollect Test ###" << std::endl;
2024-07-01 09:57:08 -05:00
}
testers.push_back(new FcollectTester<int64_t>(
args,
[rank](int64_t& f1, int64_t& f2) {
f1 = rank;
f2 = -1;
},
[rank](int64_t v, int64_t src_pe) {
int64_t expected_val = src_pe;
return (v == expected_val)
? std::make_pair(true, "")
: std::make_pair(
false, "Rank " + std::to_string(rank) + ", Got " +
std::to_string(v) + ", Expect " +
std::to_string(expected_val));
}));
return testers;
case AMO_FAddTestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "AMO Fetch_Add ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new AMOStandardTester<long long>(args));
testers.push_back(new AMOStandardTester<long>(args));
testers.push_back(new AMOStandardTester<int>(args));
return testers;
case AMO_FIncTestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "AMO Fetch_Inc ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new AMOStandardTester<long long>(args));
testers.push_back(new AMOStandardTester<long>(args));
testers.push_back(new AMOStandardTester<int>(args));
return testers;
case AMO_FetchTestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "AMO Fetch ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new AMOExtendedTester<long long>(args));
testers.push_back(new AMOExtendedTester<long>(args));
testers.push_back(new AMOExtendedTester<int>(args));
return testers;
case AMO_FCswapTestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "AMO Fetch_CSWAP ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new AMOStandardTester<long long>(args));
testers.push_back(new AMOStandardTester<long>(args));
testers.push_back(new AMOStandardTester<int>(args));
return testers;
case AMO_AddTestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "AMO Add ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new AMOStandardTester<long long>(args));
testers.push_back(new AMOStandardTester<long>(args));
testers.push_back(new AMOStandardTester<int>(args));
return testers;
case AMO_SetTestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "AMO Set ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new AMOExtendedTester<long long>(args));
testers.push_back(new AMOExtendedTester<long>(args));
testers.push_back(new AMOExtendedTester<int>(args));
return testers;
case AMO_SwapTestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "AMO Swap ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new AMOExtendedTester<long long>(args));
testers.push_back(new AMOExtendedTester<long>(args));
testers.push_back(new AMOExtendedTester<int>(args));
return testers;
case AMO_FetchAndTestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "AMO Fetch And ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new AMOBitwiseTester<unsigned long long>(args));
testers.push_back(new AMOBitwiseTester<unsigned long>(args));
testers.push_back(new AMOBitwiseTester<unsigned int>(args));
return testers;
case AMO_AndTestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "AMO And ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new AMOBitwiseTester<unsigned long long>(args));
testers.push_back(new AMOBitwiseTester<unsigned long>(args));
testers.push_back(new AMOBitwiseTester<unsigned int>(args));
return testers;
case AMO_FetchOrTestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "AMO Fetch Or ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new AMOBitwiseTester<unsigned long long>(args));
testers.push_back(new AMOBitwiseTester<unsigned long>(args));
testers.push_back(new AMOBitwiseTester<unsigned int>(args));
return testers;
case AMO_OrTestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "AMO Or ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new AMOBitwiseTester<unsigned long long>(args));
testers.push_back(new AMOBitwiseTester<unsigned long>(args));
testers.push_back(new AMOBitwiseTester<unsigned int>(args));
return testers;
case AMO_FetchXorTestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "AMO Fetch Xor ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new AMOBitwiseTester<unsigned long long>(args));
testers.push_back(new AMOBitwiseTester<unsigned long>(args));
testers.push_back(new AMOBitwiseTester<unsigned int>(args));
return testers;
case AMO_XorTestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "AMO Xor ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new AMOBitwiseTester<unsigned long long>(args));
testers.push_back(new AMOBitwiseTester<unsigned long>(args));
testers.push_back(new AMOBitwiseTester<unsigned int>(args));
return testers;
case AMO_IncTestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "AMO Inc ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new AMOStandardTester<long long>(args));
testers.push_back(new AMOStandardTester<long>(args));
testers.push_back(new AMOStandardTester<int>(args));
return testers;
case PingPongTestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "PingPong ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new PingPongTester(args));
return testers;
case PingAllTestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "PingAll ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new PingAllTester(args));
return testers;
case BarrierAllTestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "Barrier_All ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new BarrierAllTester(args));
return testers;
case SyncAllTestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "SyncAll ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new SyncTester(args));
return testers;
case SyncTestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "Sync ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new SyncTester(args));
return testers;
case RandomAccessTestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "Random_Access ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new RandomAccessTester(args));
return testers;
case ShmemPtrTestType:
2024-10-28 13:37:33 -05:00
if (rank == 0) std::cout << "Shmem_Ptr ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new ShmemPtrTester(args));
return testers;
case WGGetTestType:
if (rank == 0) {
if (args.num_wgs > 1)
2024-10-28 13:37:33 -05:00
std::cout << "Tiled Blocking WG level Gets ###" << std::endl;
else std::cout << "Blocking WG level Gets ###" << std::endl;
}
2024-07-01 09:57:08 -05:00
testers.push_back(new ExtendedPrimitiveTester(args));
return testers;
case WGGetNBITestType:
if (rank == 0) {
if (args.num_wgs > 1)
2024-10-28 13:37:33 -05:00
std::cout << "Tiled Non-Blocking WG level Gets ###" << std::endl;
else std::cout << "Non-Blocking WG level Gets ###" << std::endl;
}
2024-07-01 09:57:08 -05:00
testers.push_back(new ExtendedPrimitiveTester(args));
return testers;
case WGPutTestType:
if (rank == 0) {
if (args.num_wgs > 1)
2024-10-28 13:37:33 -05:00
std::cout << "Tiled Blocking WG level Puts ###" << std::endl;
else std::cout << "Blocking WG level Puts ###" << std::endl;
}
2024-07-01 09:57:08 -05:00
testers.push_back(new ExtendedPrimitiveTester(args));
return testers;
case WGPutNBITestType:
if (rank == 0) {
if (args.num_wgs > 1)
2024-10-28 13:37:33 -05:00
std::cout << "Tiled Non-Blocking WG level Puts ###" << std::endl;
else std::cout << "Non-Blocking WG level Puts ###" << std::endl;
}
2024-07-01 09:57:08 -05:00
testers.push_back(new ExtendedPrimitiveTester(args));
return testers;
case PutNBIMRTestType:
if (rank == 0)
2024-10-28 13:37:33 -05:00
std::cout << "Non-Blocking Put message rate ###" << std::endl;
2024-07-01 09:57:08 -05:00
testers.push_back(new PrimitiveMRTester(args));
return testers;
case WAVEGetTestType:
if (rank == 0) {
if (args.num_wgs > 1 || numWarps > 1)
2024-10-28 13:37:33 -05:00
std::cout << "Tiled Blocking WAVE level Gets ###" << std::endl;
else std::cout << "Blocking WAVE level Gets ###" << std::endl;
}
testers.push_back(new WaveLevelPrimitiveTester(args));
return testers;
case WAVEGetNBITestType:
if (rank == 0) {
if (args.num_wgs > 1 || numWarps > 1)
2024-10-28 13:37:33 -05:00
std::cout << "Tiled Non-Blocking WAVE level Gets ###" << std::endl;
else std::cout << "Non-Blocking WAVE level Gets ###" << std::endl;
}
testers.push_back(new WaveLevelPrimitiveTester(args));
return testers;
case WAVEPutTestType:
if (rank == 0) {
if (args.num_wgs > 1 || numWarps > 1)
2024-10-28 13:37:33 -05:00
std::cout << "Tiled Blocking WAVE level Puts ###" << std::endl;
else std::cout << "Blocking WAVE level Puts ###" << std::endl;
}
testers.push_back(new WaveLevelPrimitiveTester(args));
return testers;
case WAVEPutNBITestType:
if (rank == 0) {
if (args.num_wgs > 1 || numWarps > 1)
2024-10-28 13:37:33 -05:00
std::cout << "Tiled Non-Blocking WAVE level Puts ###" << std::endl;
else std::cout << "Non-Blocking WAVE level Puts ###" << std::endl;
}
testers.push_back(new WaveLevelPrimitiveTester(args));
return testers;
2024-11-22 15:24:50 -06:00
case PutSignalTestType:
if (rank == 0) std::cout << "Putmem Signal ###" << std::endl;
testers.push_back(new SignalingOperationsTester(args));
return testers;
case WGPutSignalTestType:
if (rank == 0) std::cout << "WG Putmem Signal ###" << std::endl;
testers.push_back(new SignalingOperationsTester(args));
return testers;
case WAVEPutSignalTestType:
if (rank == 0) std::cout << "Wave Putmem Signal ###" << std::endl;
testers.push_back(new SignalingOperationsTester(args));
return testers;
case PutSignalNBITestType:
if (rank == 0) std::cout << "Non-Blocking Putmem Signal ###" << std::endl;
testers.push_back(new SignalingOperationsTester(args));
return testers;
case WGPutSignalNBITestType:
if (rank == 0) std::cout << "Non-Blocking WG Putmem Signal ###" << std::endl;
testers.push_back(new SignalingOperationsTester(args));
return testers;
case WAVEPutSignalNBITestType:
if (rank == 0) std::cout << "Non-Blocking Wave Putmem Signal ###" << std::endl;
testers.push_back(new SignalingOperationsTester(args));
return testers;
case SignalFetchTestType:
if (rank == 0) std::cout << "Signal Fetch ###" << std::endl;
testers.push_back(new SignalingOperationsTester(args));
return testers;
case WGSignalFetchTestType:
if (rank == 0) std::cout << "WG Signal Fetch ###" << std::endl;
testers.push_back(new SignalingOperationsTester(args));
return testers;
case WAVESignalFetchTestType:
if (rank == 0) std::cout << "Wave Signal Fetch ###" << std::endl;
testers.push_back(new SignalingOperationsTester(args));
return testers;
2024-07-01 09:57:08 -05:00
default:
if (rank == 0) std::cout << "Empty Test ###" << std::endl;
2024-07-01 09:57:08 -05:00
return testers;
}
return testers;
}
void Tester::execute() {
if (_type == InitTestType) return;
int num_loops = args.loop;
/**
* Some tests loop through data sizes in powers of 2 and report the
* results for those ranges.
*/
for (uint64_t size = args.min_msg_size; size <= args.max_msg_size;
size <<= 1) {
resetBuffers(size);
/**
* Restricts the number of iterations of really large messages.
*/
if (size > args.large_message_size) num_loops = args.loop_large;
barrier();
preLaunchKernel();
/**
* This conditional launches the HIP kernel.
*
* Some tests may only launch a single kernel. These kernels will
* be kicked off by the initiator (denoted by the args.myid check).
*
* Other tests will initiate of both sides and launch from both
* rocshmem pes.
*/
if (peLaunchesKernel()) {
/**
* TODO:
* Verify that this timer type is actually uint64_t on the
* device side.
*/
memset(timer, 0, sizeof(uint64_t) * args.num_wgs);
const dim3 blockSize(args.wg_size, 1, 1);
const dim3 gridSize(args.num_wgs, 1, 1);
2024-07-02 10:07:43 -07:00
CHECK_HIP(hipEventRecord(start_event, stream));
2024-07-01 09:57:08 -05:00
launchKernel(gridSize, blockSize, num_loops, size);
2024-07-02 10:07:43 -07:00
CHECK_HIP(hipEventRecord(stop_event, stream));
2024-07-01 09:57:08 -05:00
hipError_t err = hipStreamSynchronize(stream);
if (err != hipSuccess) {
printf("error = %d \n", err);
}
2024-11-25 14:12:15 -06:00
// rocshmem_dump_stats();
// rocshmem_reset_stats();
2024-07-01 09:57:08 -05:00
}
barrier();
postLaunchKernel();
// data validation
verifyResults(size);
/**
* Adjust size for *_wg and *_wave functions
*/
uint64_t size_ = size;
TestType type = (TestType)args.algorithm;
switch (type) {
case WAVEGetTestType:
case WAVEGetNBITestType:
case WAVEPutTestType:
case WAVEPutNBITestType:
size_ *= (args.num_wgs * num_warps);
break;
case WGGetTestType:
case WGGetNBITestType:
case WGPutTestType:
case WGPutNBITestType:
size_ *= args.num_wgs;
break;
default:
break;
}
2024-07-01 09:57:08 -05:00
barrier();
if (_type != TeamCtxInfraTestType) {
print(size_);
2024-07-01 09:57:08 -05:00
}
}
}
bool Tester::peLaunchesKernel() {
bool is_launcher;
/**
* The PE assigned 0 is always active in these tests.
*/
is_launcher = args.myid == 0;
/**
* Some test types are active on both sides.
*/
is_launcher = is_launcher || (_type == TeamReductionTestType) ||
2024-12-21 18:16:42 +00:00
(_type == TeamBroadcastTestType) || (_type == TeamCtxInfraTestType) ||
2024-07-01 09:57:08 -05:00
(_type == AllToAllTestType) || (_type == FCollectTestType) ||
(_type == PingPongTestType) || (_type == BarrierAllTestType) ||
(_type == SyncTestType) || (_type == SyncAllTestType) ||
(_type == RandomAccessTestType) || (_type == PingAllTestType);
return is_launcher;
}
void Tester::print(uint64_t size) {
if (args.myid != 0) {
return;
}
uint64_t timer_avg = timerAvgInMicroseconds();
double latency_avg = static_cast<double>(timer_avg) / num_timed_msgs;
double avg_msg_rate = num_timed_msgs / (timer_avg / 1e6);
float total_kern_time_ms;
2024-07-02 10:07:43 -07:00
CHECK_HIP(hipEventElapsedTime(&total_kern_time_ms, start_event, stop_event));
2024-07-01 09:57:08 -05:00
float total_kern_time_s = total_kern_time_ms / 1000;
double bandwidth_avg_gbs =
num_msgs * size * bw_factor / total_kern_time_s / pow(2, 30);
int field_width = 20;
int float_precision = 2;
2024-10-28 13:37:33 -05:00
if (_print_header) {
printf("%-*s%*s%*s%*s",
10, "# Size (B)",
field_width, "Latency (us)",
field_width, "Bandwidth (GB/s)",
field_width + 1, "Msg Rate (Msg/s)\n");
_print_header = 0;
}
2024-07-01 09:57:08 -05:00
2024-10-28 13:37:33 -05:00
printf("%-*lu%*.*f%*.*f%*.*f\n",
10, size,
field_width, float_precision, latency_avg,
field_width, float_precision, bandwidth_avg_gbs,
field_width, float_precision, avg_msg_rate);
2024-07-01 09:57:08 -05:00
fflush(stdout);
}
void flush_hdp() {
int hip_dev_id{};
unsigned int* hdp_flush_ptr_{nullptr};
2024-07-02 10:07:43 -07:00
CHECK_HIP(hipGetDevice(&hip_dev_id));
CHECK_HIP(hipDeviceGetAttribute(reinterpret_cast<int*>(&hdp_flush_ptr_),
hipDeviceAttributeHdpMemFlushCntl, hip_dev_id));
2024-07-01 09:57:08 -05:00
__atomic_store_n(hdp_flush_ptr_, 0x1, __ATOMIC_SEQ_CST);
}
void Tester::barrier() {
MPI_Barrier(MPI_COMM_WORLD);
flush_hdp();
}
uint64_t Tester::gpuCyclesToMicroseconds(uint64_t cycles) {
/**
* The dGPU asm core timer runs at 27MHz. This is different from the
* core clock returned by HIP. For an APU, this is different and might
* need adjusting.
*/
uint64_t gpu_frequency_MHz = 27;
/**
* hipDeviceGetAttribute(&gpu_frequency_khz,
* hipDeviceAttributeClockRate,
* 0);
*/
return cycles / gpu_frequency_MHz;
}
uint64_t Tester::timerAvgInMicroseconds() {
uint64_t sum = 0;
/**
* TODO: (bpotter/avinash) Modify the calcuation for the Tiled version of
* puts and gets at wavefront level
*/
for (uint64_t i = 0; i < args.num_wgs; i++) {
2024-07-01 09:57:08 -05:00
sum += gpuCyclesToMicroseconds(timer[i]);
}
return sum / args.num_wgs;
}