[hip-tests] Tag multigpu tests with Catch2 tags (#1315)

Šī revīzija ir iekļauta:
amilanov-amd
2025-11-14 13:00:30 +01:00
revīziju iesūtīja GitHub
vecāks f7249e092b
revīzija 738bf16008
146 mainīti faili ar 425 papildinājumiem un 316 dzēšanām
@@ -54,8 +54,9 @@ THE SOFTWARE.
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicAdd_system_Positive_Peer_GPUs", "", int, unsigned int, unsigned long,
unsigned long long, float, double) {
TEMPLATE_TEST_CASE("Unit_atomicAdd_system_Positive_Peer_GPUs", "[multigpu]",
int, unsigned int, unsigned long, unsigned long long, float,
double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
@@ -103,8 +104,9 @@ TEMPLATE_TEST_CASE("Unit_atomicAdd_system_Positive_Peer_GPUs", "", int, unsigned
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicAdd_system_Positive_Host_And_GPU", "", int, unsigned int,
unsigned long, unsigned long long, float, double) {
TEMPLATE_TEST_CASE("Unit_atomicAdd_system_Positive_Host_And_GPU", "[multigpu]",
int, unsigned int, unsigned long, unsigned long long, float,
double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
@@ -152,8 +154,9 @@ TEMPLATE_TEST_CASE("Unit_atomicAdd_system_Positive_Host_And_GPU", "", int, unsig
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs", "", int, unsigned int,
unsigned long, unsigned long long, float, double) {
TEMPLATE_TEST_CASE("Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs",
"[multigpu]", int, unsigned int, unsigned long,
unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
@@ -45,8 +45,9 @@ THE SOFTWARE.
* - Multi-device
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicAnd_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int,
unsigned long, unsigned long long) {
TEMPLATE_TEST_CASE("Unit_atomicAnd_system_Positive_Peer_GPUs_Same_Address",
"[multigpu]", int, unsigned int, unsigned long,
unsigned long long) {
for (auto current = 0; current < 1; ++current) {
DYNAMIC_SECTION("Same address " << current) {
Bitwise::MultipleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kAndSystem>(
@@ -68,8 +69,9 @@ TEMPLATE_TEST_CASE("Unit_atomicAnd_system_Positive_Peer_GPUs_Same_Address", "",
* - Multi-device
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicAnd_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
TEMPLATE_TEST_CASE(
"Unit_atomicAnd_system_Positive_Peer_GPUs_Adjacent_Addresses", "[multigpu]",
int, unsigned int, unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
@@ -94,8 +96,9 @@ TEMPLATE_TEST_CASE("Unit_atomicAnd_system_Positive_Peer_GPUs_Adjacent_Addresses"
* - Multi-device
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicAnd_system_Positive_Peer_GPUs_Scattered_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
TEMPLATE_TEST_CASE(
"Unit_atomicAnd_system_Positive_Peer_GPUs_Scattered_Addresses",
"[multigpu]", int, unsigned int, unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
@@ -60,8 +60,9 @@ THE SOFTWARE.
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Peer_GPUs", "", int, unsigned int,
unsigned long long, unsigned short int TYPES) {
TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Peer_GPUs", "[multigpu]",
int, unsigned int, unsigned long long,
unsigned short int TYPES) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
@@ -105,8 +106,9 @@ TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Peer_GPUs", "", int, unsigned
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Host_And_GPU", "", int, unsigned int,
unsigned long long, unsigned short int TYPES) {
TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Host_And_GPU", "[multigpu]",
int, unsigned int, unsigned long long,
unsigned short int TYPES) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
@@ -155,8 +157,9 @@ TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Host_And_GPU", "", int, unsig
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs", "", int, unsigned int,
unsigned long long, unsigned short int TYPES) {
TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs",
"[multigpu]", int, unsigned int, unsigned long long,
unsigned short int TYPES) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
@@ -55,11 +55,12 @@ THE SOFTWARE.
* - HIP_VERSION >= 5.2
*/
#if HT_NVIDIA
TEMPLATE_TEST_CASE("Unit_atomicExch_system_Positive_Peer_GPUs", "", int, unsigned int,
unsigned long long, float) {
TEMPLATE_TEST_CASE("Unit_atomicExch_system_Positive_Peer_GPUs", "[multigpu]",
int, unsigned int, unsigned long long, float) {
#else
TEMPLATE_TEST_CASE("Unit_atomicExch_system_Positive_Peer_GPUs", "", int, unsigned int,
unsigned long, unsigned long long, float, double) {
TEMPLATE_TEST_CASE("Unit_atomicExch_system_Positive_Peer_GPUs", "[multigpu]",
int, unsigned int, unsigned long, unsigned long long, float,
double) {
#endif // HT_NVIDIA
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
@@ -109,12 +110,13 @@ TEMPLATE_TEST_CASE("Unit_atomicExch_system_Positive_Peer_GPUs", "", int, unsigne
* - HIP_VERSION >= 5.2
*/
#if HT_NVIDIA
TEMPLATE_TEST_CASE("Unit_atomicExch_system_Positive_Host_And_GPU", "", int, unsigned int,
unsigned long long, float) {
TEMPLATE_TEST_CASE("Unit_atomicExch_system_Positive_Host_And_GPU", "[multigpu]",
int, unsigned int, unsigned long long, float) {
#else
TEMPLATE_TEST_CASE("Unit_atomicExch_system_Positive_Host_And_GPU", "", int, unsigned int,
unsigned long, unsigned long long, float, double) {
#endif // HT_NVIDIA
TEMPLATE_TEST_CASE("Unit_atomicExch_system_Positive_Host_And_GPU", "[multigpu]",
int, unsigned int, unsigned long, unsigned long long, float,
double) {
#endif // HT_NVIDIA
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
@@ -164,11 +166,12 @@ TEMPLATE_TEST_CASE("Unit_atomicExch_system_Positive_Host_And_GPU", "", int, unsi
* - HIP_VERSION >= 5.2
*/
#if HT_NVIDIA
TEMPLATE_TEST_CASE("Unit_atomicExch_system_Positive_Host_And_Peer_GPUs", "", int, unsigned int,
unsigned long long, float) {
TEMPLATE_TEST_CASE("Unit_atomicExch_system_Positive_Host_And_Peer_GPUs",
"[multigpu]", int, unsigned int, unsigned long long, float) {
#else
TEMPLATE_TEST_CASE("Unit_atomicExch_system_Positive_Host_And_Peer_GPUs", "", int, unsigned int,
unsigned long, unsigned long long, float, double) {
TEMPLATE_TEST_CASE("Unit_atomicExch_system_Positive_Host_And_Peer_GPUs",
"[multigpu]", int, unsigned int, unsigned long,
unsigned long long, float, double) {
#endif // HT_NVIDIA
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
@@ -46,11 +46,13 @@ THE SOFTWARE.
* - HIP_VERSION >= 5.2
*/
#if HT_AMD
TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int,
unsigned long, unsigned long long, float, double) {
TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Same_Address",
"[multigpu]", int, unsigned int, unsigned long,
unsigned long long, float, double) {
#else
TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int,
unsigned long, unsigned long long) {
TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Same_Address",
"[multigpu]", int, unsigned int, unsigned long,
unsigned long long) {
#endif
for (auto current = 0; current < 1; ++current) {
DYNAMIC_SECTION("Same address " << current) {
@@ -74,11 +76,13 @@ TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Same_Address", "",
* - HIP_VERSION >= 5.2
*/
#if HT_AMD
TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int,
unsigned int, unsigned long, unsigned long long, float, double) {
TEMPLATE_TEST_CASE(
"Unit_atomicMax_system_Positive_Peer_GPUs_Adjacent_Addresses", "[multigpu]",
int, unsigned int, unsigned long, unsigned long long, float, double) {
#else
TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
TEMPLATE_TEST_CASE(
"Unit_atomicMax_system_Positive_Peer_GPUs_Adjacent_Addresses", "[multigpu]",
int, unsigned int, unsigned long, unsigned long long) {
#endif
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
@@ -105,11 +109,14 @@ TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Adjacent_Addresses"
* - HIP_VERSION >= 5.2
*/
#if HT_AMD
TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Scattered_Addresses", "", int,
unsigned int, unsigned long, unsigned long long, float, double) {
TEMPLATE_TEST_CASE(
"Unit_atomicMax_system_Positive_Peer_GPUs_Scattered_Addresses",
"[multigpu]", int, unsigned int, unsigned long, unsigned long long, float,
double) {
#else
TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Scattered_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
TEMPLATE_TEST_CASE(
"Unit_atomicMax_system_Positive_Peer_GPUs_Scattered_Addresses",
"[multigpu]", int, unsigned int, unsigned long, unsigned long long) {
#endif
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
@@ -46,11 +46,13 @@ THE SOFTWARE.
* - HIP_VERSION >= 5.2
*/
#if HT_AMD
TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int,
unsigned long, unsigned long long, float, double) {
TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Same_Address",
"[multigpu]", int, unsigned int, unsigned long,
unsigned long long, float, double) {
#else
TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int,
unsigned long, unsigned long long) {
TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Same_Address",
"[multigpu]", int, unsigned int, unsigned long,
unsigned long long) {
#endif
for (auto current = 0; current < 1; ++current) {
DYNAMIC_SECTION("Same address " << current) {
@@ -74,11 +76,13 @@ TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Same_Address", "",
* - HIP_VERSION >= 5.2
*/
#if HT_AMD
TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int,
unsigned int, unsigned long, unsigned long long, float, double) {
TEMPLATE_TEST_CASE(
"Unit_atomicMin_system_Positive_Peer_GPUs_Adjacent_Addresses", "[multigpu]",
int, unsigned int, unsigned long, unsigned long long, float, double) {
#else
TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
TEMPLATE_TEST_CASE(
"Unit_atomicMin_system_Positive_Peer_GPUs_Adjacent_Addresses", "[multigpu]",
int, unsigned int, unsigned long, unsigned long long) {
#endif
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
@@ -105,11 +109,14 @@ TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Adjacent_Addresses"
* - HIP_VERSION >= 5.2
*/
#if HT_AMD
TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Scattered_Addresses", "", int,
unsigned int, unsigned long, unsigned long long, float, double) {
TEMPLATE_TEST_CASE(
"Unit_atomicMin_system_Positive_Peer_GPUs_Scattered_Addresses",
"[multigpu]", int, unsigned int, unsigned long, unsigned long long, float,
double) {
#else
TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Scattered_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
TEMPLATE_TEST_CASE(
"Unit_atomicMin_system_Positive_Peer_GPUs_Scattered_Addresses",
"[multigpu]", int, unsigned int, unsigned long, unsigned long long) {
#endif
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
@@ -45,8 +45,9 @@ THE SOFTWARE.
* - Multi-device
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicOr_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int,
unsigned long, unsigned long long) {
TEMPLATE_TEST_CASE("Unit_atomicOr_system_Positive_Peer_GPUs_Same_Address",
"[multigpu]", int, unsigned int, unsigned long,
unsigned long long) {
for (auto current = 0; current < 1; ++current) {
DYNAMIC_SECTION("Same address " << current) {
Bitwise::MultipleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kOrSystem>(
@@ -68,8 +69,9 @@ TEMPLATE_TEST_CASE("Unit_atomicOr_system_Positive_Peer_GPUs_Same_Address", "", i
* - Multi-device
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicOr_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
TEMPLATE_TEST_CASE("Unit_atomicOr_system_Positive_Peer_GPUs_Adjacent_Addresses",
"[multigpu]", int, unsigned int, unsigned long,
unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
@@ -94,8 +96,9 @@ TEMPLATE_TEST_CASE("Unit_atomicOr_system_Positive_Peer_GPUs_Adjacent_Addresses",
* - Multi-device
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicOr_system_Positive_Peer_GPUs_Scattered_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
TEMPLATE_TEST_CASE(
"Unit_atomicOr_system_Positive_Peer_GPUs_Scattered_Addresses", "[multigpu]",
int, unsigned int, unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
@@ -54,8 +54,9 @@ THE SOFTWARE.
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicSub_system_Positive_Peer_GPUs", "", int, unsigned int, unsigned long,
unsigned long long, float, double) {
TEMPLATE_TEST_CASE("Unit_atomicSub_system_Positive_Peer_GPUs", "[multigpu]",
int, unsigned int, unsigned long, unsigned long long, float,
double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
@@ -103,8 +104,9 @@ TEMPLATE_TEST_CASE("Unit_atomicSub_system_Positive_Peer_GPUs", "", int, unsigned
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicSub_system_Positive_Host_And_GPU", "", int, unsigned int,
unsigned long, unsigned long long, float, double) {
TEMPLATE_TEST_CASE("Unit_atomicSub_system_Positive_Host_And_GPU", "[multigpu]",
int, unsigned int, unsigned long, unsigned long long, float,
double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
@@ -152,8 +154,9 @@ TEMPLATE_TEST_CASE("Unit_atomicSub_system_Positive_Host_And_GPU", "", int, unsig
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicSub_system_Positive_Host_And_Peer_GPUs", "", int, unsigned int,
unsigned long, unsigned long long, float, double) {
TEMPLATE_TEST_CASE("Unit_atomicSub_system_Positive_Host_And_Peer_GPUs",
"[multigpu]", int, unsigned int, unsigned long,
unsigned long long, float, double) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
@@ -45,8 +45,9 @@ THE SOFTWARE.
* - Multi-device
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicXor_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int,
unsigned long, unsigned long long) {
TEMPLATE_TEST_CASE("Unit_atomicXor_system_Positive_Peer_GPUs_Same_Address",
"[multigpu]", int, unsigned int, unsigned long,
unsigned long long) {
for (auto current = 0; current < 1; ++current) {
DYNAMIC_SECTION("Same address " << current) {
Bitwise::MultipleDeviceMultipleKernelTest<TestType, Bitwise::AtomicOperation::kXorSystem>(
@@ -68,8 +69,9 @@ TEMPLATE_TEST_CASE("Unit_atomicXor_system_Positive_Peer_GPUs_Same_Address", "",
* - Multi-device
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicXor_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
TEMPLATE_TEST_CASE(
"Unit_atomicXor_system_Positive_Peer_GPUs_Adjacent_Addresses", "[multigpu]",
int, unsigned int, unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
@@ -94,8 +96,9 @@ TEMPLATE_TEST_CASE("Unit_atomicXor_system_Positive_Peer_GPUs_Adjacent_Addresses"
* - Multi-device
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_atomicXor_system_Positive_Peer_GPUs_Scattered_Addresses", "", int,
unsigned int, unsigned long, unsigned long long) {
TEMPLATE_TEST_CASE(
"Unit_atomicXor_system_Positive_Peer_GPUs_Scattered_Addresses",
"[multigpu]", int, unsigned int, unsigned long, unsigned long long) {
int warp_size = 0;
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
const auto cache_line_size = 128u;
@@ -65,7 +65,8 @@ TEST_CASE("Unit_hipGetStreamDeviceId_Positive_Threaded_Basic") {
* - Platform specific (AMD)
* - Multithreaded GPU
*/
TEST_CASE("Unit_hipGetStreamDeviceId_Positive_Multithreaded_Basic") {
TEST_CASE("Unit_hipGetStreamDeviceId_Positive_Multithreaded_Basic",
"[multigpu]") {
const unsigned int max_threads = std::thread::hardware_concurrency();
const int device_count = HipTest::getDeviceCount();
@@ -378,7 +378,7 @@ template <typename F> static void test_cg_multi_grid_group_type(F kernel_func, i
}
}
TEST_CASE("Unit_hipCGMultiGridGroupType_Basic") {
TEST_CASE("Unit_hipCGMultiGridGroupType_Basic", "[multigpu]") {
int num_devices = 0;
HIP_CHECK(hipGetDeviceCount(&num_devices));
num_devices = min(num_devices, MaxGPUs);
@@ -425,7 +425,7 @@ TEST_CASE("Unit_hipCGMultiGridGroupType_Basic") {
}
}
TEST_CASE("Unit_hipCGMultiGridGroupType_Barrier") {
TEST_CASE("Unit_hipCGMultiGridGroupType_Barrier", "[multigpu]") {
int num_devices = 0;
uint32_t loops = GENERATE(1, 2, 3, 4);
uint32_t warps = GENERATE(4, 8, 16, 32);
@@ -130,7 +130,7 @@ __global__ void test_gws(uint* buf, uint buf_size, long* tmp_buf, long* result)
}
}
TEST_CASE("Unit_hipLaunchCooperativeKernelMultiDevice_Basic") {
TEST_CASE("Unit_hipLaunchCooperativeKernelMultiDevice_Basic", "[multigpu]") {
constexpr uint num_kernel_args = 4;
int device_num = 0;
@@ -154,7 +154,7 @@ static void get_multi_grid_dims(dim3& grid_dim, dim3& block_dim, unsigned int de
* - HIP_VERSION >= 5.2
* - Devices support cooperative multi device launch
*/
TEST_CASE("Unit_Multi_Grid_Group_Getters_Positive_Basic") {
TEST_CASE("Unit_Multi_Grid_Group_Getters_Positive_Basic", "[multigpu]") {
int num_devices = 0;
HIP_CHECK(hipGetDeviceCount(&num_devices));
num_devices = min(num_devices, kMaxGPUs);
@@ -302,7 +302,7 @@ TEST_CASE("Unit_Multi_Grid_Group_Getters_Positive_Basic") {
* - HIP_VERSION >= 5.2
* - Devices support cooperative multi device launch
*/
TEST_CASE("Unit_Multi_Grid_Group_Getters_Positive_Base_Type") {
TEST_CASE("Unit_Multi_Grid_Group_Getters_Positive_Base_Type", "[multigpu]") {
int num_devices = 0;
HIP_CHECK(hipGetDeviceCount(&num_devices));
num_devices = min(num_devices, kMaxGPUs);
@@ -423,7 +423,8 @@ TEST_CASE("Unit_Multi_Grid_Group_Getters_Positive_Base_Type") {
* - HIP_VERSION >= 5.2
* - Devices support cooperative multi device launch
*/
TEST_CASE("Unit_Multi_Grid_Group_Getters_Positive_Non_Member_Functions") {
TEST_CASE("Unit_Multi_Grid_Group_Getters_Positive_Non_Member_Functions",
"[multigpu]") {
int num_devices = 0;
HIP_CHECK(hipGetDeviceCount(&num_devices));
num_devices = min(num_devices, kMaxGPUs);
@@ -535,7 +536,7 @@ TEST_CASE("Unit_Multi_Grid_Group_Getters_Positive_Non_Member_Functions") {
* - HIP_VERSION >= 5.2
* - Devices support cooperative multi device launch
*/
TEST_CASE("Unit_Multi_Grid_Group_Positive_Sync") {
TEST_CASE("Unit_Multi_Grid_Group_Positive_Sync", "[multigpu]") {
CHECK_IMAGE_SUPPORT
int num_devices = 0;
HIP_CHECK(hipGetDeviceCount(&num_devices));
@@ -46,7 +46,7 @@ THE SOFTWARE.
* - Multi-device
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipDeviceEnableDisablePeerAccess_positive") {
TEST_CASE("Unit_hipDeviceEnableDisablePeerAccess_positive", "[multigpu]") {
int canAccessPeer = 0;
int deviceCount = HipTest::getGeviceCount();
if (deviceCount < 2) {
@@ -95,7 +95,7 @@ TEST_CASE("Unit_hipDeviceEnableDisablePeerAccess_positive") {
* - Multi-device
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipDeviceEnablePeerAccess_negative") {
TEST_CASE("Unit_hipDeviceEnablePeerAccess_negative", "[multigpu]") {
int deviceCount = HipTest::getGeviceCount();
if (deviceCount < 2) {
HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
@@ -159,7 +159,7 @@ TEST_CASE("Unit_hipDeviceEnablePeerAccess_negative") {
* - Multi-device
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipDeviceDisablePeerAccess_negative") {
TEST_CASE("Unit_hipDeviceDisablePeerAccess_negative", "[multigpu]") {
int deviceCount = HipTest::getGeviceCount();
if (deviceCount < 2) {
HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
@@ -210,7 +210,8 @@ static inline std::vector<int> parseVisibleDevices() {
* ------------------------
* - HIP_VERSION >= 5.7
*/
TEST_CASE("Unit_hipDeviceName_gcnArchName_And_rocm_agent_enumerator") {
TEST_CASE("Unit_hipDeviceName_gcnArchName_And_rocm_agent_enumerator",
"[multigpu]") {
int deviceCount = 0;
HIP_CHECK(hipGetDeviceCount(&deviceCount));
if (deviceCount <= 0) {
@@ -145,7 +145,7 @@ static inline std::vector<int> parseVisibleDevices() {
* ------------------------
* - HIP_VERSION >= 5.7
*/
TEST_CASE("Unit_hipDeviceGetUuid_From_RocmInfo") {
TEST_CASE("Unit_hipDeviceGetUuid_From_RocmInfo", "[multigpu]") {
int deviceCount = 0;
HIP_CHECK(hipGetDeviceCount(&deviceCount));
assert(deviceCount > 0);
@@ -219,7 +219,8 @@ TEST_CASE("Unit_hipDeviceGetUuid_From_RocmInfo") {
*/
// Guarding it against NVIDIA as this test is faling on it.
#if HT_AMD
TEST_CASE("Unit_hipDeviceGetUuid_VerifyUuidFrm_hipGetDeviceProperties") {
TEST_CASE("Unit_hipDeviceGetUuid_VerifyUuidFrm_hipGetDeviceProperties",
"[multigpu]") {
int deviceCount = 0;
hipDevice_t device;
hipDeviceProp_t prop;
@@ -462,7 +462,7 @@ void getMinMaxCurrentAndSetCurrent() {
* ------------------------
* - HIP_VERSION >= 6.5
*/
TEST_CASE("Unit_hipDeviceGetSetLimit_Scratch_MultiDevice") {
TEST_CASE("Unit_hipDeviceGetSetLimit_Scratch_MultiDevice", "[multigpu]") {
int deviceCount = 0;
HIP_CHECK(hipGetDeviceCount(&deviceCount));
if (deviceCount < 2) {
@@ -65,7 +65,7 @@ static bool testSetLimitFunc(hipLimit_t limit_to_test) {
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipDeviceSetLimit_SetGet") {
TEST_CASE("Unit_hipDeviceSetLimit_SetGet", "[multigpu]") {
size_t value = 0;
// Scenario1
SECTION("Set Get Test hipLimitStackSize") {
@@ -115,7 +115,7 @@ TEST_CASE("Unit_hipDeviceTotalMem_ValidateTotalMem") {
* - Multi-device test
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipDeviceTotalMem_NonSelectedDevice") {
TEST_CASE("Unit_hipDeviceTotalMem_NonSelectedDevice", "[multigpu]") {
auto deviceCount = HipTest::getDeviceCount();
if (deviceCount < 2) {
HipTest::HIP_SKIP_TEST("Multi Device Test, will not run on single gpu systems. Skipping.");
@@ -148,7 +148,7 @@ static void validateDeviceMacro(int* archProp_h, hipDeviceProp_t* prop) {
* - Platform specific (AMD)
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipGetDeviceProperties_ArchPropertiesTst") {
TEST_CASE("Unit_hipGetDeviceProperties_ArchPropertiesTst", "[multigpu]") {
int *archProp_h, *archProp_d;
archProp_h = new int[NUM_OF_ARCHPROP];
hipDeviceProp_t prop;
@@ -372,7 +372,7 @@ TEST_CASE("Unit_hipGetProcAddress_ValidateDeviceApis") {
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipGetProcAddress_PeerDeviceAccessAPIs") {
TEST_CASE("Unit_hipGetProcAddress_PeerDeviceAccessAPIs", "[multigpu]") {
void* hipDeviceCanAccessPeer_ptr = nullptr;
void* hipSetDevice_ptr = nullptr;
void* hipGetDevice_ptr = nullptr;
@@ -453,7 +453,7 @@ bool CheckMemPoolSupport(const int device) {
return true;
}
TEST_CASE("Unit_hipGetProcAddress_SetGetMemPoolAPIs") {
TEST_CASE("Unit_hipGetProcAddress_SetGetMemPoolAPIs", "[multigpu]") {
void* hipDeviceSetMemPool_ptr = nullptr;
void* hipDeviceGetMemPool_ptr = nullptr;
int currentHipVersion = 0;
@@ -44,7 +44,7 @@ THE SOFTWARE.
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipSetDevice_BasicSetGet") {
TEST_CASE("Unit_hipSetDevice_BasicSetGet", "[multigpu]") {
int numDevices = 0;
int device{};
HIP_CHECK(hipGetDeviceCount(&numDevices));
@@ -73,7 +73,7 @@ TEST_CASE("Unit_hipSetDevice_BasicSetGet") {
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipGetSetDevice_MultiThreaded") {
TEST_CASE("Unit_hipGetSetDevice_MultiThreaded", "[multigpu]") {
auto maxThreads = std::thread::hardware_concurrency();
auto deviceCount = HipTest::getDeviceCount();
@@ -126,7 +126,7 @@ TEST_CASE("Unit_hipGetSetDevice_MultiThreaded") {
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipSetGetDevice_Positive_Threaded_Basic") {
TEST_CASE("Unit_hipSetGetDevice_Positive_Threaded_Basic", "[multigpu]") {
class HipSetGetDeviceThreadedTest : public ThreadedZigZagTest<HipSetGetDeviceThreadedTest> {
public:
void TestPart1() { HIP_CHECK(hipSetDevice(0)); }
@@ -158,7 +158,7 @@ TEST_CASE("Unit_hipSetValidDevices_Negative_Length_Lessthan_DeviceArrSize") {
* ------------------------
* - HIP_VERSION >= 7.1
*/
TEST_CASE("Unit_hipSetValidDevices_Positive_Basic") {
TEST_CASE("Unit_hipSetValidDevices_Positive_Basic", "[multigpu]") {
int totalDevices = HipTest::getDeviceCount();
if (totalDevices < 2) {
HipTest::HIP_SKIP_TEST("This test requires 2 or more GPUs. Skipping.");
@@ -51,7 +51,7 @@ __global__ void gpu_round_robin(const int id, const int num_dev, const int num_i
round_robin(id, num_dev, num_iter, data, flag);
}
TEST_CASE("Unit_threadfence_system") {
TEST_CASE("Unit_threadfence_system", "[multigpu]") {
int num_gpus = 0;
HIP_CHECK(hipGetDeviceCount(&num_gpus));
REQUIRE(num_gpus > 0);
@@ -93,7 +93,7 @@ TEST_CASE("Unit_hipExtGetLastError_Positive_Threaded") {
*  - HIP_VERSION >= 6.4
*/
TEST_CASE("Unit_hipExtGetLastError_with_hipMemcpyPeerAsync") {
TEST_CASE("Unit_hipExtGetLastError_with_hipMemcpyPeerAsync", "[multigpu]") {
const auto device_count = HipTest::getDeviceCount();
if (device_count < 2) {
HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
@@ -94,7 +94,7 @@ TEST_CASE("Unit_hipGetLastError_Positive_Threaded") {
*  - HIP_VERSION >= 6.0
*/
TEST_CASE("Unit_hipGetLastError_with_hipMemcpyPeerAsync") {
TEST_CASE("Unit_hipGetLastError_with_hipMemcpyPeerAsync", "[multigpu]") {
const auto device_count = HipTest::getDeviceCount();
if (device_count < 2) {
HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
@@ -102,7 +102,7 @@ TEST_CASE("Unit_hipGetLastError_KernelFailure_ValidAndInvalidOperations") {
* ------------------------
* - HIP_VERSION >= 7.0
*/
TEST_CASE("Unit_hipGetLastError_KernelFailure_TwoDevices") {
TEST_CASE("Unit_hipGetLastError_KernelFailure_TwoDevices", "[multigpu]") {
int deviceCount = 0;
HIP_CHECK(hipGetDeviceCount(&deviceCount));
if (deviceCount < 2) {
@@ -108,7 +108,7 @@ TEST_CASE("Unit_hipEventElapsedTime_DisableTiming") {
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipEventElapsedTime_DifferentDevices") {
TEST_CASE("Unit_hipEventElapsedTime_DifferentDevices", "[multigpu]") {
int devCount = 0;
HIP_CHECK(hipGetDeviceCount(&devCount));
if (devCount > 1) {
@@ -217,7 +217,7 @@ TEST_CASE("Unit_hipEventMGpuMThreads_1") { testEventMGpuMThreads(1); }
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipEventMGpuMThreads_2") {
TEST_CASE("Unit_hipEventMGpuMThreads_2", "[multigpu]") {
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices > 1) {
@@ -238,7 +238,7 @@ TEST_CASE("Unit_hipEventMGpuMThreads_2") {
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipEventMGpuMThreads_3") {
TEST_CASE("Unit_hipEventMGpuMThreads_3", "[multigpu]") {
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices > 1) {
@@ -41,7 +41,7 @@ THE SOFTWARE.
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipEventQuery_DifferentDevice") {
TEST_CASE("Unit_hipEventQuery_DifferentDevice", "[multigpu]") {
hipEvent_t event1{}, event2{};
HIP_CHECK(hipEventCreate(&event1));
HIP_CHECK(hipEventCreate(&event2));
@@ -158,7 +158,7 @@ TEST_CASE("Unit_hipEventRecord") {
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipEventRecord_Negative") {
TEST_CASE("Unit_hipEventRecord_Negative", "[multigpu]") {
SECTION("Nullptr event") {
HIP_CHECK_ERROR(hipEventRecord(nullptr, nullptr), hipErrorInvalidResourceHandle);
}
@@ -27,7 +27,8 @@ THE SOFTWARE.
#include <resource_guards.hh>
#include <utils.hh>
TEST_CASE("Unit_hipExtLaunchMultiKernelMultiDevice_Positive_Basic") {
TEST_CASE("Unit_hipExtLaunchMultiKernelMultiDevice_Positive_Basic",
"[multigpu]") {
const auto device_count = HipTest::getDeviceCount();
std::vector<hipLaunchParams> params_list(device_count);
@@ -54,7 +55,8 @@ TEST_CASE("Unit_hipExtLaunchMultiKernelMultiDevice_Positive_Basic") {
}
}
TEST_CASE("Unit_hipExtLaunchMultiKernelMultiDevice_Negative_Parameters") {
TEST_CASE("Unit_hipExtLaunchMultiKernelMultiDevice_Negative_Parameters",
"[multigpu]") {
const auto device_count = HipTest::getDeviceCount();
std::vector<hipLaunchParams> params_list(device_count);
@@ -27,7 +27,8 @@ THE SOFTWARE.
#include <resource_guards.hh>
#include <utils.hh>
TEST_CASE("Unit_hipLaunchCooperativeKernelMultiDevice_Positive_Basic") {
TEST_CASE("Unit_hipLaunchCooperativeKernelMultiDevice_Positive_Basic",
"[multigpu]") {
if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) {
HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported");
return;
@@ -59,7 +60,8 @@ TEST_CASE("Unit_hipLaunchCooperativeKernelMultiDevice_Positive_Basic") {
}
}
TEST_CASE("Unit_hipLaunchCooperativeKernelMultiDevice_Negative_Parameters") {
TEST_CASE("Unit_hipLaunchCooperativeKernelMultiDevice_Negative_Parameters",
"[multigpu]") {
if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) {
HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported");
return;
@@ -309,7 +309,8 @@ TEST_CASE("Unit_hipDeviceGetGraphMemAttribute_Functional") {
Unit_hipDeviceGetGraphMemAttribute_Functional();
}
TEST_CASE("Unit_hipDeviceGetGraphMemAttribute_Functional_Multi_Device") {
TEST_CASE("Unit_hipDeviceGetGraphMemAttribute_Functional_Multi_Device",
"[multigpu]") {
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
@@ -368,7 +368,7 @@ TEST_CASE("Unit_hipDrvGraphAddMemcpyNode_test") {
* - HIP_VERSION >= 6.1
*/
TEST_CASE("Unit_hipDrvGraphAddMemcpyNode_MulitDevice") {
TEST_CASE("Unit_hipDrvGraphAddMemcpyNode_MulitDevice", "[multigpu]") {
CHECK_IMAGE_SUPPORT
int numDevices = 0;
@@ -1083,7 +1083,7 @@ TEST_CASE("Unit_hipGraphAddChildGraphNode_MultGraphsAsSingleGraph") {
in multi GPU environment. Create one nested graph per GPU context. Execute
all the created graphs in their respective GPUs and validate the output.
*/
TEST_CASE("Unit_hipGraphAddChildGraphNode_CmplxNstGrph_MultGPU") {
TEST_CASE("Unit_hipGraphAddChildGraphNode_CmplxNstGrph_MultGPU", "[multigpu]") {
int devcount = 0;
HIP_CHECK(hipGetDeviceCount(&devcount));
// If only single GPU is detected then return
@@ -584,7 +584,7 @@ TEST_CASE("Unit_hipGraphAddMemAllocNode_Functional_1") {
* ------------------------
*  - HIP_VERSION >= 6.1
*/
TEST_CASE("Unit_hipGraphAddMemAllocNode_Functional_2") {
TEST_CASE("Unit_hipGraphAddMemAllocNode_Functional_2", "[multigpu]") {
int mem_pool_support = 0;
HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0));
if (!mem_pool_support) {
@@ -653,7 +653,7 @@ TEST_CASE("Unit_hipGraphAddMemAllocNode_Functional_2") {
* ------------------------
* - HIP_VERSION >= 6.1
*/
TEST_CASE("Unit_hipGraphAddMemAllocNode_Functional_3") {
TEST_CASE("Unit_hipGraphAddMemAllocNode_Functional_3", "[multigpu]") {
int mem_pool_support = 0;
HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0));
if (!mem_pool_support) {
@@ -727,7 +727,7 @@ TEST_CASE("Unit_hipGraphAddMemAllocNode_Functional_3") {
* ------------------------
* - HIP_VERSION >= 6.1
*/
TEST_CASE("Unit_hipGraphAddMemAllocNode_Functional_4") {
TEST_CASE("Unit_hipGraphAddMemAllocNode_Functional_4", "[multigpu]") {
int mem_pool_support = 0;
HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0));
if (!mem_pool_support) {
@@ -115,7 +115,7 @@ static void validateMemcpyNode1DArray(bool peerAccess,
* For Peer device test: Memory allocations happen on device(0) and memcpy operations
* are performed from device(1).
*/
TEST_CASE("Unit_hipGraphAddMemcpyNode1D_Functional") {
TEST_CASE("Unit_hipGraphAddMemcpyNode1D_Functional", "[multigpu]") {
SECTION("Memcpy with 1D array on default device") { validateMemcpyNode1DArray(false); }
SECTION("Memcpy with 1D array using DeviceToDeviceNoCU") {
validateMemcpyNode1DArray(false, hipMemcpyDeviceToDeviceNoCU);
@@ -255,7 +255,8 @@ in GPU-0 and add the MemcpyNodeFromSymbol node to the graph and
verifying the result in GPU-1
*/
#if HT_NVIDIA
TEST_CASE("Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemoryPeerDevice") {
TEST_CASE("Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemoryPeerDevice",
"[multigpu]") {
int numDevices = 0;
int canAccessPeer = 0;
if (numDevices > 1) {
@@ -276,7 +277,8 @@ in GPU-0 and add the MemcpyNodeFromSymbol node to the graph and
verifying the result in GPU-1
*/
TEST_CASE("Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalConstMemoryPeerDevice") {
TEST_CASE("Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalConstMemoryPeerDevice",
"[multigpu]") {
int numDevices = 0;
int canAccessPeer = 0;
if (numDevices > 1) {
@@ -257,7 +257,8 @@ This testcase verifies allocating global const symbol memory and device variable
in GPU-0 and add the MemcpyNodeToSymbol node to the graph and
verifying the result in GPU-1
*/
TEST_CASE("Unit_hipGraphAddMemcpyNodeToSymbol_GlobalConstMemoryPeerDevice") {
TEST_CASE("Unit_hipGraphAddMemcpyNodeToSymbol_GlobalConstMemoryPeerDevice",
"[multigpu]") {
int numDevices = 0;
int canAccessPeer = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
@@ -278,7 +279,8 @@ This testcaser verifies allocating global memory,
Add MemcpyToSymbolNode,KernelNode and memcpynode and validating
the behaviour
*/
TEST_CASE("Unit_hipGraphAddMemcpyNodeToSymbol_MemcpyToSymbolNodeWithKernel") {
TEST_CASE("Unit_hipGraphAddMemcpyNodeToSymbol_MemcpyToSymbolNodeWithKernel",
"[multigpu]") {
constexpr size_t Nbytes = SIZE * sizeof(int);
constexpr auto blocksPerCU = 6; // to hide latency
constexpr auto threadsPerBlock = 256;
@@ -484,7 +484,7 @@ TEST_CASE("Unit_hipGraphAddMemcpyNode_BasicFunctional") {
* are performed from device(1).
* Tests also verify memcpy node addition with 1D, 2D and 3D objects.
*/
TEST_CASE("Unit_hipGraphAddMemcpyNode_PeerAccessFunctional") {
TEST_CASE("Unit_hipGraphAddMemcpyNode_PeerAccessFunctional", "[multigpu]") {
CHECK_IMAGE_SUPPORT
int numDevices{}, peerAccess{};
@@ -229,7 +229,7 @@ This testcase verifies following scenarios
validate the result of the cloned graph
3. Device context change for cloned graph
*/
TEST_CASE("Unit_hipGraphClone_Functional") {
TEST_CASE("Unit_hipGraphClone_Functional", "[multigpu]") {
SECTION("hipGraphClone Basic Functionality") { hipGraphClone_Func(); }
SECTION("hipGraphClone Modify Original graph") { hipGraphClone_Func(true); }
@@ -1490,7 +1490,7 @@ TEST_CASE("Unit_hipGraphClone_Test_hipGraphEventWaitNodeSetEvent_and_Exec") {
Execute both original graph and cloned graph in loop: with multiple device.
Loop: Update input data -> Launch Graph -> Validate output data -> Goto Loop */
TEST_CASE("Unit_hipGraphClone_address_change_in_loop") {
TEST_CASE("Unit_hipGraphClone_address_change_in_loop", "[multigpu]") {
constexpr size_t Nbytes = N * sizeof(int);
constexpr auto blocksPerCU = 6; // to hide latency
constexpr auto threadsPerBlock = 256;
@@ -1644,7 +1644,7 @@ static void hipGraphClone_address_change_in_thread(hipGraph_t* graph, hipGraphNo
memory addresses in each Node and create executable graphs.
Launch the graphs in their respective GPUs. Validate the outputs. */
TEST_CASE("Unit_hipGraphClone_address_change_in_thread") {
TEST_CASE("Unit_hipGraphClone_address_change_in_thread", "[multigpu]") {
constexpr size_t Nbytes = N * sizeof(int);
constexpr auto blocksPerCU = 6; // to hide latency
constexpr auto threadsPerBlock = 256;
@@ -1735,7 +1735,7 @@ static void hipGraphClone_Test_All_API(int dev) {
Create a graph with Memcpy and Kernel nodes. and its cloned graph.
Run all the above writen test cases for multiple GPU scenarios */
TEST_CASE("Unit_hipGraphClone_multi_GPU_test") {
TEST_CASE("Unit_hipGraphClone_multi_GPU_test", "[multigpu]") {
// FIXME: This test tests 3D as well, decouple it
CHECK_IMAGE_SUPPORT
@@ -187,7 +187,8 @@ TEST_CASE("Unit_hipGraphExecEventRecordNodeSetEvent_VerifyEventNotChanged") {
* Scenario 3: This test verifies event in node of the executable graph can be changed to event on
* different device
*/
TEST_CASE("Unit_hipGraphExecEventRecordNodeSetEvent_Positive_DifferentDevices") {
TEST_CASE("Unit_hipGraphExecEventRecordNodeSetEvent_Positive_DifferentDevices",
"[multigpu]") {
const auto device_count = HipTest::getDeviceCount();
if (device_count < 2) {
HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
@@ -142,7 +142,8 @@ TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Positive_Basic") {
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Negative_Parameters") {
TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Negative_Parameters",
"[multigpu]") {
using namespace std::placeholders;
hipGraph_t graph = nullptr;
HIP_CHECK(hipGraphCreate(&graph, 0));
@@ -129,7 +129,8 @@ TEMPLATE_TEST_CASE("Unit_hipGraphExecMemsetNodeSetParams_Positive_Basic", "", ui
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipGraphExecMemsetNodeSetParams_Negative_Parameters") {
TEST_CASE("Unit_hipGraphExecMemsetNodeSetParams_Negative_Parameters",
"[multigpu]") {
// FIXME: this test tests 1D/2D/3D stuff in one single go, need to decouple it so that it can run
// on devices with no image support
CHECK_IMAGE_SUPPORT
@@ -639,7 +639,8 @@ TEST_CASE("Unit_hipGraphExecUpdate_Negative_NodeType_Changed") {
* - HIP_VERSION >= 6.0
*/
TEST_CASE("Unit_hipGraphExecUpdate_Negative_MultiDevice_Context_Changed") {
TEST_CASE("Unit_hipGraphExecUpdate_Negative_MultiDevice_Context_Changed",
"[multigpu]") {
constexpr size_t N = 1024;
constexpr size_t Nbytes = N * sizeof(int);
constexpr auto blocksPerCU = 6; // to hide latency
@@ -269,7 +269,8 @@ This testcase verifies hipGraphInstantiateWithFlags API
by creating dependency graph on GPU-0 and instantiate, launching and verifying
the result on GPU-1
*/
TEST_CASE("Unit_hipGraphInstantiateWithFlags_DependencyGraphDeviceCtxtChg") {
TEST_CASE("Unit_hipGraphInstantiateWithFlags_DependencyGraphDeviceCtxtChg",
"[multigpu]") {
int numDevices = 0;
int canAccessPeer = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
@@ -311,7 +312,8 @@ This testcase verifies hipGraphInstantiateWithFlags API
by creating capture graph on GPU-0 and instantiate, launching and verifying
the result on GPU-1
*/
TEST_CASE("Unit_hipGraphInstantiateWithFlags_StreamCaptureDeviceContextChg") {
TEST_CASE("Unit_hipGraphInstantiateWithFlags_StreamCaptureDeviceContextChg",
"[multigpu]") {
int numDevices = 0;
int canAccessPeer = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
@@ -254,7 +254,7 @@ static void hipGraphLaunch_test() {
HIP_CHECK(hipStreamDestroy(streamForGraph));
}
TEST_CASE("Unit_hipGraphLaunch_Functional_multidevice_test") {
TEST_CASE("Unit_hipGraphLaunch_Functional_multidevice_test", "[multigpu]") {
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
@@ -135,7 +135,8 @@ TEST_CASE("Unit_hipGraphMem_Alloc_Free_NodeGetParams_Functional") {
hipGraphMemAllocNodeGetParams_Functional();
}
TEST_CASE("Unit_hipGraphMem_Alloc_Free_NodeGetParams_Functional_MultiDevice") {
TEST_CASE("Unit_hipGraphMem_Alloc_Free_NodeGetParams_Functional_MultiDevice",
"[multigpu]") {
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
@@ -614,7 +614,8 @@ static void hipGraph_PerfCheck_hipGraphExecKernelNodeSetParams(const hipStream_t
* ------------------------
* - HIP_VERSION >= 6.1
*/
TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecKernelNodeSetParams") {
TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecKernelNodeSetParams",
"[multigpu]") {
if ((setenv("DEBUG_CLR_GRAPH_PACKET_CAPTURE", "true", 1)) != 0) {
HipTest::HIP_SKIP_TEST(
"Unable to turn on "
@@ -732,7 +733,8 @@ static void hipGraph_PerfCheck_hipGraphExecKernelNodeSetParams_inLoop(const hipS
* ------------------------
* - HIP_VERSION >= 6.1
*/
TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecKernelNodeSetParams_inLoop") {
TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecKernelNodeSetParams_inLoop",
"[multigpu]") {
if ((setenv("DEBUG_CLR_GRAPH_PACKET_CAPTURE", "true", 1)) != 0) {
HipTest::HIP_SKIP_TEST(
"Unable to turn on "
@@ -954,7 +956,8 @@ static void hipGraph_PerfCheck_hipGraphExecMemcpyNodeSetParams_inLoop(const hipS
* - HIP_VERSION >= 6.1
*/
TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecMemcpyNodeSetParams_inLoop") {
TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecMemcpyNodeSetParams_inLoop",
"[multigpu]") {
if ((setenv("DEBUG_CLR_GRAPH_PACKET_CAPTURE", "true", 1)) != 0) {
HipTest::HIP_SKIP_TEST(
"Unable to turn on "
@@ -1070,7 +1073,8 @@ static void hipGraph_PerfCheck_hipGraphExecMemcpyNodeSetParams1D_inLoop(const hi
* - HIP_VERSION >= 6.1
*/
TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecMemcpyNodeSetParams1D_inLoop") {
TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecMemcpyNodeSetParams1D_inLoop",
"[multigpu]") {
if ((setenv("DEBUG_CLR_GRAPH_PACKET_CAPTURE", "true", 1)) != 0) {
HipTest::HIP_SKIP_TEST(
"Unable to turn on "
@@ -1178,7 +1182,8 @@ static void hipGraph_PerfCheck_hipGraphExecMemcpyNodeSetParamsFrmSymbol(const hi
* - HIP_VERSION >= 6.1
*/
TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecMemcpyNodeSetParamsFrmSymbol") {
TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecMemcpyNodeSetParamsFrmSymbol",
"[multigpu]") {
if ((setenv("DEBUG_CLR_GRAPH_PACKET_CAPTURE", "true", 1)) != 0) {
HipTest::HIP_SKIP_TEST(
"Unable to turn on "
@@ -1285,7 +1290,8 @@ static void hipGraph_PerfCheck_hipGraphExecMemcpyNodeSetParamsToSymbol(const hip
* ------------------------
* - HIP_VERSION >= 6.1
*/
TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecMemcpyNodeSetParamsToSymbol") {
TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecMemcpyNodeSetParamsToSymbol",
"[multigpu]") {
if ((setenv("DEBUG_CLR_GRAPH_PACKET_CAPTURE", "true", 1)) != 0) {
HipTest::HIP_SKIP_TEST(
"Unable to turn on "
@@ -1438,7 +1444,8 @@ static void hipGraph_PerfCheck_hipGraphExecMemsetNodeSetParams(const hipStream_t
* ------------------------
* - HIP_VERSION >= 6.1
*/
TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecMemsetNodeSetParams") {
TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecMemsetNodeSetParams",
"[multigpu]") {
if ((setenv("DEBUG_CLR_GRAPH_PACKET_CAPTURE", "true", 1)) != 0) {
HipTest::HIP_SKIP_TEST(
"Unable to turn on "
@@ -1871,7 +1878,8 @@ static void hipGraph_PerfCheck_hipGraphExecChildGraphNodeSetParams_mKernel(
* ------------------------
* - HIP_VERSION >= 6.1
*/
TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecChildGraphNodeSetParams") {
TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecChildGraphNodeSetParams",
"[multigpu]") {
if ((setenv("DEBUG_CLR_GRAPH_PACKET_CAPTURE", "true", 1)) != 0) {
HipTest::HIP_SKIP_TEST(
"Unable to turn on "
@@ -2018,7 +2026,8 @@ static void hipGraph_PerfCheck_hipGraphExecEventRecordNodeSetEvent(const hipStre
* ------------------------
* - HIP_VERSION >= 6.1
*/
TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecEventRecordNodeSetEvent") {
TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecEventRecordNodeSetEvent",
"[multigpu]") {
if ((setenv("DEBUG_CLR_GRAPH_PACKET_CAPTURE", "true", 1)) != 0) {
HipTest::HIP_SKIP_TEST(
"Unable to turn on "
@@ -2205,7 +2214,8 @@ static void hipGraph_PerfCheck_hipGraphExecEventWaitNodeSetEvent(const hipStream
* ------------------------
* - HIP_VERSION >= 6.1
*/
TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecEventWaitNodeSetEvent") {
TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecEventWaitNodeSetEvent",
"[multigpu]") {
if ((setenv("DEBUG_CLR_GRAPH_PACKET_CAPTURE", "true", 1)) != 0) {
HipTest::HIP_SKIP_TEST(
"Unable to turn on "
@@ -2359,7 +2369,8 @@ static void hipGraph_PerfCheck_hipGraphExecHostNodeSetParams(const hipStream_t&
* ------------------------
* - HIP_VERSION >= 6.1
*/
TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecHostNodeSetParams") {
TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecHostNodeSetParams",
"[multigpu]") {
if ((setenv("DEBUG_CLR_GRAPH_PACKET_CAPTURE", "true", 1)) != 0) {
HipTest::HIP_SKIP_TEST(
"Unable to turn on "
@@ -2482,7 +2493,7 @@ static void hipGraph_PerfCheck_hipGraphExecUpdate(const hipStream_t& stream) {
* ------------------------
* - HIP_VERSION >= 6.1
*/
TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecUpdate") {
TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecUpdate", "[multigpu]") {
if ((setenv("DEBUG_CLR_GRAPH_PACKET_CAPTURE", "true", 1)) != 0) {
HipTest::HIP_SKIP_TEST(
"Unable to turn on "
@@ -2626,7 +2637,8 @@ static void hipGraph_PerfCheck_hipGraphExecUpdate_kernel_inLoop(const hipStream_
* ------------------------
* - HIP_VERSION >= 6.1
*/
TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecUpdate_kernel_inLoop") {
TEST_CASE("Unit_hipGraph_PerfCheck_hipGraphExecUpdate_kernel_inLoop",
"[multigpu]") {
if ((setenv("DEBUG_CLR_GRAPH_PACKET_CAPTURE", "true", 1)) != 0) {
HipTest::HIP_SKIP_TEST(
"Unable to turn on "
@@ -152,7 +152,7 @@ TEST_CASE("Unit_hipGraphUpload_Functional") {
}
}
TEST_CASE("Unit_hipGraphUpload_Functional_multidevice_test") {
TEST_CASE("Unit_hipGraphUpload_Functional_multidevice_test", "[multigpu]") {
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
@@ -1064,7 +1064,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_EndingCapwhenCapInProg") {
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipStreamBeginCapture_Positive_MultiGPU") {
TEST_CASE("Unit_hipStreamBeginCapture_Positive_MultiGPU", "[multigpu]") {
int devcount = 0;
HIP_CHECK(hipGetDeviceCount(&devcount));
// If only single GPU is detected then return
@@ -931,7 +931,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_EndingCapturewhenCaptureInProgress") {
/* Test scenario 15
*/
TEST_CASE("Unit_hipStreamBeginCapture_MultiGPU") {
TEST_CASE("Unit_hipStreamBeginCapture_MultiGPU", "[multigpu]") {
int devcount = 0;
HIP_CHECK(hipGetDeviceCount(&devcount));
// If only single GPU is detected then return
@@ -45,7 +45,7 @@ __global__ void run_printf() { printf("Hello World\n"); }
* ------------------------
* - HIP_VERSION >= 5.6
*/
TEST_CASE("Unit_kernel_ChkPrintf") {
TEST_CASE("Unit_kernel_ChkPrintf", "[multigpu]") {
int device_count = 0;
CaptureStream capture(stdout);
HIP_CHECK(hipGetDeviceCount(&device_count));
@@ -85,7 +85,7 @@ This testcase verifies the hipArrayCreate API in multithreaded
scenario by launching threads in parallel on multiple GPUs
and verifies the hipArrayCreate API with small and big chunks data
*/
TEST_CASE("Unit_hipArrayCreate_MultiThread") {
TEST_CASE("Unit_hipArrayCreate_MultiThread", "[multigpu]") {
CHECK_IMAGE_SUPPORT
std::vector<std::thread> threadlist;
@@ -233,7 +233,7 @@ float* funcToChkArray(hipArray_t array) {
* ------------------------
* - HIP_VERSION >= 5.6
*/
TEST_CASE("Unit_hipArrayGetDescriptor_1D_2D_ArrayParameterChk") {
TEST_CASE("Unit_hipArrayGetDescriptor_1D_2D_ArrayParameterChk", "[multigpu]") {
CHECK_IMAGE_SUPPORT
int numDevices = 0;
@@ -318,7 +318,8 @@ TEST_CASE("Unit_hipArrayGetDescriptor_1D_2D_ArrayParameterChk") {
* ------------------------
* - HIP_VERSION >= 5.6
*/
TEST_CASE("Unit_hipArrayGetDescriptor_MultiThreadScenarioFor1D_2D_Array") {
TEST_CASE("Unit_hipArrayGetDescriptor_MultiThreadScenarioFor1D_2D_Array",
"[multigpu]") {
CHECK_IMAGE_SUPPORT
int numDevices = 0;
@@ -368,7 +369,7 @@ TEST_CASE("Unit_hipArrayGetDescriptor_MultiThreadScenarioFor1D_2D_Array") {
* ------------------------
* - HIP_VERSION >= 5.6
*/
TEST_CASE("Unit_hipArrayGetDescriptor_Host2Array_Array2Host") {
TEST_CASE("Unit_hipArrayGetDescriptor_Host2Array_Array2Host", "[multigpu]") {
CHECK_IMAGE_SUPPORT
int numDevices = 0;
@@ -137,7 +137,7 @@ TEST_CASE("Unit_hipDeviceGetMemPool_Functional") {
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipDeviceGetMemPool_Multidevice") {
TEST_CASE("Unit_hipDeviceGetMemPool_Multidevice", "[multigpu]") {
int num_devices;
HIP_CHECK(hipGetDeviceCount(&num_devices));
@@ -108,7 +108,7 @@ TEST_CASE("Unit_hipDeviceSetMemPool_Basic") {
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipDeviceSetMemPool_DestroyCurrentMempool") {
TEST_CASE("Unit_hipDeviceSetMemPool_DestroyCurrentMempool", "[multigpu]") {
int num_devices;
HIP_CHECK(hipGetDeviceCount(&num_devices));
for (int dev = 0; dev < num_devices; dev++) {
@@ -269,7 +269,7 @@ TEST_CASE("Unit_hipDrvMemcpy2DUnaligned_FuncTst") {
* ------------------------
* - HIP_VERSION >= 6.0
*/
TEST_CASE("Unit_hipDrvMemcpy2DUnaligned_Positive_Basic") {
TEST_CASE("Unit_hipDrvMemcpy2DUnaligned_Positive_Basic", "[multigpu]") {
CHECK_IMAGE_SUPPORT
SECTION("Device to Device") {
@@ -542,7 +542,7 @@ TEST_CASE("Unit_hipDrvMemcpy3DAsync_ExtentValidation") {
* - HIP_VERSION >= 6.0
*/
TEST_CASE("Unit_hipDrvMemcpy3DAsync_H2DDeviceContextChange") {
TEST_CASE("Unit_hipDrvMemcpy3DAsync_H2DDeviceContextChange", "[multigpu]") {
CHECK_IMAGE_SUPPORT
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
@@ -567,7 +567,8 @@ TEST_CASE("Unit_hipDrvMemcpy3DAsync_H2DDeviceContextChange") {
* - HIP_VERSION >= 6.0
*/
TEST_CASE("Unit_hipDrvMemcpy3DAsync_Host2ArrayDeviceContextChange") {
TEST_CASE("Unit_hipDrvMemcpy3DAsync_Host2ArrayDeviceContextChange",
"[multigpu]") {
CHECK_IMAGE_SUPPORT
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
@@ -595,7 +596,8 @@ TEST_CASE("Unit_hipDrvMemcpy3DAsync_Host2ArrayDeviceContextChange") {
* - HIP_VERSION >= 6.0
*/
TEST_CASE("Unit_hipDrvMemcpy3DAsync_multiDevice_Basic_Size_Test") {
TEST_CASE("Unit_hipDrvMemcpy3DAsync_multiDevice_Basic_Size_Test",
"[multigpu]") {
CHECK_IMAGE_SUPPORT
constexpr int size_128b = 128, size_256b = 256;
int numDevices = 0;
@@ -524,7 +524,7 @@ TEST_CASE("Unit_hipDrvMemcpy3D_ExtentValidation") {
* - HIP_VERSION >= 6.0
*/
TEST_CASE("Unit_hipDrvMemcpy3D_H2DDeviceContextChange") {
TEST_CASE("Unit_hipDrvMemcpy3D_H2DDeviceContextChange", "[multigpu]") {
CHECK_IMAGE_SUPPORT
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
@@ -549,7 +549,7 @@ TEST_CASE("Unit_hipDrvMemcpy3D_H2DDeviceContextChange") {
* - HIP_VERSION >= 6.0
*/
TEST_CASE("Unit_hipDrvMemcpy3D_Host2ArrayDeviceContextChange") {
TEST_CASE("Unit_hipDrvMemcpy3D_Host2ArrayDeviceContextChange", "[multigpu]") {
CHECK_IMAGE_SUPPORT
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
@@ -577,7 +577,7 @@ TEST_CASE("Unit_hipDrvMemcpy3D_Host2ArrayDeviceContextChange") {
* - HIP_VERSION >= 6.0
*/
TEST_CASE("Unit_hipDrvMemcpy3D_multiDevice_Basic_Size_Test") {
TEST_CASE("Unit_hipDrvMemcpy3D_multiDevice_Basic_Size_Test", "[multigpu]") {
CHECK_IMAGE_SUPPORT
constexpr int size_128b = 128, size_256b = 256;
int numDevices = 0;
@@ -2705,7 +2705,7 @@ TEST_CASE("Unit_hipGetProcAddress_MemoryApisGetMemInfoRelated") {
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipGetProcAddress_MemoryApisMemcpy2DRelated") {
TEST_CASE("Unit_hipGetProcAddress_MemoryApisMemcpy2DRelated", "[multigpu]") {
CHECK_IMAGE_SUPPORT
void* hipMemcpy2D_ptr = nullptr;
@@ -6008,7 +6008,7 @@ TEST_CASE("Unit_hipGetProcAddress_MemoryApisStreamOrderedMemory") {
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipGetProcAddress_MemoryApisPeerToPeer") {
TEST_CASE("Unit_hipGetProcAddress_MemoryApisPeerToPeer", "[multigpu]") {
int deviceCount = 0;
HIP_CHECK(hipGetDeviceCount(&deviceCount));
@@ -107,7 +107,8 @@ void doMemCopy(size_t numElements, int offset, T* A, T* Bh, T* Bd, bool internal
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_hipHostRegister_ReferenceFromKernelandhipMemset", "", int, float, double) {
TEMPLATE_TEST_CASE("Unit_hipHostRegister_ReferenceFromKernelandhipMemset",
"[multigpu]", int, float, double) {
size_t sizeBytes{LEN * sizeof(TestType)};
TestType *A, **Ad;
int num_devices = 0;
@@ -214,7 +215,8 @@ TEMPLATE_TEST_CASE("Unit_hipHostRegister_DirectReferenceFromKernel", "", int, fl
* ------------------------
* - HIP_VERSION >= 5.6
*/
TEMPLATE_TEST_CASE("Unit_hipHostRegister_DirectReferenceMultGpu", "", int, float, double) {
TEMPLATE_TEST_CASE("Unit_hipHostRegister_DirectReferenceMultGpu", "[multigpu]",
int, float, double) {
// 1 refers to doing hipHostRegister once for all devices
// 0 refers to doing hipHostRegister for each device
auto register_once = GENERATE(0, 1);
@@ -127,7 +127,7 @@ This testcase verifies the hipMalloc3D API in multithreaded
scenario by launching threads in parallel on multiple GPUs
and verifies the hipMalloc3D API with small and big chunks data
*/
TEST_CASE("Unit_hipMalloc3D_MultiThread") {
TEST_CASE("Unit_hipMalloc3D_MultiThread", "[multigpu]") {
CHECK_IMAGE_SUPPORT
std::vector<std::thread> threadlist;
@@ -78,7 +78,7 @@ This testcase verifies the hipMalloc3DArray API in multithreaded
scenario by launching threads in parallel on multiple GPUs
and verifies the hipMalloc3DArray API with small and big chunks data
*/
TEST_CASE("Unit_hipMalloc3DArray_MultiThread") {
TEST_CASE("Unit_hipMalloc3DArray_MultiThread", "[multigpu]") {
CHECK_IMAGE_SUPPORT
std::vector<std::thread> threadlist;
@@ -80,7 +80,7 @@ This testcase verifies the hipMallocArray API in multithreaded
scenario by launching threads in parallel on multiple GPUs
and verifies the hipMallocArray API with small and big chunks data
*/
TEST_CASE("Unit_hipMallocArray_MultiThread") {
TEST_CASE("Unit_hipMallocArray_MultiThread", "[multigpu]") {
CHECK_IMAGE_SUPPORT
std::vector<std::thread> threadlist;
@@ -290,7 +290,7 @@ TEST_CASE("Unit_hipMallocAsync_StreamEvent_CrissCross") {
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocAsync_Multidevice") {
TEST_CASE("Unit_hipMallocAsync_Multidevice", "[multigpu]") {
int num_devices;
HIP_CHECK(hipGetDeviceCount(&num_devices));
for (int i = 0; i < num_devices; i++) {
@@ -330,7 +330,7 @@ static void threadQAsyncCommands(streamMemAllocTest* testObj, hipStream_t strm,
testObj->freeDevBuf(strm);
}
TEST_CASE("Unit_hipMallocAsync_Multidevice_Concurrent") {
TEST_CASE("Unit_hipMallocAsync_Multidevice_Concurrent", "[multigpu]") {
int num_devices;
HIP_CHECK(hipGetDeviceCount(&num_devices));
checkIfMultiDev(num_devices) hipStream_t* stream_buf = new hipStream_t[num_devices];
@@ -379,7 +379,7 @@ TEST_CASE("Unit_hipMallocAsync_Multidevice_Concurrent") {
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocAsync_Multidevice_MultiStream") {
TEST_CASE("Unit_hipMallocAsync_Multidevice_MultiStream", "[multigpu]") {
int num_devices;
HIP_CHECK(hipGetDeviceCount(&num_devices));
checkIfMultiDev(num_devices)
@@ -303,7 +303,7 @@ TEST_CASE("Unit_hipMalloc_AllocateAndPoolBuffers") {
* Exercise hipMalloc() api parellely on all gpus from
* multiple threads and regress the api.
*/
TEST_CASE("Unit_hipMalloc_Multithreaded_MultiGPU") {
TEST_CASE("Unit_hipMalloc_Multithreaded_MultiGPU", "[multigpu]") {
std::vector<std::thread> threadlist;
int devCnt;
@@ -340,7 +340,7 @@ TEST_CASE("Unit_hipMallocFromPoolAsync_hipStreamPerThread") {
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_ReleaseThreshold_Mgpu") {
TEST_CASE("Unit_hipMallocFromPoolAsync_ReleaseThreshold_Mgpu", "[multigpu]") {
constexpr int N = 1 << 20;
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
@@ -575,7 +575,7 @@ static bool checkReuseAllowOtherFlags(int N, hipMemPoolAttr attr, enum eTestValu
* - HIP_VERSION >= 6.2
*/
#if HT_AMD
TEST_CASE("Unit_hipMallocFromPoolAsync_Multidevice_Concurrent") {
TEST_CASE("Unit_hipMallocFromPoolAsync_Multidevice_Concurrent", "[multigpu]") {
auto testType = GENERATE(testdefault, testMaximum);
constexpr int N = 1 << 20;
int num_devices;
@@ -627,7 +627,7 @@ TEST_CASE("Unit_hipMallocFromPoolAsync_Multidevice_Concurrent") {
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_Multidevice_MultiStream") {
TEST_CASE("Unit_hipMallocFromPoolAsync_Multidevice_MultiStream", "[multigpu]") {
int num_devices;
auto testType = GENERATE(testdefault, testMaximum);
constexpr int N = 1 << 20;
@@ -31,7 +31,7 @@ __global__ void MallcMangdFlgTst(int n, float* x, float* y) {
}
// The following section tests working of hipMallocManaged with flag parameters
TEST_CASE("Unit_hipMallocManaged_FlgParam") {
TEST_CASE("Unit_hipMallocManaged_FlgParam", "[multigpu]") {
auto managed = HmmAttrPrint();
if (managed != 1) {
HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test.");
@@ -119,7 +119,7 @@ TEST_CASE("Unit_hipMallocManaged_FlgParam") {
// The following function tests Memory access allocated using hipMallocManaged
// in multiple streams
TEST_CASE("Unit_hipMallocManaged_AccessMultiStream") {
TEST_CASE("Unit_hipMallocManaged_AccessMultiStream", "[multigpu]") {
auto managed = HmmAttrPrint();
if (managed != 1) {
HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test.");
@@ -161,7 +161,7 @@ TEST_CASE("Unit_hipMallocManaged_MultiChunkSingleDevice") {
// Equal parts of Hmm is accessed on available gpus and
// kernel is launched on acessed chunk of hmm memory
// and checks if there are any inconsistencies or access issues
TEST_CASE("Unit_hipMallocManaged_MultiChunkMultiDevice") {
TEST_CASE("Unit_hipMallocManaged_MultiChunkMultiDevice", "[multigpu]") {
auto managed = HmmAttrPrint();
if (managed != 1) {
HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test.");
@@ -304,7 +304,8 @@ TEST_CASE("Unit_hipMallocManaged_Negative") {
// Allocate two pointers using hipMallocManaged(), initialize,
// then launch kernel using these pointers directly and
// later validate the content without using any Memcpy.
TEMPLATE_TEST_CASE("Unit_hipMallocManaged_TwoPointers", "", int, float, double) {
TEMPLATE_TEST_CASE("Unit_hipMallocManaged_TwoPointers", "[multigpu]", int,
float, double) {
auto managed = HmmAttrPrint();
if (managed != 1) {
HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test.");
@@ -344,8 +345,8 @@ TEMPLATE_TEST_CASE("Unit_hipMallocManaged_TwoPointers", "", int, float, double)
// to all other devices. This include verification and Device two Device
// transfers and kernel launch o discover if there any access issues.
TEMPLATE_TEST_CASE("Unit_hipMallocManaged_DeviceContextChange", "", unsigned char, int, float,
double) {
TEMPLATE_TEST_CASE("Unit_hipMallocManaged_DeviceContextChange", "[multigpu]",
unsigned char, int, float, double) {
auto managed = HmmAttrPrint();
if (managed != 1) {
HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test.");
@@ -85,7 +85,7 @@ This testcase verifies the hipMallocMipmappedArray API in multithreaded
scenario by launching threads in parallel on multiple GPUs
and verifies the hipMallocMipmappedArray API with small and big chunks data
*/
TEST_CASE("Unit_hipMallocMipmappedArray_MultiThread") {
TEST_CASE("Unit_hipMallocMipmappedArray_MultiThread", "[multigpu]") {
std::vector<std::thread> threadlist;
int devCnt = 0;
devCnt = HipTest::getDeviceCount();
@@ -297,7 +297,7 @@ static void AllocateHmmMemory(int flag, int device) {
}
}
TEST_CASE("Unit_hipMallocManaged_MultiThread") {
TEST_CASE("Unit_hipMallocManaged_MultiThread", "[multigpu]") {
auto managed = HmmAttrPrint();
if (managed != 1) {
HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test.");
@@ -351,7 +351,7 @@ TEST_CASE("Unit_hipMallocManaged_MultiThread") {
// The following test checks what happens when same Hmm memory is used to
// launch multiple threads over multiple gpus
TEST_CASE("Unit_hipMallocManaged_MGpuMThread") {
TEST_CASE("Unit_hipMallocManaged_MGpuMThread", "[multigpu]") {
auto managed = HmmAttrPrint();
if (managed != 1) {
HipTest::HIP_SKIP_TEST("GPU doesn't support managed memory so skipping test.");
@@ -459,7 +459,7 @@ scenario by launching threads in parallel on multiple GPUs
and verifies the hipMallocPitch API with small and big chunks data
*/
TEST_CASE("Unit_hipMallocPitch_MultiThread", "") {
TEST_CASE("Unit_hipMallocPitch_MultiThread", "[multigpu]") {
CHECK_IMAGE_SUPPORT
std::vector<std::thread> threadlist;
@@ -181,7 +181,7 @@ TEST_CASE("Unit_hipMemAdvise_Flags_Do_Not_Cause_Prefetch") {
#endif
}
TEST_CASE("Unit_hipMemAdvise_Read_Write_After_Advise") {
TEST_CASE("Unit_hipMemAdvise_Read_Write_After_Advise", "[multigpu]") {
auto supported_devices = GetDevicesWithAdviseSupport();
if (supported_devices.empty()) {
HipTest::HIP_SKIP_TEST("Test needs at least 1 device that supports managed memory");
@@ -243,7 +243,7 @@ TEST_CASE("Unit_hipMemAdvise_NegtveTsts") {
// The following function tests various scenarios around the flag
// 'hipMemAdviseSetPreferredLocation' using HMM memory and hipMemAdvise() api
TEST_CASE("Unit_hipMemAdvise_PrefrdLoc") {
TEST_CASE("Unit_hipMemAdvise_PrefrdLoc", "[multigpu]") {
int MangdMem = HmmAttrPrint();
if (MangdMem == 1) {
// Check that when a page fault occurs for the memory region set to devPtr,
@@ -428,7 +428,7 @@ TEST_CASE("Unit_hipMemAdvise_TstFlgOverrideEffect") {
// The following function tests if peers can set hipMemAdviseSetAccessedBy flag
// on HMM memory prefetched on each of the other gpus
#if HT_AMD
TEST_CASE("Unit_hipMemAdvise_TstAccessedByPeer") {
TEST_CASE("Unit_hipMemAdvise_TstAccessedByPeer", "[multigpu]") {
int MangdMem = HmmAttrPrint();
if (MangdMem == 1) {
bool IfTestPassed = true;
@@ -732,7 +732,7 @@ TEST_CASE("Unit_hipMemAdvise_TstMemAdvisePrefrdLoc") {
to device1, probe for hipMemRangeAttributeLastPrefetchLocation using
hipMemRangeGetAttribute(), we should get 1*/
TEST_CASE("Unit_hipMemAdvise_TstMemAdviseLstPreftchLoc") {
TEST_CASE("Unit_hipMemAdvise_TstMemAdviseLstPreftchLoc", "[multigpu]") {
int NumDevs = 0;
HIP_CHECK(hipGetDeviceCount(&NumDevs));
if (NumDevs >= 2) {
@@ -802,7 +802,7 @@ TEST_CASE("Unit_hipMemAdvise_TstMemAdviseMultiFlag") {
access denial case arising due to setting ReadMostly only to a particular
gpu*/
TEST_CASE("Unit_hipMemAdvise_ReadMosltyMgpuTst") {
TEST_CASE("Unit_hipMemAdvise_ReadMosltyMgpuTst", "[multigpu]") {
int managed = HmmAttrPrint();
if (managed == 1) {
int Ngpus = 0;
@@ -70,7 +70,7 @@ static std::vector<int> getSupportedDevices() {
* ------------------------
* - HIP_VERSION >= 7.1
*/
TEST_CASE("Unit_hipMemAdvise_v2_Device_Host") {
TEST_CASE("Unit_hipMemAdvise_v2_Device_Host", "[multigpu]") {
auto supportedDevices = getSupportedDevices();
if (supportedDevices.empty()) {
HipTest::HIP_SKIP_TEST(
@@ -93,7 +93,7 @@ TEST_CASE("Unit_hipMemAllocHost_Negative") {
/*
* Verify that a device can read/write to the memory of another device
*/
TEST_CASE("Unit_hipMemAllocHost_VerifyAccess") {
TEST_CASE("Unit_hipMemAllocHost_VerifyAccess", "[multigpu]") {
int devices_number = 0;
HIP_CHECK(hipGetDeviceCount(&devices_number));
std::vector<int*> devices_memories(devices_number);
@@ -157,7 +157,7 @@ static __global__ void setKer(int* devptr) {
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolCreate_DeviceTest") {
TEST_CASE("Unit_hipMemPoolCreate_DeviceTest", "[multigpu]") {
checkMempoolSupported(0) int num_devices = 0;
HIP_CHECK(hipGetDeviceCount(&num_devices));
checkIfMultiDev(num_devices)
@@ -98,7 +98,7 @@ int CheckP2PMemPoolSupport(int src_device, int dst_device) {
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolSetGetAccess_Positive_MultipleGPU") {
TEST_CASE("Unit_hipMemPoolSetGetAccess_Positive_MultipleGPU", "[multigpu]") {
const auto device_count = HipTest::getDeviceCount();
if (device_count < 2) {
HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
@@ -212,7 +212,7 @@ void MemPoolSetGetAccess_P2P(const MemPools mempool_type) {
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolSetGetAccess_Positive_P2P") {
TEST_CASE("Unit_hipMemPoolSetGetAccess_Positive_P2P", "[multigpu]") {
const auto device_count = HipTest::getDeviceCount();
if (device_count < 2) {
HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
@@ -406,7 +406,7 @@ static void getDevicePairs(std::vector<std::pair<int, int>>* p2p_pairs, int numD
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolSetAccess_SetAccess") {
TEST_CASE("Unit_hipMemPoolSetAccess_SetAccess", "[multigpu]") {
constexpr int N = 1 << 14;
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
@@ -47,7 +47,7 @@ __global__ void MemPrefetchAsyncKernel(int* C_d, const int* A_d, size_t N) {
}
}
TEST_CASE("Unit_hipMemPrefetchAsync_Basic") {
TEST_CASE("Unit_hipMemPrefetchAsync_Basic", "[multigpu]") {
const auto supported_devices = GetDevicesWithPrefetchSupport();
if (supported_devices.empty()) {
HipTest::HIP_SKIP_TEST("Test need at least one device with managed memory support");
@@ -70,7 +70,7 @@ static std::vector<int> getSupportedDevices() {
* ------------------------
* - HIP_VERSION >= 7.1
*/
TEST_CASE("Unit_hipMemPrefetchAsync_v2_Device_Host") {
TEST_CASE("Unit_hipMemPrefetchAsync_v2_Device_Host", "[multigpu]") {
auto supportedDevices = getSupportedDevices();
if (supportedDevices.empty()) {
HipTest::HIP_SKIP_TEST(
@@ -27,7 +27,7 @@ THE SOFTWARE.
#include <resource_guards.hh>
#include <utils.hh>
TEST_CASE("Unit_hipMemcpy2D_Positive_Basic") {
TEST_CASE("Unit_hipMemcpy2D_Positive_Basic", "[multigpu]") {
CHECK_IMAGE_SUPPORT
constexpr bool async = false;
@@ -27,7 +27,7 @@ THE SOFTWARE.
#include <resource_guards.hh>
#include <utils.hh>
TEST_CASE("Unit_hipMemcpy2DAsync_Positive_Basic") {
TEST_CASE("Unit_hipMemcpy2DAsync_Positive_Basic", "[multigpu]") {
using namespace std::placeholders;
constexpr bool async = true;
@@ -172,7 +172,8 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_Host&PinnedMem", "", int, float, doubl
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice-Host&PinnedMem", "", int, float, double) {
TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice-Host&PinnedMem",
"[multigpu]", int, float, double) {
CHECK_IMAGE_SUPPORT
auto mem_type = GENERATE(0, 1);
int numDevices = 0;
@@ -264,7 +265,8 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice-Host&PinnedMem", "", int,
* - HIP_VERSION >= 5.2
*/
TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice-StreamOnDiffDevice", "", int, float, double) {
TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice-StreamOnDiffDevice",
"[multigpu]", int, float, double) {
CHECK_IMAGE_SUPPORT
auto mem_type = GENERATE(0, 1);
int numDevices = 0;
@@ -506,7 +508,7 @@ static void hipMemcpy2DAsync_Basic_Size_Test(size_t inc) {
* - HIP_VERSION >= 6.0
*/
TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice_Basic_Size_Test") {
TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice_Basic_Size_Test", "[multigpu]") {
CHECK_IMAGE_SUPPORT
size_t input = 1 << 20;
int numDevices = 0;
@@ -34,8 +34,7 @@ invalid
#include <resource_guards.hh>
#include <utils.hh>
TEST_CASE("Unit_hipMemcpy2DFromArray_Positive_Default") {
TEST_CASE("Unit_hipMemcpy2DFromArray_Positive_Default", "[multigpu]") {
CHECK_IMAGE_SUPPORT
using namespace std::placeholders;
@@ -34,7 +34,7 @@ of hipMemcpy2DFromArrayAsync api when parameters are invalid
#include <resource_guards.hh>
#include <utils.hh>
TEST_CASE("Unit_hipMemcpy2DFromArrayAsync_Positive_Default") {
TEST_CASE("Unit_hipMemcpy2DFromArrayAsync_Positive_Default", "[multigpu]") {
CHECK_IMAGE_SUPPORT
using namespace std::placeholders;
@@ -195,7 +195,8 @@ TEST_CASE("Unit_hipMemcpy2DFromArrayAsync_PinnedHostMemSameGpu") {
then A_d-->E_h in GPU1
* OUTPUT: validating the result by comparing A_h and E_h
*/
TEST_CASE("Unit_hipMemcpy2DFromArrayAsync_multiDevicePinnedHostMem") {
TEST_CASE("Unit_hipMemcpy2DFromArrayAsync_multiDevicePinnedHostMem",
"[multigpu]") {
CHECK_IMAGE_SUPPORT
int numDevices = 0;
@@ -254,7 +255,8 @@ TEST_CASE("Unit_hipMemcpy2DFromArrayAsync_multiDevicePinnedHostMem") {
* --> A_h host variable
* and verifying A_h with Phi
* */
TEST_CASE("Unit_hipMemcpy2DFromArrayAsync_multiDeviceContextChange") {
TEST_CASE("Unit_hipMemcpy2DFromArrayAsync_multiDeviceContextChange",
"[multigpu]") {
CHECK_IMAGE_SUPPORT
int numDevices = 0;
@@ -165,7 +165,8 @@ TEST_CASE("Unit_hipMemcpy2DFromArray_PinnedMemSameGPU") {
* --> E_h host variable
* and verifying A_h with E_h
*/
TEST_CASE("Unit_hipMemcpy2DFromArray_multiDevicePinnedMemPeerGpu") {
TEST_CASE("Unit_hipMemcpy2DFromArray_multiDevicePinnedMemPeerGpu",
"[multigpu]") {
CHECK_IMAGE_SUPPORT
int numDevices = 0;
@@ -218,7 +219,7 @@ TEST_CASE("Unit_hipMemcpy2DFromArray_multiDevicePinnedMemPeerGpu") {
* --> A_h host variable
* and verifying A_h with Phi
* */
TEST_CASE("Unit_hipMemcpy2DFromArray_multiDeviceContextChange") {
TEST_CASE("Unit_hipMemcpy2DFromArray_multiDeviceContextChange", "[multigpu]") {
CHECK_IMAGE_SUPPORT
int numDevices = 0;
@@ -33,8 +33,7 @@ unsuccessful execution of hipMemcpy2DToArray api when parameters are invalid
#include <resource_guards.hh>
#include <utils.hh>
TEST_CASE("Unit_hipMemcpy2DToArray_Positive_Default") {
TEST_CASE("Unit_hipMemcpy2DToArray_Positive_Default", "[multigpu]") {
CHECK_IMAGE_SUPPORT
using namespace std::placeholders;
@@ -34,8 +34,7 @@ of hipMemcpy2DToArrayAsync api when parameters are invalid
#include <resource_guards.hh>
#include <utils.hh>
TEST_CASE("Unit_hipMemcpy2DToArrayAsync_Positive_Default") {
TEST_CASE("Unit_hipMemcpy2DToArrayAsync_Positive_Default", "[multigpu]") {
CHECK_IMAGE_SUPPORT
using namespace std::placeholders;
@@ -194,7 +194,8 @@ TEST_CASE("Unit_hipMemcpy2DToArrayAsync_PinnedHostMemSameGpu") {
* --> A_h host variable
* and verifying A_h with E_h[0]+i(i.e., 10+i)
*/
TEST_CASE("Unit_hipMemcpy2DToArrayAsync_multiDevicePinnedHostMem") {
TEST_CASE("Unit_hipMemcpy2DToArrayAsync_multiDevicePinnedHostMem",
"[multigpu]") {
CHECK_IMAGE_SUPPORT
int numDevices = 0;
@@ -254,7 +255,8 @@ TEST_CASE("Unit_hipMemcpy2DToArrayAsync_multiDevicePinnedHostMem") {
* --> A_h host variable
* and verifying A_h with Phi
* */
TEST_CASE("Unit_hipMemcpy2DToArrayAsync_multiDeviceDeviceContextChange") {
TEST_CASE("Unit_hipMemcpy2DToArrayAsync_multiDeviceDeviceContextChange",
"[multigpu]") {
CHECK_IMAGE_SUPPORT
int numDevices = 0;
@@ -164,7 +164,7 @@ TEST_CASE("Unit_hipMemcpy2DToArray_PinnedMemSameGPU") {
* --> A_h host variable
* and verifying A_h with E_h[0]+i(i.e., 10+i)
*/
TEST_CASE("Unit_hipMemcpy2DToArray_multiDevicePinnedMemPeerGpu") {
TEST_CASE("Unit_hipMemcpy2DToArray_multiDevicePinnedMemPeerGpu", "[multigpu]") {
CHECK_IMAGE_SUPPORT
int numDevices = 0;
@@ -218,7 +218,8 @@ TEST_CASE("Unit_hipMemcpy2DToArray_multiDevicePinnedMemPeerGpu") {
* --> A_h host variable
* and verifying A_h with Phi
* */
TEST_CASE("Unit_hipMemcpy2DToArray_multiDeviceDeviceContextChange") {
TEST_CASE("Unit_hipMemcpy2DToArray_multiDeviceDeviceContextChange",
"[multigpu]") {
CHECK_IMAGE_SUPPORT
int numDevices = 0;
@@ -309,7 +309,8 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpy2D_H2D-D2D-D2H_Managed_WithOffset", "", int, f
* - HIP_VERSION >= 6.0
*/
TEMPLATE_TEST_CASE("Unit_hipMemcpy2D_multiDevice-D2D", "", int, float, double) {
TEMPLATE_TEST_CASE("Unit_hipMemcpy2D_multiDevice-D2D", "[multigpu]", int, float,
double) {
CHECK_IMAGE_SUPPORT
auto mem_type = GENERATE(0, 1);
int numDevices = 0;
@@ -524,7 +525,7 @@ static void hipMemcpy2D_Basic_Size_Test(size_t inc) {
* - HIP_VERSION >= 6.0
*/
TEST_CASE("Unit_hipMemcpy2D_multiDevice_Basic_Size_Test") {
TEST_CASE("Unit_hipMemcpy2D_multiDevice_Basic_Size_Test", "[multigpu]") {
CHECK_IMAGE_SUPPORT
size_t input = 1 << 20;
int numDevices = 0;
@@ -715,7 +715,7 @@ TEST_CASE("Unit_hipMemcpy3DAsync_multiDevice-Negative") {
* - HIP_VERSION >= 6.0
*/
TEST_CASE("Unit_hipMemcpy3DAsync_multiDevice-D2D") {
TEST_CASE("Unit_hipMemcpy3DAsync_multiDevice-D2D", "[multigpu]") {
CHECK_IMAGE_SUPPORT
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
@@ -747,7 +747,7 @@ TEST_CASE("Unit_hipMemcpy3DAsync_multiDevice-D2D") {
* - HIP_VERSION >= 6.0
*/
TEST_CASE("Unit_hipMemcpy3DAsync_multiDevice-DiffStream") {
TEST_CASE("Unit_hipMemcpy3DAsync_multiDevice-DiffStream", "[multigpu]") {
CHECK_IMAGE_SUPPORT
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
@@ -609,7 +609,7 @@ TEST_CASE("Unit_hipMemcpy3D_multiDevice-Negative") {
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipMemcpy3D_multiDevice-OnPeerDevice") {
TEST_CASE("Unit_hipMemcpy3D_multiDevice-OnPeerDevice", "[multigpu]") {
CHECK_IMAGE_SUPPORT
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
@@ -644,7 +644,7 @@ TEST_CASE("Unit_hipMemcpy3D_multiDevice-OnPeerDevice") {
* - HIP_VERSION >= 6.0
*/
TEST_CASE("Unit_hipMemcpy3D_multiDevice_Basic_Size_Test") {
TEST_CASE("Unit_hipMemcpy3D_multiDevice_Basic_Size_Test", "[multigpu]") {
CHECK_IMAGE_SUPPORT
constexpr int size_128b = 128, size_256b = 256;
int numDevices = 0;
@@ -128,7 +128,8 @@ This testcase verifies the following scenarios
4. Device context change
5. H2D-D2D-D2H peer GPU
*/
TEMPLATE_TEST_CASE("Unit_hipMemcpyAsync_H2H-H2D-D2H-H2PinMem", "", char, int, float, double) {
TEMPLATE_TEST_CASE("Unit_hipMemcpyAsync_H2H-H2D-D2H-H2PinMem", "[multigpu]",
char, int, float, double) {
TestType *A_d{nullptr}, *B_d{nullptr};
TestType *A_h{nullptr}, *B_h{nullptr};
TestType *A_Ph{nullptr}, *B_Ph{nullptr};
@@ -288,7 +289,8 @@ This testcase verifies hipMemcpy API with pinnedMemory and hostRegister
along with kernel launches
*/
TEMPLATE_TEST_CASE("Unit_hipMemcpyAsync_PinnedRegMemWithKernelLaunch", "", int, float, double) {
TEMPLATE_TEST_CASE("Unit_hipMemcpyAsync_PinnedRegMemWithKernelLaunch",
"[multigpu]", int, float, double) {
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices < 2) {
@@ -90,8 +90,8 @@ Output:"B_h" host variable output of hipMemcpyAtoH API
is then validated with "hData"
*/
#if HT_AMD
TEMPLATE_TEST_CASE("Unit_hipMemcpyAtoH_multiDevice-PeerDeviceContext", "[hipMemcpyAtoH]", char, int,
float) {
TEMPLATE_TEST_CASE("Unit_hipMemcpyAtoH_multiDevice-PeerDeviceContext",
"[hipMemcpyAtoH][multigpu]", char, int, float) {
CHECK_IMAGE_SUPPORT
int numDevices = 0;
@@ -38,7 +38,8 @@ This testcase verifies hipMemcpyDtoD API
6.Kernel Launch
7.DtoH copy and validating the result
*/
TEMPLATE_TEST_CASE("Unit_hipMemcpyDtoD_Basic", "", int, float, double) {
TEMPLATE_TEST_CASE("Unit_hipMemcpyDtoD_Basic", "[multigpu]", int, float,
double) {
size_t Nbytes = NUM_ELM * sizeof(TestType);
int numDevices = 0;
TestType *A_d{nullptr}, *B_d{nullptr}, *C_d{nullptr}, *X_d{nullptr}, *Y_d{nullptr}, *Z_d{nullptr};

Daži faili netika attēloti, jo izmaiņu fails ir pārāk liels Rādīt vairāk