Functional tests without MPI support (#343)

* Let functional tests build without external MPI

* Fix error conditions when using uuid startup with internal MPI

* Do not abort if libibverbs is not found but not using GDA

* Enabled RO functional test initialized with TEST_UUID

* Reduce load time for ro backend_can_run and prevent mpilib_dlclose
crashing

* Fix case TEST_UUID=1, ROCSHMEM_BACKEND='' (autoloading gda)
Bu işleme şunda yer alıyor:
Aurelien Bouteiller
2025-12-08 11:46:16 -05:00
işlemeyi yapan: GitHub
ebeveyn baaf8091b5
işleme c99bc21e10
10 değiştirilmiş dosya ile 81 ekleme ve 56 silme
+34 -35
Dosyayı Görüntüle
@@ -107,7 +107,7 @@ static BackendType select_backend_type() {
DPRINTF("GDABackend::backend_can_run returned success\n");
return BackendType::GDA_BACKEND;
}
if (MPIInstance::mpilib_dl_init() == ROCSHMEM_SUCCESS) {
if (ROBackend::backend_can_run() == ROCSHMEM_SUCCESS) {
DPRINTF("MPIInstance could dl_init MPI library\n");
return BackendType::RO_BACKEND;
}
@@ -130,6 +130,11 @@ static BackendType select_backend_type() {
int ret;
ret = MPIInstance::mpilib_dl_init();
if (ret != ROCSHMEM_SUCCESS) {
fprintf(stderr, "Could not initialize MPI library. This initialization method of "
"rocSHMEM requires MPI library to be loaded at runtime. Aborting.\n");
exit(1);
}
mpi_instance = new MPIInstance(comm);
#if defined(USE_GDA) && defined(USE_RO) && defined(USE_IPC)
@@ -155,10 +160,6 @@ static BackendType select_backend_type() {
CHECK_HIP(hipHostMalloc(&backend, sizeof(GDABackend)));
backend = new (backend) GDABackend(comm);
#elif defined(USE_RO)
if (ret != ROCSHMEM_SUCCESS) {
printf("Could not initialize MPI library. RO conduit requires MPI library to be loaded at runtime. Aborting\n");
abort();
}
CHECK_HIP(hipHostMalloc(&backend, sizeof(ROBackend)));
backend = new (backend) ROBackend(comm);
#elif defined(USE_IPC)
@@ -167,7 +168,8 @@ static BackendType select_backend_type() {
#endif
if (!backend) {
abort();
printf("No Backend could be initialized! Aborting.\n");
exit(1);
}
}
@@ -177,14 +179,16 @@ static BackendType select_backend_type() {
int ret;
ret = MPIInstance::mpilib_dl_init();
if (ret == ROCSHMEM_SUCCESS) {
printf("Could not initialize MPI library. This initialization method of "
"rocSHMEM requires MPI library to be loaded at runtime. Aborting\n");
abort();
if (ret != ROCSHMEM_SUCCESS) {
fprintf(stderr, "Could not initialize MPI library. This initialization method of "
"rocSHMEM requires MPI library to be loaded at runtime. Aborting.\n");
exit(1);
}
mpilib_ftable_.Initialized(&initialized);
if (!initialized) {
if (initialized) {
mpilib_ftable_.Comm_size (MPI_COMM_WORLD, &world_size);
} else {
// This is an Open MPI specific solution to retrieve the number of
// processes that have been started, value can be checked before MPI_Init
char *value = getenv("OMPI_COMM_WORLD_SIZE");
@@ -194,13 +198,11 @@ static BackendType select_backend_type() {
if (world_size != nranks) {
// This solution will require MPI_Sessions. This is planned for the
// future, but is not supported in the current version.
fprintf (stderr, "Unsupported configuration to initialize rocSHMEM. Please "
"initialize the MPI library using MPI_Init first, if you want to "
"initialize rocSHMEM with a subset of the processes\n");
abort();
fprintf(stderr, "Unsupported configuration to initialize rocSHMEM. Please "
"initialize the MPI library using MPI_Init first, if you want to "
"initialize rocSHMEM with a subset of the processes\n");
exit(1);
}
} else {
mpilib_ftable_.Comm_size (MPI_COMM_WORLD, &world_size);
}
if (world_size == nranks) {
@@ -252,11 +254,8 @@ static BackendType select_backend_type() {
backend = new (backend) GDABackend(bootstrap);
break;
case BackendType::RO_BACKEND:
/* Not sure whether this is a valid configuration. Will leave it in for now */
DPRINTF("Initializing RO backend with TCP bootstrapping\n");
mpi_instance = new MPIInstance(MPI_COMM_WORLD);
CHECK_HIP(hipHostMalloc(&backend, sizeof(ROBackend)));
backend = new (backend) ROBackend(MPI_COMM_WORLD);
library_init_subcomm(bootstr, bootstr->getNranks(), bootstr->getRank());
break;
case BackendType::IPC_BACKEND:
DPRINTF("Initializing IPC backend with TCP bootstrapping\n");
@@ -268,23 +267,15 @@ static BackendType select_backend_type() {
CHECK_HIP(hipHostMalloc(&backend, sizeof(GDABackend)));
backend = new (backend) GDABackend(bootstrap);
#elif defined(USE_RO)
/* Not sure whether this is a valid configuration. Will leave it in for now */
int ret;
ret = MPIInstance::mpilib_dl_init();
if (ret != MPI_SUCCESS) {
printf("RO Backend requires MPI library to be initialized, even when using uniqueId initializations!\n");
abort();
}
mpi_instance = new MPIInstance(MPI_COMM_WORLD);
CHECK_HIP(hipHostMalloc(&backend, sizeof(ROBackend)));
backend = new (backend) ROBackend(MPI_COMM_WORLD);
library_init_subcomm(bootstr, bootstr->getNranks(), bootstr->getRank());
#elif defined(USE_IPC)
CHECK_HIP(hipHostMalloc(&backend, sizeof(IPCBackend)));
backend = new (backend) IPCBackend(bootstrap);
#endif
if (!backend) {
abort();
printf("No Backend could be initialized! Aborting.\n");
exit(1);
}
}
@@ -318,7 +309,7 @@ static BackendType select_backend_type() {
if (envvar::uniqueid_with_mpi) {
library_init_subcomm(bootstr, attr->nranks, attr->rank);
} else {
library_init (bootstr);
library_init(bootstr);
}
}
@@ -367,7 +358,12 @@ static BackendType select_backend_type() {
#endif
[[maybe_unused]] __host__ void rocshmem_init() {
MPIInstance::mpilib_dl_init();
auto ret = MPIInstance::mpilib_dl_init();
if (ret != ROCSHMEM_SUCCESS) {
fprintf(stderr, "Could not initialize MPI library. This initialization method of "
"rocSHMEM requires MPI library to be loaded at runtime. Aborting.\n");
exit(1);
}
library_init(MPI_COMM_WORLD);
}
@@ -458,11 +454,14 @@ __host__ void * rocshmem_ptr(const void * dest, int pe){
backend->~Backend();
CHECK_HIP(hipHostFree(backend));
if (bootstr == nullptr)
if (mpi_instance != nullptr)
delete mpi_instance;
if (bootstr != nullptr)
delete bootstr;
//TODO This crashes
//MPIInstance::mpilib_dl_close();
}
__host__ void rocshmem_query_thread(int *provided) {