Functional tests without MPI support (#343)
* Let functional tests build without external MPI * Fix error conditions when using uuid startup with internal MPI * Do not abort if libibverbs is not found but not using GDA * Enabled RO functional test initialized with TEST_UUID * Reduce load time for ro backend_can_run and prevent mpilib_dlclose crashing * Fix case TEST_UUID=1, ROCSHMEM_BACKEND='' (autoloading gda)
Bu işleme şunda yer alıyor:
işlemeyi yapan:
GitHub
ebeveyn
baaf8091b5
işleme
c99bc21e10
+34
-35
@@ -107,7 +107,7 @@ static BackendType select_backend_type() {
|
||||
DPRINTF("GDABackend::backend_can_run returned success\n");
|
||||
return BackendType::GDA_BACKEND;
|
||||
}
|
||||
if (MPIInstance::mpilib_dl_init() == ROCSHMEM_SUCCESS) {
|
||||
if (ROBackend::backend_can_run() == ROCSHMEM_SUCCESS) {
|
||||
DPRINTF("MPIInstance could dl_init MPI library\n");
|
||||
return BackendType::RO_BACKEND;
|
||||
}
|
||||
@@ -130,6 +130,11 @@ static BackendType select_backend_type() {
|
||||
|
||||
int ret;
|
||||
ret = MPIInstance::mpilib_dl_init();
|
||||
if (ret != ROCSHMEM_SUCCESS) {
|
||||
fprintf(stderr, "Could not initialize MPI library. This initialization method of "
|
||||
"rocSHMEM requires MPI library to be loaded at runtime. Aborting.\n");
|
||||
exit(1);
|
||||
}
|
||||
mpi_instance = new MPIInstance(comm);
|
||||
|
||||
#if defined(USE_GDA) && defined(USE_RO) && defined(USE_IPC)
|
||||
@@ -155,10 +160,6 @@ static BackendType select_backend_type() {
|
||||
CHECK_HIP(hipHostMalloc(&backend, sizeof(GDABackend)));
|
||||
backend = new (backend) GDABackend(comm);
|
||||
#elif defined(USE_RO)
|
||||
if (ret != ROCSHMEM_SUCCESS) {
|
||||
printf("Could not initialize MPI library. RO conduit requires MPI library to be loaded at runtime. Aborting\n");
|
||||
abort();
|
||||
}
|
||||
CHECK_HIP(hipHostMalloc(&backend, sizeof(ROBackend)));
|
||||
backend = new (backend) ROBackend(comm);
|
||||
#elif defined(USE_IPC)
|
||||
@@ -167,7 +168,8 @@ static BackendType select_backend_type() {
|
||||
#endif
|
||||
|
||||
if (!backend) {
|
||||
abort();
|
||||
printf("No Backend could be initialized! Aborting.\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -177,14 +179,16 @@ static BackendType select_backend_type() {
|
||||
|
||||
int ret;
|
||||
ret = MPIInstance::mpilib_dl_init();
|
||||
if (ret == ROCSHMEM_SUCCESS) {
|
||||
printf("Could not initialize MPI library. This initialization method of "
|
||||
"rocSHMEM requires MPI library to be loaded at runtime. Aborting\n");
|
||||
abort();
|
||||
if (ret != ROCSHMEM_SUCCESS) {
|
||||
fprintf(stderr, "Could not initialize MPI library. This initialization method of "
|
||||
"rocSHMEM requires MPI library to be loaded at runtime. Aborting.\n");
|
||||
exit(1);
|
||||
}
|
||||
mpilib_ftable_.Initialized(&initialized);
|
||||
|
||||
if (!initialized) {
|
||||
if (initialized) {
|
||||
mpilib_ftable_.Comm_size (MPI_COMM_WORLD, &world_size);
|
||||
} else {
|
||||
// This is an Open MPI specific solution to retrieve the number of
|
||||
// processes that have been started, value can be checked before MPI_Init
|
||||
char *value = getenv("OMPI_COMM_WORLD_SIZE");
|
||||
@@ -194,13 +198,11 @@ static BackendType select_backend_type() {
|
||||
if (world_size != nranks) {
|
||||
// This solution will require MPI_Sessions. This is planned for the
|
||||
// future, but is not supported in the current version.
|
||||
fprintf (stderr, "Unsupported configuration to initialize rocSHMEM. Please "
|
||||
"initialize the MPI library using MPI_Init first, if you want to "
|
||||
"initialize rocSHMEM with a subset of the processes\n");
|
||||
abort();
|
||||
fprintf(stderr, "Unsupported configuration to initialize rocSHMEM. Please "
|
||||
"initialize the MPI library using MPI_Init first, if you want to "
|
||||
"initialize rocSHMEM with a subset of the processes\n");
|
||||
exit(1);
|
||||
}
|
||||
} else {
|
||||
mpilib_ftable_.Comm_size (MPI_COMM_WORLD, &world_size);
|
||||
}
|
||||
|
||||
if (world_size == nranks) {
|
||||
@@ -252,11 +254,8 @@ static BackendType select_backend_type() {
|
||||
backend = new (backend) GDABackend(bootstrap);
|
||||
break;
|
||||
case BackendType::RO_BACKEND:
|
||||
/* Not sure whether this is a valid configuration. Will leave it in for now */
|
||||
DPRINTF("Initializing RO backend with TCP bootstrapping\n");
|
||||
mpi_instance = new MPIInstance(MPI_COMM_WORLD);
|
||||
CHECK_HIP(hipHostMalloc(&backend, sizeof(ROBackend)));
|
||||
backend = new (backend) ROBackend(MPI_COMM_WORLD);
|
||||
library_init_subcomm(bootstr, bootstr->getNranks(), bootstr->getRank());
|
||||
break;
|
||||
case BackendType::IPC_BACKEND:
|
||||
DPRINTF("Initializing IPC backend with TCP bootstrapping\n");
|
||||
@@ -268,23 +267,15 @@ static BackendType select_backend_type() {
|
||||
CHECK_HIP(hipHostMalloc(&backend, sizeof(GDABackend)));
|
||||
backend = new (backend) GDABackend(bootstrap);
|
||||
#elif defined(USE_RO)
|
||||
/* Not sure whether this is a valid configuration. Will leave it in for now */
|
||||
int ret;
|
||||
ret = MPIInstance::mpilib_dl_init();
|
||||
if (ret != MPI_SUCCESS) {
|
||||
printf("RO Backend requires MPI library to be initialized, even when using uniqueId initializations!\n");
|
||||
abort();
|
||||
}
|
||||
mpi_instance = new MPIInstance(MPI_COMM_WORLD);
|
||||
CHECK_HIP(hipHostMalloc(&backend, sizeof(ROBackend)));
|
||||
backend = new (backend) ROBackend(MPI_COMM_WORLD);
|
||||
library_init_subcomm(bootstr, bootstr->getNranks(), bootstr->getRank());
|
||||
#elif defined(USE_IPC)
|
||||
CHECK_HIP(hipHostMalloc(&backend, sizeof(IPCBackend)));
|
||||
backend = new (backend) IPCBackend(bootstrap);
|
||||
#endif
|
||||
|
||||
if (!backend) {
|
||||
abort();
|
||||
printf("No Backend could be initialized! Aborting.\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -318,7 +309,7 @@ static BackendType select_backend_type() {
|
||||
if (envvar::uniqueid_with_mpi) {
|
||||
library_init_subcomm(bootstr, attr->nranks, attr->rank);
|
||||
} else {
|
||||
library_init (bootstr);
|
||||
library_init(bootstr);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -367,7 +358,12 @@ static BackendType select_backend_type() {
|
||||
#endif
|
||||
|
||||
[[maybe_unused]] __host__ void rocshmem_init() {
|
||||
MPIInstance::mpilib_dl_init();
|
||||
auto ret = MPIInstance::mpilib_dl_init();
|
||||
if (ret != ROCSHMEM_SUCCESS) {
|
||||
fprintf(stderr, "Could not initialize MPI library. This initialization method of "
|
||||
"rocSHMEM requires MPI library to be loaded at runtime. Aborting.\n");
|
||||
exit(1);
|
||||
}
|
||||
library_init(MPI_COMM_WORLD);
|
||||
}
|
||||
|
||||
@@ -458,11 +454,14 @@ __host__ void * rocshmem_ptr(const void * dest, int pe){
|
||||
backend->~Backend();
|
||||
CHECK_HIP(hipHostFree(backend));
|
||||
|
||||
if (bootstr == nullptr)
|
||||
if (mpi_instance != nullptr)
|
||||
delete mpi_instance;
|
||||
|
||||
if (bootstr != nullptr)
|
||||
delete bootstr;
|
||||
|
||||
//TODO This crashes
|
||||
//MPIInstance::mpilib_dl_close();
|
||||
}
|
||||
|
||||
__host__ void rocshmem_query_thread(int *provided) {
|
||||
|
||||
Yeni konuda referans
Bir kullanıcı engelle