diff --git a/test/test_AllGather.py b/test/test_AllGather.py index 2d3d74bcef..1213de78c7 100644 --- a/test/test_AllGather.py +++ b/test/test_AllGather.py @@ -22,12 +22,22 @@ import os import subprocess import itertools +import math import pytest +ngpus = 0 +if os.environ.get('ROCR_VISIBLE_DEVICES') is not None: + ngpus = len(os.environ['ROCR_VISIBLE_DEVICES'].split(",")) +elif os.environ.get('HIP_VISIBLE_DEVICES') is not None: + ngpus = len(os.environ['HIP_VISIBLE_DEVICES'].split(",")) +else: + ngpus = int(subprocess.check_output("rocminfo | grep \"Device Type:.\s*.GPU\" | wc -l",shell=True)) +log_ngpus = int(math.log2(ngpus)) + nthreads = ["1"] nprocs = ["2"] -ngpus_single = ["1","2","4"] +ngpus_single = [str(2**x) for x in range(log_ngpus+1)] ngpus_mpi = ["1","2"] byte_range = [("4", "128M")] op = ["sum", "prod", "min", "max"] @@ -99,4 +109,4 @@ def test_AllGatherMPI(request, nthreads, nprocs, ngpus_mpi, byte_range, op, step print(rccl_test.stdout) pytest.fail("AllGather test error(s) detected.") - assert rccl_test.returncode == 0 \ No newline at end of file + assert rccl_test.returncode == 0 diff --git a/test/test_AllReduce.py b/test/test_AllReduce.py index b3cb5f99ff..34d22493f4 100644 --- a/test/test_AllReduce.py +++ b/test/test_AllReduce.py @@ -22,12 +22,22 @@ import os import subprocess import itertools +import math import pytest +ngpus = 0 +if os.environ.get('ROCR_VISIBLE_DEVICES') is not None: + ngpus = len(os.environ['ROCR_VISIBLE_DEVICES'].split(",")) +elif os.environ.get('HIP_VISIBLE_DEVICES') is not None: + ngpus = len(os.environ['HIP_VISIBLE_DEVICES'].split(",")) +else: + ngpus = int(subprocess.check_output("rocminfo | grep \"Device Type:.\s*.GPU\" | wc -l",shell=True)) +log_ngpus = int(math.log2(ngpus)) + nthreads = ["1"] nprocs = ["2"] -ngpus_single = ["1","2","4"] +ngpus_single = [str(2**x) for x in range(log_ngpus+1)] ngpus_mpi = ["1","2"] byte_range = [("4", "128M")] op = ["sum", "prod", "min", "max"] @@ -99,4 +109,4 @@ def test_AllReduceMPI(request, nthreads, nprocs, ngpus_mpi, byte_range, op, step print(rccl_test.stdout) pytest.fail("AllReduce test error(s) detected.") - assert rccl_test.returncode == 0 \ No newline at end of file + assert rccl_test.returncode == 0 diff --git a/test/test_Broadcast.py b/test/test_Broadcast.py index f4b8b38363..f6bd9003a9 100644 --- a/test/test_Broadcast.py +++ b/test/test_Broadcast.py @@ -22,12 +22,22 @@ import os import subprocess import itertools +import math import pytest +ngpus = 0 +if os.environ.get('ROCR_VISIBLE_DEVICES') is not None: + ngpus = len(os.environ['ROCR_VISIBLE_DEVICES'].split(",")) +elif os.environ.get('HIP_VISIBLE_DEVICES') is not None: + ngpus = len(os.environ['HIP_VISIBLE_DEVICES'].split(",")) +else: + ngpus = int(subprocess.check_output("rocminfo | grep \"Device Type:.\s*.GPU\" | wc -l",shell=True)) +log_ngpus = int(math.log2(ngpus)) + nthreads = ["1"] nprocs = ["2"] -ngpus_single = ["1","2","4"] +ngpus_single = [str(2**x) for x in range(log_ngpus+1)] ngpus_mpi = ["1","2"] byte_range = [("4", "128M")] op = ["sum", "prod", "min", "max"] @@ -99,4 +109,4 @@ def test_BroadcastMPI(request, nthreads, nprocs, ngpus_mpi, byte_range, op, step print(rccl_test.stdout) pytest.fail("Broadcast test error(s) detected.") - assert rccl_test.returncode == 0 \ No newline at end of file + assert rccl_test.returncode == 0 diff --git a/test/test_Reduce.py b/test/test_Reduce.py index 5df694490d..0e6671e84f 100644 --- a/test/test_Reduce.py +++ b/test/test_Reduce.py @@ -22,12 +22,22 @@ import os import subprocess import itertools +import math import pytest +ngpus = 0 +if os.environ.get('ROCR_VISIBLE_DEVICES') is not None: + ngpus = len(os.environ['ROCR_VISIBLE_DEVICES'].split(",")) +elif os.environ.get('HIP_VISIBLE_DEVICES') is not None: + ngpus = len(os.environ['HIP_VISIBLE_DEVICES'].split(",")) +else: + ngpus = int(subprocess.check_output("rocminfo | grep \"Device Type:.\s*.GPU\" | wc -l",shell=True)) +log_ngpus = int(math.log2(ngpus)) + nthreads = ["1"] nprocs = ["2"] -ngpus_single = ["1","2","4"] +ngpus_single = [str(2**x) for x in range(log_ngpus+1)] ngpus_mpi = ["1","2"] byte_range = [("4", "128M")] op = ["sum", "prod", "min", "max"] @@ -99,4 +109,4 @@ def test_ReduceMPI(request, nthreads, nprocs, ngpus_mpi, byte_range, op, step_fa print(rccl_test.stdout) pytest.fail("Reduce test error(s) detected.") - assert rccl_test.returncode == 0 \ No newline at end of file + assert rccl_test.returncode == 0 diff --git a/test/test_ReduceScatter.py b/test/test_ReduceScatter.py index 66b431b00a..58dd709abf 100644 --- a/test/test_ReduceScatter.py +++ b/test/test_ReduceScatter.py @@ -22,12 +22,22 @@ import os import subprocess import itertools +import math import pytest +ngpus = 0 +if os.environ.get('ROCR_VISIBLE_DEVICES') is not None: + ngpus = len(os.environ['ROCR_VISIBLE_DEVICES'].split(",")) +elif os.environ.get('HIP_VISIBLE_DEVICES') is not None: + ngpus = len(os.environ['HIP_VISIBLE_DEVICES'].split(",")) +else: + ngpus = int(subprocess.check_output("rocminfo | grep \"Device Type:.\s*.GPU\" | wc -l",shell=True)) +log_ngpus = int(math.log2(ngpus)) + nthreads = ["1"] nprocs = ["2"] -ngpus_single = ["1","2","4"] +ngpus_single = [str(2**x) for x in range(log_ngpus+1)] ngpus_mpi = ["1","2"] byte_range = [("4", "128M")] op = ["sum", "prod", "min", "max"] @@ -99,4 +109,4 @@ def test_ReduceScatterMPI(request, nthreads, nprocs, ngpus_mpi, byte_range, op, print(rccl_test.stdout) pytest.fail("ReduceScatter test error(s) detected.") - assert rccl_test.returncode == 0 \ No newline at end of file + assert rccl_test.returncode == 0