Scaling tests to #ngpus (#81)

* scaling tests to #ngpus

Signed-off-by: AtlantaPepsi <hyj1999110@gmail.com>

* switching to rocminfo

---------

Signed-off-by: AtlantaPepsi <hyj1999110@gmail.com>

[ROCm/rccl-tests commit: ae3e6357cb]
Šī revīzija ir iekļauta:
Tim
2024-09-10 19:05:22 -04:00
revīziju iesūtīja GitHub
vecāks ee4dd140bf
revīzija c5ab7dc5b5
5 mainīti faili ar 60 papildinājumiem un 10 dzēšanām
@@ -22,12 +22,22 @@
import os
import subprocess
import itertools
import math
import pytest
ngpus = 0
if os.environ.get('ROCR_VISIBLE_DEVICES') is not None:
ngpus = len(os.environ['ROCR_VISIBLE_DEVICES'].split(","))
elif os.environ.get('HIP_VISIBLE_DEVICES') is not None:
ngpus = len(os.environ['HIP_VISIBLE_DEVICES'].split(","))
else:
ngpus = int(subprocess.check_output("rocminfo | grep \"Device Type:.\s*.GPU\" | wc -l",shell=True))
log_ngpus = int(math.log2(ngpus))
nthreads = ["1"]
nprocs = ["2"]
ngpus_single = ["1","2","4"]
ngpus_single = [str(2**x) for x in range(log_ngpus+1)]
ngpus_mpi = ["1","2"]
byte_range = [("4", "128M")]
op = ["sum", "prod", "min", "max"]
@@ -22,12 +22,22 @@
import os
import subprocess
import itertools
import math
import pytest
ngpus = 0
if os.environ.get('ROCR_VISIBLE_DEVICES') is not None:
ngpus = len(os.environ['ROCR_VISIBLE_DEVICES'].split(","))
elif os.environ.get('HIP_VISIBLE_DEVICES') is not None:
ngpus = len(os.environ['HIP_VISIBLE_DEVICES'].split(","))
else:
ngpus = int(subprocess.check_output("rocminfo | grep \"Device Type:.\s*.GPU\" | wc -l",shell=True))
log_ngpus = int(math.log2(ngpus))
nthreads = ["1"]
nprocs = ["2"]
ngpus_single = ["1","2","4"]
ngpus_single = [str(2**x) for x in range(log_ngpus+1)]
ngpus_mpi = ["1","2"]
byte_range = [("4", "128M")]
op = ["sum", "prod", "min", "max"]
@@ -22,12 +22,22 @@
import os
import subprocess
import itertools
import math
import pytest
ngpus = 0
if os.environ.get('ROCR_VISIBLE_DEVICES') is not None:
ngpus = len(os.environ['ROCR_VISIBLE_DEVICES'].split(","))
elif os.environ.get('HIP_VISIBLE_DEVICES') is not None:
ngpus = len(os.environ['HIP_VISIBLE_DEVICES'].split(","))
else:
ngpus = int(subprocess.check_output("rocminfo | grep \"Device Type:.\s*.GPU\" | wc -l",shell=True))
log_ngpus = int(math.log2(ngpus))
nthreads = ["1"]
nprocs = ["2"]
ngpus_single = ["1","2","4"]
ngpus_single = [str(2**x) for x in range(log_ngpus+1)]
ngpus_mpi = ["1","2"]
byte_range = [("4", "128M")]
op = ["sum", "prod", "min", "max"]
+11 -1
Parādīt failu
@@ -22,12 +22,22 @@
import os
import subprocess
import itertools
import math
import pytest
ngpus = 0
if os.environ.get('ROCR_VISIBLE_DEVICES') is not None:
ngpus = len(os.environ['ROCR_VISIBLE_DEVICES'].split(","))
elif os.environ.get('HIP_VISIBLE_DEVICES') is not None:
ngpus = len(os.environ['HIP_VISIBLE_DEVICES'].split(","))
else:
ngpus = int(subprocess.check_output("rocminfo | grep \"Device Type:.\s*.GPU\" | wc -l",shell=True))
log_ngpus = int(math.log2(ngpus))
nthreads = ["1"]
nprocs = ["2"]
ngpus_single = ["1","2","4"]
ngpus_single = [str(2**x) for x in range(log_ngpus+1)]
ngpus_mpi = ["1","2"]
byte_range = [("4", "128M")]
op = ["sum", "prod", "min", "max"]
@@ -22,12 +22,22 @@
import os
import subprocess
import itertools
import math
import pytest
ngpus = 0
if os.environ.get('ROCR_VISIBLE_DEVICES') is not None:
ngpus = len(os.environ['ROCR_VISIBLE_DEVICES'].split(","))
elif os.environ.get('HIP_VISIBLE_DEVICES') is not None:
ngpus = len(os.environ['HIP_VISIBLE_DEVICES'].split(","))
else:
ngpus = int(subprocess.check_output("rocminfo | grep \"Device Type:.\s*.GPU\" | wc -l",shell=True))
log_ngpus = int(math.log2(ngpus))
nthreads = ["1"]
nprocs = ["2"]
ngpus_single = ["1","2","4"]
ngpus_single = [str(2**x) for x in range(log_ngpus+1)]
ngpus_mpi = ["1","2"]
byte_range = [("4", "128M")]
op = ["sum", "prod", "min", "max"]