Scaling tests to #ngpus (#81)
* scaling tests to #ngpus
Signed-off-by: AtlantaPepsi <hyj1999110@gmail.com>
* switching to rocminfo
---------
Signed-off-by: AtlantaPepsi <hyj1999110@gmail.com>
[ROCm/rccl-tests commit: ae3e6357cb]
Šī revīzija ir iekļauta:
@@ -22,12 +22,22 @@
|
||||
import os
|
||||
import subprocess
|
||||
import itertools
|
||||
import math
|
||||
|
||||
import pytest
|
||||
|
||||
ngpus = 0
|
||||
if os.environ.get('ROCR_VISIBLE_DEVICES') is not None:
|
||||
ngpus = len(os.environ['ROCR_VISIBLE_DEVICES'].split(","))
|
||||
elif os.environ.get('HIP_VISIBLE_DEVICES') is not None:
|
||||
ngpus = len(os.environ['HIP_VISIBLE_DEVICES'].split(","))
|
||||
else:
|
||||
ngpus = int(subprocess.check_output("rocminfo | grep \"Device Type:.\s*.GPU\" | wc -l",shell=True))
|
||||
log_ngpus = int(math.log2(ngpus))
|
||||
|
||||
nthreads = ["1"]
|
||||
nprocs = ["2"]
|
||||
ngpus_single = ["1","2","4"]
|
||||
ngpus_single = [str(2**x) for x in range(log_ngpus+1)]
|
||||
ngpus_mpi = ["1","2"]
|
||||
byte_range = [("4", "128M")]
|
||||
op = ["sum", "prod", "min", "max"]
|
||||
|
||||
@@ -22,12 +22,22 @@
|
||||
import os
|
||||
import subprocess
|
||||
import itertools
|
||||
import math
|
||||
|
||||
import pytest
|
||||
|
||||
ngpus = 0
|
||||
if os.environ.get('ROCR_VISIBLE_DEVICES') is not None:
|
||||
ngpus = len(os.environ['ROCR_VISIBLE_DEVICES'].split(","))
|
||||
elif os.environ.get('HIP_VISIBLE_DEVICES') is not None:
|
||||
ngpus = len(os.environ['HIP_VISIBLE_DEVICES'].split(","))
|
||||
else:
|
||||
ngpus = int(subprocess.check_output("rocminfo | grep \"Device Type:.\s*.GPU\" | wc -l",shell=True))
|
||||
log_ngpus = int(math.log2(ngpus))
|
||||
|
||||
nthreads = ["1"]
|
||||
nprocs = ["2"]
|
||||
ngpus_single = ["1","2","4"]
|
||||
ngpus_single = [str(2**x) for x in range(log_ngpus+1)]
|
||||
ngpus_mpi = ["1","2"]
|
||||
byte_range = [("4", "128M")]
|
||||
op = ["sum", "prod", "min", "max"]
|
||||
|
||||
@@ -22,12 +22,22 @@
|
||||
import os
|
||||
import subprocess
|
||||
import itertools
|
||||
import math
|
||||
|
||||
import pytest
|
||||
|
||||
ngpus = 0
|
||||
if os.environ.get('ROCR_VISIBLE_DEVICES') is not None:
|
||||
ngpus = len(os.environ['ROCR_VISIBLE_DEVICES'].split(","))
|
||||
elif os.environ.get('HIP_VISIBLE_DEVICES') is not None:
|
||||
ngpus = len(os.environ['HIP_VISIBLE_DEVICES'].split(","))
|
||||
else:
|
||||
ngpus = int(subprocess.check_output("rocminfo | grep \"Device Type:.\s*.GPU\" | wc -l",shell=True))
|
||||
log_ngpus = int(math.log2(ngpus))
|
||||
|
||||
nthreads = ["1"]
|
||||
nprocs = ["2"]
|
||||
ngpus_single = ["1","2","4"]
|
||||
ngpus_single = [str(2**x) for x in range(log_ngpus+1)]
|
||||
ngpus_mpi = ["1","2"]
|
||||
byte_range = [("4", "128M")]
|
||||
op = ["sum", "prod", "min", "max"]
|
||||
|
||||
@@ -22,12 +22,22 @@
|
||||
import os
|
||||
import subprocess
|
||||
import itertools
|
||||
import math
|
||||
|
||||
import pytest
|
||||
|
||||
ngpus = 0
|
||||
if os.environ.get('ROCR_VISIBLE_DEVICES') is not None:
|
||||
ngpus = len(os.environ['ROCR_VISIBLE_DEVICES'].split(","))
|
||||
elif os.environ.get('HIP_VISIBLE_DEVICES') is not None:
|
||||
ngpus = len(os.environ['HIP_VISIBLE_DEVICES'].split(","))
|
||||
else:
|
||||
ngpus = int(subprocess.check_output("rocminfo | grep \"Device Type:.\s*.GPU\" | wc -l",shell=True))
|
||||
log_ngpus = int(math.log2(ngpus))
|
||||
|
||||
nthreads = ["1"]
|
||||
nprocs = ["2"]
|
||||
ngpus_single = ["1","2","4"]
|
||||
ngpus_single = [str(2**x) for x in range(log_ngpus+1)]
|
||||
ngpus_mpi = ["1","2"]
|
||||
byte_range = [("4", "128M")]
|
||||
op = ["sum", "prod", "min", "max"]
|
||||
|
||||
@@ -22,12 +22,22 @@
|
||||
import os
|
||||
import subprocess
|
||||
import itertools
|
||||
import math
|
||||
|
||||
import pytest
|
||||
|
||||
ngpus = 0
|
||||
if os.environ.get('ROCR_VISIBLE_DEVICES') is not None:
|
||||
ngpus = len(os.environ['ROCR_VISIBLE_DEVICES'].split(","))
|
||||
elif os.environ.get('HIP_VISIBLE_DEVICES') is not None:
|
||||
ngpus = len(os.environ['HIP_VISIBLE_DEVICES'].split(","))
|
||||
else:
|
||||
ngpus = int(subprocess.check_output("rocminfo | grep \"Device Type:.\s*.GPU\" | wc -l",shell=True))
|
||||
log_ngpus = int(math.log2(ngpus))
|
||||
|
||||
nthreads = ["1"]
|
||||
nprocs = ["2"]
|
||||
ngpus_single = ["1","2","4"]
|
||||
ngpus_single = [str(2**x) for x in range(log_ngpus+1)]
|
||||
ngpus_mpi = ["1","2"]
|
||||
byte_range = [("4", "128M")]
|
||||
op = ["sum", "prod", "min", "max"]
|
||||
|
||||
Atsaukties uz šo jaunā problēmā
Block a user