replace rocm-smi with amd-smi cmd (#612)
[ROCm/rocprofiler-compute commit: 0c6cec5671]
This commit is contained in:
committed by
GitHub
orang tua
eba173de5e
melakukan
83edd97f78
@@ -16,6 +16,7 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
|
||||
### Changed
|
||||
|
||||
* Change normal_unit default to per_kernel
|
||||
* change dependency from rocm-smi to amd-smi
|
||||
|
||||
### Resolved issues
|
||||
|
||||
|
||||
@@ -164,8 +164,14 @@ class OmniSoC_Base:
|
||||
)
|
||||
|
||||
# we get the max mclk from rocm-smi --showmclkrange
|
||||
rocm_smi_mclk = run(["rocm-smi", "--showmclkrange"], exit_on_error=True)
|
||||
self._mspec.max_mclk = search(r"(\d+)Mhz\s*$", rocm_smi_mclk)
|
||||
# Regular expression to extract the max memory clock (third frequency level in MEM)
|
||||
memory_clock_pattern = (
|
||||
r"MEM:\s*[^:]*FREQUENCY_LEVELS:\s*(?:\d+: \d+ MHz\s*){2}(\d+)\s*MHz"
|
||||
)
|
||||
amd_smi_mclk = run(["amd-smi", "static"], exit_on_error=True)
|
||||
self._mspec.max_mclk = search(memory_clock_pattern, amd_smi_mclk)
|
||||
|
||||
console_debug("max mem clock is {}".format(self._mspec.max_mclk))
|
||||
|
||||
# these are just max's now, because the parsing was broken and this was inconsistent
|
||||
# with how we use the clocks elsewhere (all max, all the time)
|
||||
|
||||
@@ -39,6 +39,7 @@ import pandas as pd
|
||||
import config
|
||||
from utils.tty import get_table_string
|
||||
from utils.utils import (
|
||||
console_debug,
|
||||
console_error,
|
||||
console_log,
|
||||
console_warning,
|
||||
@@ -135,18 +136,25 @@ def generate_machine_specs(args, sysinfo: dict = None):
|
||||
linux_distro = ""
|
||||
rocm_version = get_rocm_ver().strip()
|
||||
# FIXME: use device
|
||||
vbios = search(r"VBIOS version: (.*?)$", run(["rocm-smi", "-v"], exit_on_error=True))
|
||||
compute_partition = search(
|
||||
r"Compute Partition:\s*(\w+)", run(["rocm-smi", "--showcomputepartition"])
|
||||
)
|
||||
|
||||
vbios_pattern = r"PART_NUMBER:\s*(\S+)"
|
||||
compute_partition_pattern = r"COMPUTE_PARTITION:\s*(\S+)"
|
||||
memory_partition_pattern = r"MEMORY_PARTITION:\s*(\S+)"
|
||||
|
||||
vbios = search(vbios_pattern, run(["amd-smi", "static"], exit_on_error=True))
|
||||
compute_partition = search(compute_partition_pattern, run(["amd-smi", "static"]))
|
||||
if compute_partition is None:
|
||||
compute_partition = "NA"
|
||||
memory_partition = search(
|
||||
r"Memory Partition:\s*(\w+)", run(["rocm-smi", "--showmemorypartition"])
|
||||
)
|
||||
memory_partition = search(memory_partition_pattern, run(["amd-smi", "static"]))
|
||||
if memory_partition is None:
|
||||
memory_partition = "NA"
|
||||
|
||||
console_debug(
|
||||
"vbios is {}, compute partition is {}, memory partition is {}".format(
|
||||
vbios, compute_partition, memory_partition
|
||||
)
|
||||
)
|
||||
|
||||
##########################################
|
||||
## B. SoC Specs
|
||||
##########################################
|
||||
@@ -628,9 +636,9 @@ def run(cmd, exit_on_error=False):
|
||||
)
|
||||
|
||||
if exit_on_error:
|
||||
if cmd[0] == "rocm-smi":
|
||||
if cmd[0] == "amd-smi":
|
||||
if p.returncode != 2 and p.returncode != 0:
|
||||
console_error("No GPU detected. Unable to load rocm-smi")
|
||||
console_error("No GPU detected. Unable to load amd-smi")
|
||||
elif p.returncode != 0:
|
||||
console_error("Command [%s] failed with non-zero exit code" % cmd)
|
||||
return p.stdout.decode("utf-8")
|
||||
|
||||
@@ -263,8 +263,8 @@ def counter_compare(test_name, errors_pd, baseline_df, run_df, threshold=5):
|
||||
|
||||
def run(cmd):
|
||||
p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
if cmd[0] == "rocm-smi" and p.returncode == 8:
|
||||
print("ERROR: No GPU detected. Unable to load rocm-smi")
|
||||
if cmd[0] == "amd-smi" and p.returncode == 8:
|
||||
print("ERROR: No GPU detected. Unable to load amd-smi")
|
||||
assert 0
|
||||
return p.stdout.decode("ascii")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user