[rocprofiler-compute] Improve native tool discovery and partition detection (#2630)

* Improve native tool discovery and partition detection

- Enhanced native tool path resolution to support CMAKE_INSTALL_LIBDIR variations
  (lib, lib64, lib32, etc.) using glob pattern matching
- Extracted path variables to avoid duplication in error messages
- Improved error message clarity by showing exact paths searched for .so and .cpp files
- Simplified code path construction using consistent Path.resolve().parents[x] syntax

- Fixed redundant partition warnings on pre-MI300 GPUs by adding architecture check
- Only query compute/memory partition on MI300+ series (gfx940+)
- Added proper type hints for gpu_arch parameter
- Moved gpu_info extraction after soc_info to ensure gpu_arch is available
- Improved code comments for MI300 series threshold

* Handle gpu arch like a hex string
Šī revīzija ir iekļauta:
vedithal-amd
2026-01-16 10:36:19 -05:00
revīziju iesūtīja GitHub
vecāks e6236417f7
revīzija f64d8e0f43
3 mainīti faili ar 50 papildinājumiem un 22 dzēšanām
@@ -49,6 +49,8 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
* Fix issue where counter collection data was empty when profiling workload which spawn multiple child processes * Fix issue where counter collection data was empty when profiling workload which spawn multiple child processes
* Fix redundant warnings for compute/memory partition not found for < MI 300 series GPUs by skipping partition checks
### Removed ### Removed
* Removed "VL1 Lat" metric for AMD Instinct MI300 series GPUs, due to MI300 series not supporting TCP_TCP_LATENCY_sum counter. * Removed "VL1 Lat" metric for AMD Instinct MI300 series GPUs, due to MI300 series not supporting TCP_TCP_LATENCY_sum counter.
@@ -531,12 +531,15 @@ class RocProfCompute_Base:
and not args.attach_pid and not args.attach_pid
): ):
# Use native counter collection tool # Use native counter collection tool
# Use lib* glob pattern to handle CMAKE_INSTALL_LIBDIR variations
# (lib, lib64, lib32, etc. depending on distribution)
native_tool_base_path = Path(sys.argv[0]).resolve().parents[2]
native_tool_glob_pattern = (
"lib*/rocprofiler-compute/librocprofiler-compute-tool.so"
)
try: try:
native_tool_path = str( native_tool_path = str(
Path(sys.argv[0]).resolve().parents[2] next(native_tool_base_path.glob(native_tool_glob_pattern))
/ "lib"
/ "rocprofiler-compute"
/ "librocprofiler-compute-tool.so"
) )
except Exception as e: except Exception as e:
console_debug( console_debug(
@@ -552,6 +555,7 @@ class RocProfCompute_Base:
) )
/ "librocprofiler-compute-tool.so" / "librocprofiler-compute-tool.so"
) )
native_tool_cpp_path = Path(__file__).resolve().parents[1] / "lib"
link_libraries = ("rocprofiler-sdk",) link_libraries = ("rocprofiler-sdk",)
build_command = ( build_command = (
# Create shared object # Create shared object
@@ -564,10 +568,10 @@ class RocProfCompute_Base:
# rocprofiler sdk library path # rocprofiler sdk library path
f"-L {str(Path(args.rocprofiler_sdk_tool_path).parent.parent)} " f"-L {str(Path(args.rocprofiler_sdk_tool_path).parent.parent)} "
# native tool source files (tool.cpp and helper.cpp) # native tool source files (tool.cpp and helper.cpp)
f"{str(Path(__file__).parent.parent)}/" f"{native_tool_cpp_path}/"
"lib/rocprofiler_compute_tool.cpp " "rocprofiler_compute_tool.cpp "
f"{str(Path(__file__).parent.parent)}/" f"{native_tool_cpp_path}/"
"lib/helper.cpp " "helper.cpp "
# temporary shared object for native tool # temporary shared object for native tool
f"-o {native_tool_path}" f"-o {native_tool_path}"
) )
@@ -575,7 +579,15 @@ class RocProfCompute_Base:
success, output = capture_subprocess_output(shlex.split(build_command)) success, output = capture_subprocess_output(shlex.split(build_command))
console_debug(f"Build output: {output}") console_debug(f"Build output: {output}")
if not success: if not success:
console_error("Failed to build native counter collection tool.") console_error(
"Failed to use native counter collection tool.\n"
"Could not find pre-built .so file at: "
f"{native_tool_base_path / native_tool_glob_pattern}\n"
"Could not find source .cpp files in folder: "
f"{native_tool_cpp_path}\n"
"Please ensure the native tool library is installed "
"or source files are present."
)
if self.__profiler == "rocprofiler-sdk": if self.__profiler == "rocprofiler-sdk":
options = self.get_profiler_options(native_tool_path=native_tool_path) options = self.get_profiler_options(native_tool_path=native_tool_path)
@@ -174,15 +174,15 @@ def generate_machine_specs(
########################################## ##########################################
machine_info = extract_machine_info() machine_info = extract_machine_info()
# FIXME: use device
# Load amd-smi data
gpu_info = extract_gpu_info()
########################################## ##########################################
## B. SoC Specs ## B. SoC Specs
########################################## ##########################################
soc_info = extract_soc_info() soc_info = extract_soc_info()
# FIXME: use device
# Load amd-smi data
gpu_info = extract_gpu_info(gpu_arch=soc_info["gpu_arch"])
# Combine all specifications # Combine all specifications
with amdsmi_ctx(): with amdsmi_ctx():
specs = MachineSpecs( specs = MachineSpecs(
@@ -269,7 +269,16 @@ def extract_machine_info() -> dict[str, Any]:
@demarcate @demarcate
def extract_gpu_info() -> dict[str, Any]: def extract_gpu_info(gpu_arch: Optional[str]) -> dict[str, Any]:
# Partition is only supported on >= MI 300 series
# (gpu_arch should be gfx940 or higher for MI300+)
is_partition_supported = False
if gpu_arch and gpu_arch.startswith("gfx") and len(gpu_arch) >= 6:
try:
is_partition_supported = int(gpu_arch[3:6], 16) >= 0x940
except ValueError:
pass # Invalid hex string, keep is_partition_supported as False
result: dict[str, Optional[str]] = { result: dict[str, Optional[str]] = {
"vbios": None, "vbios": None,
"compute_partition": None, "compute_partition": None,
@@ -278,17 +287,22 @@ def extract_gpu_info() -> dict[str, Any]:
with amdsmi_ctx(): with amdsmi_ctx():
result["vbios"] = get_gpu_vbios_part_number() result["vbios"] = get_gpu_vbios_part_number()
result["compute_partition"] = get_gpu_compute_partition() if is_partition_supported:
result["memory_partition"] = get_gpu_memory_partition() result["compute_partition"] = get_gpu_compute_partition()
result["memory_partition"] = get_gpu_memory_partition()
else:
result["compute_partition"] = "N/A"
result["memory_partition"] = "N/A"
# Apply defaults and warnings # Apply defaults and warnings
if result["compute_partition"] == "N/A" or not result["compute_partition"]: if is_partition_supported:
console_warning("Cannot detect accelerator partition from amd-smi.") if result["compute_partition"] == "N/A" or not result["compute_partition"]:
console_warning("Applying default accelerator partition: SPX") console_warning("Cannot detect accelerator partition from amd-smi.")
result["compute_partition"] = "SPX" console_warning("Applying default accelerator partition: SPX")
result["compute_partition"] = "SPX"
if result["memory_partition"] == "N/A" or not result["memory_partition"]: if result["memory_partition"] == "N/A" or not result["memory_partition"]:
console_warning("Cannot detect memory partition from amd-smi.") console_warning("Cannot detect memory partition from amd-smi.")
console_debug( console_debug(
f"vbios is {result['vbios']}, compute partition is " f"vbios is {result['vbios']}, compute partition is "