[rocprofiler-compute] Support new attach/detach API (#2642)

* Removed attach tool library path

* Support new attach/detach API

* New attach/detach API was introduced in
  https://github.com/ROCm/rocm-systems/pull/1653

* Provide backward compatibility with old api

* Stabilize attach/detach tests by adding sleep to help workload get
  ready for attachment

* Fix typo in test name

---------

Co-authored-by: Vignesh Edithal <Vignesh.Edithal@amd.com>
Co-authored-by: Fei Zheng <44449748+feizheng10@users.noreply.github.com>
This commit is contained in:
abchoudh-amd
2026-01-20 02:30:14 +05:30
zatwierdzone przez GitHub
rodzic 1c5aa2d4e7
commit dd149d3957
3 zmienionych plików z 102 dodań i 46 usunięć
@@ -92,7 +92,6 @@ class rocprofiler_sdk_profiler(RocProfCompute_Base):
/ "librocprofiler-sdk-rocattach.so"
)
options.update({
"ROCPROF_ATTACH_TOOL_LIBRARY": rocprofiler_sdk_tool_path,
"ROCPROF_ATTACH_LIBRARY": rocprofiler_attach_library_path,
"ROCPROF_ATTACH_PID": args.attach_pid,
})
@@ -235,6 +235,101 @@ def detect_rocprof(args: argparse.Namespace) -> str:
return rocprof_cmd
def perform_attach_detach(new_env: dict[str, str], options: dict[str, Any]) -> None:
@contextmanager
def temporary_env(env_vars: dict[str, str]) -> Generator[None, None, None]:
"""
Temporarily change the environment variable of this application.
"""
original_env = os.environ.copy()
os.environ.update({k: str(v) for k, v in env_vars.items()})
try:
yield
finally:
os.environ.clear()
os.environ.update(original_env)
with temporary_env(new_env):
libname = options["ROCPROF_ATTACH_LIBRARY"]
try:
c_lib = ctypes.CDLL(libname)
if c_lib is None:
console_error(f"Error opening {libname}")
except Exception as e:
console_error(f"Error loading {libname}: {e}")
# Set argument and return types for attach/detach functions
try:
# old attach/detach API
c_lib.attach.argtypes = [ctypes.c_uint]
except Exception as e:
console_debug(
"Error setting old attach/detach API argument "
f"types: {e}, trying new API"
)
try:
# new attach/detach API
c_lib.rocattach_attach.restype = ctypes.c_int
c_lib.rocattach_attach.argtypes = [ctypes.c_int]
c_lib.rocattach_detach.restype = ctypes.c_int
c_lib.rocattach_detach.argtypes = [ctypes.c_int]
except Exception as e:
console_error(
f"Error setting attach/detach function argument types: {e}"
)
pid = options["ROCPROF_ATTACH_PID"]
if pid is None:
console_error("Mode of attach/detach must have setup for process ID")
try:
# old attach/detach API
c_lib.attach(int(pid))
except Exception as e:
console_debug(f"Error attaching with old API: {e}, trying new API")
try:
# new attach/detach API
attach_status = c_lib.rocattach_attach(int(pid))
if attach_status != 0:
console_error(
f"Error attaching to process {pid}, "
f"rocattach_attach returned {attach_status}"
)
except Exception as e:
console_error(f"Error attaching to process {pid}: {e}")
duration = os.environ.get("ROCPROF_ATTACH_DURATION", None)
if duration is None:
console_log(
f"\033[93mAttach to process with ID {pid} is successful, "
"Press Enter to detach...\033[0m"
)
input()
else:
console_log(
f"\033[93mAttach to process with ID {pid} is successful, "
f"detach will happen in {duration} milliseconds...\033[0m"
)
time.sleep(int(duration) / 1000)
try:
# old attach/detach API
c_lib.detach(int(pid))
except Exception as e:
console_debug(f"Error detaching with old API: {e}, trying new API")
try:
# new attach/detach API
detach_status = c_lib.rocattach_detach(int(pid))
if detach_status != 0:
console_error(
f"Error detaching from process {pid}, "
f"rocattach_detach returned {detach_status}"
)
except Exception as e:
console_error(f"Error detaching from process {pid}: {e}")
def capture_subprocess_output(
subprocess_args: list[str],
new_env: Optional[dict[str, str]] = None,
@@ -788,49 +883,7 @@ def run_prof(
console_debug(f"rocprof sdk env vars: {new_env}")
if is_mode_live_attach:
@contextmanager
def temporary_env(env_vars: dict[str, str]) -> Generator[None, None, None]:
"""
Temporarily change the environment variable of this application.
"""
original_env = os.environ.copy()
os.environ.update({k: str(v) for k, v in env_vars.items()})
try:
yield
finally:
os.environ.clear()
os.environ.update(original_env)
with temporary_env(new_env):
libname = options["ROCPROF_ATTACH_LIBRARY"]
c_lib = ctypes.CDLL(libname)
if c_lib is None:
console_error(f"Error opening {libname}")
c_lib.attach.argtypes = [ctypes.c_uint]
pid = options["ROCPROF_ATTACH_PID"]
if pid is None:
console_error(
"Mode of attach/detach must have setup for process ID"
)
c_lib.attach(int(pid))
duration = os.environ.get("ROCPROF_ATTACH_DURATION", None)
if duration is None:
console_log(
f"\033[93mAttach to process with ID {pid} is successful, "
"Press Enter to detach...\033[0m"
)
input()
else:
console_log(
f"\033[93mAttach to process with ID {pid} is successful, "
f"detach will happen in {duration} milliseconds...\033[0m"
)
time.sleep(int(duration) / 1000)
c_lib.detach(int(pid))
perform_attach_detach(new_env, options)
else:
if app_cmd is None:
console_error(
@@ -29,6 +29,7 @@ import re
import sqlite3
import subprocess
import sys
import time
from pathlib import Path
import numpy as np
@@ -2268,6 +2269,7 @@ def test_live_attach_detach_block(binary_handler_profile_rocprof_compute):
try:
# Start workload
process_workload = subprocess.Popen(config["app_hip_dynamic_shared"], env=env)
time.sleep(5) # Give workload time to start
attach_detach = {
"attach_pid": process_workload.pid,
@@ -2316,8 +2318,9 @@ def test_live_attach_detach_block_thread_sleep(binary_handler_profile_rocprof_co
try:
# Start workload with sleep mode enabled
process_workload = subprocess.Popen(
[config["app_hip_dynamic_shared"], "--enable-sleep"], env=env
[*config["app_hip_dynamic_shared"], "--enable-sleep"], env=env
)
time.sleep(5) # Give workload time to start
attach_detach = {
"attach_pid": process_workload.pid,
@@ -2358,7 +2361,7 @@ def test_live_attach_detach_block_thread_sleep(binary_handler_profile_rocprof_co
@pytest.mark.live_attach_detach
def test_live_attach_detach_singlepath_launch_stats(
def test_live_attach_detach_singlepass_launch_stats(
binary_handler_profile_rocprof_compute,
):
options = ["--set", "launch_stats"]
@@ -2374,6 +2377,7 @@ def test_live_attach_detach_singlepath_launch_stats(
try:
# Start workload
process_workload = subprocess.Popen(config["app_hip_dynamic_shared"], env=env)
time.sleep(5) # Give workload time to start
attach_detach = {
"attach_pid": process_workload.pid,