From 9d340983504736b983ae6b332c85f61e035c10a6 Mon Sep 17 00:00:00 2001 From: cfallows-amd Date: Wed, 10 Dec 2025 01:44:28 -0500 Subject: [PATCH] [rocprofiler-compute] Roofline runtime compilation patch (#2232) * Add install into CMakeLists.txt file- resolves 'no hip module' issues. * Readd printout line for peak VALU during benchmarking removed on accident in a different commit. * Add CHANGELOG entry for commit 2bfa9a4 ("Integrate roofline benchmark into rocprof-compute (#2015)") Signed-off-by: Carrie Fallows * Run formatter checks on rocprof-compute to clear PR checks Signed-off-by: Carrie Fallows * Update benchmark.py link in changelog Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Apply suggestions to CHANGELOG from code review Co-authored-by: Pratik Basyal --------- Signed-off-by: Carrie Fallows Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Pratik Basyal --- projects/rocprofiler-compute/CHANGELOG.md | 5 +++++ projects/rocprofiler-compute/CMakeLists.txt | 16 +++++++++++----- projects/rocprofiler-compute/src/hip/hip.py | 2 +- projects/rocprofiler-compute/src/hip/hiprtc.py | 2 +- .../rocprofiler-compute/src/utils/benchmark.py | 2 ++ projects/rocprofiler-compute/tools/run-ci.py | 6 ++++-- 6 files changed, 24 insertions(+), 9 deletions(-) diff --git a/projects/rocprofiler-compute/CHANGELOG.md b/projects/rocprofiler-compute/CHANGELOG.md index 0a52be3fdf..89a960cdbd 100644 --- a/projects/rocprofiler-compute/CHANGELOG.md +++ b/projects/rocprofiler-compute/CHANGELOG.md @@ -19,6 +19,11 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs. * kernel: Counters are collected in a round robin fashion for unique kernels. * kernel_launch_params: Counters are collected in a round robin fashion for unique kernels having the exact same launch parameters. +* Runtime compilation of Roofline benchmarking: + * GPU kernels from [rocm-amdgpu-bench](https://github.com/ROCm/rocm-amdgpu-bench) repository are moved into the ROCm Compute Profiler and are compiled at runtime using local HIP and HIPRTC Python wrappers. + * Roofline binaries compiled from [rocm-amdgpu-bench](https://github.com/ROCm/rocm-amdgpu-bench) repository have been removed from the project, as Roofline runtime compilation performs the same work as the Roofline binaries. + * You can collect standalone Roofline empirical peaks without running the entire ROCm Compute Profiler's profile mode, through an entry point in [benchmark.py](https://github.com/ROCm/rocm-systems/blob/HEAD/projects/rocprofiler-compute/src/utils/benchmark.py). Running the `benchmark.py` Python file replaces calling standalone Roofline binary. + ### Changed * Default output format for the underlying ROCprofiler-SDK tool has been changed from ``csv`` to ``rocpd``. diff --git a/projects/rocprofiler-compute/CMakeLists.txt b/projects/rocprofiler-compute/CMakeLists.txt index df97f4a212..28ccb2b484 100644 --- a/projects/rocprofiler-compute/CMakeLists.txt +++ b/projects/rocprofiler-compute/CMakeLists.txt @@ -532,6 +532,13 @@ install( DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}/${PROJECT_NAME} COMPONENT main ) +# src/hip (local hip python wrapper) +install( + DIRECTORY src/hip + DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}/${PROJECT_NAME} + COMPONENT main + PATTERN "__pycache__" EXCLUDE +) # src/rocprof_compute_analyze install( DIRECTORY src/rocprof_compute_analyze @@ -658,9 +665,8 @@ add_custom_target( --include-data-files=${PROJECT_SOURCE_DIR}/VERSION*=./ --enable-plugin=no-qt --include-data-files=src/lib/rocprofiler_compute_tool.cpp=lib/rocprofiler_compute_tool.cpp --include-data-files=src/lib/helper.cpp=lib/helper.cpp - --include-data-files=src/lib/helper.hpp=lib/helper.hpp - --include-package=dash_svg --include-package-data=dash_svg - --include-package=dash_bootstrap_components + --include-data-files=src/lib/helper.hpp=lib/helper.hpp --include-package=dash_svg + --include-package-data=dash_svg --include-package=dash_bootstrap_components --include-package-data=dash_bootstrap_components --include-package=plotly --include-package-data=plotly --include-package=kaleido --include-package-data=kaleido --include-package=rocprof_compute_analyze @@ -669,8 +675,8 @@ add_custom_target( --include-package-data=rocprof_compute_profile --include-package=rocprof_compute_tui --include-package-data=rocprof_compute_tui --include-package=rocprof_compute_soc --include-package-data=rocprof_compute_soc - --include-package=utils --include-package-data=utils - --include-package=hip --include-package-data=hip src/rocprof-compute + --include-package=utils --include-package-data=utils --include-package=hip + --include-package-data=hip src/rocprof-compute # Remove library rpath from executable COMMAND patchelf --remove-rpath rocprof-compute.bin # Move to build directory diff --git a/projects/rocprofiler-compute/src/hip/hip.py b/projects/rocprofiler-compute/src/hip/hip.py index 823a4844cb..13f15d4879 100644 --- a/projects/rocprofiler-compute/src/hip/hip.py +++ b/projects/rocprofiler-compute/src/hip/hip.py @@ -24,6 +24,7 @@ ############################################################################## import ctypes +import os from ctypes import ( POINTER, Structure, @@ -37,7 +38,6 @@ from ctypes import ( c_uint8, c_void_p, ) -import os _lib = ctypes.CDLL(f"{os.getenv('ROCM_PATH', '/opt/rocm')}/lib/libamdhip64.so") diff --git a/projects/rocprofiler-compute/src/hip/hiprtc.py b/projects/rocprofiler-compute/src/hip/hiprtc.py index 823bd4f0f1..17d1ef6ade 100644 --- a/projects/rocprofiler-compute/src/hip/hiprtc.py +++ b/projects/rocprofiler-compute/src/hip/hiprtc.py @@ -24,6 +24,7 @@ ############################################################################## import ctypes +import os from ctypes import ( POINTER, byref, @@ -33,7 +34,6 @@ from ctypes import ( c_size_t, c_void_p, ) -import os _lib = ctypes.CDLL(f"{os.getenv('ROCM_PATH', '/opt/rocm')}/lib/libhiprtc.so") diff --git a/projects/rocprofiler-compute/src/utils/benchmark.py b/projects/rocprofiler-compute/src/utils/benchmark.py index 9349ddc9ab..f33e5005f7 100644 --- a/projects/rocprofiler-compute/src/utils/benchmark.py +++ b/projects/rocprofiler-compute/src/utils/benchmark.py @@ -638,6 +638,8 @@ def flops_bench(device: int, type: str, unit: str, rate: int) -> PerfMetrics: event_ms = total_flops / mean / 1e6 print( + f"Peak VALU {unit}s ({type}), GPU ID: {device}, " + f"workgroupSize:{workgroup_size}, " f"workgroups:{workgroups}, experiments:{num_experiments}, " f"{unit}:{total_flops}, duration:{event_ms:.1f} ms, " f"mean:{mean:.1f} {rate}, stdev={stdev:.1f} GFLOPS" diff --git a/projects/rocprofiler-compute/tools/run-ci.py b/projects/rocprofiler-compute/tools/run-ci.py index ae6a1ed83a..e1733a2b7f 100755 --- a/projects/rocprofiler-compute/tools/run-ci.py +++ b/projects/rocprofiler-compute/tools/run-ci.py @@ -88,8 +88,10 @@ def generate_custom(args, cmake_args, ctest_args): set(CTEST_BINARY_DIRECTORY {BINARY_DIR}) set(CTEST_UPDATE_COMMAND {GIT_CMD}) - set(CTEST_CONFIGURE_COMMAND "{CMAKE_CMD} -B {BINARY_DIR} {SOURCE_DIR} {CMAKE_ARGS}") - set(CTEST_BUILD_COMMAND "{CMAKE_CMD} --build {BINARY_DIR} --target all --parallel {BUILD_JOBS}") + set(CTEST_CONFIGURE_COMMAND "{CMAKE_CMD} -B {BINARY_DIR} {SOURCE_DIR} \ + {CMAKE_ARGS}") + set(CTEST_BUILD_COMMAND "{CMAKE_CMD} --build {BINARY_DIR} --target all \ + --parallel {BUILD_JOBS}") set(CTEST_COVERAGE_COMMAND {GCOV_CMD}) """