[rocprofiler-compute] Roofline runtime compilation patch (#2232)
* Add install into CMakeLists.txt file- resolves 'no hip module' issues.
* Readd printout line for peak VALU during benchmarking removed on accident in a different commit.
* Add CHANGELOG entry for commit 2bfa9a4 ("Integrate roofline benchmark into rocprof-compute (#2015)")
Signed-off-by: Carrie Fallows <Carrie.Fallows@amd.com>
* Run formatter checks on rocprof-compute to clear PR checks
Signed-off-by: Carrie Fallows <Carrie.Fallows@amd.com>
* Update benchmark.py link in changelog
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
* Apply suggestions to CHANGELOG from code review
Co-authored-by: Pratik Basyal <pratik.basyal@amd.com>
---------
Signed-off-by: Carrie Fallows <Carrie.Fallows@amd.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: Pratik Basyal <pratik.basyal@amd.com>
This commit is contained in:
committad av
GitHub
förälder
73778bf83c
incheckning
9d34098350
@@ -19,6 +19,11 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
|
||||
* kernel: Counters are collected in a round robin fashion for unique kernels.
|
||||
* kernel_launch_params: Counters are collected in a round robin fashion for unique kernels having the exact same launch parameters.
|
||||
|
||||
* Runtime compilation of Roofline benchmarking:
|
||||
* GPU kernels from [rocm-amdgpu-bench](https://github.com/ROCm/rocm-amdgpu-bench) repository are moved into the ROCm Compute Profiler and are compiled at runtime using local HIP and HIPRTC Python wrappers.
|
||||
* Roofline binaries compiled from [rocm-amdgpu-bench](https://github.com/ROCm/rocm-amdgpu-bench) repository have been removed from the project, as Roofline runtime compilation performs the same work as the Roofline binaries.
|
||||
* You can collect standalone Roofline empirical peaks without running the entire ROCm Compute Profiler's profile mode, through an entry point in [benchmark.py](https://github.com/ROCm/rocm-systems/blob/HEAD/projects/rocprofiler-compute/src/utils/benchmark.py). Running the `benchmark.py` Python file replaces calling standalone Roofline binary.
|
||||
|
||||
### Changed
|
||||
|
||||
* Default output format for the underlying ROCprofiler-SDK tool has been changed from ``csv`` to ``rocpd``.
|
||||
|
||||
@@ -532,6 +532,13 @@ install(
|
||||
DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}/${PROJECT_NAME}
|
||||
COMPONENT main
|
||||
)
|
||||
# src/hip (local hip python wrapper)
|
||||
install(
|
||||
DIRECTORY src/hip
|
||||
DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}/${PROJECT_NAME}
|
||||
COMPONENT main
|
||||
PATTERN "__pycache__" EXCLUDE
|
||||
)
|
||||
# src/rocprof_compute_analyze
|
||||
install(
|
||||
DIRECTORY src/rocprof_compute_analyze
|
||||
@@ -658,9 +665,8 @@ add_custom_target(
|
||||
--include-data-files=${PROJECT_SOURCE_DIR}/VERSION*=./ --enable-plugin=no-qt
|
||||
--include-data-files=src/lib/rocprofiler_compute_tool.cpp=lib/rocprofiler_compute_tool.cpp
|
||||
--include-data-files=src/lib/helper.cpp=lib/helper.cpp
|
||||
--include-data-files=src/lib/helper.hpp=lib/helper.hpp
|
||||
--include-package=dash_svg --include-package-data=dash_svg
|
||||
--include-package=dash_bootstrap_components
|
||||
--include-data-files=src/lib/helper.hpp=lib/helper.hpp --include-package=dash_svg
|
||||
--include-package-data=dash_svg --include-package=dash_bootstrap_components
|
||||
--include-package-data=dash_bootstrap_components --include-package=plotly
|
||||
--include-package-data=plotly --include-package=kaleido
|
||||
--include-package-data=kaleido --include-package=rocprof_compute_analyze
|
||||
@@ -669,8 +675,8 @@ add_custom_target(
|
||||
--include-package-data=rocprof_compute_profile
|
||||
--include-package=rocprof_compute_tui --include-package-data=rocprof_compute_tui
|
||||
--include-package=rocprof_compute_soc --include-package-data=rocprof_compute_soc
|
||||
--include-package=utils --include-package-data=utils
|
||||
--include-package=hip --include-package-data=hip src/rocprof-compute
|
||||
--include-package=utils --include-package-data=utils --include-package=hip
|
||||
--include-package-data=hip src/rocprof-compute
|
||||
# Remove library rpath from executable
|
||||
COMMAND patchelf --remove-rpath rocprof-compute.bin
|
||||
# Move to build directory
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
##############################################################################
|
||||
|
||||
import ctypes
|
||||
import os
|
||||
from ctypes import (
|
||||
POINTER,
|
||||
Structure,
|
||||
@@ -37,7 +38,6 @@ from ctypes import (
|
||||
c_uint8,
|
||||
c_void_p,
|
||||
)
|
||||
import os
|
||||
|
||||
_lib = ctypes.CDLL(f"{os.getenv('ROCM_PATH', '/opt/rocm')}/lib/libamdhip64.so")
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
##############################################################################
|
||||
|
||||
import ctypes
|
||||
import os
|
||||
from ctypes import (
|
||||
POINTER,
|
||||
byref,
|
||||
@@ -33,7 +34,6 @@ from ctypes import (
|
||||
c_size_t,
|
||||
c_void_p,
|
||||
)
|
||||
import os
|
||||
|
||||
_lib = ctypes.CDLL(f"{os.getenv('ROCM_PATH', '/opt/rocm')}/lib/libhiprtc.so")
|
||||
|
||||
|
||||
@@ -638,6 +638,8 @@ def flops_bench(device: int, type: str, unit: str, rate: int) -> PerfMetrics:
|
||||
event_ms = total_flops / mean / 1e6
|
||||
|
||||
print(
|
||||
f"Peak VALU {unit}s ({type}), GPU ID: {device}, "
|
||||
f"workgroupSize:{workgroup_size}, "
|
||||
f"workgroups:{workgroups}, experiments:{num_experiments}, "
|
||||
f"{unit}:{total_flops}, duration:{event_ms:.1f} ms, "
|
||||
f"mean:{mean:.1f} {rate}, stdev={stdev:.1f} GFLOPS"
|
||||
|
||||
@@ -88,8 +88,10 @@ def generate_custom(args, cmake_args, ctest_args):
|
||||
set(CTEST_BINARY_DIRECTORY {BINARY_DIR})
|
||||
|
||||
set(CTEST_UPDATE_COMMAND {GIT_CMD})
|
||||
set(CTEST_CONFIGURE_COMMAND "{CMAKE_CMD} -B {BINARY_DIR} {SOURCE_DIR} {CMAKE_ARGS}")
|
||||
set(CTEST_BUILD_COMMAND "{CMAKE_CMD} --build {BINARY_DIR} --target all --parallel {BUILD_JOBS}")
|
||||
set(CTEST_CONFIGURE_COMMAND "{CMAKE_CMD} -B {BINARY_DIR} {SOURCE_DIR} \
|
||||
{CMAKE_ARGS}")
|
||||
set(CTEST_BUILD_COMMAND "{CMAKE_CMD} --build {BINARY_DIR} --target all \
|
||||
--parallel {BUILD_JOBS}")
|
||||
set(CTEST_COVERAGE_COMMAND {GCOV_CMD})
|
||||
"""
|
||||
|
||||
|
||||
Referens i nytt ärende
Block a user