Standalone binary (#546)

* Add cmake function to create standalone binary * Mention licenses used by dependencies in the LICENSE file * Add test cases for standalone binary by adding --call-binary option for pytest * Docker compose file to create standalone binary in standardized RHEL 8 environment * Add README instructions on how to create and test standalone binary * Move docker files from utils to docker folder; Add standalone binary testing instructions * Add CHANGELOG statement * Use different service names in docker compose files * Use volume mounting in docker files [ROCm/rocprofiler-compute commit: 40f79c28b1]
2025-02-20 17:51:57 -05:00
parent 216fa7d5f6
commit d045ac8c74
14 changed files with 1499 additions and 2898 deletions
@@ -2,6 +2,10 @@

 Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.amd.com/projects/rocprofiler-compute/en/latest/](https://rocm.docs.amd.com/projects/rocprofiler-compute/en/latest/).

+## Unreleased
+
+* Add Docker files to package the application and dependencies into a single portable and executable standalone binary file  
+
 ## (Unreleased) ROCm Compute Profiler 3.1.0 for ROCm 6.4.0

 ### Added
@@ -372,6 +372,28 @@ add_custom_target(
        "src/${PACKAGE_NAME},cmake/Dockerfile,cmake/rocm_install.sh,docker/docker-entrypoint.sh,src/rocprof_compute_analyze/convertor/mongodb/convert"
    )

+# Standalone binary creation
+add_custom_target(
+    standalonebinary
+    # Change working directory to src
+    WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/src
+    # Check nuitka
+    COMMAND ${Python3_EXECUTABLE} -m pip list | grep -i nuitka > /dev/null 2>&1
+    # Check patchelf
+    COMMAND ${Python3_EXECUTABLE} -m pip list | grep -i patchelf > /dev/null 2>&1
+    # Build standalone binary
+    COMMAND
+        ${Python3_EXECUTABLE} -m nuitka --mode=onefile
+        --include-data-files=${PROJECT_SOURCE_DIR}/VERSION*=./ --enable-plugin=no-qt
+        --include-package-data=dash_svg --include-package=dash_bootstrap_components
+        --include-package=plotly --include-package-data=kaleido
+        --include-package=rocprof_compute_soc --include-package-data=rocprof_compute_soc
+        --include-package-data=utils rocprof-compute
+    # Remove library rpath from executable
+    COMMAND patchelf --remove-rpath rocprof-compute.bin
+    # Move to build directory
+    COMMAND mv rocprof-compute.bin ${CMAKE_BINARY_DIR})
+
 install(
    FILES ${PROJECT_SOURCE_DIR}/LICENSE
    DESTINATION ${CMAKE_INSTALL_DOCDIR}
@@ -19,3 +19,22 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
+
+This application uses the following dependencies and their usage is governed by their respective licenses
+Python 3 standard library: PSFL
+Nuitka specific runtime code: Apache 2.0 license
+astunparse python library: PSFL
+colorlover python library: MIT
+dash python library: MIT
+matplotlib python library: PSFL
+numpy python library: BSD
+pandas python library: BSD
+pymongo python library: Apache 2.0 license
+pyyaml python library: MIT
+tabulate python library: MIT
+tqdm python library: MIT
+dash-svg python library: MIT
+dash-bootstrap-components python library: MIT
+kaleido python library: MIT
+setuptools python library: MIT
+plotille python library: MIT
@@ -37,8 +37,8 @@ Users may checkout `amd-staging` to preview upcoming features.
 ## Testing

 To quickly get the environment (bash shell) for building and testing, run the following commands:
-* `cd utils/docker_env`
-* `docker compose run app`
+* `cd docker`
+* `docker compose -f docker-compose.test.yml run test`

 Inside the docker container, clean, build and install the project with tests enabled:
 ```
@@ -56,6 +56,27 @@ For manual testing, you can find the executable at `install/bin/rocprof-compute`

 NOTE: This Dockerfile uses `rocm/dev-ubuntu-22.04` as the base image

+## Standalone binary
+
+To create a standalone binary, run the following commands:
+* `cd docker`
+* `docker compose -f docker-compose.standalone.yml run standalone`
+
+You should find the rocprof-compute.bin standalone binary inside the `build` folder in the root directory of the project.
+
+To build the binary we follow these steps:
+* Use RHEL 8 image used to build ROCm as the base image
+* Install python3.8
+* Install dependencies for runtime and for making standalone binary
+* Call the make target which uses Nuitka to build the standalone binary
+
+NOTE: Since RHEL 8 ships with glibc version 2.28, this standalone binary can only be run on environment with glibc version greater than 2.28.
+glibc version can be checked using `ldd --version` command.
+
+NOTE: libnss3.so shared library is required when using --roof-only option which generates roofline data in PDF format 
+
+To test the standalone binary provide the `--call-binary` option to pytest.
+
 ## How to Cite

 This software can be cited using a Zenodo
@@ -0,0 +1,19 @@
+FROM redhat/ubi8:8.10-1184
+
+WORKDIR /app
+
+RUN yum install -y curl gcc cmake
+
+RUN yum install -y python38 python38-devel && \
+    yum clean all && \
+    rm -rf /var/cache/yum && \
+    curl -sS https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
+    python3.8 get-pip.py
+
+CMD ["/bin/bash", "-c", "\
+    python3.8 -m pip install -r requirements.txt \
+    && python3.8 -m pip install nuitka patchelf \
+    && rm -rf build \
+    && cmake -B build -S . \
+    && make -C build standalonebinary \
+"]
@@ -19,11 +19,9 @@ RUN apt-get install -y python3.10 python3.10-venv python3.10-dev python3-pip
 # Set Python 3.10 as the default python3
 RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1

-# Copy your application code to the container
-COPY . .
-
 # Install any dependencies specified in requirements.txt
-RUN pip3 install --no-cache-dir -r requirements.txt -r requirements-test.txt
-
-# Command to run your application
-CMD ["/bin/bash"]
+# Run interactive bash shell
+CMD ["/bin/bash", "-c", "\
+    python3.10 -m pip install -r requirements.txt -r requirements-test.txt \
+    && exec /bin/bash \
+"]
@@ -0,0 +1,12 @@
+services:
+  standalone:
+    build:
+      context: ../
+      dockerfile: docker/Dockerfile.standalone
+    devices:
+      - /dev/kfd
+      - /dev/dri
+    security_opt:
+      - seccomp:unconfined
+    volumes:
+      - ../:/app
@@ -0,0 +1,12 @@
+services:
+  test:
+    build:
+      context: ../
+      dockerfile: docker/Dockerfile.test
+    devices:
+      - /dev/kfd
+      - /dev/dri
+    security_opt:
+      - seccomp:unconfined
+    volumes:
+      - ../:/app
@@ -0,0 +1,10 @@
+import pytest
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--call-binary",
+        action="store_true",
+        default=False,
+        help="Call standalone binary instead of main function during tests",
+    )
@@ -11,6 +11,7 @@ from unittest.mock import patch
 import pandas as pd
 import pytest
 import test_utils
+from test_utils import binary_handler_profile_rocprof_compute

 # Globals

@@ -35,9 +36,6 @@ MI300_CHIP_IDS = {
 # --

 config = {}
-config["rocprofiler-compute"] = SourceFileLoader(
-    "rocprofiler-compute", "src/rocprof-compute"
-).load_module()
 config["kernel_name_1"] = "vecCopy"
 config["app_1"] = ["./tests/vcopy", "-n", "1048576", "-b", "256", "-i", "3"]
 config["cleanup"] = True
@@ -45,13 +43,8 @@ config["COUNTER_LOGGING"] = False
 config["METRIC_COMPARE"] = False
 config["METRIC_LOGGING"] = False

-# default option is no roof to reduce test run time
-baseline_opts = ["rocprof-compute", "profile", "--no-roof", "-n", "app_1", "-VVV"]
-baseline_with_roof_opts = ["rocprof-compute", "profile", "-n", "app_1", "-VVV"]
-
 num_kernels = 3
 num_devices = 1
-dispatch_id = 0

 DEFAULT_ABS_DIFF = 15
 DEFAULT_REL_DIFF = 50
@@ -497,10 +490,9 @@ def validate(test_name, workload_dir, file_dict, args=[]):


@pytest.mark.misc
-def test_path():
-    options = baseline_opts
+def test_path(binary_handler_profile_rocprof_compute):
    workload_dir = test_utils.get_output_dir()
-    test_utils.launch_rocprof_compute(config, options, workload_dir)
+    binary_handler_profile_rocprof_compute(config, workload_dir)

    file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels)

@@ -520,22 +512,22 @@ def test_path():


@pytest.mark.misc
-def test_kernel_names():
-    options = baseline_with_roof_opts + ["--roof-only", "--kernel-names"]
+def test_kernel_names(binary_handler_profile_rocprof_compute):
+    options = ["--device", "0", "--roof-only", "--kernel-names"]
    workload_dir = test_utils.get_output_dir()
-    e = test_utils.launch_rocprof_compute(
-        config, options, workload_dir, check_success=False
+    returncode = binary_handler_profile_rocprof_compute(
+        config, workload_dir, options, check_success=False, roof=True
    )

    if soc == "MI100":
        # assert that it did not run
-        assert e.value.code >= 1
+        assert returncode >= 1
        # Do not continue testing
        return
    # assert successful run
-    assert e.value.code == 0
+    assert returncode == 0

-    file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels)
+    file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels)
    if soc == "MI200" or "MI300" in soc:
        assert sorted(list(file_dict.keys())) == sorted(
            ROOF_ONLY_FILES + ["kernelName_legend.pdf"]
@@ -553,14 +545,10 @@ def test_kernel_names():


@pytest.mark.misc
-def test_device_filter():
-    device_id = "0"
-    # if "HIP_VISIBLE_DEVICES" in os.environ:
-    #     device_id = os.environ["HIP_VISIBLE_DEVICES"]
-
-    options = baseline_opts + ["--device", device_id]
+def test_device_filter(binary_handler_profile_rocprof_compute):
+    options = ["--device", "0"]
    workload_dir = test_utils.get_output_dir()
-    test_utils.launch_rocprof_compute(config, options, workload_dir)
+    binary_handler_profile_rocprof_compute(config, workload_dir, options)

    file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels)
    if soc == "MI100":
@@ -585,10 +573,10 @@ def test_device_filter():


@pytest.mark.kernel_execution
-def test_kernel():
-    options = baseline_opts + ["--kernel", config["kernel_name_1"]]
+def test_kernel(binary_handler_profile_rocprof_compute):
+    options = ["--kernel", config["kernel_name_1"]]
    workload_dir = test_utils.get_output_dir()
-    test_utils.launch_rocprof_compute(config, options, workload_dir)
+    binary_handler_profile_rocprof_compute(config, workload_dir, options)

    file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels)
    if soc == "MI100":
@@ -611,10 +599,10 @@ def test_kernel():


@pytest.mark.block
-def test_block_SQ():
-    options = baseline_opts + ["--block", "SQ"]
+def test_block_SQ(binary_handler_profile_rocprof_compute):
+    options = ["--block", "SQ"]
    workload_dir = test_utils.get_output_dir()
-    test_utils.launch_rocprof_compute(config, options, workload_dir)
+    binary_handler_profile_rocprof_compute(config, workload_dir, options)

    file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels)
    expected_csvs = [
@@ -666,10 +654,10 @@ def test_block_SQ():


@pytest.mark.block
-def test_block_SQC():
-    options = baseline_opts + ["--block", "SQC"]
+def test_block_SQC(binary_handler_profile_rocprof_compute):
+    options = ["--block", "SQC"]
    workload_dir = test_utils.get_output_dir()
-    test_utils.launch_rocprof_compute(config, options, workload_dir)
+    binary_handler_profile_rocprof_compute(config, workload_dir, options)

    file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels)
    expected_csvs = [
@@ -694,10 +682,10 @@ def test_block_SQC():


@pytest.mark.block
-def test_block_TA():
-    options = baseline_opts + ["--block", "TA"]
+def test_block_TA(binary_handler_profile_rocprof_compute):
+    options = ["--block", "TA"]
    workload_dir = test_utils.get_output_dir()
-    test_utils.launch_rocprof_compute(config, options, workload_dir)
+    binary_handler_profile_rocprof_compute(config, workload_dir, options)

    file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels)
    expected_csvs = [
@@ -726,10 +714,10 @@ def test_block_TA():


@pytest.mark.block
-def test_block_TD():
-    options = baseline_opts + ["--block", "TD"]
+def test_block_TD(binary_handler_profile_rocprof_compute):
+    options = ["--block", "TD"]
    workload_dir = test_utils.get_output_dir()
-    test_utils.launch_rocprof_compute(config, options, workload_dir)
+    binary_handler_profile_rocprof_compute(config, workload_dir, options)

    file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels)
    expected_csvs = [
@@ -763,10 +751,10 @@ def test_block_TD():


@pytest.mark.block
-def test_block_TCP():
-    options = baseline_opts + ["--block", "TCP"]
+def test_block_TCP(binary_handler_profile_rocprof_compute):
+    options = ["--block", "TCP"]
    workload_dir = test_utils.get_output_dir()
-    test_utils.launch_rocprof_compute(config, options, workload_dir)
+    binary_handler_profile_rocprof_compute(config, workload_dir, options)

    file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels)
    expected_csvs = [
@@ -797,10 +785,10 @@ def test_block_TCP():


@pytest.mark.block
-def test_block_TCC():
-    options = baseline_opts + ["--block", "TCC"]
+def test_block_TCC(binary_handler_profile_rocprof_compute):
+    options = ["--block", "TCC"]
    workload_dir = test_utils.get_output_dir()
-    test_utils.launch_rocprof_compute(config, options, workload_dir)
+    binary_handler_profile_rocprof_compute(config, workload_dir, options)

    file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels)
    expected_csvs = [
@@ -847,10 +835,10 @@ def test_block_TCC():


@pytest.mark.block
-def test_block_SPI():
-    options = baseline_opts + ["--block", "SPI"]
+def test_block_SPI(binary_handler_profile_rocprof_compute):
+    options = ["--block", "SPI"]
    workload_dir = test_utils.get_output_dir()
-    test_utils.launch_rocprof_compute(config, options, workload_dir)
+    binary_handler_profile_rocprof_compute(config, workload_dir, options)

    file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels)
    expected_csvs = [
@@ -880,10 +868,10 @@ def test_block_SPI():


@pytest.mark.block
-def test_block_CPC():
-    options = baseline_opts + ["--block", "CPC"]
+def test_block_CPC(binary_handler_profile_rocprof_compute):
+    options = ["--block", "CPC"]
    workload_dir = test_utils.get_output_dir()
-    test_utils.launch_rocprof_compute(config, options, workload_dir)
+    binary_handler_profile_rocprof_compute(config, workload_dir, options)

    file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels)
    expected_csvs = [
@@ -906,10 +894,10 @@ def test_block_CPC():


@pytest.mark.block
-def test_block_CPF():
-    options = baseline_opts + ["--block", "CPF"]
+def test_block_CPF(binary_handler_profile_rocprof_compute):
+    options = ["--block", "CPF"]
    workload_dir = test_utils.get_output_dir()
-    test_utils.launch_rocprof_compute(config, options, workload_dir)
+    binary_handler_profile_rocprof_compute(config, workload_dir, options)

    file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels)
    expected_csvs = [
@@ -933,10 +921,10 @@ def test_block_CPF():


@pytest.mark.block
-def test_block_SQ_CPC():
-    options = baseline_opts + ["--block", "SQ", "CPC"]
+def test_block_SQ_CPC(binary_handler_profile_rocprof_compute):
+    options = ["--block", "SQ", "CPC"]
    workload_dir = test_utils.get_output_dir()
-    test_utils.launch_rocprof_compute(config, options, workload_dir)
+    binary_handler_profile_rocprof_compute(config, workload_dir, options)

    file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels)
    expected_csvs = [
@@ -988,10 +976,10 @@ def test_block_SQ_CPC():


@pytest.mark.block
-def test_block_SQ_TA():
-    options = baseline_opts + ["--block", "SQ", "TA"]
+def test_block_SQ_TA(binary_handler_profile_rocprof_compute):
+    options = ["--block", "SQ", "TA"]
    workload_dir = test_utils.get_output_dir()
-    test_utils.launch_rocprof_compute(config, options, workload_dir)
+    binary_handler_profile_rocprof_compute(config, workload_dir, options)

    file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels)
    expected_csvs = [
@@ -1039,10 +1027,10 @@ def test_block_SQ_TA():


@pytest.mark.block
-def test_block_SQ_SPI():
-    options = baseline_opts + ["--block", "SQ", "SPI"]
+def test_block_SQ_SPI(binary_handler_profile_rocprof_compute):
+    options = ["--block", "SQ", "SPI"]
    workload_dir = test_utils.get_output_dir()
-    test_utils.launch_rocprof_compute(config, options, workload_dir)
+    binary_handler_profile_rocprof_compute(config, workload_dir, options)

    file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels)
    expected_csvs = [
@@ -1093,10 +1081,10 @@ def test_block_SQ_SPI():


@pytest.mark.block
-def test_block_SQ_SQC_TCP_CPC():
-    options = baseline_opts + ["--block", "SQ", "SQC", "TCP", "CPC"]
+def test_block_SQ_SQC_TCP_CPC(binary_handler_profile_rocprof_compute):
+    options = ["--block", "SQ", "SQC", "TCP", "CPC"]
    workload_dir = test_utils.get_output_dir()
-    test_utils.launch_rocprof_compute(config, options, workload_dir)
+    binary_handler_profile_rocprof_compute(config, workload_dir, options)

    file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels)
    expected_csvs = [
@@ -1144,10 +1132,10 @@ def test_block_SQ_SQC_TCP_CPC():


@pytest.mark.block
-def test_block_SQ_SPI_TA_TCC_CPF():
-    options = baseline_opts + ["--block", "SQ", "SPI", "TA", "TCC", "CPF"]
+def test_block_SQ_SPI_TA_TCC_CPF(binary_handler_profile_rocprof_compute):
+    options = ["--block", "SQ", "SPI", "TA", "TCC", "CPF"]
    workload_dir = test_utils.get_output_dir()
-    test_utils.launch_rocprof_compute(config, options, workload_dir)
+    binary_handler_profile_rocprof_compute(config, workload_dir, options)

    file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels)
    expected_csvs = [
@@ -1199,10 +1187,10 @@ def test_block_SQ_SPI_TA_TCC_CPF():


@pytest.mark.dispatch
-def test_dispatch_0():
-    options = baseline_opts + ["--dispatch", "0"]
+def test_dispatch_0(binary_handler_profile_rocprof_compute):
+    options = ["--dispatch", "0"]
    workload_dir = test_utils.get_output_dir()
-    test_utils.launch_rocprof_compute(config, options, workload_dir)
+    binary_handler_profile_rocprof_compute(config, workload_dir, options)

    file_dict = test_utils.check_csv_files(workload_dir, num_devices, 1)
    if soc == "MI100":
@@ -1229,10 +1217,10 @@ def test_dispatch_0():


@pytest.mark.dispatch
-def test_dispatch_0_1():
-    options = baseline_opts + ["--dispatch", "0:2"]
+def test_dispatch_0_1(binary_handler_profile_rocprof_compute):
+    options = ["--dispatch", "0:2"]
    workload_dir = test_utils.get_output_dir()
-    test_utils.launch_rocprof_compute(config, options, workload_dir)
+    binary_handler_profile_rocprof_compute(config, workload_dir, options)

    file_dict = test_utils.check_csv_files(workload_dir, num_devices, 2)
    if soc == "MI100":
@@ -1256,10 +1244,10 @@ def test_dispatch_0_1():


@pytest.mark.dispatch
-def test_dispatch_2():
-    options = baseline_opts + ["--dispatch", dispatch_id]
+def test_dispatch_2(binary_handler_profile_rocprof_compute):
+    options = ["--dispatch", "0"]
    workload_dir = test_utils.get_output_dir()
-    test_utils.launch_rocprof_compute(config, options, workload_dir)
+    binary_handler_profile_rocprof_compute(config, workload_dir, options)

    file_dict = test_utils.check_csv_files(workload_dir, num_devices, 1)
    if soc == "MI100":
@@ -1278,7 +1266,7 @@ def test_dispatch_2():
        file_dict,
        [
            "--dispatch",
-            str(dispatch_id),
+            "0",
        ],
    )

@@ -1286,10 +1274,10 @@ def test_dispatch_2():


@pytest.mark.join
-def test_join_type_grid():
-    options = baseline_opts + ["--join-type", "grid"]
+def test_join_type_grid(binary_handler_profile_rocprof_compute):
+    options = ["--join-type", "grid"]
    workload_dir = test_utils.get_output_dir()
-    test_utils.launch_rocprof_compute(config, options, workload_dir)
+    binary_handler_profile_rocprof_compute(config, workload_dir, options)

    file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels)
    if soc == "MI100":
@@ -1312,10 +1300,10 @@ def test_join_type_grid():


@pytest.mark.join
-def test_join_type_kernel():
-    options = baseline_opts + ["--join-type", "kernel"]
+def test_join_type_kernel(binary_handler_profile_rocprof_compute):
+    options = ["--join-type", "kernel"]
    workload_dir = test_utils.get_output_dir()
-    test_utils.launch_rocprof_compute(config, options, workload_dir)
+    binary_handler_profile_rocprof_compute(config, workload_dir, options)

    file_dict = test_utils.check_csv_files(workload_dir, num_devices, num_kernels)

@@ -1339,27 +1327,22 @@ def test_join_type_kernel():


@pytest.mark.sort
-def test_sort_dispatches():
+def test_sort_dispatches(binary_handler_profile_rocprof_compute):
    # only test 1 device for roofline
-    device_id = "0"
-    options = (
-        baseline_with_roof_opts
-        + ["--device", device_id]
-        + ["--roof-only", "--sort", "dispatches"]
-    )
+    options = ["--device", "0", "--roof-only", "--sort", "dispatches"]
    workload_dir = test_utils.get_output_dir()
-    e = test_utils.launch_rocprof_compute(
-        config, options, workload_dir, check_success=False
+    returncode = binary_handler_profile_rocprof_compute(
+        config, workload_dir, options, check_success=False, roof=True
    )

    if soc == "MI100":
        # assert that it did not run
-        assert e.value.code >= 1
+        assert returncode >= 1
        # Do not continue testing
        return

    # assert successful run
-    assert e.value.code == 0
+    assert returncode == 0

    file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels)

@@ -1378,27 +1361,22 @@ def test_sort_dispatches():


@pytest.mark.sort
-def test_sort_kernels():
+def test_sort_kernels(binary_handler_profile_rocprof_compute):
    # only test 1 device for roofline
-    device_id = "0"
-    options = (
-        baseline_with_roof_opts
-        + ["--device", device_id]
-        + ["--roof-only", "--sort", "kernels"]
-    )
+    options = ["--device", "0", "--roof-only", "--sort", "kernels"]
    workload_dir = test_utils.get_output_dir()
-    e = test_utils.launch_rocprof_compute(
-        config, options, workload_dir, check_success=False
+    returncode = binary_handler_profile_rocprof_compute(
+        config, workload_dir, options, check_success=False, roof=True
    )

    if soc == "MI100":
        # assert that it did not run
-        assert e.value.code >= 1
+        assert returncode >= 1
        # Do not continue testing
        return

    # assert successful run
-    assert e.value.code == 0
+    assert returncode == 0
    file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels)

    if soc == "MI200" or "MI300" in soc:
@@ -1416,27 +1394,22 @@ def test_sort_kernels():


@pytest.mark.mem
-def test_mem_levels_vL1D():
+def test_mem_levels_vL1D(binary_handler_profile_rocprof_compute):
    # only test 1 device for roofline
-    device_id = "0"
-    options = (
-        baseline_with_roof_opts
-        + ["--device", device_id]
-        + ["--roof-only", "--mem-level", "vL1D"]
-    )
+    options = ["--device", "0", "--roof-only", "--mem-level", "vL1D"]
    workload_dir = test_utils.get_output_dir()
-    e = test_utils.launch_rocprof_compute(
-        config, options, workload_dir, check_success=False
+    returncode = binary_handler_profile_rocprof_compute(
+        config, workload_dir, options, check_success=False, roof=True
    )

    if soc == "MI100":
        # assert that it did not run
-        assert e.value.code >= 1
+        assert returncode >= 1
        # Do not continue testing
        return

    # assert successful run
-    assert e.value.code == 0
+    assert returncode == 0
    file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels)

    if soc == "MI200" or "MI300" in soc:
@@ -1454,27 +1427,22 @@ def test_mem_levels_vL1D():


@pytest.mark.mem
-def test_mem_levels_LDS():
+def test_mem_levels_LDS(binary_handler_profile_rocprof_compute):
    # only test 1 device for roofline
-    device_id = "0"
-    options = (
-        baseline_with_roof_opts
-        + ["--device", device_id]
-        + ["--roof-only", "--mem-level", "LDS"]
-    )
+    options = ["--device", "0", "--roof-only", "--mem-level", "LDS"]
    workload_dir = test_utils.get_output_dir()
-    e = test_utils.launch_rocprof_compute(
-        config, options, workload_dir, check_success=False
+    returncode = binary_handler_profile_rocprof_compute(
+        config, workload_dir, options, check_success=False, roof=True
    )

    if soc == "MI100":
        # assert that it did not run
-        assert e.value.code >= 1
+        assert returncode >= 1
        # Do not continue testing
        return

    # assert successful run
-    assert e.value.code == 0
+    assert returncode == 0
    file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels)

    if soc == "MI200" or "MI300" in soc:
@@ -26,12 +26,16 @@
 import inspect
 import os
 import shutil
+import subprocess
+from importlib.machinery import SourceFileLoader
 from pathlib import Path
 from unittest.mock import patch

 import pandas as pd
 import pytest

+rocprof_compute = SourceFileLoader("rocprof-compute", "src/rocprof-compute").load_module()
+

 def check_resource_allocation():
    """Check if CTEST resource allocation is enabled for parallel testing and set
@@ -128,26 +132,67 @@ def check_csv_files(output_dir, num_devices, num_kernels):
    return file_dict


-def launch_rocprof_compute(config, options, workload_dir, check_success=True):
-    """Launch ROCm Compute Profiler with command-line optoins
+@pytest.fixture
+def binary_handler_profile_rocprof_compute(request):
+    def _handler(config, workload_dir, options=[], check_success=True, roof=False):
+        if request.config.getoption("--call-binary"):
+            baseline_opts = [
+                "build/rocprof-compute.bin",
+                "profile",
+                "-n",
+                "app_1",
+                "-VVV",
+            ]
+            if not roof:
+                baseline_opts.append("--no-roof")
+            process = subprocess.run(
+                baseline_opts
+                + options
+                + ["--path", workload_dir, "--"]
+                + config["app_1"],
+                text=True,
+            )
+            # verify run status
+            if check_success:
+                assert process.returncode == 0
+            return process.returncode
+        else:
+            baseline_opts = ["rocprof-compute", "profile", "-n", "app_1", "-VVV"]
+            if not roof:
+                baseline_opts.append("--no-roof")
+            with pytest.raises(SystemExit) as e:
+                with patch(
+                    "sys.argv",
+                    baseline_opts
+                    + options
+                    + ["--path", workload_dir, "--"]
+                    + config["app_1"],
+                ):
+                    rocprof_compute.main()
+            # verify run status
+            if check_success:
+                assert e.value.code == 0
+            return e.value.code

-    Args:
-        config (list): runtime configuration settings
-        options (list): command line options to provide to rocprofiler-compute
-        workload_dir (string): desired output directory
-        check_success (bool, optional): Whether to verify successful exit condition. Defaults to True.
+    return _handler

-    Returns:
-       exception: SystemExit exception
-    """
-    with pytest.raises(SystemExit) as e:
-        with patch(
-            "sys.argv", options + ["--path", workload_dir, "--"] + config["app_1"]
-        ):
-            config["rocprofiler-compute"].main()

-    # verify run status
-    if check_success:
-        assert e.value.code == 0
+@pytest.fixture
+def binary_handler_analyze_rocprof_compute(request):
+    def _handler(arguments):
+        if request.config.getoption("--call-binary"):
+            process = subprocess.run(
+                ["build/rocprof-compute.bin", *arguments],
+                text=True,
+            )
+            return process.returncode
+        else:
+            with pytest.raises(SystemExit) as e:
+                with patch(
+                    "sys.argv",
+                    ["rocprof-compute", *arguments],
+                ):
+                    rocprof_compute.main()
+            return e.value.code

-    return e
+    return _handler
@@ -1,10 +0,0 @@
-services:
-  app:
-    build:
-      context: ../../
-      dockerfile: utils/docker_env/Dockerfile
-    devices:
-      - /dev/kfd
-      - /dev/dri
-    security_opt:
-      - seccomp:unconfined