From efbe4ea0a229a4e16a0f7e305201b8689923b3dd Mon Sep 17 00:00:00 2001 From: Mythreya Date: Thu, 12 Sep 2024 16:26:34 -0700 Subject: [PATCH] Enable queue interception with scratch reporting (#1069) * Enable queue interception with scratch reporting Scratch reporting reports agent ID in buffer and callback records, but HSA runtime provides only queue ID in the scratch callback. This change enables queue interception when scratch reporting is requested * Validation test for rocprofv3 + scratch-memory-trace * Simplify checks for whether context is tracing a domain * Update changelog --------- Co-authored-by: Jonathan R. Madsen --- CHANGELOG.md | 1 + .../lib/rocprofiler-sdk/context/context.cpp | 20 +++++ .../lib/rocprofiler-sdk/context/context.hpp | 3 + .../rocprofiler-sdk/hsa/queue_controller.cpp | 13 +-- tests/rocprofv3/CMakeLists.txt | 1 + tests/rocprofv3/scratch-memory/CMakeLists.txt | 41 ++++++++++ tests/rocprofv3/scratch-memory/conftest.py | 23 ++++++ tests/rocprofv3/scratch-memory/pytest.ini | 5 ++ tests/rocprofv3/scratch-memory/validate.py | 80 +++++++++++++++++++ 9 files changed, 181 insertions(+), 6 deletions(-) create mode 100644 tests/rocprofv3/scratch-memory/CMakeLists.txt create mode 100644 tests/rocprofv3/scratch-memory/conftest.py create mode 100644 tests/rocprofv3/scratch-memory/pytest.ini create mode 100755 tests/rocprofv3/scratch-memory/validate.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a81b32345..603c02a7de 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -106,3 +106,4 @@ Full documentation for ROCprofiler-SDK is available at [Click Here](source/docs/ - Creation of subdirection when rocprofv3 `--output-file` contains a folder path - Fix misaligned stores (undefined behavior) for buffer records +- Fix crash when only scratch reporting is enabled diff --git a/source/lib/rocprofiler-sdk/context/context.cpp b/source/lib/rocprofiler-sdk/context/context.cpp index 976a7e7c97..1a6d429fba 100644 --- a/source/lib/rocprofiler-sdk/context/context.cpp +++ b/source/lib/rocprofiler-sdk/context/context.cpp @@ -427,5 +427,25 @@ deregister_client_contexts(rocprofiler_client_id_t client_id) } } } + +template +bool +context::is_tracing(KindT _kind) const +{ + constexpr auto is_callback_tracing = + std::is_same::value; + constexpr auto is_buffered_tracing = + std::is_same::value; + static_assert(is_callback_tracing || is_buffered_tracing, "Unsupported domain type"); + + if constexpr(is_callback_tracing) + return (callback_tracer && callback_tracer->domains(_kind)); + else if constexpr(is_buffered_tracing) + return (buffered_tracer && buffered_tracer->domains(_kind)); +} + +// explicitly instantiate +template bool context::is_tracing(rocprofiler_callback_tracing_kind_t) const; +template bool context::is_tracing(rocprofiler_buffer_tracing_kind_t) const; } // namespace context } // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk/context/context.hpp b/source/lib/rocprofiler-sdk/context/context.hpp index 449cb4db38..168d468aa5 100644 --- a/source/lib/rocprofiler-sdk/context/context.hpp +++ b/source/lib/rocprofiler-sdk/context/context.hpp @@ -130,6 +130,9 @@ struct context std::unique_ptr dispatch_thread_trace = {}; std::unique_ptr agent_thread_trace = {}; + + template + bool is_tracing(KindT _kind) const; }; // set the client index needs to be called before allocate_context() diff --git a/source/lib/rocprofiler-sdk/hsa/queue_controller.cpp b/source/lib/rocprofiler-sdk/hsa/queue_controller.cpp index 49518d9e97..57501f5a7f 100644 --- a/source/lib/rocprofiler-sdk/hsa/queue_controller.cpp +++ b/source/lib/rocprofiler-sdk/hsa/queue_controller.cpp @@ -270,14 +270,15 @@ QueueController::init(CoreApiTable& core_table, AmdExtTable& ext_table) "If you added a new field to context struct, make sure there is a check here if it " "requires queue interception. Once you have done so, increment expected_context_size"); - bool has_kernel_tracing = - (itr->callback_tracer && - itr->callback_tracer->domains(ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH)) || - (itr->buffered_tracer && - itr->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH)); + bool has_kernel_tracing = itr->is_tracing(ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH) || + itr->is_tracing(ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH); + + bool has_scratch_reporting = itr->is_tracing(ROCPROFILER_CALLBACK_TRACING_SCRATCH_MEMORY) || + itr->is_tracing(ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY); if(itr->counter_collection || itr->pc_sampler || has_kernel_tracing || - itr->agent_counter_collection || itr->agent_thread_trace || itr->dispatch_thread_trace) + has_scratch_reporting || itr->agent_counter_collection || itr->agent_thread_trace || + itr->dispatch_thread_trace) { enable_intercepter = true; break; diff --git a/tests/rocprofv3/CMakeLists.txt b/tests/rocprofv3/CMakeLists.txt index 49a76063ab..4b1200fde3 100644 --- a/tests/rocprofv3/CMakeLists.txt +++ b/tests/rocprofv3/CMakeLists.txt @@ -32,3 +32,4 @@ add_subdirectory(kernel-rename) add_subdirectory(aborted-app) add_subdirectory(summary) add_subdirectory(roctracer-roctx) +add_subdirectory(scratch-memory) diff --git a/tests/rocprofv3/scratch-memory/CMakeLists.txt b/tests/rocprofv3/scratch-memory/CMakeLists.txt new file mode 100644 index 0000000000..cc5a43aa3b --- /dev/null +++ b/tests/rocprofv3/scratch-memory/CMakeLists.txt @@ -0,0 +1,41 @@ +# +# +# +cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR) + +project( + rocprofiler-tests-rocprofv3-scratch-memory-tracing + LANGUAGES CXX + VERSION 0.0.0) + +find_package(rocprofiler-sdk REQUIRED) + +rocprofiler_configure_pytest_files(CONFIG pytest.ini COPY validate.py conftest.py) + +string(REPLACE "LD_PRELOAD=" "ROCPROF_PRELOAD=" PRELOAD_ENV + "${ROCPROFILER_MEMCHECK_PRELOAD_ENV}") + +set(scratch-memory-tracing-env "${PRELOAD_ENV}") + +add_test( + NAME rocprofv3-test-scratch-memory-tracing-execute + COMMAND + $ --scratch-memory-trace -d + ${CMAKE_CURRENT_BINARY_DIR}/%tag%-trace -o out --output-format json --log-level + env -- $) + +set_tests_properties( + rocprofv3-test-scratch-memory-tracing-execute + PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT + "${scratch-memory-tracing-env}" FAIL_REGULAR_EXPRESSION + "threw an exception") + +add_test(NAME rocprofv3-test-scratch-memory-tracing-validate + COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --input + ${CMAKE_CURRENT_BINARY_DIR}/scratch-memory-trace/out_results.json) + +set_tests_properties( + rocprofv3-test-scratch-memory-tracing-validate + PROPERTIES TIMEOUT 45 LABELS "integration-tests" DEPENDS + rocprofv3-test-scratch-memory-tracing-execute FAIL_REGULAR_EXPRESSION + "AssertionError") diff --git a/tests/rocprofv3/scratch-memory/conftest.py b/tests/rocprofv3/scratch-memory/conftest.py new file mode 100644 index 0000000000..e099667cb6 --- /dev/null +++ b/tests/rocprofv3/scratch-memory/conftest.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python3 + +import json +import pytest + +from rocprofiler_sdk.pytest_utils.dotdict import dotdict +from rocprofiler_sdk.pytest_utils import collapse_dict_list + + +def pytest_addoption(parser): + parser.addoption( + "--input", + action="store", + default="scratch-memory-tracing/out_results.json", + help="Input JSON", + ) + + +@pytest.fixture +def input_data(request): + filename = request.config.getoption("--input") + with open(filename, "r") as inp: + return dotdict(collapse_dict_list(json.load(inp))) diff --git a/tests/rocprofv3/scratch-memory/pytest.ini b/tests/rocprofv3/scratch-memory/pytest.ini new file mode 100644 index 0000000000..5e1e1c14a0 --- /dev/null +++ b/tests/rocprofv3/scratch-memory/pytest.ini @@ -0,0 +1,5 @@ + +[pytest] +addopts = --durations=20 -rA -s -vv +testpaths = validate.py +pythonpath = @ROCPROFILER_SDK_TESTS_BINARY_DIR@/pytest-packages diff --git a/tests/rocprofv3/scratch-memory/validate.py b/tests/rocprofv3/scratch-memory/validate.py new file mode 100755 index 0000000000..72c3dec2f1 --- /dev/null +++ b/tests/rocprofv3/scratch-memory/validate.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 + +import sys +import pytest +import json + +from collections import defaultdict + + +# helper function +def node_exists(name, data, min_len=1): + assert name in data + assert data[name] is not None + if isinstance(data[name], (list, tuple, dict, set)): + assert len(data[name]) >= min_len + + +def get_operation(record, kind_name, op_name=None): + for idx, itr in enumerate(record["strings"]["buffer_records"]): + if kind_name == itr["kind"]: + if op_name is None: + return idx, itr["operations"] + else: + for oidx, oname in enumerate(itr["operations"]): + if op_name == oname: + return oidx + return None + + +def test_scratch_memory(input_data): + data = input_data["rocprofiler-sdk-tool"] + buffer_records = data["buffer_records"] + + scratch_memory_data = buffer_records["scratch_memory"] + + _, bf_op_names = get_operation(data, "SCRATCH_MEMORY") + + assert len(bf_op_names) == 4 + + scratch_reported_agent_ids = set() + detected_agents_ids = set( + agent["id"]["handle"] for agent in data["agents"] if agent["type"] == 2 + ) + # check buffering data + for node in scratch_memory_data: + assert "size" in node + assert "kind" in node + assert "flags" in node + assert "thread_id" in node + assert "end_timestamp" in node + assert "start_timestamp" in node + + assert "queue_id" in node + assert "agent_id" in node + assert "operation" in node + assert "handle" in node["queue_id"] + + assert node.size > 0 + assert node.thread_id > 0 + assert node.agent_id.handle > 0 + assert node.queue_id.handle > 0 + assert node.start_timestamp > 0 + assert node.end_timestamp > 0 + assert node.start_timestamp < node.end_timestamp + + assert data.strings.buffer_records[node.kind].kind == "SCRATCH_MEMORY" + assert ( + data.strings.buffer_records[node.kind].operations[node.operation] + in bf_op_names + ) + + scratch_reported_agent_ids.add(node["agent_id"]["handle"]) + + assert 2**64 - 1 not in scratch_reported_agent_ids + assert scratch_reported_agent_ids == detected_agents_ids + + +if __name__ == "__main__": + exit_code = pytest.main(["-x", __file__] + sys.argv[1:]) + sys.exit(exit_code)