Enable queue interception with scratch reporting (#1069)

* Enable queue interception with scratch reporting

Scratch reporting reports agent ID in buffer and callback records, but
HSA runtime provides only queue ID in the scratch callback.

This change enables queue interception when scratch reporting is requested

* Validation test for rocprofv3 + scratch-memory-trace

* Simplify checks for whether context is tracing a domain

* Update changelog

---------

Co-authored-by: Jonathan R. Madsen <jonathanrmadsen@gmail.com>
Этот коммит содержится в:
Mythreya
2024-09-12 16:26:34 -07:00
коммит произвёл GitHub
родитель 6098d52335
Коммит efbe4ea0a2
9 изменённых файлов: 181 добавлений и 6 удалений
+1
Просмотреть файл
@@ -106,3 +106,4 @@ Full documentation for ROCprofiler-SDK is available at [Click Here](source/docs/
- Creation of subdirection when rocprofv3 `--output-file` contains a folder path
- Fix misaligned stores (undefined behavior) for buffer records
- Fix crash when only scratch reporting is enabled
+20
Просмотреть файл
@@ -427,5 +427,25 @@ deregister_client_contexts(rocprofiler_client_id_t client_id)
}
}
}
template <typename KindT>
bool
context::is_tracing(KindT _kind) const
{
constexpr auto is_callback_tracing =
std::is_same<KindT, rocprofiler_callback_tracing_kind_t>::value;
constexpr auto is_buffered_tracing =
std::is_same<KindT, rocprofiler_buffer_tracing_kind_t>::value;
static_assert(is_callback_tracing || is_buffered_tracing, "Unsupported domain type");
if constexpr(is_callback_tracing)
return (callback_tracer && callback_tracer->domains(_kind));
else if constexpr(is_buffered_tracing)
return (buffered_tracer && buffered_tracer->domains(_kind));
}
// explicitly instantiate
template bool context::is_tracing(rocprofiler_callback_tracing_kind_t) const;
template bool context::is_tracing(rocprofiler_buffer_tracing_kind_t) const;
} // namespace context
} // namespace rocprofiler
+3
Просмотреть файл
@@ -130,6 +130,9 @@ struct context
std::unique_ptr<thread_trace::DispatchThreadTracer> dispatch_thread_trace = {};
std::unique_ptr<thread_trace::AgentThreadTracer> agent_thread_trace = {};
template <typename KindT>
bool is_tracing(KindT _kind) const;
};
// set the client index needs to be called before allocate_context()
+7 -6
Просмотреть файл
@@ -270,14 +270,15 @@ QueueController::init(CoreApiTable& core_table, AmdExtTable& ext_table)
"If you added a new field to context struct, make sure there is a check here if it "
"requires queue interception. Once you have done so, increment expected_context_size");
bool has_kernel_tracing =
(itr->callback_tracer &&
itr->callback_tracer->domains(ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH)) ||
(itr->buffered_tracer &&
itr->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH));
bool has_kernel_tracing = itr->is_tracing(ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH) ||
itr->is_tracing(ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH);
bool has_scratch_reporting = itr->is_tracing(ROCPROFILER_CALLBACK_TRACING_SCRATCH_MEMORY) ||
itr->is_tracing(ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY);
if(itr->counter_collection || itr->pc_sampler || has_kernel_tracing ||
itr->agent_counter_collection || itr->agent_thread_trace || itr->dispatch_thread_trace)
has_scratch_reporting || itr->agent_counter_collection || itr->agent_thread_trace ||
itr->dispatch_thread_trace)
{
enable_intercepter = true;
break;
+1
Просмотреть файл
@@ -32,3 +32,4 @@ add_subdirectory(kernel-rename)
add_subdirectory(aborted-app)
add_subdirectory(summary)
add_subdirectory(roctracer-roctx)
add_subdirectory(scratch-memory)
+41
Просмотреть файл
@@ -0,0 +1,41 @@
#
#
#
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
project(
rocprofiler-tests-rocprofv3-scratch-memory-tracing
LANGUAGES CXX
VERSION 0.0.0)
find_package(rocprofiler-sdk REQUIRED)
rocprofiler_configure_pytest_files(CONFIG pytest.ini COPY validate.py conftest.py)
string(REPLACE "LD_PRELOAD=" "ROCPROF_PRELOAD=" PRELOAD_ENV
"${ROCPROFILER_MEMCHECK_PRELOAD_ENV}")
set(scratch-memory-tracing-env "${PRELOAD_ENV}")
add_test(
NAME rocprofv3-test-scratch-memory-tracing-execute
COMMAND
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --scratch-memory-trace -d
${CMAKE_CURRENT_BINARY_DIR}/%tag%-trace -o out --output-format json --log-level
env -- $<TARGET_FILE:scratch-memory>)
set_tests_properties(
rocprofv3-test-scratch-memory-tracing-execute
PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT
"${scratch-memory-tracing-env}" FAIL_REGULAR_EXPRESSION
"threw an exception")
add_test(NAME rocprofv3-test-scratch-memory-tracing-validate
COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --input
${CMAKE_CURRENT_BINARY_DIR}/scratch-memory-trace/out_results.json)
set_tests_properties(
rocprofv3-test-scratch-memory-tracing-validate
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DEPENDS
rocprofv3-test-scratch-memory-tracing-execute FAIL_REGULAR_EXPRESSION
"AssertionError")
+23
Просмотреть файл
@@ -0,0 +1,23 @@
#!/usr/bin/env python3
import json
import pytest
from rocprofiler_sdk.pytest_utils.dotdict import dotdict
from rocprofiler_sdk.pytest_utils import collapse_dict_list
def pytest_addoption(parser):
parser.addoption(
"--input",
action="store",
default="scratch-memory-tracing/out_results.json",
help="Input JSON",
)
@pytest.fixture
def input_data(request):
filename = request.config.getoption("--input")
with open(filename, "r") as inp:
return dotdict(collapse_dict_list(json.load(inp)))
+5
Просмотреть файл
@@ -0,0 +1,5 @@
[pytest]
addopts = --durations=20 -rA -s -vv
testpaths = validate.py
pythonpath = @ROCPROFILER_SDK_TESTS_BINARY_DIR@/pytest-packages
Исполняемый файл
+80
Просмотреть файл
@@ -0,0 +1,80 @@
#!/usr/bin/env python3
import sys
import pytest
import json
from collections import defaultdict
# helper function
def node_exists(name, data, min_len=1):
assert name in data
assert data[name] is not None
if isinstance(data[name], (list, tuple, dict, set)):
assert len(data[name]) >= min_len
def get_operation(record, kind_name, op_name=None):
for idx, itr in enumerate(record["strings"]["buffer_records"]):
if kind_name == itr["kind"]:
if op_name is None:
return idx, itr["operations"]
else:
for oidx, oname in enumerate(itr["operations"]):
if op_name == oname:
return oidx
return None
def test_scratch_memory(input_data):
data = input_data["rocprofiler-sdk-tool"]
buffer_records = data["buffer_records"]
scratch_memory_data = buffer_records["scratch_memory"]
_, bf_op_names = get_operation(data, "SCRATCH_MEMORY")
assert len(bf_op_names) == 4
scratch_reported_agent_ids = set()
detected_agents_ids = set(
agent["id"]["handle"] for agent in data["agents"] if agent["type"] == 2
)
# check buffering data
for node in scratch_memory_data:
assert "size" in node
assert "kind" in node
assert "flags" in node
assert "thread_id" in node
assert "end_timestamp" in node
assert "start_timestamp" in node
assert "queue_id" in node
assert "agent_id" in node
assert "operation" in node
assert "handle" in node["queue_id"]
assert node.size > 0
assert node.thread_id > 0
assert node.agent_id.handle > 0
assert node.queue_id.handle > 0
assert node.start_timestamp > 0
assert node.end_timestamp > 0
assert node.start_timestamp < node.end_timestamp
assert data.strings.buffer_records[node.kind].kind == "SCRATCH_MEMORY"
assert (
data.strings.buffer_records[node.kind].operations[node.operation]
in bf_op_names
)
scratch_reported_agent_ids.add(node["agent_id"]["handle"])
assert 2**64 - 1 not in scratch_reported_agent_ids
assert scratch_reported_agent_ids == detected_agents_ids
if __name__ == "__main__":
exit_code = pytest.main(["-x", __file__] + sys.argv[1:])
sys.exit(exit_code)