Enable queue interception with scratch reporting (#1069)

* Enable queue interception with scratch reporting

Scratch reporting reports agent ID in buffer and callback records, but
HSA runtime provides only queue ID in the scratch callback.

This change enables queue interception when scratch reporting is requested

* Validation test for rocprofv3 + scratch-memory-trace

* Simplify checks for whether context is tracing a domain

* Update changelog

---------

Co-authored-by: Jonathan R. Madsen <jonathanrmadsen@gmail.com>

[ROCm/rocprofiler-sdk commit: efbe4ea0a2]
이 커밋은 다음에 포함됨:
Mythreya
2024-09-12 16:26:34 -07:00
커밋한 사람 GitHub
부모 db0f26f562
커밋 d008463d50
9개의 변경된 파일181개의 추가작업 그리고 6개의 파일을 삭제
+1
파일 보기
@@ -106,3 +106,4 @@ Full documentation for ROCprofiler-SDK is available at [Click Here](source/docs/
- Creation of subdirection when rocprofv3 `--output-file` contains a folder path
- Fix misaligned stores (undefined behavior) for buffer records
- Fix crash when only scratch reporting is enabled
+20
파일 보기
@@ -427,5 +427,25 @@ deregister_client_contexts(rocprofiler_client_id_t client_id)
}
}
}
template <typename KindT>
bool
context::is_tracing(KindT _kind) const
{
constexpr auto is_callback_tracing =
std::is_same<KindT, rocprofiler_callback_tracing_kind_t>::value;
constexpr auto is_buffered_tracing =
std::is_same<KindT, rocprofiler_buffer_tracing_kind_t>::value;
static_assert(is_callback_tracing || is_buffered_tracing, "Unsupported domain type");
if constexpr(is_callback_tracing)
return (callback_tracer && callback_tracer->domains(_kind));
else if constexpr(is_buffered_tracing)
return (buffered_tracer && buffered_tracer->domains(_kind));
}
// explicitly instantiate
template bool context::is_tracing(rocprofiler_callback_tracing_kind_t) const;
template bool context::is_tracing(rocprofiler_buffer_tracing_kind_t) const;
} // namespace context
} // namespace rocprofiler
+3
파일 보기
@@ -130,6 +130,9 @@ struct context
std::unique_ptr<thread_trace::DispatchThreadTracer> dispatch_thread_trace = {};
std::unique_ptr<thread_trace::AgentThreadTracer> agent_thread_trace = {};
template <typename KindT>
bool is_tracing(KindT _kind) const;
};
// set the client index needs to be called before allocate_context()
+7 -6
파일 보기
@@ -270,14 +270,15 @@ QueueController::init(CoreApiTable& core_table, AmdExtTable& ext_table)
"If you added a new field to context struct, make sure there is a check here if it "
"requires queue interception. Once you have done so, increment expected_context_size");
bool has_kernel_tracing =
(itr->callback_tracer &&
itr->callback_tracer->domains(ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH)) ||
(itr->buffered_tracer &&
itr->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH));
bool has_kernel_tracing = itr->is_tracing(ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH) ||
itr->is_tracing(ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH);
bool has_scratch_reporting = itr->is_tracing(ROCPROFILER_CALLBACK_TRACING_SCRATCH_MEMORY) ||
itr->is_tracing(ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY);
if(itr->counter_collection || itr->pc_sampler || has_kernel_tracing ||
itr->agent_counter_collection || itr->agent_thread_trace || itr->dispatch_thread_trace)
has_scratch_reporting || itr->agent_counter_collection || itr->agent_thread_trace ||
itr->dispatch_thread_trace)
{
enable_intercepter = true;
break;
+1
파일 보기
@@ -32,3 +32,4 @@ add_subdirectory(kernel-rename)
add_subdirectory(aborted-app)
add_subdirectory(summary)
add_subdirectory(roctracer-roctx)
add_subdirectory(scratch-memory)
+41
파일 보기
@@ -0,0 +1,41 @@
#
#
#
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
project(
rocprofiler-tests-rocprofv3-scratch-memory-tracing
LANGUAGES CXX
VERSION 0.0.0)
find_package(rocprofiler-sdk REQUIRED)
rocprofiler_configure_pytest_files(CONFIG pytest.ini COPY validate.py conftest.py)
string(REPLACE "LD_PRELOAD=" "ROCPROF_PRELOAD=" PRELOAD_ENV
"${ROCPROFILER_MEMCHECK_PRELOAD_ENV}")
set(scratch-memory-tracing-env "${PRELOAD_ENV}")
add_test(
NAME rocprofv3-test-scratch-memory-tracing-execute
COMMAND
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --scratch-memory-trace -d
${CMAKE_CURRENT_BINARY_DIR}/%tag%-trace -o out --output-format json --log-level
env -- $<TARGET_FILE:scratch-memory>)
set_tests_properties(
rocprofv3-test-scratch-memory-tracing-execute
PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT
"${scratch-memory-tracing-env}" FAIL_REGULAR_EXPRESSION
"threw an exception")
add_test(NAME rocprofv3-test-scratch-memory-tracing-validate
COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --input
${CMAKE_CURRENT_BINARY_DIR}/scratch-memory-trace/out_results.json)
set_tests_properties(
rocprofv3-test-scratch-memory-tracing-validate
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DEPENDS
rocprofv3-test-scratch-memory-tracing-execute FAIL_REGULAR_EXPRESSION
"AssertionError")
+23
파일 보기
@@ -0,0 +1,23 @@
#!/usr/bin/env python3
import json
import pytest
from rocprofiler_sdk.pytest_utils.dotdict import dotdict
from rocprofiler_sdk.pytest_utils import collapse_dict_list
def pytest_addoption(parser):
parser.addoption(
"--input",
action="store",
default="scratch-memory-tracing/out_results.json",
help="Input JSON",
)
@pytest.fixture
def input_data(request):
filename = request.config.getoption("--input")
with open(filename, "r") as inp:
return dotdict(collapse_dict_list(json.load(inp)))
+5
파일 보기
@@ -0,0 +1,5 @@
[pytest]
addopts = --durations=20 -rA -s -vv
testpaths = validate.py
pythonpath = @ROCPROFILER_SDK_TESTS_BINARY_DIR@/pytest-packages
+80
파일 보기
@@ -0,0 +1,80 @@
#!/usr/bin/env python3
import sys
import pytest
import json
from collections import defaultdict
# helper function
def node_exists(name, data, min_len=1):
assert name in data
assert data[name] is not None
if isinstance(data[name], (list, tuple, dict, set)):
assert len(data[name]) >= min_len
def get_operation(record, kind_name, op_name=None):
for idx, itr in enumerate(record["strings"]["buffer_records"]):
if kind_name == itr["kind"]:
if op_name is None:
return idx, itr["operations"]
else:
for oidx, oname in enumerate(itr["operations"]):
if op_name == oname:
return oidx
return None
def test_scratch_memory(input_data):
data = input_data["rocprofiler-sdk-tool"]
buffer_records = data["buffer_records"]
scratch_memory_data = buffer_records["scratch_memory"]
_, bf_op_names = get_operation(data, "SCRATCH_MEMORY")
assert len(bf_op_names) == 4
scratch_reported_agent_ids = set()
detected_agents_ids = set(
agent["id"]["handle"] for agent in data["agents"] if agent["type"] == 2
)
# check buffering data
for node in scratch_memory_data:
assert "size" in node
assert "kind" in node
assert "flags" in node
assert "thread_id" in node
assert "end_timestamp" in node
assert "start_timestamp" in node
assert "queue_id" in node
assert "agent_id" in node
assert "operation" in node
assert "handle" in node["queue_id"]
assert node.size > 0
assert node.thread_id > 0
assert node.agent_id.handle > 0
assert node.queue_id.handle > 0
assert node.start_timestamp > 0
assert node.end_timestamp > 0
assert node.start_timestamp < node.end_timestamp
assert data.strings.buffer_records[node.kind].kind == "SCRATCH_MEMORY"
assert (
data.strings.buffer_records[node.kind].operations[node.operation]
in bf_op_names
)
scratch_reported_agent_ids.add(node["agent_id"]["handle"])
assert 2**64 - 1 not in scratch_reported_agent_ids
assert scratch_reported_agent_ids == detected_agents_ids
if __name__ == "__main__":
exit_code = pytest.main(["-x", __file__] + sys.argv[1:])
sys.exit(exit_code)