4fa165ec1a
* Add ToolsApiTable Add ToolsApiTable wrapping for scratch memory tracking * Add initial support for scratch memory tracking Buffering is implemented * cmake formatting (cmake-format) (#525) Co-authored-by: MythreyaK <MythreyaK@users.noreply.github.com> * source formatting (clang-format v11) (#524) Co-authored-by: MythreyaK <MythreyaK@users.noreply.github.com> * Add callback tracing for scratch Fixed the error where scratch tracking init was called irrespective of whether any client requested for it * Apply suggestions from code review Co-authored-by: Jonathan R. Madsen <jrmadsen@users.noreply.github.com> * Fix tools api copy/update Table were saved/updated incorrectly in previous commit. Also adds passing user data through the callback * Fix OpKind sequence for scratch tracking Previously scratch was using OpKind from rocprofiler-sdk, but templates were instantiated using API ID. These differ by 1 * Integration tests for scratch reporting Added buffer and callback integration tests for scratch reporting * source formatting (clang-format v11) (#550) Co-authored-by: MythreyaK <26112391+MythreyaK@users.noreply.github.com> * cmake formatting (cmake-format) (#551) Co-authored-by: MythreyaK <26112391+MythreyaK@users.noreply.github.com> * python formatting (black) (#549) Co-authored-by: MythreyaK <26112391+MythreyaK@users.noreply.github.com> * CI fixes * source formatting (clang-format v11) (#554) Co-authored-by: MythreyaK <26112391+MythreyaK@users.noreply.github.com> * Update api Rebase on main and updates based on PR feedback * Update scratch reporting and address PR comments - Added agent id to buffer records - Updated `test_internal_correlation_ids` - Is almost identical to one in async-copy - Updated scratch test to check for agent id - Updated queue id serialization in callback records (prints handle as nested key) - Remove `marker_api_traces` from scratch `test_internal_correlation_ids` validation test - Rename `amd_tools_api` to `scratch_memory` - Added doxygen comments - Remove scratch callback from `tool.cpp` - Replace assert with `LOF_IF` in `scratch_memory.cpp` * Update tools table Changed to match up with changes to hsa tables in main branch * Rework scratch memory structure * Update tests - Added suggestions from PR review, and updated tests accordingly * Misc cleanup * Update scratch test As of Apr 4th, `hsa_amd_agent_set_async_scratch_limit` is disabled. Note, > This API: `hsa_amd_agent_set_async_scratch_limit` is currently > disabled. We need some changes in CP firmware to be able to do this > and these changes are not ready yet. > With the current code, you will also not get notifications for > alternate-scratch allocations because this feature has been disabled > while CP firmware is making additional changes > We are hoping to have that feature enabled by ROCm-6.3 * Minor update to lib/rocprofiler-sdk/internal_threading.* - delay destruction of shared_ptrs of the tasks to prevent rare (but possible) data race on the destruction of the shared_ptr --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: MythreyaK <MythreyaK@users.noreply.github.com> Co-authored-by: Jonathan R. Madsen <jrmadsen@users.noreply.github.com> Co-authored-by: Jonathan R. Madsen <jonathanrmadsen@gmail.com>
294 строки
11 KiB
Python
Исполняемый файл
294 строки
11 KiB
Python
Исполняемый файл
#!/usr/bin/env python3
|
|
|
|
import sys
|
|
import pytest
|
|
import json
|
|
|
|
from collections import defaultdict
|
|
|
|
|
|
# helper function
|
|
def node_exists(name, data, min_len=1):
|
|
assert name in data
|
|
assert data[name] is not None
|
|
if isinstance(data[name], (list, tuple, dict, set)):
|
|
assert len(data[name]) >= min_len
|
|
|
|
|
|
def test_data_structure(input_data):
|
|
"""verify minimum amount of expected data is present"""
|
|
data = input_data
|
|
sdk_data = input_data["rocprofiler-sdk-json-tool"]
|
|
|
|
node_exists("rocprofiler-sdk-json-tool", data)
|
|
|
|
sdk_data = data["rocprofiler-sdk-json-tool"]
|
|
|
|
num_agents = len([agent for agent in sdk_data["agents"] if agent["type"] == 2])
|
|
|
|
node_exists("metadata", sdk_data)
|
|
node_exists("pid", sdk_data["metadata"])
|
|
node_exists("main_tid", sdk_data["metadata"])
|
|
node_exists("init_time", sdk_data["metadata"])
|
|
node_exists("fini_time", sdk_data["metadata"])
|
|
|
|
node_exists("agents", sdk_data)
|
|
node_exists("call_stack", sdk_data)
|
|
node_exists("callback_records", sdk_data)
|
|
node_exists("buffer_records", sdk_data)
|
|
|
|
node_exists("names", sdk_data["callback_records"])
|
|
node_exists("code_objects", sdk_data["callback_records"])
|
|
node_exists("kernel_symbols", sdk_data["callback_records"])
|
|
node_exists("hsa_api_traces", sdk_data["callback_records"])
|
|
node_exists("hip_api_traces", sdk_data["callback_records"], 0)
|
|
node_exists("scratch_memory_traces", sdk_data["callback_records"], min_len=8)
|
|
|
|
node_exists("names", sdk_data["buffer_records"])
|
|
node_exists("kernel_dispatches", sdk_data["buffer_records"])
|
|
node_exists("memory_copies", sdk_data["buffer_records"], num_agents)
|
|
node_exists("hsa_api_traces", sdk_data["buffer_records"])
|
|
node_exists("hip_api_traces", sdk_data["buffer_records"], 0)
|
|
node_exists("retired_correlation_ids", sdk_data["buffer_records"])
|
|
node_exists("scratch_memory_traces", sdk_data["buffer_records"], min_len=8)
|
|
|
|
|
|
def test_timestamps(input_data):
|
|
data = input_data
|
|
sdk_data = data["rocprofiler-sdk-json-tool"]
|
|
|
|
cb_start = {}
|
|
cb_end = {}
|
|
for titr in ["hsa_api_traces", "hip_api_traces"]:
|
|
for itr in sdk_data["callback_records"][titr]:
|
|
cid = itr["record"]["correlation_id"]["internal"]
|
|
phase = itr["record"]["phase"]
|
|
if phase == 1:
|
|
cb_start[cid] = itr["timestamp"]
|
|
elif phase == 2:
|
|
cb_end[cid] = itr["timestamp"]
|
|
assert cb_start[cid] <= itr["timestamp"]
|
|
else:
|
|
assert phase == 1 or phase == 2
|
|
|
|
for itr in sdk_data["buffer_records"][titr]:
|
|
assert itr["start_timestamp"] <= itr["end_timestamp"]
|
|
|
|
for titr in ["kernel_dispatches", "memory_copies"]:
|
|
for itr in sdk_data["buffer_records"][titr]:
|
|
assert itr["start_timestamp"] < itr["end_timestamp"]
|
|
assert itr["correlation_id"]["internal"] > 0
|
|
assert itr["correlation_id"]["external"] > 0
|
|
assert sdk_data["metadata"]["init_time"] < itr["start_timestamp"]
|
|
assert sdk_data["metadata"]["init_time"] < itr["end_timestamp"]
|
|
assert sdk_data["metadata"]["fini_time"] > itr["start_timestamp"]
|
|
assert sdk_data["metadata"]["fini_time"] > itr["end_timestamp"]
|
|
|
|
# TODO(Is this check applicable for scratch, which doesn't use any correlation id?)
|
|
# api_start = cb_start[itr["correlation_id"]["internal"]]
|
|
# api_end = cb_end[itr["correlation_id"]["internal"]]
|
|
# assert api_start < itr["start_timestamp"]
|
|
# assert api_end <= itr["end_timestamp"]
|
|
|
|
|
|
def test_internal_correlation_ids(input_data):
|
|
data = input_data
|
|
sdk_data = data["rocprofiler-sdk-json-tool"]
|
|
|
|
api_corr_ids = []
|
|
for titr in ["hsa_api_traces", "hip_api_traces"]:
|
|
for itr in sdk_data["callback_records"][titr]:
|
|
api_corr_ids.append(itr["record"]["correlation_id"]["internal"])
|
|
|
|
for itr in sdk_data["buffer_records"][titr]:
|
|
api_corr_ids.append(itr["correlation_id"]["internal"])
|
|
|
|
api_corr_ids_sorted = sorted(api_corr_ids)
|
|
api_corr_ids_unique = list(set(api_corr_ids))
|
|
|
|
for itr in sdk_data["buffer_records"]["kernel_dispatches"]:
|
|
assert itr["correlation_id"]["internal"] in api_corr_ids_unique
|
|
|
|
for itr in sdk_data["buffer_records"]["memory_copies"]:
|
|
assert itr["correlation_id"]["internal"] in api_corr_ids_unique
|
|
|
|
len_corr_id_unq = len(api_corr_ids_unique)
|
|
assert len(api_corr_ids) != len_corr_id_unq
|
|
assert max(api_corr_ids_sorted) == len_corr_id_unq
|
|
|
|
|
|
def test_external_correlation_ids(input_data):
|
|
data = input_data
|
|
sdk_data = data["rocprofiler-sdk-json-tool"]
|
|
|
|
extern_corr_ids = []
|
|
for titr in ["hsa_api_traces", "hip_api_traces"]:
|
|
for itr in sdk_data["callback_records"][titr]:
|
|
assert itr["record"]["correlation_id"]["external"] > 0
|
|
assert (
|
|
itr["record"]["thread_id"] == itr["record"]["correlation_id"]["external"]
|
|
)
|
|
extern_corr_ids.append(itr["record"]["correlation_id"]["external"])
|
|
|
|
extern_corr_ids = list(set(sorted(extern_corr_ids)))
|
|
for titr in ["hsa_api_traces", "hip_api_traces"]:
|
|
for itr in sdk_data["buffer_records"][titr]:
|
|
assert itr["correlation_id"]["external"] > 0
|
|
assert itr["thread_id"] == itr["correlation_id"]["external"]
|
|
assert itr["thread_id"] in extern_corr_ids
|
|
assert itr["correlation_id"]["external"] in extern_corr_ids
|
|
|
|
for itr in sdk_data["buffer_records"]["kernel_dispatches"]:
|
|
assert itr["correlation_id"]["external"] > 0
|
|
assert itr["correlation_id"]["external"] in extern_corr_ids
|
|
|
|
for itr in sdk_data["buffer_records"]["memory_copies"]:
|
|
assert itr["correlation_id"]["external"] > 0
|
|
assert itr["correlation_id"]["external"] in extern_corr_ids
|
|
|
|
|
|
def op_name(op_name, record):
|
|
found_op = False
|
|
op_key = None
|
|
|
|
for kind_node in record["names"]["kind_names"]:
|
|
if kind_node["value"] == op_name:
|
|
op_key = kind_node["key"]
|
|
|
|
for op_node in record["names"]["operation_names"]:
|
|
if op_node["key"] == op_key:
|
|
return op_node
|
|
|
|
|
|
# Tests above are identical to async-copy. Update as needed
|
|
|
|
|
|
def test_scratch_memory_tracking(input_data):
|
|
sdk_data = input_data["rocprofiler-sdk-json-tool"]
|
|
callback_records = sdk_data["callback_records"]
|
|
buffer_records = sdk_data["buffer_records"]
|
|
|
|
scratch_callback_data = sdk_data["callback_records"]["scratch_memory_traces"]
|
|
scratch_buffer_data = sdk_data["buffer_records"]["scratch_memory_traces"]
|
|
|
|
cb_op_names = op_name("SCRATCH_MEMORY", callback_records)["value"]
|
|
bf_op_names = op_name("SCRATCH_MEMORY", buffer_records)["value"]
|
|
|
|
assert len(cb_op_names) == 4
|
|
assert len(bf_op_names) == 4
|
|
|
|
# op name -> enum value
|
|
scratch_cb_op_map = {node["value"]: node["key"] for node in cb_op_names}
|
|
scratch_bf_op_map = {node["value"]: node["key"] for node in bf_op_names}
|
|
assert scratch_cb_op_map == scratch_bf_op_map
|
|
|
|
scratch_reported_agent_ids = set()
|
|
detected_agents_ids = set(
|
|
agent["id"]["handle"] for agent in sdk_data["agents"] if agent["type"] == 2
|
|
)
|
|
# check buffering data
|
|
for node in scratch_buffer_data:
|
|
assert "size" in node
|
|
assert "kind" in node
|
|
assert "flags" in node
|
|
assert "thread_id" in node
|
|
assert "end_timestamp" in node
|
|
assert "start_timestamp" in node
|
|
|
|
assert "queue_id" in node
|
|
assert "agent_id" in node
|
|
assert "operation" in node
|
|
assert "handle" in node["queue_id"]
|
|
|
|
assert node["start_timestamp"] > 0
|
|
assert node["start_timestamp"] < node["end_timestamp"]
|
|
|
|
scratch_reported_agent_ids.add(node["agent_id"]["handle"])
|
|
|
|
assert 2**64 - 1 not in scratch_reported_agent_ids
|
|
assert scratch_reported_agent_ids == detected_agents_ids
|
|
|
|
# { thread-id -> [ events ], ... }
|
|
cb_threads = defaultdict(list)
|
|
bf_threads = defaultdict(list)
|
|
|
|
# fetch node["payload"]
|
|
pl = lambda x: x["payload"]
|
|
# fetch node["record"]
|
|
rc = lambda x: x["record"]
|
|
|
|
for node in scratch_callback_data:
|
|
cb_threads[rc(node)["thread_id"]].append(node)
|
|
|
|
for node in scratch_buffer_data:
|
|
bf_threads[node["thread_id"]].append(node)
|
|
|
|
for thread_id, nodes in cb_threads.items():
|
|
assert thread_id > 0
|
|
|
|
# start must be followed by end
|
|
for inx in range(0, len(nodes), 2):
|
|
this_node = nodes[inx]
|
|
next_node = nodes[inx + 1]
|
|
|
|
assert rc(this_node)["phase"] + 1 == rc(next_node)["phase"]
|
|
assert rc(this_node)["thread_id"] == rc(next_node)["thread_id"]
|
|
assert this_node["timestamp"] < next_node["timestamp"]
|
|
|
|
# alloc has more data vs free and async reclaim
|
|
scratch_alloc_node = (
|
|
this_node["record"]["operation"]
|
|
== scratch_cb_op_map["SCRATCH_MEMORY_ALLOC"]
|
|
)
|
|
if scratch_alloc_node:
|
|
assert (
|
|
pl(this_node)["queue_id"]["handle"]
|
|
== pl(next_node)["queue_id"]["handle"]
|
|
)
|
|
assert (
|
|
this_node["args"]["dispatch_id"] == next_node["args"]["dispatch_id"]
|
|
)
|
|
assert "size" in pl(next_node) and pl(next_node)["size"] > 0
|
|
assert (
|
|
"num_slots" in next_node["args"]
|
|
and next_node["args"]["num_slots"] > 0
|
|
)
|
|
assert "flags" in pl(next_node)
|
|
|
|
# callback data and buffer data must agree with each other
|
|
for bf_thr, bf_nodes in bf_threads.items():
|
|
cb_nodes = cb_threads[bf_thr]
|
|
|
|
for bf_node_inx in range(len(bf_nodes)):
|
|
# All these 3 should have same data
|
|
# timestamps are not same as callback records them at
|
|
# a different instant in time. Callback timestamp
|
|
# should be more than buffer timestamp
|
|
bf_node = bf_nodes[bf_node_inx]
|
|
cb_enter = cb_nodes[bf_node_inx * 2]
|
|
cb_exit = cb_nodes[bf_node_inx * 2 + 1]
|
|
|
|
assert (
|
|
bf_node["operation"]
|
|
== rc(cb_enter)["operation"]
|
|
== rc(cb_exit)["operation"]
|
|
)
|
|
assert (
|
|
bf_op_names[bf_node["operation"]]
|
|
== cb_op_names[rc(cb_enter)["operation"]]
|
|
== cb_op_names[rc(cb_exit)["operation"]]
|
|
)
|
|
|
|
assert bf_node["flags"] == pl(cb_exit)["flags"]
|
|
|
|
assert (
|
|
bf_node["thread_id"]
|
|
== rc(cb_enter)["thread_id"]
|
|
== rc(cb_exit)["thread_id"]
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
exit_code = pytest.main(["-x", __file__] + sys.argv[1:])
|
|
sys.exit(exit_code)
|