b097e276a9
* [rocprofv3] Add rocpd output support (part 1: prelude)
- git submodules for sqlite3, GOTCHA, and pybind11
- HIP stream data
- rocprofiler_query_intercept_table_name(...)
- serialization load
- rocprofiler::sdk::get_perfetto_category(KindT)
- rocprofiler::sdk::parse::strip
- common library updates
- md5sum
- hasher
- simple_timer
- static_tl_object
- get_process_start_time_ns(pid_t)
- output library updates
- node_info
- file_generator (generator is now virtual base class)
- stream info updates
* Added submodules
* Code review updates
* Minor unused-but-set-X warning fixes
* Update CI
- install libsqlite3-dev package
* Update CI
- install libsqlite3-dev package
* Fix static thread-local object memory leak
- also fix signal handler chaining
* Remove URL from comment
* Remove page migration exception
* Enable ROCPROFILER_BUILD_SQLITE3 by default
- try find_package(SQLite3) first and then build when ROCPROFILER_BUILD_SQLITE3=ON
* Fix gotcha installation
- make install of target optional
* Validate tracing + counter collection dispatch data
- i.e. correlation ids, thread ids, timestamps
* Make find_package(SQLite3) optional
- ROCm CI does not have SQLite3 dev package installed and cannot build from source (missing tclsh)
* Fixes to tracing + counter collection test
* get_process_start_time_ns update
- original implementation did not work
* Fix pytest-packages test_perfetto_data for counter collection
- erroneous failure when used with same PMC + multiple agents
* cmake policy: option() honors normal variables
- for GOTCHA submodule
* Improve samples/api_buffered_tracing stability
- reduce likelihood of sporadic exception throw
* Update gotcha submodule
---------
Co-authored-by: Jonathan R. Madsen <jonathanrmadsen@gmail.com>
[ROCm/rocprofiler-sdk commit: 7166b1ab58]
335 строки
12 KiB
Python
335 строки
12 KiB
Python
#!/usr/bin/env python3
|
|
|
|
# MIT License
|
|
#
|
|
# Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
|
|
#
|
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
# of this software and associated documentation files (the "Software"), to deal
|
|
# in the Software without restriction, including without limitation the rights
|
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
# copies of the Software, and to permit persons to whom the Software is
|
|
# furnished to do so, subject to the following conditions:
|
|
#
|
|
# The above copyright notice and this permission notice shall be included in
|
|
# all copies or substantial portions of the Software.
|
|
#
|
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
# THE SOFTWARE.
|
|
|
|
import os
|
|
import re
|
|
import sys
|
|
import pytest
|
|
|
|
|
|
def test_hip_api_trace(json_data):
|
|
data = json_data["rocprofiler-sdk-tool"]
|
|
|
|
def get_operation_name(kind_id, op_id):
|
|
return data["strings"]["buffer_records"][kind_id]["operations"][op_id]
|
|
|
|
def get_kind_name(kind_id):
|
|
return data["strings"]["buffer_records"][kind_id]["kind"]
|
|
|
|
valid_domain_names = (
|
|
"HIP_RUNTIME_API",
|
|
"HIP_RUNTIME_API_EXT",
|
|
)
|
|
|
|
hip_api_data = data["buffer_records"]["hip_api"]
|
|
|
|
functions = []
|
|
for api in hip_api_data:
|
|
kind = get_kind_name(api["kind"])
|
|
assert kind in valid_domain_names
|
|
assert api["end_timestamp"] >= api["start_timestamp"]
|
|
functions.append(get_operation_name(api["kind"], api["operation"]))
|
|
|
|
functions = list(set(functions))
|
|
for itr in (
|
|
"__hipPushCallConfiguration",
|
|
"__hipPopCallConfiguration",
|
|
"__hipRegisterFatBinary",
|
|
"__hipRegisterFunction",
|
|
):
|
|
assert itr not in functions, f"{itr}"
|
|
|
|
for itr in (
|
|
"hipMallocAsync",
|
|
"hipMemcpyAsync",
|
|
"hipMemsetAsync",
|
|
"hipFreeAsync",
|
|
"hipLaunchKernel",
|
|
):
|
|
assert itr in functions, f"{itr}"
|
|
|
|
|
|
def test_kernel_trace(json_data):
|
|
data = json_data["rocprofiler-sdk-tool"]
|
|
|
|
def get_kernel_name(kernel_id):
|
|
return data["kernel_symbols"][kernel_id]["formatted_kernel_name"]
|
|
|
|
def get_kernel_rename(corr_id):
|
|
for itr in data.strings.correlation_id.external:
|
|
print(itr)
|
|
if itr.key == corr_id:
|
|
return itr.value
|
|
return None
|
|
|
|
def get_kind_name(kind_id):
|
|
return data["strings"]["buffer_records"][kind_id]["kind"]
|
|
|
|
valid_kernel_names = "|".join(
|
|
(
|
|
"run",
|
|
"run/iteration",
|
|
"run/rank-([0-9]+)/thread-([0-9]+)/device-([0-9]+)/(begin|end)",
|
|
)
|
|
)
|
|
valid_kernel_regex = re.compile("^({})$".format(valid_kernel_names))
|
|
|
|
kernel_dispatch_data = data["buffer_records"]["kernel_dispatch"]
|
|
for dispatch in kernel_dispatch_data:
|
|
assert get_kind_name(dispatch["kind"]) == "KERNEL_DISPATCH"
|
|
assert dispatch["correlation_id"]["internal"] > 0
|
|
assert dispatch["correlation_id"]["external"] > 0
|
|
|
|
dispatch_info = dispatch["dispatch_info"]
|
|
assert dispatch_info["agent_id"]["handle"] > 0
|
|
assert dispatch_info["queue_id"]["handle"] > 0
|
|
assert dispatch_info["kernel_id"] > 0
|
|
assert dispatch["end_timestamp"] >= dispatch["start_timestamp"]
|
|
|
|
kernel_name = get_kernel_name(dispatch_info["kernel_id"])
|
|
assert (
|
|
re.search(valid_kernel_regex, kernel_name) is None
|
|
), f"kernel '{kernel_name}' matches regular expression '{valid_kernel_names}'"
|
|
|
|
assert kernel_name not in valid_kernel_names
|
|
|
|
external_corr_id = dispatch["correlation_id"]["external"]
|
|
assert external_corr_id > 0
|
|
|
|
kernel_rename = get_kernel_rename(external_corr_id)
|
|
assert kernel_rename is not None, f"{dispatch}"
|
|
assert kernel_rename != kernel_name, f"{dispatch}"
|
|
assert (
|
|
re.search(valid_kernel_regex, kernel_rename) is not None
|
|
), f"renamed kernel '{kernel_rename}' does not match regular expression '{valid_kernel_names}'"
|
|
|
|
|
|
def test_memory_copy_trace(json_data):
|
|
data = json_data["rocprofiler-sdk-tool"]
|
|
|
|
buffer_records = data["buffer_records"]
|
|
agent_data = data["agents"]
|
|
memory_copy_data = buffer_records["memory_copy"]
|
|
|
|
def get_kind_name(kind_id):
|
|
return data["strings"]["buffer_records"][kind_id]["kind"]
|
|
|
|
def get_agent(node_id):
|
|
for agent in agent_data:
|
|
if agent["id"]["handle"] == node_id["handle"]:
|
|
return agent
|
|
return None
|
|
|
|
# two threads * two directions
|
|
assert len(memory_copy_data) >= (2 * 2), f"{memory_copy_data}"
|
|
assert (len(memory_copy_data) % (2 * 2)) == 0, f"{memory_copy_data}"
|
|
|
|
for row in memory_copy_data:
|
|
src_agent = get_agent(row["src_agent_id"])
|
|
dst_agent = get_agent(row["dst_agent_id"])
|
|
assert get_kind_name(row["kind"]) == "MEMORY_COPY"
|
|
assert src_agent is not None, f"{row}"
|
|
assert dst_agent is not None, f"{row}"
|
|
assert row["correlation_id"]["internal"] > 0
|
|
assert row["end_timestamp"] >= row["start_timestamp"]
|
|
|
|
|
|
def test_metadata_data(json_data):
|
|
data = json_data["rocprofiler-sdk-tool"]
|
|
|
|
# patch summary groups for testing purposes
|
|
# (it appears sometimes these contain single quotes and other times it doesn't)
|
|
data.metadata.config.summary_groups = [
|
|
f"{itr}".strip("'") for itr in data.metadata.config.summary_groups
|
|
]
|
|
|
|
num_summary_grps = len(data.metadata.config.summary_groups)
|
|
expected_summary_grps = (
|
|
[
|
|
"KERNEL_DISPATCH|MEMORY_COPY",
|
|
]
|
|
if num_summary_grps == 1
|
|
else [
|
|
"KERNEL_DISPATCH|MEMORY_COPY",
|
|
".*_API",
|
|
]
|
|
)
|
|
|
|
assert data.metadata.config.summary is True
|
|
assert data.metadata.config.summary_per_domain is True
|
|
assert data.metadata.config.summary_unit == "nsec"
|
|
assert data.metadata.config.summary_file == "summary"
|
|
assert data.metadata.config.summary_groups == expected_summary_grps
|
|
|
|
assert len(data.metadata.command) == 4
|
|
|
|
# patch command to make it easier to test
|
|
data.metadata.command[0] = os.path.basename(data.metadata.command[0])
|
|
|
|
assert data.metadata.command == ["transpose", "2", "500", "10"]
|
|
|
|
|
|
def test_summary_data(json_data):
|
|
data = json_data["rocprofiler-sdk-tool"]
|
|
|
|
domains = []
|
|
for itr in data.summary:
|
|
domains.append(itr.domain)
|
|
if itr.domain == "KERNEL_DISPATCH":
|
|
assert itr.stats.count == 1004
|
|
expected = dict([["run/iteration", 1000]])
|
|
for oitr in itr.stats.operations:
|
|
if oitr.key in expected.keys():
|
|
assert oitr.value.count == expected[oitr.key]
|
|
else:
|
|
assert oitr.key.startswith("run/rank-")
|
|
assert (
|
|
re.match(
|
|
r"run/rank-([0-9]+)/thread-([0-9]+)/device-([0-9]+)/begin",
|
|
oitr.key,
|
|
)
|
|
is not None
|
|
)
|
|
assert oitr.value.count == 2
|
|
elif itr.domain == "HIP_API":
|
|
assert itr.stats.count >= 2130 and itr.stats.count <= 2165
|
|
elif itr.domain == "MEMORY_COPY":
|
|
# two threads + two memory copies (H2D + D2H).
|
|
# HIP may decompose memory copies into more than one HSA memory copy
|
|
assert itr.stats.count >= 4 and (itr.stats.count % 4) == 0
|
|
elif itr.domain == "MEMORY_ALLOCATION":
|
|
memory_allocation_allocate_count = 0
|
|
memory_allocation_free_count = 0
|
|
memory_allocation_vmem_allocate_count = 0
|
|
memory_allocation_vmem_free_count = 0
|
|
for operation in itr.stats.operations:
|
|
if operation.key == "MEMORY_ALLOCATION_ALLOCATE":
|
|
memory_allocation_allocate_count = operation.value.count
|
|
elif operation.key == "MEMORY_ALLOCATION_FREE":
|
|
memory_allocation_free_count = operation.value.count
|
|
elif operation.key == "MEMORY_ALLOCATION_VMEM_ALLOCATE":
|
|
memory_allocation_vmem_allocate_count = operation.value.count
|
|
elif operation.key == "MEMORY_ALLOCATION_VMEM_FREE":
|
|
memory_allocation_vmem_free_count = operation.value.count
|
|
memory_allocation_allocate_and_free_count = (
|
|
memory_allocation_allocate_count + memory_allocation_free_count
|
|
)
|
|
assert (
|
|
memory_allocation_allocate_and_free_count >= 10
|
|
and memory_allocation_allocate_and_free_count <= 30
|
|
)
|
|
# check if hip-runtime memory management pools through virtual memory allocation count is equal to free count.
|
|
assert (
|
|
memory_allocation_vmem_allocate_count == memory_allocation_vmem_free_count
|
|
)
|
|
elif itr.domain == "MARKER_API":
|
|
assert itr.stats.count == 1106
|
|
expected = dict(
|
|
[
|
|
["run", 2],
|
|
["run/iteration", 1000],
|
|
["run/iteration/sync", 100],
|
|
]
|
|
)
|
|
for oitr in itr.stats.operations:
|
|
if oitr.key in expected.keys():
|
|
assert oitr.value.count == expected[oitr.key]
|
|
else:
|
|
assert oitr.key.startswith("run/rank-")
|
|
assert (
|
|
re.match(
|
|
r"run/rank-([0-9]+)/thread-([0-9]+)/device-([0-9]+)/(begin|end)",
|
|
oitr.key,
|
|
)
|
|
is not None
|
|
)
|
|
assert oitr.value.count == 1
|
|
else:
|
|
assert False, f"unhandled domain: {itr.domain}"
|
|
|
|
assert len(list(set(domains))) == len(domains)
|
|
|
|
|
|
def test_summary_display_data(json_data, summary_data):
|
|
data = json_data["rocprofiler-sdk-tool"]
|
|
num_summary_grps = len(data.metadata.config.summary_groups)
|
|
|
|
def get_df(domain):
|
|
return summary_data[domain]
|
|
|
|
def get_dims(df):
|
|
# return rows x cols
|
|
return [df.shape[0], df.shape[1]] if df is not None else [0, 0]
|
|
|
|
hip = get_df("HIP_API")
|
|
marker = get_df("MARKER_API")
|
|
dispatch = get_df("KERNEL_DISPATCH")
|
|
memcpy = get_df("MEMORY_COPY")
|
|
memalloc = get_df("MEMORY_ALLOCATION")
|
|
dispatch_and_copy = get_df("KERNEL_DISPATCH + MEMORY_COPY")
|
|
hip_and_marker = get_df("HIP_API + MARKER_API") if num_summary_grps > 1 else None
|
|
total = get_df("SUMMARY")
|
|
|
|
expected_hip_and_marker_dims = [21, 9] if hip_and_marker is not None else [0, 0]
|
|
|
|
assert get_dims(marker) == [7, 9], f"{marker}"
|
|
assert get_dims(memcpy) == [2, 9], f"{memcpy}"
|
|
assert get_dims(memalloc) in (
|
|
[2, 9], # [2,9] when hip-runtime doesn't use vmem.
|
|
[4, 9],
|
|
), f"{memalloc}"
|
|
assert get_dims(dispatch) == [3, 9], f"{dispatch}"
|
|
assert get_dims(dispatch_and_copy) == [5, 9], f"{dispatch_and_copy}"
|
|
assert get_dims(hip) == [14, 9], f"{hip}"
|
|
assert get_dims(hip_and_marker) == expected_hip_and_marker_dims, f"{hip_and_marker}"
|
|
if get_dims(memalloc) == [2, 9]: # [2,9] when hip-runtime doesn't use vmem alloc.
|
|
assert get_dims(total) == [25, 9], f"{total}"
|
|
elif get_dims(memalloc) == [4, 9]:
|
|
assert get_dims(total) == [27, 9], f"{total}"
|
|
|
|
|
|
def test_perfetto_data(pftrace_data, json_data):
|
|
import rocprofiler_sdk.tests.rocprofv3 as rocprofv3
|
|
|
|
rocprofv3.test_perfetto_data(
|
|
pftrace_data,
|
|
json_data,
|
|
("hip", "marker", "kernel", "memory_copy"),
|
|
)
|
|
|
|
|
|
def test_otf2_data(otf2_data, json_data):
|
|
import rocprofiler_sdk.tests.rocprofv3 as rocprofv3
|
|
|
|
rocprofv3.test_otf2_data(
|
|
otf2_data,
|
|
json_data,
|
|
("hip", "marker", "kernel", "memory_copy", "memory_allocation"),
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
exit_code = pytest.main(["-x", __file__] + sys.argv[1:])
|
|
sys.exit(exit_code)
|