Handle special cases when stream value is hipStreamLegacy (0x01) or hipStreamPerThread (0x02) (#343)

* Updated stream code to handle special cases when stream value is 0x01 or 0x02

* Removed extra definitions and updated tests to account for special case

* Modified stream.cpp so that each thread assigned a unique stream ID when hipStreamPerThread is used as stream value. Modified tests to check that threads are assigned unique, repeated values when hipStreamPerThread is called

* Updated idx_offset, stream_map, and thread counter to be in one struct.

* Update stream.cpp to only use add_stream() and update tests for seperate unit test for hipStreamPerThread

* Remove unecessary comment

* Removed unecessary line

* Updated tests and stream.cpp to update stream ID correctly

* Updated test structure
This commit is contained in:
itrowbri
2025-08-27 20:04:13 -05:00
committed by GitHub
parent 9aaad2017b
commit 4d98a0169f
15 changed files with 572 additions and 15 deletions
@@ -40,8 +40,6 @@
#include <sqlite3.h>
#include <algorithm>
#include <algorithm>
namespace rocpd
{
namespace interop
@@ -164,7 +164,7 @@ ThreadTraceAQLPacketFactory::ThreadTraceAQLPacketFactory(const hsa::AgentCache&
{buffer_size_hi}});
}
if(perf_exclude_mask)
if(perf_exclude_mask != 0u)
{
// Bitwise NOT because aqlprofile receives the mask, not the exclude mask
aql_params.push_back(
@@ -92,6 +92,13 @@ add_stream(hipStream_t stream)
if(!_data.emplace(_stream, rocprofiler_stream_id_t{.handle = idx}).second)
{
idx_offset += 1;
// Handle special hipStreamPerThread case where each thread has it's own implicit
// stream ID. No need to update map since hipStreamPerThread is defined as 0x02
if(_stream == hipStreamPerThread)
{
return rocprofiler_stream_id_t{.handle = idx};
}
idx = _data.size() + idx_offset;
auto _existing = _data.at(_stream);
ROCP_INFO << "existing hipStream_t ("
<< sdk::utility::as_hex(static_cast<void*>(_stream))
@@ -99,7 +106,6 @@ add_stream(hipStream_t stream)
<< "} -> rocprofiler_stream_id_t{.handle = " << idx << "}";
_data.at(_stream) = rocprofiler_stream_id_t{.handle = idx};
}
return _data.at(_stream);
},
stream);
@@ -108,10 +114,26 @@ add_stream(hipStream_t stream)
auto
get_stream_id(hipStream_t stream)
{
// Handle special case where stream is hipStreamLegacy (0x01). Changes sync behavior of
// null stream, so the stream is assigned the value of the null stream
if(stream == hipStreamLegacy)
{
stream = nullptr;
}
// Handle special case where stream is hipStreamPerThread (0x02). Assigns implicit stream id to
// each thread
else if(stream == hipStreamPerThread)
{
static thread_local auto thr_stream_id = rocprofiler_stream_id_t{.handle = 0};
if(thr_stream_id.handle == 0) thr_stream_id = add_stream(stream);
return thr_stream_id;
}
return get_stream_map()->rlock(
[](const stream_map_t& _data, hipStream_t _stream) {
ROCP_ERROR_IF(_data.count(_stream) == 0)
<< "failed to retrieve stream ID in " << __FILE__;
<< fmt::format("failed to retrieve stream ID for hipStream_t ({}) in {}",
sdk::utility::as_hex(static_cast<void*>(_stream)),
__FILE__);
return _data.at(_stream);
},
stream);
@@ -39,3 +39,4 @@ if(rocJPEG_FOUND AND rocJPEG_VERSION VERSION_GREATER 0.6.0)
endif()
add_subdirectory(hsa-code-object)
add_subdirectory(hip-streams)
add_subdirectory(hip-streams-per-thread)
@@ -0,0 +1,42 @@
#
#
#
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
if(NOT CMAKE_HIP_COMPILER)
find_program(
amdclangpp_EXECUTABLE
NAMES amdclang++
HINTS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
PATHS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
PATH_SUFFIXES bin llvm/bin NO_CACHE)
mark_as_advanced(amdclangpp_EXECUTABLE)
if(amdclangpp_EXECUTABLE)
set(CMAKE_HIP_COMPILER "${amdclangpp_EXECUTABLE}")
endif()
endif()
project(rocprofiler-sdk-tests-bin-hip-streams-per-thread LANGUAGES CXX HIP)
foreach(_TYPE DEBUG MINSIZEREL RELEASE RELWITHDEBINFO)
if("${CMAKE_HIP_FLAGS_${_TYPE}}" STREQUAL "")
set(CMAKE_HIP_FLAGS_${_TYPE} "${CMAKE_CXX_FLAGS_${_TYPE}}")
endif()
endforeach()
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_HIP_STANDARD 17)
set(CMAKE_HIP_EXTENSIONS OFF)
set(CMAKE_HIP_STANDARD_REQUIRED ON)
set_source_files_properties(hip-streams-per-thread.cpp PROPERTIES LANGUAGE HIP)
add_executable(hip-streams-per-thread)
target_sources(hip-streams-per-thread PRIVATE hip-streams-per-thread.cpp)
target_link_libraries(hip-streams-per-thread PRIVATE rocprofiler-sdk::tests-build-flags)
find_package(Threads REQUIRED)
target_link_libraries(hip-streams-per-thread PRIVATE Threads::Threads)
@@ -0,0 +1,98 @@
// MIT License
//
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include <array>
#include <cstdlib>
#include <thread>
#include <vector>
#include "hip/hip_runtime.h"
/* Macro for checking GPU API return values */
#define HIP_ASSERT(call) \
do \
{ \
hipError_t gpuErr = call; \
if(hipSuccess != gpuErr) \
{ \
printf( \
"GPU API Error - %s:%d: '%s'\n", __FILE__, __LINE__, hipGetErrorString(gpuErr)); \
exit(1); \
} \
} while(0)
static void
copy_to_dev(const hipStream_t stream)
{
unsigned int n = (32 * 1024); // 32KB
double* A_h = nullptr;
double* A_d = nullptr;
HIP_ASSERT(hipHostMalloc(&A_h, n * sizeof(double)));
HIP_ASSERT(hipMalloc(&A_d, n * sizeof(double)));
for(unsigned int i = 0; i < n; ++i)
{
A_h[i] = 123.5;
}
HIP_ASSERT(hipMemcpyAsync(A_d, A_h, n * sizeof(double), hipMemcpyHostToDevice, stream));
// Repeat to make sure streams remain the same
HIP_ASSERT(hipMemcpyAsync(A_d, A_h, n * sizeof(double), hipMemcpyHostToDevice, stream));
// Release device memory
HIP_ASSERT(hipFree(A_d));
// Release host memory
HIP_ASSERT(hipHostFree(A_h));
}
int
main(int argc, char** argv)
{
// Test hipStreamPerThread with multiple threads
const size_t num_streams = 3;
const size_t thread_cnt = argc < 2 ? 9 : atoi(argv[1]);
std::vector<std::thread> threads{};
std::array<hipStream_t, num_streams> streams{};
threads.reserve(thread_cnt);
threads.emplace_back(std::thread(copy_to_dev, nullptr));
for(size_t i = 1, j = 0; i < thread_cnt; ++i)
{
if(i % 3 == 0)
{
threads.emplace_back(std::thread(copy_to_dev, hipStreamLegacy));
}
else if(i % 3 == 1)
{
threads.emplace_back(std::thread(copy_to_dev, hipStreamPerThread));
}
else
{
HIP_ASSERT(hipStreamCreate(&streams[j]));
threads.emplace_back(std::thread(copy_to_dev, streams[j++]));
}
}
for(auto& thread : threads)
{
thread.join();
}
return 0;
}
@@ -110,6 +110,9 @@ main()
const int gridSizePerStream = 104; //(int)ceil((float)elements_per_stream/blockSize);
HIP_ASSERT(hipEventRecord(start));
// Extra copy with null stream
HIP_ASSERT(hipMemcpyAsync(
&d_input1[0], &h_input1[0], bytes_per_stream, hipMemcpyHostToDevice, nullptr));
// split H2D copies and kernel calls into separate loops
for(int i = 0; i < num_streams; i++)
{
@@ -40,6 +40,7 @@ add_subdirectory(rocdecode-trace)
add_subdirectory(rocjpeg-trace)
add_subdirectory(advanced-thread-trace)
add_subdirectory(hip-stream-display)
add_subdirectory(hip-streams-per-thread)
add_subdirectory(agent-index)
add_subdirectory(negate-aggregate-tracing-options)
add_subdirectory(minimum-bytes)
@@ -41,7 +41,7 @@ add_test(
NAME rocprofv3-test-hip-stream-display-execute
COMMAND
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --kernel-rename -s -d
${CMAKE_CURRENT_BINARY_DIR}/%tag%-trace -o out --output-format json pftrace
${CMAKE_CURRENT_BINARY_DIR}/%tag%-trace -o out --output-format json pftrace csv
--log-level env -- $<TARGET_FILE:hip-streams>)
set_tests_properties(
@@ -62,7 +62,11 @@ add_test(
COMMAND
${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --json-input
${CMAKE_CURRENT_BINARY_DIR}/hip-streams-trace/out_results.json --pftrace-input
${CMAKE_CURRENT_BINARY_DIR}/hip-streams-trace/out_results.pftrace)
${CMAKE_CURRENT_BINARY_DIR}/hip-streams-trace/out_results.pftrace
--kernel-csv-input
${CMAKE_CURRENT_BINARY_DIR}/hip-streams-trace/out_kernel_trace.csv
--memory-copy-csv-input
${CMAKE_CURRENT_BINARY_DIR}/hip-streams-trace/out_memory_copy_trace.csv)
set_tests_properties(
rocprofv3-test-hip-stream-display-validate
@@ -45,6 +45,18 @@ def pytest_addoption(parser):
default="hip-stream-display/out_results.pftrace",
help="Input pftrace file",
)
parser.addoption(
"--kernel-csv-input",
action="store",
default="hip-stream-display/out_kernel_trace.csv",
help="Input csv file",
)
parser.addoption(
"--memory-copy-csv-input",
action="store",
default="hip-stream-display/out_memory_copy_trace.csv",
help="Input csv file",
)
@pytest.fixture
@@ -62,3 +74,29 @@ def pftrace_data(request):
if not os.path.isfile(filename):
return pytest.skip("stream tracing unavailable")
return PerfettoReader(filename).read()[0]
@pytest.fixture
def kernel_csv_data(request):
filename = request.config.getoption("--kernel-csv-input")
data = []
if not os.path.isfile(filename):
raise FileExistsError(f"{filename} does not exist")
with open(filename, "r") as inp:
reader = csv.DictReader(inp)
for row in reader:
data.append(row)
return data
@pytest.fixture
def memory_copy_csv_data(request):
filename = request.config.getoption("--memory-copy-csv-input")
data = []
if not os.path.isfile(filename):
raise FileExistsError(f"{filename} does not exist")
with open(filename, "r") as inp:
reader = csv.DictReader(inp)
for row in reader:
data.append(row)
return data
@@ -60,11 +60,14 @@ def test_stream_trace(json_data):
# Expect stream ids to be set between 1 and 8 inclusive for transpose executable
expected_stream_ids = set([i for i in range(1, 9)])
kernel_stream_id_set = set()
memory_copy_streams = defaultdict(int)
# check buffering data
for titr in (kernel_dispatch_data, memory_copies_data):
stream_id_set = set()
for node in titr:
for titr in (
(kernel_dispatch_data, "KERNEL_DISPATCH"),
(memory_copies_data, "MEMORY_COPY"),
):
for node in titr[0]:
assert "size" in node
assert "kind" in node
assert "operation" in node
@@ -76,13 +79,21 @@ def test_stream_trace(json_data):
assert node.size > 0
assert node.thread_id > 0
assert node.start_timestamp > 0
assert node.end_timestamp > 0
assert node.start_timestamp < node.end_timestamp
stream_id = node.stream_id.handle
stream_id_set.add(stream_id)
assert stream_id_set == expected_stream_ids
if titr[1] == "KERNEL_DISPATCH":
assert stream_id not in kernel_stream_id_set
kernel_stream_id_set.add(stream_id)
elif titr[1] == "MEMORY_COPY":
memory_copy_streams[stream_id] += 1
# Exactly 1 kernel executed on streams 1 through 8
assert kernel_stream_id_set == expected_stream_ids
# One extra memory copy with the null stream
assert memory_copy_streams[0] == 1
# Exactly 1 memory copy to device and 1 memory copy to host
for i in expected_stream_ids:
assert memory_copy_streams[i] == 2
def test_perfetto_data(pftrace_data, json_data):
@@ -96,6 +107,35 @@ def test_perfetto_data(pftrace_data, json_data):
)
def test_csv_data(kernel_csv_data, memory_copy_csv_data):
assert len(kernel_csv_data) > 0, "Expected non-empty kernel csv data"
assert len(memory_copy_csv_data) > 0, "Expected non-empty memory copy csv data"
expected_stream_ids = set([i for i in range(1, 9)])
kernel_stream_id_set = set()
for row in kernel_csv_data:
assert "Stream_Id" in row
stream_id = int(row["Stream_Id"])
assert stream_id not in kernel_stream_id_set
kernel_stream_id_set.add(stream_id)
# Exactly 1 kernel executed on streams 1 through 8
assert kernel_stream_id_set == expected_stream_ids
memory_copy_streams = defaultdict(int)
for row in memory_copy_csv_data:
assert "Stream_Id" in row
stream_id = int(row["Stream_Id"])
memory_copy_streams[stream_id] += 1
# One extra memory copy with the null stream due to hipStreamLegacy
assert memory_copy_streams[0] == 1
# Exactly 1 memory copy to device and 1 memory copy to host
for i in expected_stream_ids:
assert memory_copy_streams[i] == 2
if __name__ == "__main__":
exit_code = pytest.main(["-x", __file__] + sys.argv[1:])
sys.exit(exit_code)
@@ -0,0 +1,83 @@
# MIT License
#
# Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
project(
rocprofiler-sdk-tests-rocprofv3-hip-streams-per-thread
LANGUAGES CXX
VERSION 0.0.0)
find_package(rocprofiler-sdk REQUIRED)
rocprofiler_configure_pytest_files(CONFIG pytest.ini COPY validate.py conftest.py)
string(REPLACE "LD_PRELOAD=" "ROCPROF_PRELOAD=" PRELOAD_ENV
"${ROCPROFILER_MEMCHECK_PRELOAD_ENV}")
set(hip-stream-env "${PRELOAD_ENV}")
set(NUM_THREADS 9)
add_test(
NAME rocprofv3-test-hip-streams-per-thread-execute
COMMAND
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> -s -d
${CMAKE_CURRENT_BINARY_DIR}/%tag%-trace -o out --output-format json pftrace csv
--log-level env -- $<TARGET_FILE:hip-streams-per-thread> ${NUM_THREADS})
set_tests_properties(
rocprofv3-test-hip-streams-per-thread-execute
PROPERTIES TIMEOUT
60
LABELS
"integration-tests"
ENVIRONMENT
"${hip-stream-env}"
FAIL_REGULAR_EXPRESSION
"${ROCPROFILER_DEFAULT_FAIL_REGEX}"
DISABLED
$<NOT:$<TARGET_EXISTS:hip-streams-per-thread>>)
add_test(
NAME rocprofv3-test-hip-streams-per-thread-validate
COMMAND
${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --json-input
${CMAKE_CURRENT_BINARY_DIR}/hip-streams-per-thread-trace/out_results.json
--pftrace-input
${CMAKE_CURRENT_BINARY_DIR}/hip-streams-per-thread-trace/out_results.pftrace
--memory-copy-csv-input
${CMAKE_CURRENT_BINARY_DIR}/hip-streams-per-thread-trace/out_memory_copy_trace.csv
)
set_tests_properties(
rocprofv3-test-hip-streams-per-thread-validate
PROPERTIES TIMEOUT
60
LABELS
"integration-tests"
DEPENDS
rocprofv3-test-hip-streams-per-thread-execute
FAIL_REGULAR_EXPRESSION
"AssertionError"
DISABLED
$<NOT:$<TARGET_EXISTS:hip-streams-per-thread>>)
@@ -0,0 +1,83 @@
#!/usr/bin/env python3
# MIT License
#
# Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import csv
import json
import os
import pytest
from rocprofiler_sdk.pytest_utils.dotdict import dotdict
from rocprofiler_sdk.pytest_utils import collapse_dict_list
from rocprofiler_sdk.pytest_utils.perfetto_reader import PerfettoReader
def pytest_addoption(parser):
parser.addoption(
"--json-input",
action="store",
default="hip-stream-display/out_results.json",
help="Input JSON",
)
parser.addoption(
"--pftrace-input",
action="store",
default="hip-stream-display/out_results.pftrace",
help="Input pftrace file",
)
parser.addoption(
"--memory-copy-csv-input",
action="store",
default="hip-stream-display/out_memory_copy_trace.csv",
help="Input csv file",
)
@pytest.fixture
def json_data(request):
filename = request.config.getoption("--json-input")
if not os.path.isfile(filename):
return pytest.skip("stream tracing unavailable")
with open(filename, "r") as inp:
return dotdict(collapse_dict_list(json.load(inp)))
@pytest.fixture
def pftrace_data(request):
filename = request.config.getoption("--pftrace-input")
if not os.path.isfile(filename):
return pytest.skip("stream tracing unavailable")
return PerfettoReader(filename).read()[0]
@pytest.fixture
def memory_copy_csv_data(request):
filename = request.config.getoption("--memory-copy-csv-input")
data = []
if not os.path.isfile(filename):
raise FileExistsError(f"{filename} does not exist")
with open(filename, "r") as inp:
reader = csv.DictReader(inp)
for row in reader:
data.append(row)
return data
@@ -0,0 +1,27 @@
# MIT License
#
# Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
[pytest]
addopts = --durations=20 -rA -s -vv
testpaths = validate.py
pythonpath = @ROCPROFILER_SDK_TESTS_BINARY_DIR@/pytest-packages
@@ -0,0 +1,117 @@
#!/usr/bin/env python3
# MIT License
#
# Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import sys
import pytest
import json
from collections import defaultdict
# helper function
def node_exists(name, data, min_len=1):
assert name in data
assert data[name] is not None
if isinstance(data[name], (list, tuple, dict, set)):
assert len(data[name]) >= min_len
def get_operation(record, kind_name, op_name=None):
for idx, itr in enumerate(record["strings"]["buffer_records"]):
if kind_name == itr["kind"]:
if op_name is None:
return idx, itr["operations"]
else:
for oidx, oname in enumerate(itr["operations"]):
if op_name == oname:
return oidx
return None
def test_stream_trace(json_data):
data = json_data["rocprofiler-sdk-tool"]
buffer_records = data["buffer_records"]
memory_copies_data = buffer_records["memory_copy"]
assert len(memory_copies_data) > 0
# Expect non-null stream ids to be set between 1 and 6 inclusive
expected_stream_ids = set([i for i in range(1, 7)])
memory_copy_streams = defaultdict(int)
# check buffering data
for node in memory_copies_data:
assert "size" in node
assert "kind" in node
assert "operation" in node
assert "correlation_id" in node
assert "end_timestamp" in node
assert "start_timestamp" in node
assert "thread_id" in node
assert "stream_id" in node
assert node.size > 0
assert node.thread_id > 0
assert node.start_timestamp < node.end_timestamp
stream_id = node.stream_id.handle
memory_copy_streams[stream_id] += 1
# 2 memory copies with null stream and 4 with hipStreamPerThread
assert memory_copy_streams[0] == 6
# Exactly 1 memory copy to device and 1 memory copy to host
for i in expected_stream_ids:
assert memory_copy_streams[i] == 2
def test_perfetto_data(pftrace_data, json_data):
import rocprofiler_sdk.tests.rocprofv3 as rocprofv3
assert pftrace_data.empty == False
rocprofv3.test_perfetto_data(
pftrace_data,
json_data,
("kernel", "memory_copy"),
)
def test_csv_data(memory_copy_csv_data):
assert len(memory_copy_csv_data) > 0, "Expected non-empty memory copy csv data"
expected_stream_ids = set([i for i in range(1, 7)])
memory_copy_streams = defaultdict(int)
for row in memory_copy_csv_data:
assert "Stream_Id" in row
stream_id = int(row["Stream_Id"])
memory_copy_streams[stream_id] += 1
# 2 memory copies with null stream and 4 with hipStreamPerThread
assert memory_copy_streams[0] == 6
# Exactly 1 memory copy to device and 1 memory copy to host
for i in expected_stream_ids:
assert memory_copy_streams[i] == 2
if __name__ == "__main__":
exit_code = pytest.main(["-x", __file__] + sys.argv[1:])
sys.exit(exit_code)