Handle special cases when stream value is hipStreamLegacy (0x01) or hipStreamPerThread (0x02) (#343)
* Updated stream code to handle special cases when stream value is 0x01 or 0x02 * Removed extra definitions and updated tests to account for special case * Modified stream.cpp so that each thread assigned a unique stream ID when hipStreamPerThread is used as stream value. Modified tests to check that threads are assigned unique, repeated values when hipStreamPerThread is called * Updated idx_offset, stream_map, and thread counter to be in one struct. * Update stream.cpp to only use add_stream() and update tests for seperate unit test for hipStreamPerThread * Remove unecessary comment * Removed unecessary line * Updated tests and stream.cpp to update stream ID correctly * Updated test structure
This commit is contained in:
@@ -40,8 +40,6 @@
|
||||
#include <sqlite3.h>
|
||||
#include <algorithm>
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace rocpd
|
||||
{
|
||||
namespace interop
|
||||
|
||||
@@ -164,7 +164,7 @@ ThreadTraceAQLPacketFactory::ThreadTraceAQLPacketFactory(const hsa::AgentCache&
|
||||
{buffer_size_hi}});
|
||||
}
|
||||
|
||||
if(perf_exclude_mask)
|
||||
if(perf_exclude_mask != 0u)
|
||||
{
|
||||
// Bitwise NOT because aqlprofile receives the mask, not the exclude mask
|
||||
aql_params.push_back(
|
||||
|
||||
@@ -92,6 +92,13 @@ add_stream(hipStream_t stream)
|
||||
if(!_data.emplace(_stream, rocprofiler_stream_id_t{.handle = idx}).second)
|
||||
{
|
||||
idx_offset += 1;
|
||||
// Handle special hipStreamPerThread case where each thread has it's own implicit
|
||||
// stream ID. No need to update map since hipStreamPerThread is defined as 0x02
|
||||
if(_stream == hipStreamPerThread)
|
||||
{
|
||||
return rocprofiler_stream_id_t{.handle = idx};
|
||||
}
|
||||
idx = _data.size() + idx_offset;
|
||||
auto _existing = _data.at(_stream);
|
||||
ROCP_INFO << "existing hipStream_t ("
|
||||
<< sdk::utility::as_hex(static_cast<void*>(_stream))
|
||||
@@ -99,7 +106,6 @@ add_stream(hipStream_t stream)
|
||||
<< "} -> rocprofiler_stream_id_t{.handle = " << idx << "}";
|
||||
_data.at(_stream) = rocprofiler_stream_id_t{.handle = idx};
|
||||
}
|
||||
|
||||
return _data.at(_stream);
|
||||
},
|
||||
stream);
|
||||
@@ -108,10 +114,26 @@ add_stream(hipStream_t stream)
|
||||
auto
|
||||
get_stream_id(hipStream_t stream)
|
||||
{
|
||||
// Handle special case where stream is hipStreamLegacy (0x01). Changes sync behavior of
|
||||
// null stream, so the stream is assigned the value of the null stream
|
||||
if(stream == hipStreamLegacy)
|
||||
{
|
||||
stream = nullptr;
|
||||
}
|
||||
// Handle special case where stream is hipStreamPerThread (0x02). Assigns implicit stream id to
|
||||
// each thread
|
||||
else if(stream == hipStreamPerThread)
|
||||
{
|
||||
static thread_local auto thr_stream_id = rocprofiler_stream_id_t{.handle = 0};
|
||||
if(thr_stream_id.handle == 0) thr_stream_id = add_stream(stream);
|
||||
return thr_stream_id;
|
||||
}
|
||||
return get_stream_map()->rlock(
|
||||
[](const stream_map_t& _data, hipStream_t _stream) {
|
||||
ROCP_ERROR_IF(_data.count(_stream) == 0)
|
||||
<< "failed to retrieve stream ID in " << __FILE__;
|
||||
<< fmt::format("failed to retrieve stream ID for hipStream_t ({}) in {}",
|
||||
sdk::utility::as_hex(static_cast<void*>(_stream)),
|
||||
__FILE__);
|
||||
return _data.at(_stream);
|
||||
},
|
||||
stream);
|
||||
|
||||
@@ -39,3 +39,4 @@ if(rocJPEG_FOUND AND rocJPEG_VERSION VERSION_GREATER 0.6.0)
|
||||
endif()
|
||||
add_subdirectory(hsa-code-object)
|
||||
add_subdirectory(hip-streams)
|
||||
add_subdirectory(hip-streams-per-thread)
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
#
|
||||
#
|
||||
#
|
||||
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
|
||||
|
||||
if(NOT CMAKE_HIP_COMPILER)
|
||||
find_program(
|
||||
amdclangpp_EXECUTABLE
|
||||
NAMES amdclang++
|
||||
HINTS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
|
||||
PATHS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
|
||||
PATH_SUFFIXES bin llvm/bin NO_CACHE)
|
||||
mark_as_advanced(amdclangpp_EXECUTABLE)
|
||||
|
||||
if(amdclangpp_EXECUTABLE)
|
||||
set(CMAKE_HIP_COMPILER "${amdclangpp_EXECUTABLE}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
project(rocprofiler-sdk-tests-bin-hip-streams-per-thread LANGUAGES CXX HIP)
|
||||
|
||||
foreach(_TYPE DEBUG MINSIZEREL RELEASE RELWITHDEBINFO)
|
||||
if("${CMAKE_HIP_FLAGS_${_TYPE}}" STREQUAL "")
|
||||
set(CMAKE_HIP_FLAGS_${_TYPE} "${CMAKE_CXX_FLAGS_${_TYPE}}")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_HIP_STANDARD 17)
|
||||
set(CMAKE_HIP_EXTENSIONS OFF)
|
||||
set(CMAKE_HIP_STANDARD_REQUIRED ON)
|
||||
|
||||
set_source_files_properties(hip-streams-per-thread.cpp PROPERTIES LANGUAGE HIP)
|
||||
|
||||
add_executable(hip-streams-per-thread)
|
||||
target_sources(hip-streams-per-thread PRIVATE hip-streams-per-thread.cpp)
|
||||
target_link_libraries(hip-streams-per-thread PRIVATE rocprofiler-sdk::tests-build-flags)
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
target_link_libraries(hip-streams-per-thread PRIVATE Threads::Threads)
|
||||
@@ -0,0 +1,98 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include <array>
|
||||
#include <cstdlib>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
/* Macro for checking GPU API return values */
|
||||
#define HIP_ASSERT(call) \
|
||||
do \
|
||||
{ \
|
||||
hipError_t gpuErr = call; \
|
||||
if(hipSuccess != gpuErr) \
|
||||
{ \
|
||||
printf( \
|
||||
"GPU API Error - %s:%d: '%s'\n", __FILE__, __LINE__, hipGetErrorString(gpuErr)); \
|
||||
exit(1); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
static void
|
||||
copy_to_dev(const hipStream_t stream)
|
||||
{
|
||||
unsigned int n = (32 * 1024); // 32KB
|
||||
double* A_h = nullptr;
|
||||
double* A_d = nullptr;
|
||||
|
||||
HIP_ASSERT(hipHostMalloc(&A_h, n * sizeof(double)));
|
||||
HIP_ASSERT(hipMalloc(&A_d, n * sizeof(double)));
|
||||
|
||||
for(unsigned int i = 0; i < n; ++i)
|
||||
{
|
||||
A_h[i] = 123.5;
|
||||
}
|
||||
HIP_ASSERT(hipMemcpyAsync(A_d, A_h, n * sizeof(double), hipMemcpyHostToDevice, stream));
|
||||
// Repeat to make sure streams remain the same
|
||||
HIP_ASSERT(hipMemcpyAsync(A_d, A_h, n * sizeof(double), hipMemcpyHostToDevice, stream));
|
||||
|
||||
// Release device memory
|
||||
HIP_ASSERT(hipFree(A_d));
|
||||
// Release host memory
|
||||
HIP_ASSERT(hipHostFree(A_h));
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char** argv)
|
||||
{
|
||||
// Test hipStreamPerThread with multiple threads
|
||||
const size_t num_streams = 3;
|
||||
const size_t thread_cnt = argc < 2 ? 9 : atoi(argv[1]);
|
||||
std::vector<std::thread> threads{};
|
||||
std::array<hipStream_t, num_streams> streams{};
|
||||
threads.reserve(thread_cnt);
|
||||
threads.emplace_back(std::thread(copy_to_dev, nullptr));
|
||||
for(size_t i = 1, j = 0; i < thread_cnt; ++i)
|
||||
{
|
||||
if(i % 3 == 0)
|
||||
{
|
||||
threads.emplace_back(std::thread(copy_to_dev, hipStreamLegacy));
|
||||
}
|
||||
else if(i % 3 == 1)
|
||||
{
|
||||
threads.emplace_back(std::thread(copy_to_dev, hipStreamPerThread));
|
||||
}
|
||||
else
|
||||
{
|
||||
HIP_ASSERT(hipStreamCreate(&streams[j]));
|
||||
threads.emplace_back(std::thread(copy_to_dev, streams[j++]));
|
||||
}
|
||||
}
|
||||
for(auto& thread : threads)
|
||||
{
|
||||
thread.join();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@@ -110,6 +110,9 @@ main()
|
||||
const int gridSizePerStream = 104; //(int)ceil((float)elements_per_stream/blockSize);
|
||||
|
||||
HIP_ASSERT(hipEventRecord(start));
|
||||
// Extra copy with null stream
|
||||
HIP_ASSERT(hipMemcpyAsync(
|
||||
&d_input1[0], &h_input1[0], bytes_per_stream, hipMemcpyHostToDevice, nullptr));
|
||||
// split H2D copies and kernel calls into separate loops
|
||||
for(int i = 0; i < num_streams; i++)
|
||||
{
|
||||
|
||||
@@ -40,6 +40,7 @@ add_subdirectory(rocdecode-trace)
|
||||
add_subdirectory(rocjpeg-trace)
|
||||
add_subdirectory(advanced-thread-trace)
|
||||
add_subdirectory(hip-stream-display)
|
||||
add_subdirectory(hip-streams-per-thread)
|
||||
add_subdirectory(agent-index)
|
||||
add_subdirectory(negate-aggregate-tracing-options)
|
||||
add_subdirectory(minimum-bytes)
|
||||
|
||||
@@ -41,7 +41,7 @@ add_test(
|
||||
NAME rocprofv3-test-hip-stream-display-execute
|
||||
COMMAND
|
||||
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --kernel-rename -s -d
|
||||
${CMAKE_CURRENT_BINARY_DIR}/%tag%-trace -o out --output-format json pftrace
|
||||
${CMAKE_CURRENT_BINARY_DIR}/%tag%-trace -o out --output-format json pftrace csv
|
||||
--log-level env -- $<TARGET_FILE:hip-streams>)
|
||||
|
||||
set_tests_properties(
|
||||
@@ -62,7 +62,11 @@ add_test(
|
||||
COMMAND
|
||||
${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --json-input
|
||||
${CMAKE_CURRENT_BINARY_DIR}/hip-streams-trace/out_results.json --pftrace-input
|
||||
${CMAKE_CURRENT_BINARY_DIR}/hip-streams-trace/out_results.pftrace)
|
||||
${CMAKE_CURRENT_BINARY_DIR}/hip-streams-trace/out_results.pftrace
|
||||
--kernel-csv-input
|
||||
${CMAKE_CURRENT_BINARY_DIR}/hip-streams-trace/out_kernel_trace.csv
|
||||
--memory-copy-csv-input
|
||||
${CMAKE_CURRENT_BINARY_DIR}/hip-streams-trace/out_memory_copy_trace.csv)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-hip-stream-display-validate
|
||||
|
||||
@@ -45,6 +45,18 @@ def pytest_addoption(parser):
|
||||
default="hip-stream-display/out_results.pftrace",
|
||||
help="Input pftrace file",
|
||||
)
|
||||
parser.addoption(
|
||||
"--kernel-csv-input",
|
||||
action="store",
|
||||
default="hip-stream-display/out_kernel_trace.csv",
|
||||
help="Input csv file",
|
||||
)
|
||||
parser.addoption(
|
||||
"--memory-copy-csv-input",
|
||||
action="store",
|
||||
default="hip-stream-display/out_memory_copy_trace.csv",
|
||||
help="Input csv file",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -62,3 +74,29 @@ def pftrace_data(request):
|
||||
if not os.path.isfile(filename):
|
||||
return pytest.skip("stream tracing unavailable")
|
||||
return PerfettoReader(filename).read()[0]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def kernel_csv_data(request):
|
||||
filename = request.config.getoption("--kernel-csv-input")
|
||||
data = []
|
||||
if not os.path.isfile(filename):
|
||||
raise FileExistsError(f"{filename} does not exist")
|
||||
with open(filename, "r") as inp:
|
||||
reader = csv.DictReader(inp)
|
||||
for row in reader:
|
||||
data.append(row)
|
||||
return data
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def memory_copy_csv_data(request):
|
||||
filename = request.config.getoption("--memory-copy-csv-input")
|
||||
data = []
|
||||
if not os.path.isfile(filename):
|
||||
raise FileExistsError(f"{filename} does not exist")
|
||||
with open(filename, "r") as inp:
|
||||
reader = csv.DictReader(inp)
|
||||
for row in reader:
|
||||
data.append(row)
|
||||
return data
|
||||
|
||||
@@ -60,11 +60,14 @@ def test_stream_trace(json_data):
|
||||
|
||||
# Expect stream ids to be set between 1 and 8 inclusive for transpose executable
|
||||
expected_stream_ids = set([i for i in range(1, 9)])
|
||||
|
||||
kernel_stream_id_set = set()
|
||||
memory_copy_streams = defaultdict(int)
|
||||
# check buffering data
|
||||
for titr in (kernel_dispatch_data, memory_copies_data):
|
||||
stream_id_set = set()
|
||||
for node in titr:
|
||||
for titr in (
|
||||
(kernel_dispatch_data, "KERNEL_DISPATCH"),
|
||||
(memory_copies_data, "MEMORY_COPY"),
|
||||
):
|
||||
for node in titr[0]:
|
||||
assert "size" in node
|
||||
assert "kind" in node
|
||||
assert "operation" in node
|
||||
@@ -76,13 +79,21 @@ def test_stream_trace(json_data):
|
||||
|
||||
assert node.size > 0
|
||||
assert node.thread_id > 0
|
||||
assert node.start_timestamp > 0
|
||||
assert node.end_timestamp > 0
|
||||
assert node.start_timestamp < node.end_timestamp
|
||||
|
||||
stream_id = node.stream_id.handle
|
||||
stream_id_set.add(stream_id)
|
||||
assert stream_id_set == expected_stream_ids
|
||||
if titr[1] == "KERNEL_DISPATCH":
|
||||
assert stream_id not in kernel_stream_id_set
|
||||
kernel_stream_id_set.add(stream_id)
|
||||
elif titr[1] == "MEMORY_COPY":
|
||||
memory_copy_streams[stream_id] += 1
|
||||
# Exactly 1 kernel executed on streams 1 through 8
|
||||
assert kernel_stream_id_set == expected_stream_ids
|
||||
# One extra memory copy with the null stream
|
||||
assert memory_copy_streams[0] == 1
|
||||
# Exactly 1 memory copy to device and 1 memory copy to host
|
||||
for i in expected_stream_ids:
|
||||
assert memory_copy_streams[i] == 2
|
||||
|
||||
|
||||
def test_perfetto_data(pftrace_data, json_data):
|
||||
@@ -96,6 +107,35 @@ def test_perfetto_data(pftrace_data, json_data):
|
||||
)
|
||||
|
||||
|
||||
def test_csv_data(kernel_csv_data, memory_copy_csv_data):
|
||||
assert len(kernel_csv_data) > 0, "Expected non-empty kernel csv data"
|
||||
assert len(memory_copy_csv_data) > 0, "Expected non-empty memory copy csv data"
|
||||
|
||||
expected_stream_ids = set([i for i in range(1, 9)])
|
||||
kernel_stream_id_set = set()
|
||||
for row in kernel_csv_data:
|
||||
assert "Stream_Id" in row
|
||||
|
||||
stream_id = int(row["Stream_Id"])
|
||||
assert stream_id not in kernel_stream_id_set
|
||||
kernel_stream_id_set.add(stream_id)
|
||||
# Exactly 1 kernel executed on streams 1 through 8
|
||||
assert kernel_stream_id_set == expected_stream_ids
|
||||
|
||||
memory_copy_streams = defaultdict(int)
|
||||
for row in memory_copy_csv_data:
|
||||
assert "Stream_Id" in row
|
||||
|
||||
stream_id = int(row["Stream_Id"])
|
||||
memory_copy_streams[stream_id] += 1
|
||||
|
||||
# One extra memory copy with the null stream due to hipStreamLegacy
|
||||
assert memory_copy_streams[0] == 1
|
||||
# Exactly 1 memory copy to device and 1 memory copy to host
|
||||
for i in expected_stream_ids:
|
||||
assert memory_copy_streams[i] == 2
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit_code = pytest.main(["-x", __file__] + sys.argv[1:])
|
||||
sys.exit(exit_code)
|
||||
|
||||
@@ -0,0 +1,83 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
#
|
||||
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
|
||||
|
||||
project(
|
||||
rocprofiler-sdk-tests-rocprofv3-hip-streams-per-thread
|
||||
LANGUAGES CXX
|
||||
VERSION 0.0.0)
|
||||
|
||||
find_package(rocprofiler-sdk REQUIRED)
|
||||
|
||||
rocprofiler_configure_pytest_files(CONFIG pytest.ini COPY validate.py conftest.py)
|
||||
|
||||
string(REPLACE "LD_PRELOAD=" "ROCPROF_PRELOAD=" PRELOAD_ENV
|
||||
"${ROCPROFILER_MEMCHECK_PRELOAD_ENV}")
|
||||
|
||||
set(hip-stream-env "${PRELOAD_ENV}")
|
||||
set(NUM_THREADS 9)
|
||||
|
||||
add_test(
|
||||
NAME rocprofv3-test-hip-streams-per-thread-execute
|
||||
COMMAND
|
||||
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> -s -d
|
||||
${CMAKE_CURRENT_BINARY_DIR}/%tag%-trace -o out --output-format json pftrace csv
|
||||
--log-level env -- $<TARGET_FILE:hip-streams-per-thread> ${NUM_THREADS})
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-hip-streams-per-thread-execute
|
||||
PROPERTIES TIMEOUT
|
||||
60
|
||||
LABELS
|
||||
"integration-tests"
|
||||
ENVIRONMENT
|
||||
"${hip-stream-env}"
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"${ROCPROFILER_DEFAULT_FAIL_REGEX}"
|
||||
DISABLED
|
||||
$<NOT:$<TARGET_EXISTS:hip-streams-per-thread>>)
|
||||
|
||||
add_test(
|
||||
NAME rocprofv3-test-hip-streams-per-thread-validate
|
||||
COMMAND
|
||||
${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --json-input
|
||||
${CMAKE_CURRENT_BINARY_DIR}/hip-streams-per-thread-trace/out_results.json
|
||||
--pftrace-input
|
||||
${CMAKE_CURRENT_BINARY_DIR}/hip-streams-per-thread-trace/out_results.pftrace
|
||||
--memory-copy-csv-input
|
||||
${CMAKE_CURRENT_BINARY_DIR}/hip-streams-per-thread-trace/out_memory_copy_trace.csv
|
||||
)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-hip-streams-per-thread-validate
|
||||
PROPERTIES TIMEOUT
|
||||
60
|
||||
LABELS
|
||||
"integration-tests"
|
||||
DEPENDS
|
||||
rocprofv3-test-hip-streams-per-thread-execute
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"AssertionError"
|
||||
DISABLED
|
||||
$<NOT:$<TARGET_EXISTS:hip-streams-per-thread>>)
|
||||
@@ -0,0 +1,83 @@
|
||||
#!/usr/bin/env python3
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
import csv
|
||||
import json
|
||||
import os
|
||||
import pytest
|
||||
|
||||
from rocprofiler_sdk.pytest_utils.dotdict import dotdict
|
||||
from rocprofiler_sdk.pytest_utils import collapse_dict_list
|
||||
from rocprofiler_sdk.pytest_utils.perfetto_reader import PerfettoReader
|
||||
|
||||
|
||||
def pytest_addoption(parser):
|
||||
parser.addoption(
|
||||
"--json-input",
|
||||
action="store",
|
||||
default="hip-stream-display/out_results.json",
|
||||
help="Input JSON",
|
||||
)
|
||||
parser.addoption(
|
||||
"--pftrace-input",
|
||||
action="store",
|
||||
default="hip-stream-display/out_results.pftrace",
|
||||
help="Input pftrace file",
|
||||
)
|
||||
parser.addoption(
|
||||
"--memory-copy-csv-input",
|
||||
action="store",
|
||||
default="hip-stream-display/out_memory_copy_trace.csv",
|
||||
help="Input csv file",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def json_data(request):
|
||||
filename = request.config.getoption("--json-input")
|
||||
if not os.path.isfile(filename):
|
||||
return pytest.skip("stream tracing unavailable")
|
||||
with open(filename, "r") as inp:
|
||||
return dotdict(collapse_dict_list(json.load(inp)))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def pftrace_data(request):
|
||||
filename = request.config.getoption("--pftrace-input")
|
||||
if not os.path.isfile(filename):
|
||||
return pytest.skip("stream tracing unavailable")
|
||||
return PerfettoReader(filename).read()[0]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def memory_copy_csv_data(request):
|
||||
filename = request.config.getoption("--memory-copy-csv-input")
|
||||
data = []
|
||||
if not os.path.isfile(filename):
|
||||
raise FileExistsError(f"{filename} does not exist")
|
||||
with open(filename, "r") as inp:
|
||||
reader = csv.DictReader(inp)
|
||||
for row in reader:
|
||||
data.append(row)
|
||||
return data
|
||||
@@ -0,0 +1,27 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
[pytest]
|
||||
addopts = --durations=20 -rA -s -vv
|
||||
testpaths = validate.py
|
||||
pythonpath = @ROCPROFILER_SDK_TESTS_BINARY_DIR@/pytest-packages
|
||||
@@ -0,0 +1,117 @@
|
||||
#!/usr/bin/env python3
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
import sys
|
||||
import pytest
|
||||
import json
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
|
||||
# helper function
|
||||
def node_exists(name, data, min_len=1):
|
||||
assert name in data
|
||||
assert data[name] is not None
|
||||
if isinstance(data[name], (list, tuple, dict, set)):
|
||||
assert len(data[name]) >= min_len
|
||||
|
||||
|
||||
def get_operation(record, kind_name, op_name=None):
|
||||
for idx, itr in enumerate(record["strings"]["buffer_records"]):
|
||||
if kind_name == itr["kind"]:
|
||||
if op_name is None:
|
||||
return idx, itr["operations"]
|
||||
else:
|
||||
for oidx, oname in enumerate(itr["operations"]):
|
||||
if op_name == oname:
|
||||
return oidx
|
||||
return None
|
||||
|
||||
|
||||
def test_stream_trace(json_data):
|
||||
data = json_data["rocprofiler-sdk-tool"]
|
||||
buffer_records = data["buffer_records"]
|
||||
|
||||
memory_copies_data = buffer_records["memory_copy"]
|
||||
assert len(memory_copies_data) > 0
|
||||
|
||||
# Expect non-null stream ids to be set between 1 and 6 inclusive
|
||||
expected_stream_ids = set([i for i in range(1, 7)])
|
||||
memory_copy_streams = defaultdict(int)
|
||||
# check buffering data
|
||||
for node in memory_copies_data:
|
||||
assert "size" in node
|
||||
assert "kind" in node
|
||||
assert "operation" in node
|
||||
assert "correlation_id" in node
|
||||
assert "end_timestamp" in node
|
||||
assert "start_timestamp" in node
|
||||
assert "thread_id" in node
|
||||
assert "stream_id" in node
|
||||
|
||||
assert node.size > 0
|
||||
assert node.thread_id > 0
|
||||
assert node.start_timestamp < node.end_timestamp
|
||||
|
||||
stream_id = node.stream_id.handle
|
||||
memory_copy_streams[stream_id] += 1
|
||||
# 2 memory copies with null stream and 4 with hipStreamPerThread
|
||||
assert memory_copy_streams[0] == 6
|
||||
# Exactly 1 memory copy to device and 1 memory copy to host
|
||||
for i in expected_stream_ids:
|
||||
assert memory_copy_streams[i] == 2
|
||||
|
||||
|
||||
def test_perfetto_data(pftrace_data, json_data):
|
||||
import rocprofiler_sdk.tests.rocprofv3 as rocprofv3
|
||||
|
||||
assert pftrace_data.empty == False
|
||||
rocprofv3.test_perfetto_data(
|
||||
pftrace_data,
|
||||
json_data,
|
||||
("kernel", "memory_copy"),
|
||||
)
|
||||
|
||||
|
||||
def test_csv_data(memory_copy_csv_data):
|
||||
assert len(memory_copy_csv_data) > 0, "Expected non-empty memory copy csv data"
|
||||
|
||||
expected_stream_ids = set([i for i in range(1, 7)])
|
||||
memory_copy_streams = defaultdict(int)
|
||||
for row in memory_copy_csv_data:
|
||||
assert "Stream_Id" in row
|
||||
|
||||
stream_id = int(row["Stream_Id"])
|
||||
memory_copy_streams[stream_id] += 1
|
||||
|
||||
# 2 memory copies with null stream and 4 with hipStreamPerThread
|
||||
assert memory_copy_streams[0] == 6
|
||||
# Exactly 1 memory copy to device and 1 memory copy to host
|
||||
for i in expected_stream_ids:
|
||||
assert memory_copy_streams[i] == 2
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit_code = pytest.main(["-x", __file__] + sys.argv[1:])
|
||||
sys.exit(exit_code)
|
||||
Reference in New Issue
Block a user