From 4d98a0169f16362bb09531ff2c777474082425e5 Mon Sep 17 00:00:00 2001 From: itrowbri Date: Wed, 27 Aug 2025 20:04:13 -0500 Subject: [PATCH] Handle special cases when stream value is hipStreamLegacy (0x01) or hipStreamPerThread (0x02) (#343) * Updated stream code to handle special cases when stream value is 0x01 or 0x02 * Removed extra definitions and updated tests to account for special case * Modified stream.cpp so that each thread assigned a unique stream ID when hipStreamPerThread is used as stream value. Modified tests to check that threads are assigned unique, repeated values when hipStreamPerThread is called * Updated idx_offset, stream_map, and thread counter to be in one struct. * Update stream.cpp to only use add_stream() and update tests for seperate unit test for hipStreamPerThread * Remove unecessary comment * Removed unecessary line * Updated tests and stream.cpp to update stream ID correctly * Updated test structure --- .../lib/python/rocpd/source/interop.cpp | 2 - .../rocprofiler-sdk/aql/packet_construct.cpp | 2 +- .../source/lib/rocprofiler-sdk/hip/stream.cpp | 26 +++- .../rocprofiler-sdk/tests/bin/CMakeLists.txt | 1 + .../bin/hip-streams-per-thread/CMakeLists.txt | 42 +++++++ .../hip-streams-per-thread.cpp | 98 +++++++++++++++ .../bin/hip-streams/compute_comm_overlap.cpp | 3 + .../tests/rocprofv3/CMakeLists.txt | 1 + .../hip-stream-display/CMakeLists.txt | 8 +- .../rocprofv3/hip-stream-display/conftest.py | 38 ++++++ .../rocprofv3/hip-stream-display/validate.py | 56 +++++++-- .../hip-streams-per-thread/CMakeLists.txt | 83 +++++++++++++ .../hip-streams-per-thread/conftest.py | 83 +++++++++++++ .../hip-streams-per-thread/pytest.ini | 27 ++++ .../hip-streams-per-thread/validate.py | 117 ++++++++++++++++++ 15 files changed, 572 insertions(+), 15 deletions(-) create mode 100644 projects/rocprofiler-sdk/tests/bin/hip-streams-per-thread/CMakeLists.txt create mode 100644 projects/rocprofiler-sdk/tests/bin/hip-streams-per-thread/hip-streams-per-thread.cpp create mode 100644 projects/rocprofiler-sdk/tests/rocprofv3/hip-streams-per-thread/CMakeLists.txt create mode 100644 projects/rocprofiler-sdk/tests/rocprofv3/hip-streams-per-thread/conftest.py create mode 100644 projects/rocprofiler-sdk/tests/rocprofv3/hip-streams-per-thread/pytest.ini create mode 100644 projects/rocprofiler-sdk/tests/rocprofv3/hip-streams-per-thread/validate.py diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/source/interop.cpp b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/interop.cpp index 31010c9411..bdde41b0b9 100644 --- a/projects/rocprofiler-sdk/source/lib/python/rocpd/source/interop.cpp +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/interop.cpp @@ -40,8 +40,6 @@ #include #include -#include - namespace rocpd { namespace interop diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/packet_construct.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/packet_construct.cpp index 51d04542cb..3417bd090d 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/packet_construct.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/packet_construct.cpp @@ -164,7 +164,7 @@ ThreadTraceAQLPacketFactory::ThreadTraceAQLPacketFactory(const hsa::AgentCache& {buffer_size_hi}}); } - if(perf_exclude_mask) + if(perf_exclude_mask != 0u) { // Bitwise NOT because aqlprofile receives the mask, not the exclude mask aql_params.push_back( diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/stream.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/stream.cpp index c2a74eb401..284d0700ff 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/stream.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hip/stream.cpp @@ -92,6 +92,13 @@ add_stream(hipStream_t stream) if(!_data.emplace(_stream, rocprofiler_stream_id_t{.handle = idx}).second) { idx_offset += 1; + // Handle special hipStreamPerThread case where each thread has it's own implicit + // stream ID. No need to update map since hipStreamPerThread is defined as 0x02 + if(_stream == hipStreamPerThread) + { + return rocprofiler_stream_id_t{.handle = idx}; + } + idx = _data.size() + idx_offset; auto _existing = _data.at(_stream); ROCP_INFO << "existing hipStream_t (" << sdk::utility::as_hex(static_cast(_stream)) @@ -99,7 +106,6 @@ add_stream(hipStream_t stream) << "} -> rocprofiler_stream_id_t{.handle = " << idx << "}"; _data.at(_stream) = rocprofiler_stream_id_t{.handle = idx}; } - return _data.at(_stream); }, stream); @@ -108,10 +114,26 @@ add_stream(hipStream_t stream) auto get_stream_id(hipStream_t stream) { + // Handle special case where stream is hipStreamLegacy (0x01). Changes sync behavior of + // null stream, so the stream is assigned the value of the null stream + if(stream == hipStreamLegacy) + { + stream = nullptr; + } + // Handle special case where stream is hipStreamPerThread (0x02). Assigns implicit stream id to + // each thread + else if(stream == hipStreamPerThread) + { + static thread_local auto thr_stream_id = rocprofiler_stream_id_t{.handle = 0}; + if(thr_stream_id.handle == 0) thr_stream_id = add_stream(stream); + return thr_stream_id; + } return get_stream_map()->rlock( [](const stream_map_t& _data, hipStream_t _stream) { ROCP_ERROR_IF(_data.count(_stream) == 0) - << "failed to retrieve stream ID in " << __FILE__; + << fmt::format("failed to retrieve stream ID for hipStream_t ({}) in {}", + sdk::utility::as_hex(static_cast(_stream)), + __FILE__); return _data.at(_stream); }, stream); diff --git a/projects/rocprofiler-sdk/tests/bin/CMakeLists.txt b/projects/rocprofiler-sdk/tests/bin/CMakeLists.txt index 7104eea016..9f50f8c014 100644 --- a/projects/rocprofiler-sdk/tests/bin/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/bin/CMakeLists.txt @@ -39,3 +39,4 @@ if(rocJPEG_FOUND AND rocJPEG_VERSION VERSION_GREATER 0.6.0) endif() add_subdirectory(hsa-code-object) add_subdirectory(hip-streams) +add_subdirectory(hip-streams-per-thread) diff --git a/projects/rocprofiler-sdk/tests/bin/hip-streams-per-thread/CMakeLists.txt b/projects/rocprofiler-sdk/tests/bin/hip-streams-per-thread/CMakeLists.txt new file mode 100644 index 0000000000..7dcc39ef85 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/bin/hip-streams-per-thread/CMakeLists.txt @@ -0,0 +1,42 @@ +# +# +# +cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR) + +if(NOT CMAKE_HIP_COMPILER) + find_program( + amdclangpp_EXECUTABLE + NAMES amdclang++ + HINTS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm + PATHS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm + PATH_SUFFIXES bin llvm/bin NO_CACHE) + mark_as_advanced(amdclangpp_EXECUTABLE) + + if(amdclangpp_EXECUTABLE) + set(CMAKE_HIP_COMPILER "${amdclangpp_EXECUTABLE}") + endif() +endif() + +project(rocprofiler-sdk-tests-bin-hip-streams-per-thread LANGUAGES CXX HIP) + +foreach(_TYPE DEBUG MINSIZEREL RELEASE RELWITHDEBINFO) + if("${CMAKE_HIP_FLAGS_${_TYPE}}" STREQUAL "") + set(CMAKE_HIP_FLAGS_${_TYPE} "${CMAKE_CXX_FLAGS_${_TYPE}}") + endif() +endforeach() + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_EXTENSIONS OFF) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_HIP_STANDARD 17) +set(CMAKE_HIP_EXTENSIONS OFF) +set(CMAKE_HIP_STANDARD_REQUIRED ON) + +set_source_files_properties(hip-streams-per-thread.cpp PROPERTIES LANGUAGE HIP) + +add_executable(hip-streams-per-thread) +target_sources(hip-streams-per-thread PRIVATE hip-streams-per-thread.cpp) +target_link_libraries(hip-streams-per-thread PRIVATE rocprofiler-sdk::tests-build-flags) + +find_package(Threads REQUIRED) +target_link_libraries(hip-streams-per-thread PRIVATE Threads::Threads) diff --git a/projects/rocprofiler-sdk/tests/bin/hip-streams-per-thread/hip-streams-per-thread.cpp b/projects/rocprofiler-sdk/tests/bin/hip-streams-per-thread/hip-streams-per-thread.cpp new file mode 100644 index 0000000000..e4eedb7829 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/bin/hip-streams-per-thread/hip-streams-per-thread.cpp @@ -0,0 +1,98 @@ +// MIT License +// +// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include +#include +#include +#include + +#include "hip/hip_runtime.h" + +/* Macro for checking GPU API return values */ +#define HIP_ASSERT(call) \ + do \ + { \ + hipError_t gpuErr = call; \ + if(hipSuccess != gpuErr) \ + { \ + printf( \ + "GPU API Error - %s:%d: '%s'\n", __FILE__, __LINE__, hipGetErrorString(gpuErr)); \ + exit(1); \ + } \ + } while(0) + +static void +copy_to_dev(const hipStream_t stream) +{ + unsigned int n = (32 * 1024); // 32KB + double* A_h = nullptr; + double* A_d = nullptr; + + HIP_ASSERT(hipHostMalloc(&A_h, n * sizeof(double))); + HIP_ASSERT(hipMalloc(&A_d, n * sizeof(double))); + + for(unsigned int i = 0; i < n; ++i) + { + A_h[i] = 123.5; + } + HIP_ASSERT(hipMemcpyAsync(A_d, A_h, n * sizeof(double), hipMemcpyHostToDevice, stream)); + // Repeat to make sure streams remain the same + HIP_ASSERT(hipMemcpyAsync(A_d, A_h, n * sizeof(double), hipMemcpyHostToDevice, stream)); + + // Release device memory + HIP_ASSERT(hipFree(A_d)); + // Release host memory + HIP_ASSERT(hipHostFree(A_h)); +} + +int +main(int argc, char** argv) +{ + // Test hipStreamPerThread with multiple threads + const size_t num_streams = 3; + const size_t thread_cnt = argc < 2 ? 9 : atoi(argv[1]); + std::vector threads{}; + std::array streams{}; + threads.reserve(thread_cnt); + threads.emplace_back(std::thread(copy_to_dev, nullptr)); + for(size_t i = 1, j = 0; i < thread_cnt; ++i) + { + if(i % 3 == 0) + { + threads.emplace_back(std::thread(copy_to_dev, hipStreamLegacy)); + } + else if(i % 3 == 1) + { + threads.emplace_back(std::thread(copy_to_dev, hipStreamPerThread)); + } + else + { + HIP_ASSERT(hipStreamCreate(&streams[j])); + threads.emplace_back(std::thread(copy_to_dev, streams[j++])); + } + } + for(auto& thread : threads) + { + thread.join(); + } + return 0; +} diff --git a/projects/rocprofiler-sdk/tests/bin/hip-streams/compute_comm_overlap.cpp b/projects/rocprofiler-sdk/tests/bin/hip-streams/compute_comm_overlap.cpp index fdb314fde1..b44a156927 100644 --- a/projects/rocprofiler-sdk/tests/bin/hip-streams/compute_comm_overlap.cpp +++ b/projects/rocprofiler-sdk/tests/bin/hip-streams/compute_comm_overlap.cpp @@ -110,6 +110,9 @@ main() const int gridSizePerStream = 104; //(int)ceil((float)elements_per_stream/blockSize); HIP_ASSERT(hipEventRecord(start)); + // Extra copy with null stream + HIP_ASSERT(hipMemcpyAsync( + &d_input1[0], &h_input1[0], bytes_per_stream, hipMemcpyHostToDevice, nullptr)); // split H2D copies and kernel calls into separate loops for(int i = 0; i < num_streams; i++) { diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/CMakeLists.txt b/projects/rocprofiler-sdk/tests/rocprofv3/CMakeLists.txt index e1699d90b6..bea39c9d9f 100644 --- a/projects/rocprofiler-sdk/tests/rocprofv3/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/rocprofv3/CMakeLists.txt @@ -40,6 +40,7 @@ add_subdirectory(rocdecode-trace) add_subdirectory(rocjpeg-trace) add_subdirectory(advanced-thread-trace) add_subdirectory(hip-stream-display) +add_subdirectory(hip-streams-per-thread) add_subdirectory(agent-index) add_subdirectory(negate-aggregate-tracing-options) add_subdirectory(minimum-bytes) diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/hip-stream-display/CMakeLists.txt b/projects/rocprofiler-sdk/tests/rocprofv3/hip-stream-display/CMakeLists.txt index a0aacd0687..d76bcff15a 100644 --- a/projects/rocprofiler-sdk/tests/rocprofv3/hip-stream-display/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/rocprofv3/hip-stream-display/CMakeLists.txt @@ -41,7 +41,7 @@ add_test( NAME rocprofv3-test-hip-stream-display-execute COMMAND $ --kernel-rename -s -d - ${CMAKE_CURRENT_BINARY_DIR}/%tag%-trace -o out --output-format json pftrace + ${CMAKE_CURRENT_BINARY_DIR}/%tag%-trace -o out --output-format json pftrace csv --log-level env -- $) set_tests_properties( @@ -62,7 +62,11 @@ add_test( COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --json-input ${CMAKE_CURRENT_BINARY_DIR}/hip-streams-trace/out_results.json --pftrace-input - ${CMAKE_CURRENT_BINARY_DIR}/hip-streams-trace/out_results.pftrace) + ${CMAKE_CURRENT_BINARY_DIR}/hip-streams-trace/out_results.pftrace + --kernel-csv-input + ${CMAKE_CURRENT_BINARY_DIR}/hip-streams-trace/out_kernel_trace.csv + --memory-copy-csv-input + ${CMAKE_CURRENT_BINARY_DIR}/hip-streams-trace/out_memory_copy_trace.csv) set_tests_properties( rocprofv3-test-hip-stream-display-validate diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/hip-stream-display/conftest.py b/projects/rocprofiler-sdk/tests/rocprofv3/hip-stream-display/conftest.py index 291719c398..afc5cbdc56 100644 --- a/projects/rocprofiler-sdk/tests/rocprofv3/hip-stream-display/conftest.py +++ b/projects/rocprofiler-sdk/tests/rocprofv3/hip-stream-display/conftest.py @@ -45,6 +45,18 @@ def pytest_addoption(parser): default="hip-stream-display/out_results.pftrace", help="Input pftrace file", ) + parser.addoption( + "--kernel-csv-input", + action="store", + default="hip-stream-display/out_kernel_trace.csv", + help="Input csv file", + ) + parser.addoption( + "--memory-copy-csv-input", + action="store", + default="hip-stream-display/out_memory_copy_trace.csv", + help="Input csv file", + ) @pytest.fixture @@ -62,3 +74,29 @@ def pftrace_data(request): if not os.path.isfile(filename): return pytest.skip("stream tracing unavailable") return PerfettoReader(filename).read()[0] + + +@pytest.fixture +def kernel_csv_data(request): + filename = request.config.getoption("--kernel-csv-input") + data = [] + if not os.path.isfile(filename): + raise FileExistsError(f"{filename} does not exist") + with open(filename, "r") as inp: + reader = csv.DictReader(inp) + for row in reader: + data.append(row) + return data + + +@pytest.fixture +def memory_copy_csv_data(request): + filename = request.config.getoption("--memory-copy-csv-input") + data = [] + if not os.path.isfile(filename): + raise FileExistsError(f"{filename} does not exist") + with open(filename, "r") as inp: + reader = csv.DictReader(inp) + for row in reader: + data.append(row) + return data diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/hip-stream-display/validate.py b/projects/rocprofiler-sdk/tests/rocprofv3/hip-stream-display/validate.py index f5c9ea2c48..999aa4bb7e 100644 --- a/projects/rocprofiler-sdk/tests/rocprofv3/hip-stream-display/validate.py +++ b/projects/rocprofiler-sdk/tests/rocprofv3/hip-stream-display/validate.py @@ -60,11 +60,14 @@ def test_stream_trace(json_data): # Expect stream ids to be set between 1 and 8 inclusive for transpose executable expected_stream_ids = set([i for i in range(1, 9)]) - + kernel_stream_id_set = set() + memory_copy_streams = defaultdict(int) # check buffering data - for titr in (kernel_dispatch_data, memory_copies_data): - stream_id_set = set() - for node in titr: + for titr in ( + (kernel_dispatch_data, "KERNEL_DISPATCH"), + (memory_copies_data, "MEMORY_COPY"), + ): + for node in titr[0]: assert "size" in node assert "kind" in node assert "operation" in node @@ -76,13 +79,21 @@ def test_stream_trace(json_data): assert node.size > 0 assert node.thread_id > 0 - assert node.start_timestamp > 0 - assert node.end_timestamp > 0 assert node.start_timestamp < node.end_timestamp stream_id = node.stream_id.handle - stream_id_set.add(stream_id) - assert stream_id_set == expected_stream_ids + if titr[1] == "KERNEL_DISPATCH": + assert stream_id not in kernel_stream_id_set + kernel_stream_id_set.add(stream_id) + elif titr[1] == "MEMORY_COPY": + memory_copy_streams[stream_id] += 1 + # Exactly 1 kernel executed on streams 1 through 8 + assert kernel_stream_id_set == expected_stream_ids + # One extra memory copy with the null stream + assert memory_copy_streams[0] == 1 + # Exactly 1 memory copy to device and 1 memory copy to host + for i in expected_stream_ids: + assert memory_copy_streams[i] == 2 def test_perfetto_data(pftrace_data, json_data): @@ -96,6 +107,35 @@ def test_perfetto_data(pftrace_data, json_data): ) +def test_csv_data(kernel_csv_data, memory_copy_csv_data): + assert len(kernel_csv_data) > 0, "Expected non-empty kernel csv data" + assert len(memory_copy_csv_data) > 0, "Expected non-empty memory copy csv data" + + expected_stream_ids = set([i for i in range(1, 9)]) + kernel_stream_id_set = set() + for row in kernel_csv_data: + assert "Stream_Id" in row + + stream_id = int(row["Stream_Id"]) + assert stream_id not in kernel_stream_id_set + kernel_stream_id_set.add(stream_id) + # Exactly 1 kernel executed on streams 1 through 8 + assert kernel_stream_id_set == expected_stream_ids + + memory_copy_streams = defaultdict(int) + for row in memory_copy_csv_data: + assert "Stream_Id" in row + + stream_id = int(row["Stream_Id"]) + memory_copy_streams[stream_id] += 1 + + # One extra memory copy with the null stream due to hipStreamLegacy + assert memory_copy_streams[0] == 1 + # Exactly 1 memory copy to device and 1 memory copy to host + for i in expected_stream_ids: + assert memory_copy_streams[i] == 2 + + if __name__ == "__main__": exit_code = pytest.main(["-x", __file__] + sys.argv[1:]) sys.exit(exit_code) diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/hip-streams-per-thread/CMakeLists.txt b/projects/rocprofiler-sdk/tests/rocprofv3/hip-streams-per-thread/CMakeLists.txt new file mode 100644 index 0000000000..2073fd29cf --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/hip-streams-per-thread/CMakeLists.txt @@ -0,0 +1,83 @@ +# MIT License +# +# Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# +cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR) + +project( + rocprofiler-sdk-tests-rocprofv3-hip-streams-per-thread + LANGUAGES CXX + VERSION 0.0.0) + +find_package(rocprofiler-sdk REQUIRED) + +rocprofiler_configure_pytest_files(CONFIG pytest.ini COPY validate.py conftest.py) + +string(REPLACE "LD_PRELOAD=" "ROCPROF_PRELOAD=" PRELOAD_ENV + "${ROCPROFILER_MEMCHECK_PRELOAD_ENV}") + +set(hip-stream-env "${PRELOAD_ENV}") +set(NUM_THREADS 9) + +add_test( + NAME rocprofv3-test-hip-streams-per-thread-execute + COMMAND + $ -s -d + ${CMAKE_CURRENT_BINARY_DIR}/%tag%-trace -o out --output-format json pftrace csv + --log-level env -- $ ${NUM_THREADS}) + +set_tests_properties( + rocprofv3-test-hip-streams-per-thread-execute + PROPERTIES TIMEOUT + 60 + LABELS + "integration-tests" + ENVIRONMENT + "${hip-stream-env}" + FAIL_REGULAR_EXPRESSION + "${ROCPROFILER_DEFAULT_FAIL_REGEX}" + DISABLED + $>) + +add_test( + NAME rocprofv3-test-hip-streams-per-thread-validate + COMMAND + ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --json-input + ${CMAKE_CURRENT_BINARY_DIR}/hip-streams-per-thread-trace/out_results.json + --pftrace-input + ${CMAKE_CURRENT_BINARY_DIR}/hip-streams-per-thread-trace/out_results.pftrace + --memory-copy-csv-input + ${CMAKE_CURRENT_BINARY_DIR}/hip-streams-per-thread-trace/out_memory_copy_trace.csv + ) + +set_tests_properties( + rocprofv3-test-hip-streams-per-thread-validate + PROPERTIES TIMEOUT + 60 + LABELS + "integration-tests" + DEPENDS + rocprofv3-test-hip-streams-per-thread-execute + FAIL_REGULAR_EXPRESSION + "AssertionError" + DISABLED + $>) diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/hip-streams-per-thread/conftest.py b/projects/rocprofiler-sdk/tests/rocprofv3/hip-streams-per-thread/conftest.py new file mode 100644 index 0000000000..665da09fb7 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/hip-streams-per-thread/conftest.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 +# MIT License +# +# Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +import csv +import json +import os +import pytest + +from rocprofiler_sdk.pytest_utils.dotdict import dotdict +from rocprofiler_sdk.pytest_utils import collapse_dict_list +from rocprofiler_sdk.pytest_utils.perfetto_reader import PerfettoReader + + +def pytest_addoption(parser): + parser.addoption( + "--json-input", + action="store", + default="hip-stream-display/out_results.json", + help="Input JSON", + ) + parser.addoption( + "--pftrace-input", + action="store", + default="hip-stream-display/out_results.pftrace", + help="Input pftrace file", + ) + parser.addoption( + "--memory-copy-csv-input", + action="store", + default="hip-stream-display/out_memory_copy_trace.csv", + help="Input csv file", + ) + + +@pytest.fixture +def json_data(request): + filename = request.config.getoption("--json-input") + if not os.path.isfile(filename): + return pytest.skip("stream tracing unavailable") + with open(filename, "r") as inp: + return dotdict(collapse_dict_list(json.load(inp))) + + +@pytest.fixture +def pftrace_data(request): + filename = request.config.getoption("--pftrace-input") + if not os.path.isfile(filename): + return pytest.skip("stream tracing unavailable") + return PerfettoReader(filename).read()[0] + + +@pytest.fixture +def memory_copy_csv_data(request): + filename = request.config.getoption("--memory-copy-csv-input") + data = [] + if not os.path.isfile(filename): + raise FileExistsError(f"{filename} does not exist") + with open(filename, "r") as inp: + reader = csv.DictReader(inp) + for row in reader: + data.append(row) + return data diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/hip-streams-per-thread/pytest.ini b/projects/rocprofiler-sdk/tests/rocprofv3/hip-streams-per-thread/pytest.ini new file mode 100644 index 0000000000..e2027e2fd5 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/hip-streams-per-thread/pytest.ini @@ -0,0 +1,27 @@ +# MIT License +# +# Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +[pytest] +addopts = --durations=20 -rA -s -vv +testpaths = validate.py +pythonpath = @ROCPROFILER_SDK_TESTS_BINARY_DIR@/pytest-packages diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/hip-streams-per-thread/validate.py b/projects/rocprofiler-sdk/tests/rocprofv3/hip-streams-per-thread/validate.py new file mode 100644 index 0000000000..eb1f7e9368 --- /dev/null +++ b/projects/rocprofiler-sdk/tests/rocprofv3/hip-streams-per-thread/validate.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python3 +# MIT License +# +# Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +import sys +import pytest +import json + +from collections import defaultdict + + +# helper function +def node_exists(name, data, min_len=1): + assert name in data + assert data[name] is not None + if isinstance(data[name], (list, tuple, dict, set)): + assert len(data[name]) >= min_len + + +def get_operation(record, kind_name, op_name=None): + for idx, itr in enumerate(record["strings"]["buffer_records"]): + if kind_name == itr["kind"]: + if op_name is None: + return idx, itr["operations"] + else: + for oidx, oname in enumerate(itr["operations"]): + if op_name == oname: + return oidx + return None + + +def test_stream_trace(json_data): + data = json_data["rocprofiler-sdk-tool"] + buffer_records = data["buffer_records"] + + memory_copies_data = buffer_records["memory_copy"] + assert len(memory_copies_data) > 0 + + # Expect non-null stream ids to be set between 1 and 6 inclusive + expected_stream_ids = set([i for i in range(1, 7)]) + memory_copy_streams = defaultdict(int) + # check buffering data + for node in memory_copies_data: + assert "size" in node + assert "kind" in node + assert "operation" in node + assert "correlation_id" in node + assert "end_timestamp" in node + assert "start_timestamp" in node + assert "thread_id" in node + assert "stream_id" in node + + assert node.size > 0 + assert node.thread_id > 0 + assert node.start_timestamp < node.end_timestamp + + stream_id = node.stream_id.handle + memory_copy_streams[stream_id] += 1 + # 2 memory copies with null stream and 4 with hipStreamPerThread + assert memory_copy_streams[0] == 6 + # Exactly 1 memory copy to device and 1 memory copy to host + for i in expected_stream_ids: + assert memory_copy_streams[i] == 2 + + +def test_perfetto_data(pftrace_data, json_data): + import rocprofiler_sdk.tests.rocprofv3 as rocprofv3 + + assert pftrace_data.empty == False + rocprofv3.test_perfetto_data( + pftrace_data, + json_data, + ("kernel", "memory_copy"), + ) + + +def test_csv_data(memory_copy_csv_data): + assert len(memory_copy_csv_data) > 0, "Expected non-empty memory copy csv data" + + expected_stream_ids = set([i for i in range(1, 7)]) + memory_copy_streams = defaultdict(int) + for row in memory_copy_csv_data: + assert "Stream_Id" in row + + stream_id = int(row["Stream_Id"]) + memory_copy_streams[stream_id] += 1 + + # 2 memory copies with null stream and 4 with hipStreamPerThread + assert memory_copy_streams[0] == 6 + # Exactly 1 memory copy to device and 1 memory copy to host + for i in expected_stream_ids: + assert memory_copy_streams[i] == 2 + + +if __name__ == "__main__": + exit_code = pytest.main(["-x", __file__] + sys.argv[1:]) + sys.exit(exit_code)