From 498b1f2bd7c438aeae0162a8951aab2346044091 Mon Sep 17 00:00:00 2001 From: Vladimir Indic <139573562+vlaindic@users.noreply.github.com> Date: Thu, 4 Jul 2024 07:49:27 +0200 Subject: [PATCH] PC sampling: integration test with instruction decoding (#929) * PC sampling: integration test with instruction decoding * PC sampling: verifying internal and external CIDs The PC sampling integration test has been extended to verify internal and external correlation IDs. * tmp solution of using Instructions as keys * wrapper for HIP call * PCS integration test: ld_addr as instruction id For the sake of the integration test, use as the instruction identifier. To support code object unloading and relocations, use as the identifier (the change in the decoder is required). * PCS integration test: removing shared_ptr Completely removing usage of shared pointers. * PCS integration test: removing decoder When a code object has been unloaded, ensure all PC samples corresponding to that object are decoded, prior to removing the decoder. * PCS integration test: fixing build flags and imports * PCS integration test: fixing labels * PCS integration test: cmake flags fix * PC sampling cmake labels renamed * PCS integration test refactoring * PCS integration test: minimize usage of raw pointers * PCS integration test: at least one sample should be delivered. * PC sampling lables: pc-sampling --- .github/workflows/continuous_integration.yml | 2 +- tests/CMakeLists.txt | 1 + tests/pc_sampling/CMakeLists.txt | 142 ++++++ tests/pc_sampling/address_translation.cpp | 197 ++++++++ tests/pc_sampling/address_translation.hpp | 273 ++++++++++ tests/pc_sampling/cid_retirement.cpp | 129 +++++ tests/pc_sampling/cid_retirement.hpp | 38 ++ tests/pc_sampling/client.cpp | 225 +++++++++ tests/pc_sampling/client.hpp | 44 ++ tests/pc_sampling/codeobj.cpp | 261 ++++++++++ tests/pc_sampling/codeobj.hpp | 38 ++ tests/pc_sampling/external_cid.cpp | 110 ++++ tests/pc_sampling/external_cid.hpp | 42 ++ tests/pc_sampling/kernel_tracing.cpp | 78 +++ tests/pc_sampling/kernel_tracing.hpp | 41 ++ tests/pc_sampling/main.cpp | 224 +++++++++ tests/pc_sampling/pcs.cpp | 504 +++++++++++++++++++ tests/pc_sampling/pcs.hpp | 55 ++ tests/pc_sampling/utils.cpp | 37 ++ tests/pc_sampling/utils.hpp | 65 +++ 20 files changed, 2505 insertions(+), 1 deletion(-) create mode 100644 tests/pc_sampling/CMakeLists.txt create mode 100644 tests/pc_sampling/address_translation.cpp create mode 100644 tests/pc_sampling/address_translation.hpp create mode 100644 tests/pc_sampling/cid_retirement.cpp create mode 100644 tests/pc_sampling/cid_retirement.hpp create mode 100644 tests/pc_sampling/client.cpp create mode 100644 tests/pc_sampling/client.hpp create mode 100644 tests/pc_sampling/codeobj.cpp create mode 100644 tests/pc_sampling/codeobj.hpp create mode 100644 tests/pc_sampling/external_cid.cpp create mode 100644 tests/pc_sampling/external_cid.hpp create mode 100644 tests/pc_sampling/kernel_tracing.cpp create mode 100644 tests/pc_sampling/kernel_tracing.hpp create mode 100644 tests/pc_sampling/main.cpp create mode 100644 tests/pc_sampling/pcs.cpp create mode 100644 tests/pc_sampling/pcs.hpp create mode 100644 tests/pc_sampling/utils.cpp create mode 100644 tests/pc_sampling/utils.hpp diff --git a/.github/workflows/continuous_integration.yml b/.github/workflows/continuous_integration.yml index cb2b339589..0ec06a9d9f 100644 --- a/.github/workflows/continuous_integration.yml +++ b/.github/workflows/continuous_integration.yml @@ -21,7 +21,7 @@ env: ROCM_PATH: "/opt/rocm" GPU_TARGETS: "gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102" PATH: "/usr/bin:$PATH" - PC_SAMPLING_TESTS_REGEX: ".*pc_sampling.*" + PC_SAMPLING_TESTS_REGEX: ".*pc-sampling.*" jobs: core: diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 397e3a3387..6bbec14e81 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -57,6 +57,7 @@ add_subdirectory(async-copy-tracing) add_subdirectory(scratch-memory-tracing) add_subdirectory(c-tool) add_subdirectory(page-migration) +add_subdirectory(pc_sampling) add_subdirectory(thread-trace) add_subdirectory(hip-graph-tracing) diff --git a/tests/pc_sampling/CMakeLists.txt b/tests/pc_sampling/CMakeLists.txt new file mode 100644 index 0000000000..a9096b2cac --- /dev/null +++ b/tests/pc_sampling/CMakeLists.txt @@ -0,0 +1,142 @@ +# +# +# +cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR) + +if(NOT CMAKE_HIP_COMPILER) + find_program( + amdclangpp_EXECUTABLE + NAMES amdclang++ + HINTS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm + PATHS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm + PATH_SUFFIXES bin llvm/bin NO_CACHE) + mark_as_advanced(amdclangpp_EXECUTABLE) + + if(amdclangpp_EXECUTABLE) + set(CMAKE_HIP_COMPILER "${amdclangpp_EXECUTABLE}") + endif() +endif() + +project(rocprofiler-sdk-samples-pc-sampling-integration-test LANGUAGES CXX HIP) + +foreach(_TYPE DEBUG MINSIZEREL RELEASE RELWITHDEBINFO) + if("${CMAKE_HIP_FLAGS_${_TYPE}}" STREQUAL "") + set(CMAKE_HIP_FLAGS_${_TYPE} "${CMAKE_CXX_FLAGS_${_TYPE}}") + endif() +endforeach() + +find_package(rocprofiler-sdk REQUIRED) + +find_package(PkgConfig) + +if(PkgConfig_FOUND) + set(ENV{PKG_CONFIG_SYSTEM_INCLUDE_PATH} "") + pkg_check_modules(DW libdw) + + if(DW_FOUND + AND DW_INCLUDE_DIRS + AND DW_LIBRARIES) + set(libdw_INCLUDE_DIR + "${DW_INCLUDE_DIRS}" + CACHE FILEPATH "libdw include directory") + set(libdw_LIBRARY + "${DW_LIBRARIES}" + CACHE FILEPATH "libdw libraries") + endif() +endif() + +if(NOT libdw_INCLUDE_DIR OR NOT libdw_LIBRARY) + find_path( + libdw_ROOT_DIR + NAMES include/elfutils/libdw.h + HINTS ${libdw_ROOT} + PATHS ${libdw_ROOT}) + + mark_as_advanced(libdw_ROOT_DIR) + + find_path( + libdw_INCLUDE_DIR + NAMES elfutils/libdw.h + HINTS ${libdw_ROOT} + PATHS ${libdw_ROOT} + PATH_SUFFIXES include) + + find_library( + libdw_LIBRARY + NAMES dw + HINTS ${libdw_ROOT} + PATHS ${libdw_ROOT} + PATH_SUFFIXES lib lib64) +endif() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(libdw DEFAULT_MSG libdw_LIBRARY libdw_INCLUDE_DIR) + +if(libdw_FOUND AND NOT TARGET libdw::libdw) + add_library(libdw::libdw INTERFACE IMPORTED) + if(TARGET PkgConfig::DW AND DW_FOUND) + target_link_libraries(libdw::libdw INTERFACE PkgConfig::DW) + else() + target_link_libraries(libdw::libdw INTERFACE ${libdw_LIBRARY}) + target_include_directories(libdw::libdw SYSTEM INTERFACE ${libdw_INCLUDE_DIR}) + endif() +endif() + +add_library(pc-sampling-integration-test-client SHARED) +target_sources( + pc-sampling-integration-test-client + PRIVATE address_translation.cpp + address_translation.hpp + client.cpp + client.hpp + cid_retirement.cpp + cid_retirement.hpp + codeobj.cpp + codeobj.hpp + external_cid.cpp + external_cid.hpp + kernel_tracing.cpp + kernel_tracing.hpp + pcs.hpp + pcs.cpp + utils.hpp + utils.cpp) +target_link_libraries( + pc-sampling-integration-test-client + PRIVATE rocprofiler-sdk::rocprofiler-sdk rocprofiler-sdk::tests-build-flags + rocprofiler-sdk::tests-common-library amd_comgr dw) + +set_source_files_properties(main.cpp PROPERTIES LANGUAGE HIP) +find_package(Threads REQUIRED) + +add_executable(pc-sampling-integration-test) +target_sources(pc-sampling-integration-test PRIVATE main.cpp) +target_link_libraries( + pc-sampling-integration-test + PRIVATE pc-sampling-integration-test-client Threads::Threads + rocprofiler-sdk::tests-build-flags) + +# rocprofiler_pc-sampling-integration_get_preload_env(PRELOAD_ENV +# pc-sampling-integration-test-client) +# rocprofiler_pc-sampling-integration_get_ld_library_path_env(LIBRARY_PATH_ENV) + +# set(pc-sampling-integration-test-env ${PRELOAD_ENV} ${LIBRARY_PATH_ENV}) + +add_test(NAME pc-sampling-integration-test + COMMAND $) + +set_tests_properties( + pc-sampling-integration-test + PROPERTIES + TIMEOUT + 45 + LABELS + "integration-tests;pc-sampling" + # ENVIRONMENT + # "${ROCPROFILER_MEMCHECK_PRELOAD_ENV};HSA_TOOLS_LIB=$" + SKIP_REGULAR_EXPRESSION + "PC sampling unavailable" + ENVIRONMENT + "${pc-sampling-integration-test-env}" + FAIL_REGULAR_EXPRESSION + "${ROCPROFILER_DEFAULT_FAIL_REGEX}") diff --git a/tests/pc_sampling/address_translation.cpp b/tests/pc_sampling/address_translation.cpp new file mode 100644 index 0000000000..0632f7ac64 --- /dev/null +++ b/tests/pc_sampling/address_translation.cpp @@ -0,0 +1,197 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// undefine NDEBUG so asserts are implemented +#ifdef NDEBUG +# undef NDEBUG +#endif + +#include "address_translation.hpp" +#include "pcs.hpp" +#include "utils.hpp" + +#include +#include +#include +#include +#include +#include + +namespace client +{ +namespace address_translation +{ +namespace +{ +struct FlatProfiler +{ +public: + FlatProfiler() = default; + ~FlatProfiler() = default; + + CodeobjAddressTranslate translator; + KernelObjectMap kernel_object_map; + FlatProfile flat_profile; + std::mutex global_mut; +}; +} // namespace + +// Raw pointer to prevent early destruction of static objects +FlatProfiler* flat_profiler = nullptr; + +void +init() +{ + flat_profiler = new FlatProfiler(); +} + +void +fini() +{ + delete flat_profiler; +} + +CodeobjAddressTranslate& +get_address_translator() +{ + return flat_profiler->translator; +} + +KernelObjectMap& +get_kernel_object_map() +{ + return flat_profiler->kernel_object_map; +} + +FlatProfile& +get_flat_profile() +{ + return flat_profiler->flat_profile; +} + +std::mutex& +get_global_mutex() +{ + return flat_profiler->global_mut; +} + +KernelObject::KernelObject(uint64_t code_object_id, + std::string kernel_name, + uint64_t begin_address, + uint64_t end_address) +: code_object_id_(code_object_id) +, kernel_name_(kernel_name) +, begin_address_(begin_address) +, end_address_(end_address) +{ + auto& translator = get_address_translator(); + uint64_t vaddr = begin_address; + while(vaddr < end_address) + { + auto inst = translator.get(vaddr); + vaddr += inst->size; + this->add_instruction(std::move(inst)); + } +} + +void +dump_flat_profile() +{ + // It seems that an instruction can be part of multiple + // instances of the same kernel loaded on two different devices. + // We need to prevent counting the same instruction multiple times. + std::unordered_set visited_instructions; + + const auto& kernel_object_map = get_kernel_object_map(); + const auto& flat_profile = get_flat_profile(); + + std::stringstream ss; + uint64_t samples_num = 0; + kernel_object_map.iterate_kernel_objects([&](const KernelObject* kernel_obj) { + ss << "\n===================================="; + ss << "The kernel: " << kernel_obj->kernel_name() + << " with the begin address: " << kernel_obj->begin_address() + << " from code object with id: " << kernel_obj->code_object_id() << std::endl; + kernel_obj->iterate_instrunctions([&](const Instruction& inst) { + ss << "\t"; + ss << inst.inst << "\t"; + ss << inst.comment << "\t"; + ss << "samples: "; + const auto* _sample_instruction = flat_profile.get_sample_instruction(inst); + if(_sample_instruction == nullptr) + ss << "0"; + else + { + _sample_instruction->process([&](const SampleInstruction& sample_instruction) { + ss << sample_instruction.sample_count(); + // Assure that each instruction is counted once. + if(visited_instructions.count(sample_instruction.inst()) == 0) + { + samples_num += sample_instruction.sample_count(); + visited_instructions.insert(sample_instruction.inst()); + } + + if(sample_instruction.exec_mask_counts().size() <= 1) + { + ss << ", exec_mask: " << std::hex; + ss << sample_instruction.exec_mask_counts().begin()->first; + ss << std::dec; + assert(sample_instruction.sample_count() == + sample_instruction.exec_mask_counts().begin()->second); + } + else + { + uint64_t num_samples_sum = 0; + // More than one exec_mask + for(auto& [exec_mask, samples_per_exec] : + sample_instruction.exec_mask_counts()) + { + ss << std::endl; + ss << "\t\t" + << "exec_mask: " << std::hex << exec_mask; + ss << "\t" + << "samples: " << std::dec << samples_per_exec; + num_samples_sum += samples_per_exec; + ss << std::endl; + } + assert(sample_instruction.sample_count() == num_samples_sum); + } + }); + } + ss << std::endl; + }); + ss << "====================================\n" << std::endl; + }); + + ss << "The total number of decoded samples: " << samples_num << std::endl; + ss << "The total number of collected samples: " << client::pcs::total_samples_num() + << std::endl; + + *utils::get_output_stream() << ss.str() << std::endl; + + assert(samples_num == client::pcs::total_samples_num()); + // We expect at least one PC sample to be decoded/delivered; + assert(samples_num > 0); +} + +} // namespace address_translation +} // namespace client diff --git a/tests/pc_sampling/address_translation.hpp b/tests/pc_sampling/address_translation.hpp new file mode 100644 index 0000000000..1426fcfe83 --- /dev/null +++ b/tests/pc_sampling/address_translation.hpp @@ -0,0 +1,273 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace client +{ +namespace address_translation +{ +using Instruction = rocprofiler::codeobj::disassembly::Instruction; +using CodeobjAddressTranslate = rocprofiler::codeobj::disassembly::CodeobjAddressTranslate; + +class KernelObject +{ +private: + using process_inst_fn = std::function; + +public: + KernelObject() = default; + KernelObject(uint64_t code_object_id, + std::string kernel_name, + uint64_t begin_address, + uint64_t end_address); + + // write lock required + void add_instruction(std::unique_ptr instruction) + { + auto lock = std::unique_lock{mut}; + + instructions_.push_back(std::move(instruction)); + } + + // read lock required + void iterate_instrunctions(process_inst_fn fn) const + { + auto lock = std::shared_lock{mut}; + + for(const auto& inst : this->instructions_) + fn(*inst); + } + + uint64_t code_object_id() const { return code_object_id_; }; + std::string kernel_name() const { return kernel_name_; }; + uint64_t begin_address() const { return begin_address_; }; + uint64_t end_address() const { return end_address_; }; + +private: + mutable std::shared_mutex mut; + uint64_t code_object_id_; + std::string kernel_name_; + uint64_t begin_address_; + uint64_t end_address_; + std::vector> instructions_; +}; + +class KernelObjectMap +{ +private: + using process_kernel_fn = std::function; + +public: + KernelObjectMap() = default; + + // write lock required + void add_kernel(uint64_t code_object_id, + std::string name, + uint64_t begin_address, + uint64_t end_address) + { + auto lock = std::unique_lock{mut}; + + auto key = form_key(code_object_id, name, begin_address); + auto it = kernel_object_map.find(key); + assert(it == kernel_object_map.end()); + kernel_object_map.insert( + {key, + std::make_unique(code_object_id, name, begin_address, end_address)}); + } + +#if 0 + // read lock required + KernelObject* get_kernel(uint64_t code_object_id, std::string name) + { + auto lock = std::shared_lock{mut}; + + auto key = form_key(code_object_id, name); + auto it = kernel_object_map.find(key); + if(it == kernel_object_map.end()) + { + return nullptr; + } + + return it->second.get(); + } +#endif + + // read lock required + void iterate_kernel_objects(process_kernel_fn fn) const + { + auto lock = std::shared_lock{mut}; + + for(auto& [_, kernel_obj] : kernel_object_map) + fn(kernel_obj.get()); + } + +private: + std::unordered_map> kernel_object_map; + mutable std::shared_mutex mut; + + std::string form_key(uint64_t code_object_id, std::string kernel_name, uint64_t begin_address) + { + return std::to_string(code_object_id) + "_" + kernel_name + "_" + + std::to_string(begin_address); + } +}; + +class SampleInstruction +{ +private: + using proces_sample_inst_fn = std::function; + +public: + SampleInstruction() = default; + SampleInstruction(std::unique_ptr inst) + : inst_(std::move(inst)) + {} + + // write lock required + void add_sample(uint64_t exec_mask) + { + auto lock = std::unique_lock{mut}; + + if(exec_mask_counts_.find(exec_mask) == exec_mask_counts_.end()) + { + exec_mask_counts_[exec_mask] = 0; + } + exec_mask_counts_[exec_mask]++; + sample_count_++; + } + + // read lock required + void process(proces_sample_inst_fn fn) const + { + auto lock = std::shared_lock{mut}; + + fn(*this); + } + + Instruction* inst() const { return inst_.get(); }; + // In case an instruction is samples with different exec masks, + // keep track of how many time each exec_mask was observed. + const std::map& exec_mask_counts() const { return exec_mask_counts_; } + // How many time this instruction is samples + uint64_t sample_count() const { return sample_count_; }; + +private: + mutable std::shared_mutex mut; + + // FIXME: prevent direct access of the following fields. + // The following fields should be accessible only from within `process` function. + std::unique_ptr inst_; + // In case an instruction is samples with different exec masks, + // keep track of how many time each exec_mask was observed. + std::map exec_mask_counts_; + // How many time this instruction is samples + uint64_t sample_count_ = 0; +}; + +class FlatProfile +{ +public: + FlatProfile() = default; + + // write lock required + void add_sample(std::unique_ptr instruction, uint64_t exec_mask) + { + auto lock = std::unique_lock{mut}; + + auto inst_id = get_instruction_id(*instruction); + auto itr = samples.find(inst_id); + if(itr == samples.end()) + { + // Add new instruction + samples.insert({inst_id, std::make_unique(std::move(instruction))}); + itr = samples.find(inst_id); + } + + auto* sample_instruction = itr->second.get(); + sample_instruction->add_sample(exec_mask); + } + + // read lock required + const SampleInstruction* get_sample_instruction(const Instruction& inst) const + { + auto lock = std::shared_lock{mut}; + + auto inst_id = get_instruction_id(inst); + auto itr = samples.find(inst_id); + if(itr == samples.end()) return nullptr; + return itr->second.get(); + } + +private: + // For the sake of this test, we use `ld_addr` as the instruction identifier. + // TODO: To cover code object loading/unloading and relocations, + // use `(code_object_id + ld_addr)` as the unique identifier. + // This assumes the decoder chage to return code_object_id as part + // of the `LoadedCodeobjDecoder::get(uint64_t ld_addr)` method. + using instrution_id_t = uint64_t; + instrution_id_t get_instruction_id(const Instruction& instruction) const + { + // Ensure the decoder determined the `ld_addr`. + assert(instruction.ld_addr > 0); + return instruction.ld_addr; + } + + std::unordered_map> samples; + mutable std::shared_mutex mut; +}; + +std::mutex& +get_global_mutex(); + +CodeobjAddressTranslate& +get_address_translator(); + +KernelObjectMap& +get_kernel_object_map(); + +FlatProfile& +get_flat_profile(); + +void +dump_flat_profile(); + +void +init(); + +void +fini(); +} // namespace address_translation +} // namespace client diff --git a/tests/pc_sampling/cid_retirement.cpp b/tests/pc_sampling/cid_retirement.cpp new file mode 100644 index 0000000000..fe2bb1473f --- /dev/null +++ b/tests/pc_sampling/cid_retirement.cpp @@ -0,0 +1,129 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// undefine NDEBUG so asserts are implemented +#ifdef NDEBUG +# undef NDEBUG +#endif + +/** + * @file samples/pc_sampling_library/client.cpp + * + * @brief Example rocprofiler client (tool) + */ + +#include "utils.hpp" + +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace client +{ +namespace cid_retirement +{ +constexpr size_t BUFFER_SIZE_BYTES = 8192; +constexpr size_t WATERMARK = (BUFFER_SIZE_BYTES / 4); + +rocprofiler_buffer_id_t cid_retirement_buffer; + +void +cid_retirement_tracing_buffered(rocprofiler_context_id_t /*context*/, + rocprofiler_buffer_id_t /*buffer_id*/, + rocprofiler_record_header_t** headers, + size_t num_headers, + void* /*user_data*/, + uint64_t /*drop_count*/) +{ + std::stringstream ss; + + for(size_t i = 0; i < num_headers; ++i) + { + auto* header = headers[i]; + + if(header == nullptr) + { + throw std::runtime_error{ + "rocprofiler provided a null pointer to header. this should never happen"}; + } + else if(header->hash != + rocprofiler_record_header_compute_hash(header->category, header->kind)) + { + throw std::runtime_error{"rocprofiler_record_header_t (category | kind) != hash"}; + } + else if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING) + { + if(header->kind == ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT) + { + auto* cid_record = + static_cast( + header->payload); + ss << "... The retired internal correlation id is: " + << cid_record->internal_correlation_id; + ss << ", the timestamp is: " << cid_record->timestamp; + ss << std::endl; + // TODO: assert that the retiring timestamp is greater than + // the greatest timestamp of PC samples matching the retired CID. + } + } + } + + *utils::get_output_stream() << ss.str(); +} + +void +configure_cid_retirement_tracing(rocprofiler_context_id_t context) +{ + ROCPROFILER_CALL(rocprofiler_create_buffer(context, + BUFFER_SIZE_BYTES, + WATERMARK, + ROCPROFILER_BUFFER_POLICY_LOSSLESS, + cid_retirement_tracing_buffered, + nullptr, + &cid_retirement_buffer), + "buffer creation"); + + ROCPROFILER_CALL(rocprofiler_configure_buffer_tracing_service( + context, + ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT, + nullptr, + 0, + cid_retirement_buffer), + "buffer tracing service for cid retirement configure"); +} + +void +flush_retired_cids() +{ + ROCPROFILER_CALL(rocprofiler_flush_buffer(cid_retirement_buffer), + "Cannot flush retired CIDs buffer"); + *utils::get_output_stream() << "Retired CIDs flushed..." << std::endl; +} + +} // namespace cid_retirement +} // namespace client diff --git a/tests/pc_sampling/cid_retirement.hpp b/tests/pc_sampling/cid_retirement.hpp new file mode 100644 index 0000000000..1585f7e3b6 --- /dev/null +++ b/tests/pc_sampling/cid_retirement.hpp @@ -0,0 +1,38 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include +#include + +namespace client +{ +namespace cid_retirement +{ +void +configure_cid_retirement_tracing(rocprofiler_context_id_t context); + +void +flush_retired_cids(); +} // namespace cid_retirement +} // namespace client diff --git a/tests/pc_sampling/client.cpp b/tests/pc_sampling/client.cpp new file mode 100644 index 0000000000..b18062e7c0 --- /dev/null +++ b/tests/pc_sampling/client.cpp @@ -0,0 +1,225 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// undefine NDEBUG so asserts are implemented +#ifdef NDEBUG +# undef NDEBUG +#endif + +/** + * @file samples/pc_sampling_library/client.cpp + * + * @brief Example rocprofiler client (tool) + */ + +#include "client.hpp" + +#include "address_translation.hpp" +#include "cid_retirement.hpp" +#include "codeobj.hpp" +#include "external_cid.hpp" +#include "kernel_tracing.hpp" +#include "pcs.hpp" +#include "utils.hpp" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace client +{ +namespace +{ +rocprofiler_client_id_t* client_id = nullptr; +rocprofiler_client_finalize_t client_fini_func = nullptr; +rocprofiler_context_id_t client_ctx; + +int +tool_init(rocprofiler_client_finalize_t fini_func, void* /*tool_data*/) +{ + client_fini_func = fini_func; + + address_translation::init(); + external_cid::init(); + pcs::init(); + + ROCPROFILER_CALL(rocprofiler_create_context(&client_ctx), "Cannot create context\n"); + + pcs::configure_pc_sampling_on_all_agents(client_ctx); + + // Enable code object tracing service, to match PC samples to corresponding code object + ROCPROFILER_CALL( + rocprofiler_configure_callback_tracing_service(client_ctx, + ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT, + nullptr, + 0, + client::codeobj::codeobj_tracing_callback, + nullptr), + "code object tracing service configure"); + + cid_retirement::configure_cid_retirement_tracing(client_ctx); + // Kernel tracing service need for external correlation service. + kernel_tracing::configure_kernel_tracing_service(client_ctx); + external_cid::configure_external_correlation_service(client_ctx); + + int valid_ctx = 0; + ROCPROFILER_CALL(rocprofiler_context_is_valid(client_ctx, &valid_ctx), + "failure checking context validity"); + if(valid_ctx == 0) + { + // notify rocprofiler that initialization failed + // and all the contexts, buffers, etc. created + // should be ignored + return -1; + } + + ROCPROFILER_CALL(rocprofiler_start_context(client_ctx), "rocprofiler context start failed"); + + return 0; +} + +void +tool_fini(void* /*tool_data*/) +{ + // Drain all retired correlation IDs + client::sync(); + + if(client_id) + { + // Assert the context is inactive. + int state = -1; + ROCPROFILER_CALL(rocprofiler_context_is_active(client_ctx, &state), + "Cannot inspect the stat of the context.") + assert(state == 0); + + // No need to stop the context, since it has been stopped implicitly by the rocprofiler-SDK. + + // Flush remaining PC samples + pcs::flush_and_destroy_buffers(); + } + + address_translation::dump_flat_profile(); + // deallocation + address_translation::fini(); + external_cid::fini(); + pcs::fini(); +} + +} // namespace + +// forward declaration +void +setup(); + +void +setup() +{ + // Do not force configuration + if(int status = 0; + rocprofiler_is_initialized(&status) == ROCPROFILER_STATUS_SUCCESS && status == 0) + { + *utils::get_output_stream() << "Client forces rocprofiler configuration.\n" << std::endl; + ROCPROFILER_CALL(rocprofiler_force_configure(&rocprofiler_configure), + "failed to force configuration"); + } +} + +void +shutdown() +{} + +void +sync() +{ + // Flush rocprofiler-SDK's buffers containing PC samples. + pcs::flush_buffers(); + + // Flush retired correlation IDs. + cid_retirement::flush_retired_cids(); +} + +} // namespace client + +extern "C" rocprofiler_tool_configure_result_t* +rocprofiler_configure(uint32_t version, + const char* runtime_version, + uint32_t priority, + rocprofiler_client_id_t* id) +{ + // only activate if main tool + if(priority > 0) return nullptr; + + // set the client name + id->name = "PCSamplingExampleTool"; + + // store client info + client::client_id = id; + + // compute major/minor/patch version info + uint32_t major = version / 10000; + uint32_t minor = (version % 10000) / 100; + uint32_t patch = version % 100; + + // generate info string + auto info = std::stringstream{}; + info << id->name << " is using rocprofiler v" << major << "." << minor << "." << patch << " (" + << runtime_version << ")"; + + std::clog << info.str() << std::endl; + + std::ostream* output_stream = nullptr; + std::string filename = "pc_sampling_integration_test.log"; + if(auto* outfile = getenv("ROCPROFILER_SAMPLE_OUTPUT_FILE"); outfile) filename = outfile; + if(filename == "stdout") + output_stream = &std::cout; + else if(filename == "stderr") + output_stream = &std::cerr; + else + output_stream = new std::ofstream{filename}; + + client::utils::get_output_stream() = output_stream; + + // create configure data + static auto cfg = + rocprofiler_tool_configure_result_t{sizeof(rocprofiler_tool_configure_result_t), + &client::tool_init, + &client::tool_fini, + static_cast(output_stream)}; + + // return pointer to configure data + return &cfg; +} diff --git a/tests/pc_sampling/client.hpp b/tests/pc_sampling/client.hpp new file mode 100644 index 0000000000..b82f27d7b4 --- /dev/null +++ b/tests/pc_sampling/client.hpp @@ -0,0 +1,44 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#ifdef pc_sampling_code_obj_tracing_client_EXPORTS +# define CLIENT_API __attribute__((visibility("default"))) +#else +# define CLIENT_API +#endif + +#define USE_CLIENT_SHUTDOWN_EXPLICITLY 1 + +namespace client +{ +void +setup() CLIENT_API; + +void +shutdown() CLIENT_API; + +void +sync() CLIENT_API; + +} // namespace client diff --git a/tests/pc_sampling/codeobj.cpp b/tests/pc_sampling/codeobj.cpp new file mode 100644 index 0000000000..a9cd688ee5 --- /dev/null +++ b/tests/pc_sampling/codeobj.cpp @@ -0,0 +1,261 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// undefine NDEBUG so asserts are implemented +#ifdef NDEBUG +# undef NDEBUG +#endif + +/** + * @file samples/pc_sampling_library/client.cpp + * + * @brief Example rocprofiler client (tool) + */ + +#include "address_translation.hpp" +#include "client.hpp" +#include "pcs.hpp" +#include "utils.hpp" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace client +{ +namespace codeobj +{ +#define CODEOBJ_DEBUG 0 + +constexpr bool COPY_MEMORY_CODEOBJ = true; + +std::string +cxa_demangle(std::string_view _mangled_name, int* _status) +{ + constexpr size_t buffer_len = 4096; + // return the mangled since there is no buffer + if(_mangled_name.empty()) + { + *_status = -2; + return std::string{}; + } + + auto _demangled_name = std::string{_mangled_name}; + + // PARAMETERS to __cxa_demangle + // mangled_name: + // A NULL-terminated character string containing the name to be demangled. + // buffer: + // A region of memory, allocated with malloc, of *length bytes, into which the + // demangled name is stored. If output_buffer is not long enough, it is expanded + // using realloc. output_buffer may instead be NULL; in that case, the demangled + // name is placed in a region of memory allocated with malloc. + // _buflen: + // If length is non-NULL, the length of the buffer containing the demangled name + // is placed in *length. + // status: + // *status is set to one of the following values + size_t _demang_len = 0; + char* _demang = abi::__cxa_demangle(_demangled_name.c_str(), nullptr, &_demang_len, _status); + switch(*_status) + { + // 0 : The demangling operation succeeded. + // -1 : A memory allocation failure occurred. + // -2 : mangled_name is not a valid name under the C++ ABI mangling rules. + // -3 : One of the arguments is invalid. + case 0: + { + if(_demang) _demangled_name = std::string{_demang}; + break; + } + case -1: + { + char _msg[buffer_len]; + ::memset(_msg, '\0', buffer_len * sizeof(char)); + ::snprintf(_msg, + buffer_len, + "memory allocation failure occurred demangling %s", + _demangled_name.c_str()); + ::perror(_msg); + break; + } + case -2: break; + case -3: + { + char _msg[buffer_len]; + ::memset(_msg, '\0', buffer_len * sizeof(char)); + ::snprintf(_msg, + buffer_len, + "Invalid argument in: (\"%s\", nullptr, nullptr, %p)", + _demangled_name.c_str(), + (void*) _status); + ::perror(_msg); + break; + } + default: break; + }; + + // if it "demangled" but the length is zero, set the status to -2 + if(_demang_len == 0 && *_status == 0) *_status = -2; + + // free allocated buffer + ::free(_demang); + return _demangled_name; +} + +template +std::string +as_hex(Tp _v, size_t _width = 16) +{ + auto _ss = std::stringstream{}; + _ss.fill('0'); + _ss << "0x" << std::hex << std::setw(_width) << _v; + return _ss.str(); +} + +void +codeobj_tracing_callback(rocprofiler_callback_tracing_record_t record, + rocprofiler_user_data_t* /*user_data*/, + void* /*callback_data*/) +{ + std::stringstream info; + + info << "-----------------------------\n"; + if(record.kind == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT && + record.operation == ROCPROFILER_CODE_OBJECT_LOAD) + { + auto* data = + static_cast(record.payload); + + if(record.phase == ROCPROFILER_CALLBACK_PHASE_LOAD) + { + auto& global_mut = address_translation::get_global_mutex(); + { + auto lock = std::unique_lock{global_mut}; + + auto& translator = client::address_translation::get_address_translator(); + // register code object inside the decoder + if(std::string_view(data->uri).find("file:///") == 0) + { + translator.addDecoder( + data->uri, data->code_object_id, data->load_delta, data->load_size); + } + else if(COPY_MEMORY_CODEOBJ) + { + translator.addDecoder(reinterpret_cast(data->memory_base), + data->memory_size, + data->code_object_id, + data->load_delta, + data->load_size); + } + else + { + return; + } + + // extract symbols from code object + auto& kernel_object_map = client::address_translation::get_kernel_object_map(); + auto symbolmap = translator.getSymbolMap(); + for(auto& [vaddr, symbol] : symbolmap) + { + kernel_object_map.add_kernel( + data->code_object_id, symbol.name, vaddr, vaddr + symbol.mem_size); + } + } + + info << "code object load :: "; + } + else if(record.phase == ROCPROFILER_CALLBACK_PHASE_UNLOAD) + { + // Ensure all PC samples of the unloaded code object are decoded, + // prior to removing the decoder. + client::sync(); + auto& global_mut = address_translation::get_global_mutex(); + { + auto lock = std::unique_lock{global_mut}; + auto& translator = client::address_translation::get_address_translator(); + translator.removeDecoder(data->code_object_id, data->load_delta); + } + + info << "code object unload :: "; + } + + info << "code_object_id=" << data->code_object_id + << ", rocp_agent=" << data->rocp_agent.handle << ", uri=" << data->uri + << ", load_base=" << as_hex(data->load_base) << ", load_size=" << data->load_size + << ", load_delta=" << as_hex(data->load_delta); + if(data->storage_type == ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_FILE) + info << ", storage_file_descr=" << data->storage_file; + else if(data->storage_type == ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_MEMORY) + info << ", storage_memory_base=" << as_hex(data->memory_base) + << ", storage_memory_size=" << data->memory_size; + + info << std::endl; + } + if(record.kind == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT && + record.operation == ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER) + { + auto* data = + static_cast( + record.payload); + + if(record.phase == ROCPROFILER_CALLBACK_PHASE_LOAD) + { + info << "kernel symbol load :: "; + } + else if(record.phase == ROCPROFILER_CALLBACK_PHASE_UNLOAD) + { + info << "kernel symbol unload :: "; + // client_kernels.erase(data->kernel_id); + } + + auto kernel_name = std::regex_replace(data->kernel_name, std::regex{"(\\.kd)$"}, ""); + int demangle_status = 0; + kernel_name = cxa_demangle(kernel_name, &demangle_status); + + info << "code_object_id=" << data->code_object_id << ", kernel_id=" << data->kernel_id + << ", kernel_object=" << as_hex(data->kernel_object) + << ", kernarg_segment_size=" << data->kernarg_segment_size + << ", kernarg_segment_alignment=" << data->kernarg_segment_alignment + << ", group_segment_size=" << data->group_segment_size + << ", private_segment_size=" << data->private_segment_size + << ", kernel_name=" << kernel_name; + + info << std::endl; + } + + *utils::get_output_stream() << info.str() << std::endl; +} + +} // namespace codeobj +} // namespace client diff --git a/tests/pc_sampling/codeobj.hpp b/tests/pc_sampling/codeobj.hpp new file mode 100644 index 0000000000..4dc303e9b9 --- /dev/null +++ b/tests/pc_sampling/codeobj.hpp @@ -0,0 +1,38 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include +#include + +namespace client +{ +namespace codeobj +{ +void +codeobj_tracing_callback(rocprofiler_callback_tracing_record_t record, + rocprofiler_user_data_t* user_data, + void* callback_data); + +} // namespace codeobj +} // namespace client diff --git a/tests/pc_sampling/external_cid.cpp b/tests/pc_sampling/external_cid.cpp new file mode 100644 index 0000000000..4592fa63b4 --- /dev/null +++ b/tests/pc_sampling/external_cid.cpp @@ -0,0 +1,110 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// undefine NDEBUG so asserts are implemented +#ifdef NDEBUG +# undef NDEBUG +#endif + +/** + * @file samples/pc_sampling_library/client.cpp + * + * @brief Example rocprofiler client (tool) + */ + +#include "utils.hpp" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace client +{ +namespace external_cid +{ +namespace +{ +template +auto +make_array(Arg arg, Args&&... args) +{ + constexpr auto N = 1 + sizeof...(Args); + return std::array{std::forward(arg), std::forward(args)...}; +} +} // namespace + +/** + * @brief Must be called at the beginning of the `tool_ini`. + */ +void +init() +{} + +/** + * @brief Should be called at the of the `tool_fini` + */ +void +fini() +{} + +int +set_external_correlation_id(rocprofiler_thread_id_t /*thr_id*/, + rocprofiler_context_id_t /*ctx_id*/, + rocprofiler_external_correlation_id_request_kind_t /*kind*/, + rocprofiler_tracing_operation_t /*op*/, + uint64_t internal_corr_id, + rocprofiler_user_data_t* external_corr_id, + void* /*user_data*/) +{ + // In multi-queues (devices) scenario, incrementing external correlation IDs + // might not always match with incrementing internal correlation IDs. + // Thus, use the value of internal correlation ID and verify that both + // externall correlation IDs and internal correlation IDs are the same + // in delivered PC samples. + external_corr_id->value = internal_corr_id; + return 0; +} + +void +configure_external_correlation_service(rocprofiler_context_id_t context) +{ + auto external_corr_id_request_kinds = + make_array(ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH); + + ROCPROFILER_CHECK(rocprofiler_configure_external_correlation_id_request_service( + context, + external_corr_id_request_kinds.data(), + external_corr_id_request_kinds.size(), + set_external_correlation_id, + nullptr)); +} + +} // namespace external_cid +} // namespace client diff --git a/tests/pc_sampling/external_cid.hpp b/tests/pc_sampling/external_cid.hpp new file mode 100644 index 0000000000..7e2d667518 --- /dev/null +++ b/tests/pc_sampling/external_cid.hpp @@ -0,0 +1,42 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include +#include +#include + +namespace client +{ +namespace external_cid +{ +void +configure_external_correlation_service(rocprofiler_context_id_t context); + +void +init(); + +void +fini(); +} // namespace external_cid +} // namespace client diff --git a/tests/pc_sampling/kernel_tracing.cpp b/tests/pc_sampling/kernel_tracing.cpp new file mode 100644 index 0000000000..986feb598f --- /dev/null +++ b/tests/pc_sampling/kernel_tracing.cpp @@ -0,0 +1,78 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// undefine NDEBUG so asserts are implemented +#ifdef NDEBUG +# undef NDEBUG +#endif + +/** + * @file samples/pc_sampling_library/client.cpp + * + * @brief Example rocprofiler client (tool) + */ + +#include "utils.hpp" + +#include +#include +#include + +#include +#include +#include + +namespace client +{ +namespace kernel_tracing +{ +constexpr size_t BUFFER_SIZE_BYTES = 8192; +constexpr size_t WATERMARK = (BUFFER_SIZE_BYTES / 4); + +rocprofiler_buffer_id_t kernel_tracing_buffer; + +void +kernel_tracing_buffered(rocprofiler_context_id_t /*context*/, + rocprofiler_buffer_id_t /*buffer_id*/, + rocprofiler_record_header_t** /*headers*/, + size_t /*num_headers*/, + void* /*user_data*/, + uint64_t /*drop_count*/) +{} + +void +configure_kernel_tracing_service(rocprofiler_context_id_t context) +{ + ROCPROFILER_CHECK(rocprofiler_create_buffer(context, + BUFFER_SIZE_BYTES, + WATERMARK, + ROCPROFILER_BUFFER_POLICY_LOSSLESS, + kernel_tracing_buffered, + nullptr, + &kernel_tracing_buffer)); + + ROCPROFILER_CHECK(rocprofiler_configure_buffer_tracing_service( + context, ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH, nullptr, 0, kernel_tracing_buffer)); +} + +} // namespace kernel_tracing +} // namespace client diff --git a/tests/pc_sampling/kernel_tracing.hpp b/tests/pc_sampling/kernel_tracing.hpp new file mode 100644 index 0000000000..226337d489 --- /dev/null +++ b/tests/pc_sampling/kernel_tracing.hpp @@ -0,0 +1,41 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include +#include + +namespace client +{ +namespace kernel_tracing +{ +void +kernel_tracing_callback(rocprofiler_callback_tracing_record_t record, + rocprofiler_user_data_t* user_data, + void* callback_data); + +void +configure_kernel_tracing_service(rocprofiler_context_id_t context); + +} // namespace kernel_tracing +} // namespace client diff --git a/tests/pc_sampling/main.cpp b/tests/pc_sampling/main.cpp new file mode 100644 index 0000000000..bc73037777 --- /dev/null +++ b/tests/pc_sampling/main.cpp @@ -0,0 +1,224 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include + +#include +#include +#include +#include + +namespace +{ +#define M 8192 +#define N 8192 +#define K 8192 +#define TileSize 16 +#define BLOCK_SIZE_X 16 +#define BLOCK_SIZE_Y 16 +#define GRID_SIZE_X (M + BLOCK_SIZE_X - 1) / BLOCK_SIZE_X +#define GRID_SIZE_Y (N + BLOCK_SIZE_Y - 1) / BLOCK_SIZE_Y +#define WAVES_PER_BLOCK_MI200_PLUS (BLOCK_SIZE_X * BLOCK_SIZE_Y) / 64 + +#define HIP_API_CALL(CALL) \ + { \ + hipError_t error_ = (CALL); \ + if(error_ != hipSuccess) \ + { \ + fprintf(stderr, \ + "%s:%d :: HIP error : %s\n", \ + __FILE__, \ + __LINE__, \ + hipGetErrorString(error_)); \ + throw std::runtime_error("hip_api_call"); \ + } \ + } +} // namespace + +namespace +{ +void +check_hip_error(void); +} // namespace + +__global__ void +matrix_multiply(float* A, float* B, float* Out, int /*m*/, int n, int k) +{ + int gid_x = blockDim.x * blockIdx.x + threadIdx.x; + int gid_y = blockDim.y * blockIdx.y + threadIdx.y; + + if(gid_x < N && gid_y < M) + { + float sum = 0; + for(int i = 0; i < k; ++i) + { + sum += A[gid_y * k + i] * B[i * n + gid_x]; + } + + Out[gid_y * n + gid_x] = sum; + } +} + +#if 1 +__global__ void +matrix_multiply_tile(float* A, float* B, float* Out, int m, int n, int k) +{ + __shared__ float subTileM[TileSize][TileSize]; + __shared__ float subTileN[TileSize][TileSize]; + + int bx = blockIdx.x; + int by = blockIdx.y; + int tx = threadIdx.x; + int ty = threadIdx.y; + + int row = by * TileSize + ty; + int col = bx * TileSize + tx; + + float sum = 0; + for(int i = 0; i < ((k - 1) / TileSize + 1); i++) + { + int curr_l = row * k + i * TileSize + tx; + int curr_r = (i * TileSize + ty) * n + col; + + if(i * TileSize + tx < k && row < m) + { + subTileM[ty][tx] = A[curr_l]; + } + else + { + subTileM[ty][tx] = 0.0; + } + + if(i * TileSize + ty < k && col < n) + { + subTileN[ty][tx] = B[curr_r]; + } + else + { + subTileN[ty][tx] = 0.0; + } + + __syncthreads(); + + for(int j = 0; j < TileSize; j++) + { + if(j + TileSize * i < k) + { + sum += subTileM[ty][j] * subTileN[j][tx]; + } + } + + __syncthreads(); + } + + if(row < m && col < n) + { + Out[row * n + col] = sum; + } +} +#endif + +void +run_hip_app() +{ + std::vector A(M * K); + std::vector B(K * N); + std::vector Out(M * N); + + // Randomly initialize the matrices + for(int i = 0; i < M * K; ++i) + { + A[i] = (float) rand() / (float) RAND_MAX; + } + + for(int i = 0; i < K * N; ++i) + { + B[i] = (float) rand() / (float) RAND_MAX; + } + + // Allocate GPU Memory + float *d_A, *d_B, *d_Out; + HIP_API_CALL(hipMalloc(&d_A, sizeof(float) * M * K)); + HIP_API_CALL(hipMalloc(&d_B, sizeof(float) * K * N)); + HIP_API_CALL(hipMalloc(&d_Out, sizeof(float) * M * N)); + + // Copy data to GPU + HIP_API_CALL(hipMemcpy(d_A, A.data(), sizeof(float) * M * K, hipMemcpyHostToDevice)); + HIP_API_CALL(hipMemcpy(d_B, B.data(), sizeof(float) * K * N, hipMemcpyHostToDevice)); + + // Run the kernel + dim3 block_size(BLOCK_SIZE_X, BLOCK_SIZE_Y); + dim3 grid_size((M + block_size.x - 1) / block_size.x, (N + block_size.y - 1) / block_size.y); + matrix_multiply<<>>(d_A, d_B, d_Out, M, N, K); + check_hip_error(); + matrix_multiply_tile<<>>(d_A, d_B, d_Out, M, N, K); + check_hip_error(); + + // Copy data back to CPU + HIP_API_CALL(hipMemcpy(Out.data(), d_Out, sizeof(float) * M * N, hipMemcpyDeviceToHost)); + + // Free GPU Memory + HIP_API_CALL(hipFree(d_A)); + HIP_API_CALL(hipFree(d_B)); + HIP_API_CALL(hipFree(d_Out)); +} + +#define DEVICE_ID 0 + +int +main(int /*argc*/, char** /*argv*/) +{ + int deviceId = DEVICE_ID; + + auto status = hipSetDevice(deviceId); + assert(status == hipSuccess); + HIP_API_CALL(status); + + int currDeviceId = -1; + status = hipGetDevice(&currDeviceId); + HIP_API_CALL(status); + assert(status == hipSuccess); + assert(deviceId == currDeviceId); + + for(int i = 0; i < 1; i++) + { + std::cout << "<<< MatMul starts" << std::endl; + run_hip_app(); + std::cout << ">>> MatMul ends" << std::endl; + } + + return 0; +} + +namespace +{ +void +check_hip_error(void) +{ + hipError_t err = hipGetLastError(); + if(err != hipSuccess) + { + std::cerr << "Error: " << hipGetErrorString(err) << std::endl; + throw std::runtime_error("hip_api_call"); + } +} +} // namespace diff --git a/tests/pc_sampling/pcs.cpp b/tests/pc_sampling/pcs.cpp new file mode 100644 index 0000000000..dbd6f0bae9 --- /dev/null +++ b/tests/pc_sampling/pcs.cpp @@ -0,0 +1,504 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// undefine NDEBUG so asserts are implemented +#ifdef NDEBUG +# undef NDEBUG +#endif + +#include "pcs.hpp" +#include "address_translation.hpp" +#include "codeobj.hpp" +#include "external_cid.hpp" +#include "utils.hpp" + +#include +#include +#include +#include +#include +#include + +namespace client +{ +namespace pcs +{ +namespace +{ +constexpr int MAX_FAILURES = 10; +constexpr size_t BUFFER_SIZE_BYTES = 8192; +constexpr size_t WATERMARK = (BUFFER_SIZE_BYTES / 4); + +struct tool_agent_info; +using avail_configs_vec_t = std::vector; +using tool_agent_info_vec_t = std::vector>; +using pc_sampling_buffer_id_vec_t = std::vector; + +struct tool_agent_info +{ + rocprofiler_agent_id_t agent_id; + std::unique_ptr avail_configs; + const rocprofiler_agent_t* agent; +}; + +struct PCSampler +{ +private: + using code_object_id_t = uint64_t; + using code_object_id_set_t = std::unordered_set; + +public: + PCSampler() = default; + + ~PCSampler() + { + // Assert that `active_code_objects` is empty. + // For more information, refer to the comments above. + assert(active_code_objects.empty()); + // Clear the data + buffer_ids.clear(); + } + + // GPU agents supporting PC sampling + tool_agent_info_vec_t gpu_agents; + // The total number of collected samples + std::atomic total_samples_num{0}; + // ROCProfiler-SDK PC sampling buffers + pc_sampling_buffer_id_vec_t buffer_ids; + // The set that keeps track of reported code object loading/unloading events. + // At the end of the test, the sets needs to be empty. + // Namely, each loading event will insert a code object id into the set, + // while each unloading event will delete a code ojbect id from the set. + code_object_id_set_t active_code_objects; +}; + +// The reason for using raw pointers is the following. +// Sometimes, statically created objects of the client::pcs +// namespace might be freed prior to the `tool_fini`, +// meaning objects of `pcs` namespace become unusable inside `tool_fini`. +// Instead, use raw pointers to control objects deallocation time. +PCSampler* pc_sampler = nullptr; + +// forward declaration +bool +query_avail_configs_for_agent(tool_agent_info* agent_info); + +rocprofiler_status_t +find_all_gpu_agents_supporting_pc_sampling_impl(rocprofiler_agent_version_t version, + const void** agents, + size_t num_agents, + void* user_data) +{ + assert(version == ROCPROFILER_AGENT_INFO_VERSION_0); + // user_data represent the pointer to the array where gpu_agent will be stored + if(!user_data) return ROCPROFILER_STATUS_ERROR; + + std::stringstream ss; + + auto* _out_agents = static_cast(user_data); + auto* _agents = reinterpret_cast(agents); + for(size_t i = 0; i < num_agents; i++) + { + if(_agents[i]->type == ROCPROFILER_AGENT_TYPE_GPU) + { + // Instantiate the tool_agent_info. + // Store pointer to the rocprofiler_agent_t and instatiate a vector of + // available configurations. + // Move the ownership to the _out_agents + auto tool_gpu_agent = std::make_unique(); + tool_gpu_agent->agent_id = _agents[i]->id; + tool_gpu_agent->avail_configs = std::make_unique(); + tool_gpu_agent->agent = _agents[i]; + // Check if the GPU agent supports PC sampling. If so, add it to the + // output list `_out_agents`. + if(query_avail_configs_for_agent(tool_gpu_agent.get())) + _out_agents->push_back(std::move(tool_gpu_agent)); + } + + ss << "[" << __FUNCTION__ << "] " << _agents[i]->name << " :: " + << "id=" << _agents[i]->id.handle << ", " + << "type=" << _agents[i]->type << "\n"; + } + + *utils::get_output_stream() << ss.str() << std::endl; + + return ROCPROFILER_STATUS_SUCCESS; +} + +void +find_all_gpu_agents_supporting_pc_sampling() +{ + // This function returns the all gpu agents supporting some kind of PC sampling + ROCPROFILER_CALL( + rocprofiler_query_available_agents(ROCPROFILER_AGENT_INFO_VERSION_0, + &find_all_gpu_agents_supporting_pc_sampling_impl, + sizeof(rocprofiler_agent_t), + static_cast(&pc_sampler->gpu_agents)), + "Failed to find GPU agents"); +} + +/** + * @brief The function queries available PC sampling configurations. + * If there is at least one available configuration, it returns true. + * Otherwise, this function returns false to indicate the agent does + * not support PC sampling. + */ +bool +query_avail_configs_for_agent(tool_agent_info* agent_info) +{ + // Clear the available configurations vector + agent_info->avail_configs->clear(); + + auto cb = [](const rocprofiler_pc_sampling_configuration_t* configs, + size_t num_config, + void* user_data) { + auto* avail_configs = static_cast(user_data); + for(size_t i = 0; i < num_config; i++) + { + avail_configs->emplace_back(configs[i]); + } + return ROCPROFILER_STATUS_SUCCESS; + }; + + auto status = rocprofiler_query_pc_sampling_agent_configurations( + agent_info->agent_id, cb, agent_info->avail_configs.get()); + + std::stringstream ss; + + if(status != ROCPROFILER_STATUS_SUCCESS) + { + // The query operation failed, so consider the PC sampling is unsupported at the agent. + // This can happen if the PC sampling service is invoked within the ROCgdb. + ss << "Querying PC sampling capabilities failed with status: " << status << std::endl; + *utils::get_output_stream() << ss.str() << std::endl; + return false; + } + else if(agent_info->avail_configs->size() == 0) + { + // No available configuration at the moment, so mark the PC sampling as unsupported. + return false; + } + + ss << "The agent with the id: " << agent_info->agent_id.handle << " supports the " + << agent_info->avail_configs->size() << " configurations: " << std::endl; + size_t ind = 0; + for(auto& cfg : *agent_info->avail_configs) + { + ss << "(" << ++ind << ".) " + << "method: " << cfg.method << ", " + << "unit: " << cfg.unit << ", " + << "min_interval: " << cfg.min_interval << ", " + << "max_interval: " << cfg.max_interval << ", " + << "flags: " << std::hex << cfg.flags << std::dec << std::endl; + } + + *utils::get_output_stream() << ss.str() << std::flush; + + return true; +} + +void +configure_pc_sampling_prefer_stochastic(tool_agent_info* agent_info, + rocprofiler_context_id_t context_id, + rocprofiler_buffer_id_t buffer_id) +{ + int failures = MAX_FAILURES; + size_t interval = 0; + do + { + // Update the list of available configurations + auto success = query_avail_configs_for_agent(agent_info); + if(!success) + { + // An error occured while querying PC sampling capabilities, + // so avoid trying configuring PC sampling service. + // Instead return false to indicated a failure. + ROCPROFILER_CALL(ROCPROFILER_STATUS_ERROR, + "Could not configuring PC sampling service due to failure with query " + "capabilities."); + } + + const rocprofiler_pc_sampling_configuration_t* first_host_trap_config = nullptr; + const rocprofiler_pc_sampling_configuration_t* first_stochastic_config = nullptr; + // Search until encountering on the stochastic configuration, if any. + // Otherwise, use the host trap config + for(auto const& cfg : *agent_info->avail_configs) + { + if(cfg.method == ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC) + { + first_stochastic_config = &cfg; + break; + } + else if(!first_host_trap_config && + cfg.method == ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP) + { + first_host_trap_config = &cfg; + } + } + + // Check if the stochastic config is found. Use host trap config otherwise. + const rocprofiler_pc_sampling_configuration_t* picked_cfg = + (first_stochastic_config != nullptr) ? first_stochastic_config : first_host_trap_config; + + interval = picked_cfg->min_interval; + + auto status = rocprofiler_configure_pc_sampling_service(context_id, + agent_info->agent_id, + picked_cfg->method, + picked_cfg->unit, + interval, + buffer_id); + if(status == ROCPROFILER_STATUS_SUCCESS) + { + *utils::get_output_stream() + << ">>> We chose PC sampling interval: " << interval + << " on the agent: " << agent_info->agent->id.handle << std::endl; + return; + } + else if(status != ROCPROFILER_STATUS_ERROR_NOT_AVAILABLE) + { + ROCPROFILER_CALL(status, "Failed to configure PC sampling"); + } + // status == ROCPROFILER_STATUS_ERROR_NOT_AVAILABLE + // means another process P2 already configured PC sampling. + // Query available configurations again and receive the configurations picked by P2. + // However, if P2 destroys PC sampling service after query function finished, + // but before the `rocprofiler_configure_pc_sampling_service` is called, + // then the `rocprofiler_configure_pc_sampling_service` will fail again. + // The process P1 executing this loop can spin wait (starve) if it is unlucky enough + // to always be interuppted by some other process P2 that creates/destroys + // PC sampling service on the same device while P1 is executing the code + // after the `query_avail_configs_for_agent` and + // before the `rocprofiler_configure_pc_sampling_service`. + // This should happen very rarely, but just to be sure, we introduce a counter `failures` + // that will allow certain amount of failures to process P1. + } while(--failures); + + // The process failed too many times configuring PC sampling, + // report this to user; + ROCPROFILER_CALL(ROCPROFILER_STATUS_ERROR, + "Failed too many times configuring PC sampling service"); +} + +void +rocprofiler_pc_sampling_callback(rocprofiler_context_id_t /*context_id*/, + rocprofiler_buffer_id_t /*buffer_id*/, + rocprofiler_record_header_t** headers, + size_t num_headers, + void* /*data*/, + uint64_t drop_count) +{ + std::stringstream ss; + ss << "The number of delivered samples is: " << num_headers << ", " + << "while the number of dropped samples is: " << drop_count << std::endl; + + auto& flat_profile = client::address_translation::get_flat_profile(); + auto& translator = client::address_translation::get_address_translator(); + auto& global_mut = address_translation::get_global_mutex(); + + { + auto lock = std::unique_lock{global_mut}; + + for(size_t i = 0; i < num_headers; i++) + { + auto* cur_header = headers[i]; + + if(cur_header == nullptr) + { + throw std::runtime_error{ + "rocprofiler provided a null pointer to header. this should never happen"}; + } + else if(cur_header->hash != + rocprofiler_record_header_compute_hash(cur_header->category, cur_header->kind)) + { + throw std::runtime_error{"rocprofiler_record_header_t (category | kind) != hash"}; + } + else if(cur_header->category == ROCPROFILER_BUFFER_CATEGORY_PC_SAMPLING) + { + if(cur_header->kind == ROCPROFILER_PC_SAMPLING_RECORD_SAMPLE) + { + auto* pc_sample = + static_cast(cur_header->payload); + + ss << "pc: " << std::hex << pc_sample->pc << ", " + << "timestamp: " << std::dec << pc_sample->timestamp << ", " + << "exec: " << std::hex << std::setw(16) << pc_sample->exec_mask << ", " + << "workgroup_id_(x=" << std::dec << std::setw(5) + << pc_sample->workgroup_id.x << ", " + << "y=" << std::setw(5) << pc_sample->workgroup_id.y << ", " + << "z=" << std::setw(5) << pc_sample->workgroup_id.z << "), " + << "wave_id: " << std::setw(2) + << static_cast(pc_sample->wave_id) << ", " + << "cu_id: " << pc_sample->hw_id << ", " + << "correlation: {internal=" << std::setw(7) + << pc_sample->correlation_id.internal << ", " + << "external=" << std::setw(5) << pc_sample->correlation_id.external.value + << "}" << std::endl; + + // Ignore samples from blit kernels. + if(pc_sample->correlation_id.internal == + ROCPROFILER_CORRELATION_ID_INTERNAL_NONE) + continue; + + total_samples_num() += 1; + + auto corr_id = pc_sample->correlation_id; + // Internal correlation IDs are generated by the ROCProfiler-SDK for + // kernel dispatches only. Similarly, the test tool generate external + // correlation IDs for the kernel dispatches only. + // Thus, we should expect them to be equal. + assert(corr_id.internal == corr_id.external.value); + assert(corr_id.external.value > 0); + + // Decoding the PC + auto inst = translator.get(pc_sample->pc); + flat_profile.add_sample(std::move(inst), pc_sample->exec_mask); + } + else if(cur_header->kind == ROCPROFILER_PC_SAMPLING_RECORD_CODE_OBJECT_LOAD_MARKER) + { + auto* marker = static_cast( + cur_header->payload); + auto code_object_id = marker->code_object_id; + ss << "code object loading: " << code_object_id << std::endl; + // The code object load event can be reported once per code object id. + assert(pc_sampler->active_code_objects.count(code_object_id) == 0); + pc_sampler->active_code_objects.emplace(code_object_id); + } + else if(cur_header->kind == + ROCPROFILER_PC_SAMPLING_RECORD_CODE_OBJECT_UNLOAD_MARKER) + { + auto* marker = + static_cast( + cur_header->payload); + auto code_object_id = marker->code_object_id; + ss << "code object unloading: " << code_object_id << std::endl; + // The code object unload event can be reported once per code object id. + assert(pc_sampler->active_code_objects.count(code_object_id) == 1); + pc_sampler->active_code_objects.erase(code_object_id); + } + } + else + { + throw std::runtime_error{"unexpected rocprofiler_record_header_t category + kind"}; + } + } + + // TODO: do we need some sync here? + *utils::get_output_stream() << ss.str() << std::endl; + } +} +} // namespace + +void +init() +{ + pc_sampler = new PCSampler(); +} + +void +fini() +{ + delete pc_sampler; +} + +std::atomic& +total_samples_num() +{ + return pc_sampler->total_samples_num; +} + +void +configure_pc_sampling_on_all_agents(rocprofiler_context_id_t context) +{ + find_all_gpu_agents_supporting_pc_sampling(); + + if(pc_sampler->gpu_agents.empty()) + { + *utils::get_output_stream() << "No availabe gpu agents supporting PC sampling" << std::endl; + *utils::get_output_stream() << "PC sampling unavailable" << std::endl; + // Exit with no error if none of the GPUs support PC sampling. + exit(0); + } + + auto& buff_ids_vec = pc_sampler->buffer_ids; + + for(auto& gpu_agent : pc_sampler->gpu_agents) + { + // creating a buffer that will hold pc sampling information + rocprofiler_buffer_policy_t drop_buffer_action = ROCPROFILER_BUFFER_POLICY_LOSSLESS; + auto buffer_id = rocprofiler_buffer_id_t{}; + ROCPROFILER_CALL(rocprofiler_create_buffer(context, + client::pcs::BUFFER_SIZE_BYTES, + client::pcs::WATERMARK, + drop_buffer_action, + client::pcs::rocprofiler_pc_sampling_callback, + nullptr, + &buffer_id), + "Cannot create pc sampling buffer"); + + client::pcs::configure_pc_sampling_prefer_stochastic(gpu_agent.get(), context, buffer_id); + + // One helper thread per GPU agent's buffer. + auto client_agent_thread = rocprofiler_callback_thread_t{}; + ROCPROFILER_CALL(rocprofiler_create_callback_thread(&client_agent_thread), + "failure creating callback thread"); + + ROCPROFILER_CALL(rocprofiler_assign_callback_thread(buffer_id, client_agent_thread), + "failed to assign thread for buffer"); + + buff_ids_vec.emplace_back(buffer_id); + } +} + +void +flush_buffers() +{ + // Flush rocproifler-SDK's buffers containing PC samples. + for(const auto& buff_id : pc_sampler->buffer_ids) + { + // Flush the buffer explicitly + ROCPROFILER_CALL(rocprofiler_flush_buffer(buff_id), "Failure flushing buffer"); + } +} + +void +flush_and_destroy_buffers() +{ + for(const auto& buff_id : pc_sampler->buffer_ids) + { + // Flush the buffer explicitly + ROCPROFILER_CALL(rocprofiler_flush_buffer(buff_id), "Failure flushing buffer"); + // Destroying the buffer + rocprofiler_status_t status = rocprofiler_destroy_buffer(buff_id); + if(status == ROCPROFILER_STATUS_ERROR_BUFFER_BUSY) + { + *utils::get_output_stream() + << "The buffer is busy, so we cannot destroy it at the moment." << std::endl; + } + else + { + ROCPROFILER_CALL(status, "Cannot destroy buffer"); + } + } +} +} // namespace pcs +} // namespace client diff --git a/tests/pc_sampling/pcs.hpp b/tests/pc_sampling/pcs.hpp new file mode 100644 index 0000000000..4b85461371 --- /dev/null +++ b/tests/pc_sampling/pcs.hpp @@ -0,0 +1,55 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include +#include + +#include +#include + +namespace client +{ +namespace pcs +{ +// Must be called first (prior to any other function from this namespace) +void +init(); + +// Must be called at the end of the `tool_fini` +void +fini(); + +std::atomic& +total_samples_num(); + +void +configure_pc_sampling_on_all_agents(rocprofiler_context_id_t context); + +void +flush_buffers(); + +void +flush_and_destroy_buffers(); +} // namespace pcs +} // namespace client diff --git a/tests/pc_sampling/utils.cpp b/tests/pc_sampling/utils.cpp new file mode 100644 index 0000000000..4fed10bd62 --- /dev/null +++ b/tests/pc_sampling/utils.cpp @@ -0,0 +1,37 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "utils.hpp" + +namespace client +{ +namespace utils +{ +std::ostream*& +get_output_stream() +{ + // The output strea is initially unitialized + static std::ostream* _v = nullptr; + return _v; +} +} // namespace utils +} // namespace client diff --git a/tests/pc_sampling/utils.hpp b/tests/pc_sampling/utils.hpp new file mode 100644 index 0000000000..e9275160a1 --- /dev/null +++ b/tests/pc_sampling/utils.hpp @@ -0,0 +1,65 @@ +// MIT License +// +// Copyright (c) 2024 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include + +#include +#include + +#define ROCPROFILER_VAR_NAME_COMBINE(X, Y) X##Y +#define ROCPROFILER_VARIABLE(X, Y) ROCPROFILER_VAR_NAME_COMBINE(X, Y) + +#define ROCPROFILER_CALL(result, msg) \ + { \ + rocprofiler_status_t CHECKSTATUS = result; \ + if(CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS) \ + { \ + std::cerr << #result << " failed with error code " << CHECKSTATUS << std::endl; \ + throw std::runtime_error(#result " failure"); \ + } \ + } + +#define ROCPROFILER_CHECK(result) \ + { \ + rocprofiler_status_t ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__) = result; \ + if(ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__) != ROCPROFILER_STATUS_SUCCESS) \ + { \ + std::string status_msg = \ + rocprofiler_get_status_string(ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__)); \ + std::stringstream errmsg{}; \ + errmsg << "[" << __FILE__ << ":" << __LINE__ << "] " << #result \ + << " failed with error code " << ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__) \ + << " :: " << status_msg; \ + throw std::runtime_error(errmsg.str()); \ + } \ + } + +namespace client +{ +namespace utils +{ +std::ostream*& +get_output_stream(); +} +} // namespace client