diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f826ec039..16e535104f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -328,3 +328,9 @@ Example for file plugin output: - Fixed ROCprofiler to match versioning changes in HIP Runtime. - Fixed plugins race condition. - Updated metrics to MI300. + +## ROCprofiler for rocm 6.2 + +### Removed + +- pcsampler sample code has been removed due to deprecation from v2. \ No newline at end of file diff --git a/plugin/cli/cli.cpp b/plugin/cli/cli.cpp index 92e7cd4c68..0c6a7bdac3 100644 --- a/plugin/cli/cli.cpp +++ b/plugin/cli/cli.cpp @@ -321,6 +321,7 @@ class file_plugin_t { break; } case ROCPROFILER_PC_SAMPLING_RECORD: { + [[deprecated("PC Sampling is deprecated")]] const rocprofiler_record_pc_sample_t* pc_sampling_record = reinterpret_cast(begin); FlushPCSamplingRecord(pc_sampling_record); diff --git a/plugin/file/file.cpp b/plugin/file/file.cpp index 63f468af4d..012458d01a 100644 --- a/plugin/file/file.cpp +++ b/plugin/file/file.cpp @@ -443,6 +443,7 @@ class file_plugin_t { break; } case ROCPROFILER_PC_SAMPLING_RECORD: { + [[deprecated("PC Sampling is deprecated")]] const rocprofiler_record_pc_sample_t* pc_sampling_record = reinterpret_cast(begin); FlushPCSamplingRecord(pc_sampling_record); diff --git a/plugin/file/file_v1.cpp b/plugin/file/file_v1.cpp index 95b8d666d9..9bc965f4ed 100644 --- a/plugin/file/file_v1.cpp +++ b/plugin/file/file_v1.cpp @@ -441,6 +441,7 @@ class file_plugin_t { break; } case ROCPROFILER_PC_SAMPLING_RECORD: { + [[deprecated("PC Sampling is deprecated")]] const rocprofiler_record_pc_sample_t* pc_sampling_record = reinterpret_cast(begin); FlushPCSamplingRecord(pc_sampling_record); diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index 2eaadd4945..6534a27841 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -184,50 +184,6 @@ install(TARGETS tracer_hip_hsa_async RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/samples COMPONENT samples) -# ######################################################################################## -# PC Sampling Samples -# ######################################################################################## - -set(CODE_PRINTING_SAMPLE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/pcsampler/code_printing_sample) -file(GLOB PC_SAMPLING_CODE_PRINTING_FILES ${CODE_PRINTING_SAMPLE_DIR}/*.cpp) -set_source_files_properties(${PC_SAMPLING_CODE_PRINTING_FILES} - PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) -hip_add_executable( - pc_sampling_code_printing ${PC_SAMPLING_CODE_PRINTING_FILES} HIPCC_OPTIONS -std=c++17 - # Include debugging symbols and source for the contextual disassembly - -gdwarf-4) -rocprofiler_sample_add_test(pc_sampling_code_printing "-d;0;-n;100000000;10;43532") - -check_c_source_compiles( - " - #define _GNU_SOURCE - #include - int main() { return memfd_create (\"cmake_test\", 0); } - " - HAVE_MEMFD_CREATE) -if(HAVE_MEMFD_CREATE) - target_compile_definitions(pc_sampling_code_printing PRIVATE HAVE_MEMFD_CREATE) -endif() - -target_link_libraries( - pc_sampling_code_printing - PRIVATE rocprofiler-v2 rocm-dbgapi ${LIBELF_LIBRARIES} ${LIBDW_LIBRARIES} - hsa-runtime64::hsa-runtime64 Threads::Threads dl) -target_include_directories( - pc_sampling_code_printing PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} - ${PROJECT_SOURCE_DIR}) -target_link_options(pc_sampling_code_printing PRIVATE "-Wl,--build-id=md5") -add_dependencies(samples pc_sampling_code_printing) -install(TARGETS pc_sampling_code_printing - RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/samples - COMPONENT samples) - -install( - DIRECTORY "${PROJECT_SOURCE_DIR}/samples/" - DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/samples-src - OPTIONAL - COMPONENT samples) - # ######################################################################################## # Scripts to run samples # ######################################################################################## diff --git a/samples/common/common.h b/samples/common/common.h index 39cb17e5b5..3eefe4ca4f 100644 --- a/samples/common/common.h +++ b/samples/common/common.h @@ -249,6 +249,7 @@ int WriteBufferRecords(const rocprofiler_record_header_t* begin, break; } case ROCPROFILER_PC_SAMPLING_RECORD: { + [[deprecated("PC Sampling is deprecated")]] const rocprofiler_record_pc_sample_t* pc_sampling_record = reinterpret_cast(begin); FlushPCSamplingRecord(pc_sampling_record); diff --git a/samples/pcsampler/code_printing_sample/.clangd b/samples/pcsampler/code_printing_sample/.clangd deleted file mode 100644 index de0ea1038d..0000000000 --- a/samples/pcsampler/code_printing_sample/.clangd +++ /dev/null @@ -1,10 +0,0 @@ ---- -If: - PathMatch: main.cpp - -CompileFlags: - Add: ['-x', 'hip'] - -# Local Variables: -# mode: yaml -# End: diff --git a/samples/pcsampler/code_printing_sample/Makefile b/samples/pcsampler/code_printing_sample/Makefile deleted file mode 100644 index bdc38ae3a9..0000000000 --- a/samples/pcsampler/code_printing_sample/Makefile +++ /dev/null @@ -1,70 +0,0 @@ -# -*- makefile-gmake -*- - -ROCM_PATH ?= /opt/rocm -HIP_PATH ?= $(ROCM_PATH)/hip -HIPCC := $(HIP_PATH)/bin/hipcc - -ROCM_PATH ?=/opt/rocm -ROCPROFILER_LIBS_PATH ?=$(ROCM_PATH)/lib -ROCPROFILER_INCLUDES=$(ROCPROFILER_LIBS_PATH)/../include/rocprofiler/ - -ifndef ROCPROFILER_PATH -$(warning You may need to set ROCPROFILER_PATH to the path of the rocprofiler source) -endif - -CXXFLAGS += -std=c++17 -Wall - -ifdef DEBUG -CXXFLAGS += -gdwarf-4 -O0 -else -ifdef DEBUGOPT -CXXFLAGS += -gdwarf-4 -Og -else -CXXFLAGS += -gdwarf-4 -O2 -endif -endif - -### - -srcs := $(wildcard *.cpp) -prog := main - -objs := $(srcs:%.cpp=%.o) -deps := $(srcs:%.cpp=%.d) - -# Kernel program - -CPPFLAGS += -DHAVE_MEMFD_CREATE - -$(prog): CC = $(HIPCC) -$(prog): CPPFLAGS += -I$(ROCPROFILER_INCLUDES) -I$(ROCM_PATH)/include -$(prog): LDFLAGS := -L$(ROCPROFILER_LIBS_PATH) -L$(ROCM_PATH)/lib -$(prog): LDLIBS += -ldl -lpthread -lhsa-runtime64 -lrocprofiler64v2 -lrocm-dbgapi -ldw -lelf -$(objs): CXX = $(HIPCC) - -# Targets - -all: $(prog) - -$(prog): $(objs) - --include $(deps) - -OUTPUT_OPTION = -MMD -MP -o $@ - -%.so: %.o - $(LINK.o) $(OUTPUT_OPTION) $^ $(LDLIBS) - -#COMPILE.hip = $(COMPILE.cpp) -#LINK.hip = $(LINK.cpp) - -#%.o: %.hip -# $(COMPILE.hip) $(OUTPUT_OPTION) $< - -clean: - $(RM) $(prog) $(objs) $(deps) - -distclean: | clean - $(RM) compile_commands.json - -.PHONY: all clean distclean diff --git a/samples/pcsampler/code_printing_sample/README.md b/samples/pcsampler/code_printing_sample/README.md deleted file mode 100644 index 8bf0f38a79..0000000000 --- a/samples/pcsampler/code_printing_sample/README.md +++ /dev/null @@ -1,149 +0,0 @@ -# ROCProfiler PC sampling example code - -The ROCProfiler library includes an API to enable periodic sampling of the GPU -program counter during kernel execution. This program demonstrates the PC -sampling API, with additional code to illustrate a typical non-trivial use case: -correlation of sampled PC addresses with their disassembled machine code, as -well as source code and symbolic debugging information if available. - -## Building the demo program - -If your ROCm installation already includes ROCProfiler, the only requirements to -build the demo program are: - -* GNU `make` -* libdw (**not** libdwarf) -* libelf - -If ROCm is installed in the standard location (`/opt/rocm`), running `make` in -the same directory as this README should work; otherwise, set `ROCM_PATH` to the -location of the ROCm installation in your environment and `ROCPROFILER_PATH` to -the location of the ROCProfiler source repo before running `make`. - -If your ROCm installation does **not** include ROCProfiler, you will need to build -it yourself. This demo program will be built as part of that process. See the -main ROCProfiler README for additional requirements and directions. - -## Running the demo program - -The demo program simply fills a vector with random 64-bit unsigned integers and -tallies the count of those greater than the mandatory `MIN` argument: - -``` -usage: code_printing_sample [OPTION]... MIN [SEED] - -d DEV HIP device number - -n LEN Length of random integer array - -D Print kernel disassembly - -P Print source and disassembly of sampled PC locations -where - DEV : i32 - MIN : u64 - LEN : u64 - SEED : u64 -``` - -### Defaults and troubleshooting - -* `-d`: use HIP device 0 -* `-n`: 4194304 (1024 * 1024 * 4) -* `-D`: false -* `-P`: false -* `SEED`: random seed; taken from the system's monotonic clock - -The program contains two trivial GPU kernels: an implementation of `memset`, and -the parallel counting procedure. Because the actual point is to demonstrate the -PC sampling functionality, it is recommended to use the `-n` option with an -argument such that the allocated vector fits in the smaller of available host as -well as device memory, but sufficiently large argument such that the kernels run -long enough for ROCProfiler to actually collect some samples. - -In order for the `-P` option to display source, the demo program must have been -built with debug symbols (at least `-gdwarf-4`). Any optimization level is -fine, but if the kernels run too quickly for ROCProfiler to collect any samples -even when a very large vector is given with the `-n` option, try rebuilding the -demo program without optimizations by adding `-O0` to the `hipcc` compilation -flags. - -## Files - -* `main.cpp`: initializes ROCProfiler and PC sampling and runs the GPU kernels -* `code_printing.cpp`: inspects the ELF and DWARF info for the GPU programs - embedded in the host binary and uses amd-dbgapi to print disassembly and - source -* `disassembly.cpp`: wrapper for `code_printing.cpp` - -## PC sampling API - -Adding PC sampling to a program already using the ROCProfiler API requires only -two changes: - -1. Call `rocprofiler_create_filter` to create a `ROCPROFILER_PC_SAMPLING_COLLECTION` - filter, then `rocprofiler_set_filter_buffer` to add the filter to the desired - buffer (see functions `main` and `run_kernel` in `main.cpp`) - -2. Handle records of kind `ROCPROFILER_PC_SAMPLING_RECORD` in the buffer callback - function. These should be cast to `rocprofiler_record_pc_sample_t *` (see - function `callback_flush_fn` in `main.cpp`) - -Like all ROCProfiler records, PC sample records contain a standard header followed -by one or more payloads: - -```c -/** - * PC sample record: contains the program counter/instruction pointer observed - * during periodic sampling of a kernel - */ -typedef struct { - /** - * ROCProfiler General Record base header to identify the id and kind of every - * record - */ - rocprofiler_record_header_t header; - /** - * PC sample data - */ - rocprofiler_pc_sample_t pc_sample; -} rocprofiler_record_pc_sample_t; -``` - -PC samples are delivered via the normal ROCProfiler buffer callback mechanism, -along with some additional information allowing each sample to be associated -with a unique, individual kernel execution: - -```c -/** - * An individual PC sample - */ -typedef struct { - /** - * Kernel dispatch ID. This is used by PC sampling to associate samples with - * individual dispatches and is unrelated to any user-supplied correlation ID - */ - rocprofiler_kernel_dispatch_id_t dispatch_id; - union { - /** - * Host timestamp - */ - rocprofiler_timestamp_t timestamp; - /** - * GPU clock counter (not currently used) - */ - uint64_t cycle; - }; - /** - * Sampled program counter - */ - uint64_t pc; - /** - * Sampled shader element - */ - uint32_t se; - /** - * Sampled GPU agent - */ - rocprofiler_agent_id_t gpu_id; -} rocprofiler_pc_sample_t; -``` - -PC sampling is started and stopped with `rocprofiler_start_session` and -`rocprofiler_terminate_session`, just like other profiling activities. diff --git a/samples/pcsampler/code_printing_sample/code_printing.cpp b/samples/pcsampler/code_printing_sample/code_printing.cpp deleted file mode 100644 index a9e3774a90..0000000000 --- a/samples/pcsampler/code_printing_sample/code_printing.cpp +++ /dev/null @@ -1,1122 +0,0 @@ -/* Copyright (c) 2022 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#if !defined(_GNU_SOURCE) || !defined(_XOPEN_SOURCE) -#define _XOPEN_SOURCE 700 -#endif - -#include "code_printing.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include - -#include - -#include -#include - -/// From rocr_debug_agent -namespace amd::debug_agent { - -enum class log_level_t { - /* Print no messages. */ - none = 0, - /* Print error messages. */ - error = 1, - /* Print error, and warning messages. */ - warning = 2, - /* Print error, warning, and info messages. */ - info = 3, - /* Print error, warning, info, and verbose messages. */ - verbose = 4 -}; - -static log_level_t log_level = log_level_t::warning; - -static std::ofstream agent_out; - -namespace detail { - -/* A macro instead of a variadic template so that the __VAR_ARGS__ are not - evaluated unless the log level indicated they are needed. */ -static void log(log_level_t level, const char* format, ...) -#if defined(__GNUC__) - __attribute__((format(printf, 2, 3))) -#endif // defined(__GNUC__) - ; - -static void log(log_level_t level, const char* format, ...) { - va_list va; - - agent_out << "rocm-debug-agent: "; - - if (level == log_level_t::error) { - agent_out << "error: "; - } else if (level == log_level_t::warning) { - agent_out << "warning: "; - } - - va_start(va, format); - size_t size = vsnprintf(NULL, 0, format, va); - va_end(va); - - va_start(va, format); - std::string str(size, '\0'); - vsprintf(&str[0], format, va); - va_end(va); - - agent_out << str << std::endl; -} - -} // namespace detail - -#define agent_log(level, format, ...) \ - do { \ - if (level <= amd::debug_agent::log_level) { \ - amd::debug_agent::detail::log(level, format, ##__VA_ARGS__); \ - } \ - } while (0) - -static void set_log_level(log_level_t level) { - log_level = level; - switch (level) { - case log_level_t::none: - amd_dbgapi_set_log_level(AMD_DBGAPI_LOG_LEVEL_NONE); - break; - case log_level_t::verbose: - amd_dbgapi_set_log_level(AMD_DBGAPI_LOG_LEVEL_VERBOSE); - break; - case log_level_t::info: - amd_dbgapi_set_log_level(AMD_DBGAPI_LOG_LEVEL_INFO); - break; - case log_level_t::warning: - amd_dbgapi_set_log_level(AMD_DBGAPI_LOG_LEVEL_WARNING); - break; - case log_level_t::error: - amd_dbgapi_set_log_level(AMD_DBGAPI_LOG_LEVEL_FATAL_ERROR); - break; - } -} - -/* A macro instead of a variadic template so that format is still a string - literal when passed to agent_log. */ -#define agent_warning(format, ...) agent_log(log_level_t::warning, format, ##__VA_ARGS__) - -#define agent_error(format, ...) \ - do { \ - agent_log(log_level_t::error, format, ##__VA_ARGS__); \ - abort(); \ - } while (false) - -#define agent_assert_fail(assertion, file, line) \ - []() { agent_error("%s:%d: Assertion `%s' failed.", file, line, assertion); }() - -#define DBGAPI_CHECK(expr) \ - do { \ - if (amd_dbgapi_status_t status = (expr); status != AMD_DBGAPI_STATUS_SUCCESS) { \ - agent_error("%s:%d: %s failed (rc=%d)", __FILE__, __LINE__, #expr, status); \ - } \ - } while (false) - -#define DEBUG_AGENT_ASSERTION_ENABLED 1 - -#if defined(DEBUG_AGENT_ASSERTION_ENABLED) -#define agent_assert(expr) ((void)((expr) ? 0 : (agent_assert_fail(#expr, __FILE__, __LINE__), 0))) -#else // !defined(DEBUG_AGENT_ASSERTION_ENABLED) -#define agent_assert(expr) ((void)0) -#endif // !defined(DEBUG_AGENT_ASSERTION_ENABLED) - -code_object_t::code_object_t(amd_dbgapi_code_object_id_t code_object_id) - : m_code_object_id(code_object_id) { - if (amd_dbgapi_code_object_get_info(code_object_id, AMD_DBGAPI_CODE_OBJECT_INFO_LOAD_ADDRESS, - sizeof(m_load_address), - &m_load_address) != AMD_DBGAPI_STATUS_SUCCESS) { - agent_warning("could not get the code object's load address"); - return; - } - - char* value; - if (amd_dbgapi_code_object_get_info(m_code_object_id, AMD_DBGAPI_CODE_OBJECT_INFO_URI_NAME, - sizeof(value), &value) != AMD_DBGAPI_STATUS_SUCCESS) { - agent_warning("could not get the code object's URI"); - return; - } - - m_uri.assign(value); - free(value); -} - -code_object_t::code_object_t(code_object_t&& rhs) - : m_load_address(rhs.m_load_address), - m_mem_size(rhs.m_mem_size), - m_uri(std::move(rhs.m_uri)), - m_code_object_id(rhs.m_code_object_id), - m_elf_amdgpu_machine(rhs.m_elf_amdgpu_machine) { - m_fd = rhs.m_fd; - rhs.m_fd.reset(); -} - -code_object_t::~code_object_t() { - if (m_fd) { - ::close(*m_fd); - } -} - -std::optional code_object_t::find_symbol( - amd_dbgapi_global_address_t address) { - /* Load the symbol table. */ - load_symbol_map(); - - if (auto it = m_symbol_map->upper_bound(address); it != m_symbol_map->begin()) { - if (auto&& [symbol_value, symbol] = *std::prev(it); address < (symbol_value + symbol.second)) { - std::string symbol_name = symbol.first; - - if (int status; auto* demangled_name = - abi::__cxa_demangle(symbol_name.c_str(), nullptr, nullptr, &status)) { - symbol_name = demangled_name; - free(demangled_name); - } - - return symbol_info_t{std::move(symbol_name), symbol_value, symbol.second}; - } - } - - return {}; -} - -void code_object_t::open() { - const std::string protocol_delim{"://"}; - - size_t protocol_end = m_uri.find(protocol_delim); - std::string protocol = m_uri.substr(0, protocol_end); - protocol_end += protocol_delim.length(); - - std::transform(protocol.begin(), protocol.end(), protocol.begin(), - [](unsigned char c) { return std::tolower(c); }); - - std::string path; - size_t path_end = m_uri.find_first_of("#?", protocol_end); - if (path_end != std::string::npos) { - path = m_uri.substr(protocol_end, path_end++ - protocol_end); - } else { - path = m_uri.substr(protocol_end); - } - - /* %-decode the string. */ - std::string decoded_path; - decoded_path.reserve(path.length()); - for (size_t i = 0; i < path.length(); ++i) - if (path[i] == '%' && std::isxdigit(path[i + 1]) && std::isxdigit(path[i + 2])) { - decoded_path += std::stoi(path.substr(i + 1, 2), 0, 16); - i += 2; - } else { - decoded_path += path[i]; - } - - /* Tokenize the query/fragment. */ - std::vector tokens; - size_t pos, last = path_end; - while ((pos = m_uri.find('&', last)) != std::string::npos) { - tokens.emplace_back(m_uri.substr(last, pos - last)); - last = pos + 1; - } - if (last != std::string::npos) { - tokens.emplace_back(m_uri.substr(last)); - } - - /* Create a tag-value map from the tokenized query/fragment. */ - std::unordered_map params; - std::for_each(tokens.begin(), tokens.end(), [&](std::string& token) { - size_t delim = token.find('='); - if (delim != std::string::npos) { - params.emplace(token.substr(0, delim), token.substr(delim + 1)); - } - }); - - std::vector buffer; - try { - size_t offset{0}, size{0}; - - if (auto offset_it = params.find("offset"); offset_it != params.end()) { - offset = std::stoul(offset_it->second, nullptr, 0); - } - - if (auto size_it = params.find("size"); size_it != params.end()) { - if (!(size = std::stoul(size_it->second, nullptr, 0))) { - return; - } - } - - if (protocol == "file") { - std::ifstream file(decoded_path, std::ios::in | std::ios::binary); - if (!file) { - agent_warning("could not open `%s'", decoded_path.c_str()); - return; - } - - if (!size) { - file.ignore(std::numeric_limits::max()); - size_t bytes = file.gcount(); - file.clear(); - - if (bytes < offset) { - agent_warning("invalid uri `%s' (file size < offset)", decoded_path.c_str()); - return; - } - size = bytes - offset; - } - - file.seekg(offset, std::ios_base::beg); - buffer.resize(size); - file.read(&buffer[0], size); - } else if (protocol == "memory") { - if (!offset || !size) { - agent_warning("invalid uri `%s' (offset and size must be != 0", m_uri.c_str()); - return; - } - - amd_dbgapi_process_id_t process_id; - if (amd_dbgapi_code_object_get_info(m_code_object_id, AMD_DBGAPI_CODE_OBJECT_INFO_PROCESS, - sizeof(process_id), - &process_id) != AMD_DBGAPI_STATUS_SUCCESS) - agent_error("could not get the process from the agent"); - - buffer.resize(size); - if (amd_dbgapi_read_memory(process_id, AMD_DBGAPI_WAVE_NONE, AMD_DBGAPI_LANE_NONE, - AMD_DBGAPI_ADDRESS_SPACE_GLOBAL, offset, &size, - buffer.data()) != AMD_DBGAPI_STATUS_SUCCESS) { - agent_warning("could not read memory at 0x%lx", offset); - return; - } - } else { - agent_warning("\"%s\" protocol not supported", protocol.c_str()); - return; - } - } catch (...) { - } - - int fd = -#if HAVE_MEMFD_CREATE - ::memfd_create(m_uri.c_str(), MFD_ALLOW_SEALING | MFD_CLOEXEC); -#else // !HAVE_MEMFD_CREATE - ::open("/tmp", O_TMPFILE | O_RDWR, 0666); -#endif // !HAVE_MEMFD_CREATE - if (fd == -1) { - agent_warning("could not create a temporary file for code object"); - return; - } - - if (size_t size = ::write(fd, buffer.data(), buffer.size()); size != buffer.size()) { - agent_warning("could not write to the temporary file"); - return; - } - - ::lseek(fd, 0, SEEK_SET); - - /* Calculate the size of the code object as loaded in memory. Its size is - the distance of the end of the highest segment from the load address. */ - std::unique_ptr elf(elf_begin(fd, ELF_C_READ, nullptr), - [](Elf* elf) { elf_end(elf); }); - if (!elf) { - agent_warning("elf_begin failed for `%s'", m_uri.c_str()); - return; - } - - Elf64_Ehdr* ehdr = elf64_getehdr(elf.get()); - if (!ehdr) { - agent_warning("elf64_getehdr failed for `%s'", m_uri.c_str()); - return; - } - m_elf_amdgpu_machine = ehdr->e_flags & ELF::EF_AMDGPU_MACH; - - size_t phnum; - if (elf_getphdrnum(elf.get(), &phnum) != 0) { - agent_warning("elf_getphdrnum failed for `%s'", m_uri.c_str()); - return; - } - - for (size_t i = 0; i < phnum; ++i) { - GElf_Phdr phdr_mem; - GElf_Phdr* phdr = gelf_getphdr(elf.get(), i, &phdr_mem); - if (!phdr) { - agent_warning("gelf_getphdr failed for `%s'", m_uri.c_str()); - return; - } - - if (phdr->p_type == PT_LOAD) { - m_mem_size = std::max(m_mem_size, phdr->p_vaddr + phdr->p_memsz); - } - } - - m_fd.emplace(fd); -} - -static amd_dbgapi_callbacks_t dbgapi_callbacks = { - .allocate_memory = malloc, - .deallocate_memory = free, - - .get_os_pid = - [](amd_dbgapi_client_process_id_t client_process_id, pid_t* pid) { - *pid = getpid(); - return AMD_DBGAPI_STATUS_SUCCESS; - }, - - .insert_breakpoint = - [](amd_dbgapi_client_process_id_t client_process_id, amd_dbgapi_global_address_t address, - amd_dbgapi_breakpoint_id_t breakpoint_id) { return AMD_DBGAPI_STATUS_SUCCESS; }, - - .remove_breakpoint = - [](amd_dbgapi_client_process_id_t client_process_id, - amd_dbgapi_breakpoint_id_t breakpoint_id) { return AMD_DBGAPI_STATUS_SUCCESS; }, - - .log_message = - [](amd_dbgapi_log_level_t level, const char* message) { - agent_out << "rocm-dbgapi: " << message << std::endl; - }}; - -static std::optional>> get_source_file_index( - const std::string& file_name) { - static std::unordered_map> file_map; - - if (auto it = file_map.find(file_name); it != file_map.end()) { - return it->second; - } - - std::ifstream file(file_name); - if (!file) { - return std::nullopt; - } - - auto [it, success] = file_map.emplace(file_name, std::vector{}); - agent_assert(success && "emplace should have succeeded"); - - auto& lines = it->second; - std::string line; - - while (std::getline(file, line)) { - lines.emplace_back(line); - } - - return lines; -} - -void code_object_t::load_symbol_map() { - agent_assert(is_open() && "code object is not opened"); - - if (m_symbol_map.has_value()) { - return; - } - - m_symbol_map.emplace(); - - std::unique_ptr elf(elf_begin(*m_fd, ELF_C_READ, nullptr), - [](Elf* elf) { elf_end(elf); }); - - if (!elf) { - return; - } - - /* Slurp the symbol table. */ - Elf_Scn* scn = nullptr; - while ((scn = elf_nextscn(elf.get(), scn)) != nullptr) { - GElf_Shdr shdr_mem; - GElf_Shdr* shdr = gelf_getshdr(scn, &shdr_mem); - if (shdr->sh_type != SHT_SYMTAB && shdr->sh_type != SHT_DYNSYM) { - continue; - } - - Elf_Data* data = elf_getdata(scn, nullptr); - if (!data) { - continue; - } - - size_t symbol_count = data->d_size / gelf_fsize(elf.get(), ELF_T_SYM, 1, EV_CURRENT); - for (size_t j = 0; j < symbol_count; ++j) { - GElf_Sym sym_mem; - GElf_Sym* sym = gelf_getsym(data, j, &sym_mem); - - if (GELF_ST_TYPE(sym->st_info) != STT_FUNC || sym->st_shndx == SHN_UNDEF) { - continue; - } - - std::string symbol_name{elf_strptr(elf.get(), shdr->sh_link, sym->st_name)}; - - auto [it, success] = m_symbol_map->emplace(m_load_address + sym->st_value, - std::make_pair(symbol_name, sym->st_size)); - - /* If there already was a symbol defined at this address, but this - new symbol covers a larger address range, replace the old symbol - with this new one. */ - if (!success && sym->st_size > it->second.second) { - it->second = std::make_pair(symbol_name, sym->st_size); - } - } - } - - /* TODO: If we did not see a symbtab, check the dynamic segment. */ -} - -void code_object_t::load_debug_info() { - agent_assert(is_open() && "code object is not opened"); - - if (m_line_number_map.has_value() && m_pc_ranges_map.has_value()) { - return; - } - - m_line_number_map.emplace(); - m_pc_ranges_map.emplace(); - - std::unique_ptr dbg(dwarf_begin(*m_fd, DWARF_C_READ), - [](Dwarf* dbg) { dwarf_end(dbg); }); - - if (!dbg) { - return; - } - - Dwarf_Off cu_offset{0}, next_offset; - size_t header_size; - - while ( - !dwarf_nextcu(dbg.get(), cu_offset, &next_offset, &header_size, nullptr, nullptr, nullptr)) { - Dwarf_Die die; - if (!dwarf_offdie(dbg.get(), cu_offset + header_size, &die)) { - continue; - } - - ptrdiff_t offset = 0; - Dwarf_Addr base, start{0}, end{0}; - - /* dwarf_ranges returns a single contiguous range - (DW_AT_low_pc/DW_AT_high_pc), or a series of non-contiguous ranges - (DW_AT_ranges). */ - while ((offset = dwarf_ranges(&die, offset, &base, &start, &end) > 0)) { - m_pc_ranges_map->emplace(m_load_address + start, m_load_address + end); - } - - Dwarf_Lines* lines; - size_t line_count; - if (dwarf_getsrclines(&die, &lines, &line_count)) { - continue; - } - - for (size_t i = 0; i < line_count; ++i) { - Dwarf_Addr addr; - int line_number; - - if (Dwarf_Line* line = dwarf_onesrcline(lines, i); line && !dwarf_lineaddr(line, &addr) && - !dwarf_lineno(line, &line_number) && line_number) { - m_line_number_map->emplace( - m_load_address + addr, - std::make_pair(dwarf_linesrc(line, nullptr, nullptr), line_number)); - } - } - - cu_offset = next_offset; - } -} - -void code_object_t::disassemble_around(amd_dbgapi_architecture_id_t architecture_id, - amd_dbgapi_global_address_t pc) { - amd_dbgapi_process_id_t process_id; - if (amd_dbgapi_code_object_get_info(m_code_object_id, AMD_DBGAPI_CODE_OBJECT_INFO_PROCESS, - sizeof(process_id), - &process_id) != AMD_DBGAPI_STATUS_SUCCESS) { - agent_error("could not get the process from the agent"); - } - - amd_dbgapi_size_t largest_instruction_size; - if (amd_dbgapi_architecture_get_info(architecture_id, - AMD_DBGAPI_ARCHITECTURE_INFO_LARGEST_INSTRUCTION_SIZE, - sizeof(largest_instruction_size), - &largest_instruction_size) != AMD_DBGAPI_STATUS_SUCCESS) { - agent_error("could not get the instruction size from the architecture"); - } - - /* Load the line number table, and low/high pc for all CUs. */ - load_debug_info(); - - constexpr int context_byte_size = 24; - amd_dbgapi_global_address_t start_pc; - - /* Try to find a line number that precedes `pc` by `context_byte_size` bytes. - If we don't have a line number map, simply start the disassembly from the - current pc. */ - - if (auto it = m_line_number_map->upper_bound(pc); it != m_line_number_map->begin()) { - do { - it = std::prev(it); - if ((pc - it->first) >= context_byte_size) { - break; - } - } while (it != m_line_number_map->begin()); - - start_pc = it->first; - } else { - /* Don't print any instructions before the current pc. The instructions - are of variable size so we can't reliably tell if we'll land on a - valid instruction. */ - start_pc = pc; - } - - amd_dbgapi_global_address_t end_pc = pc + context_byte_size; - - /* If pc is included in a [lowpc,highpc] interval, clamp start_pc and - end_pc. */ - - if (auto it = m_pc_ranges_map->upper_bound(pc); it != m_pc_ranges_map->begin()) { - if (auto [low_pc, high_pc] = *std::prev(it); pc < high_pc) { - start_pc = std::max(start_pc, low_pc); - end_pc = std::min(end_pc, high_pc); - } - } - - auto symbol = find_symbol(pc); - - agent_out << std::endl << "Disassembly"; - if (symbol) { - agent_out << " for function " << symbol->m_name; - } - agent_out << ":" << std::endl; - - agent_out << " code object: " << m_uri << std::endl; - agent_out << " loaded at: " - << "[0x" << std::hex << m_load_address << "-" - << "0x" << std::hex << (m_load_address + m_mem_size) << "]" << std::endl; - - /* Remember the start_pc address to print the first source line. */ - amd_dbgapi_global_address_t saved_start_pc{start_pc}; - - /* Now that we know start_pc is a valid instruction address, skip ahead until - the distance between start_pc and pc is <= context_byte_size. */ - while ((pc - start_pc) > context_byte_size) { - std::vector buffer(largest_instruction_size); - - amd_dbgapi_size_t size = buffer.size(); - if (amd_dbgapi_read_memory(process_id, AMD_DBGAPI_WAVE_NONE, AMD_DBGAPI_LANE_NONE, - AMD_DBGAPI_ADDRESS_SPACE_GLOBAL, start_pc, &size, - buffer.data()) != AMD_DBGAPI_STATUS_SUCCESS) { - break; - } - - if (amd_dbgapi_disassemble_instruction(architecture_id, start_pc, &size, buffer.data(), nullptr, - amd_dbgapi_symbolizer_id_t{}, - nullptr) != AMD_DBGAPI_STATUS_SUCCESS) { - break; - } - - if ((pc - (start_pc + size)) < context_byte_size) { - break; - } - - start_pc += size; - } - - std::string prev_file_name; - size_t prev_line_number{0}; - amd_dbgapi_global_address_t addr{start_pc}; - - while (addr < end_pc) { - if (auto it = m_line_number_map->find(addr == start_pc ? saved_start_pc : addr); - it != m_line_number_map->end()) { - const std::string& file_name = it->second.first; - size_t line_number = it->second.second; - - if (file_name != prev_file_name || line_number != prev_line_number) { - agent_out << std::endl; - } - - if (file_name != prev_file_name) { - agent_out << file_name << ":" << std::endl; - } - - /* If the source line for `addr` is a different line than the - previous one printed, then print it. If the previous line printed - is in the same file and an earlier line, and if all the lines - between it and the source line for `addr` have no associated - instructions (indicated by their being no entries in the line - number map that mention them), then display those lines as well as - a source line block. That allows the disassembly to show all the - source file lines, including those that have no associated code. - */ - if (file_name != prev_file_name || line_number != prev_line_number) { - size_t first_line = line_number; - size_t last_line = line_number; - - /* Find the first line to print between prev_line_number and - line_number that does not appear in the line number table. - */ - if (file_name == prev_file_name && (line_number + 1) > prev_line_number) { - while (--first_line > prev_line_number) { - if (std::find_if(m_line_number_map->begin(), m_line_number_map->end(), - [first_line, &file_name](const std::remove_reference_t::value_type& value) { - return file_name == value.second.first && - first_line == value.second.second; - }) != m_line_number_map->end()) - break; - } - /* First is either prev_line_number, or a line associated - with another address, so start at the next line. */ - ++first_line; - } - - for (size_t line = first_line; line <= last_line; ++line) { - agent_out << std::setfill(' ') << std::setw(8) << std::left << std::dec << line; - - if (auto lines = get_source_file_index(file_name); !lines) { - agent_out << file_name << ": No such file or directory."; - } else if (line && line <= lines->get().size()) { - agent_out << lines->get()[line - 1]; - } - - agent_out << std::endl; - } - } - - prev_file_name = file_name; - prev_line_number = line_number; - - /* If the start_pc address is not the begining of a line number - block, then print ... to show that the following instruction is - not the first in the block. */ - if (addr == start_pc && start_pc != saved_start_pc) { - agent_out << " ..." << std::endl; - } - } - - std::vector buffer(largest_instruction_size); - - amd_dbgapi_size_t size = buffer.size(); - if (amd_dbgapi_read_memory(process_id, AMD_DBGAPI_WAVE_NONE, AMD_DBGAPI_LANE_NONE, - AMD_DBGAPI_ADDRESS_SPACE_GLOBAL, addr, &size, - buffer.data()) != AMD_DBGAPI_STATUS_SUCCESS) { - agent_out << "Cannot access memory at address 0x" << std::hex << addr << std::endl; - break; - } - - auto symbolizer = [](amd_dbgapi_symbolizer_id_t symbolizer_id, - amd_dbgapi_global_address_t address, char** symbol_text) { - auto& code_object = *reinterpret_cast(symbolizer_id); - std::stringstream ss; - - ss << "0x" << std::hex << address; - - if (auto&& symbol = code_object.find_symbol(address)) { - ss << " <" << symbol->m_name; - ss << "+" << std::dec << (address - symbol->m_value); - ss << ">"; - } - - *symbol_text = strdup(ss.str().c_str()); - return AMD_DBGAPI_STATUS_SUCCESS; - }; - - char* value; - if (amd_dbgapi_disassemble_instruction(architecture_id, addr, &size, buffer.data(), &value, - reinterpret_cast(this), - symbolizer) != AMD_DBGAPI_STATUS_SUCCESS) - agent_error("amd_dbgapi_disassemble_instruction failed"); - - std::string instruction(value); - free(value); - - agent_out << ((addr == pc) ? " => " : " "); - - agent_out << "0x" << std::hex << addr; - if (symbol) { - agent_out << " <"; - if (addr >= symbol->m_value) { - agent_out << "+" << std::dec << (addr - symbol->m_value); - } else { - agent_out << "-" << std::dec << (symbol->m_value - addr); - } - agent_out << ">"; - } - - agent_out << ": " << instruction << std::endl; - - addr += size; - } - - /* If the end_pc address (addr) is not the beginning of a new line number - block, then print ... to show that the previous instruction was - not the last of the instructions associated with the previous source ine - printed. */ - if (auto it = m_line_number_map->find(addr); it == m_line_number_map->end()) - agent_out << " ..." << std::endl; - - agent_out << std::endl << "End of disassembly." << std::endl; -} - -void code_object_t::disassemble_kernel(amd_dbgapi_architecture_id_t architecture_id, - amd_dbgapi_global_address_t addr, bool const print_src) { - amd_dbgapi_process_id_t process_id; - if (amd_dbgapi_code_object_get_info(m_code_object_id, AMD_DBGAPI_CODE_OBJECT_INFO_PROCESS, - sizeof(process_id), - &process_id) != AMD_DBGAPI_STATUS_SUCCESS) { - agent_error("could not get the process from the agent"); - } - - amd_dbgapi_size_t largest_instruction_size; - if (amd_dbgapi_architecture_get_info(architecture_id, - AMD_DBGAPI_ARCHITECTURE_INFO_LARGEST_INSTRUCTION_SIZE, - sizeof(largest_instruction_size), - &largest_instruction_size) != AMD_DBGAPI_STATUS_SUCCESS) { - agent_error("could not get the instruction size from the architecture"); - } - - /* Load the line number table, and low/high pc for all CUs. */ - load_debug_info(); - - amd_dbgapi_global_address_t start_addr = addr; - - auto symbol = find_symbol(start_addr); - if (!symbol) { - agent_out << std::endl - << "No symbol found at address " << std::hex << std::showbase << start_addr - << std::endl; - return; - } - - amd_dbgapi_global_address_t end_addr = addr + symbol->m_size; - - agent_out << std::endl << "Dump of assembler code"; - if (symbol) { - agent_out << " for function " << symbol->m_name; - } - agent_out << ":" << std::endl; - - agent_out << " code object: " << m_uri << std::endl; - agent_out << " loaded at: " - << "[0x" << std::hex << m_load_address << "-" - << "0x" << std::hex << (m_load_address + m_mem_size) << "]" << std::endl; - - /* Remember the start_pc address to print the first source line. */ - amd_dbgapi_global_address_t saved_start_addr{addr}; - - std::string prev_file_name; - size_t prev_line_number{0}; - - while (addr < end_addr) { - if (!print_src) { - goto do_disassemble; - } - - if (auto it = m_line_number_map->find(addr == start_addr ? saved_start_addr : addr); - it != m_line_number_map->end()) { - const std::string& file_name = it->second.first; - size_t line_number = it->second.second; - - if (file_name != prev_file_name || line_number != prev_line_number) { - agent_out << std::endl; - } - - if (file_name != prev_file_name) { - agent_out << file_name << ":" << std::endl; - } - - /* If the source line for `addr` is a different line than the - previous one printed, then print it. If the previous line printed - is in the same file and an earlier line, and if all the lines - between it and the source line for `addr` have no associated - instructions (indicated by their being no entries in the line - number map that mention them), then display those lines as well as - a source line block. That allows the disassembly to show all the - source file lines, including those that have no associated code. - */ - if (file_name != prev_file_name || line_number != prev_line_number) { - size_t first_line = line_number; - size_t last_line = line_number; - - /* Find the first line to print between prev_line_number and - line_number that does not appear in the line number table. - */ - if (file_name == prev_file_name && (line_number + 1) > prev_line_number) { - while (--first_line > prev_line_number) { - if (std::find_if(m_line_number_map->begin(), m_line_number_map->end(), - [first_line, &file_name](const std::remove_reference_t::value_type& value) { - return file_name == value.second.first && - first_line == value.second.second; - }) != m_line_number_map->end()) - break; - } - /* First is either prev_line_number, or a line associated - with another address, so start at the next line. */ - ++first_line; - } - - for (size_t line = first_line; line <= last_line; ++line) { - agent_out << std::setfill(' ') << std::setw(8) << std::left << std::dec << line; - - if (auto lines = get_source_file_index(file_name); !lines) { - agent_out << file_name << ": No such file or directory."; - } else if (line && line <= lines->get().size()) { - agent_out << lines->get()[line - 1]; - } - - agent_out << std::endl; - } - } - - prev_file_name = file_name; - prev_line_number = line_number; - - /* If the start_pc address is not the begining of a line number - block, then print ... to show that the following instruction is - not the first in the block. */ - if (addr == start_addr && start_addr != saved_start_addr) { - agent_out << " ..." << std::endl; - } - } - - do_disassemble: - std::vector buffer(largest_instruction_size); - - amd_dbgapi_size_t size = buffer.size(); - if (amd_dbgapi_read_memory(process_id, AMD_DBGAPI_WAVE_NONE, AMD_DBGAPI_LANE_NONE, - AMD_DBGAPI_ADDRESS_SPACE_GLOBAL, addr, &size, - buffer.data()) != AMD_DBGAPI_STATUS_SUCCESS) { - agent_out << "Cannot access memory at address 0x" << std::hex << addr << std::endl; - break; - } - - auto symbolizer = [](amd_dbgapi_symbolizer_id_t symbolizer_id, - amd_dbgapi_global_address_t address, char** symbol_text) { - auto& code_object = *reinterpret_cast(symbolizer_id); - std::stringstream ss; - - ss << "0x" << std::hex << address; - - if (auto&& symbol = code_object.find_symbol(address)) { - ss << " <" << symbol->m_name; - ss << "+" << std::dec << (address - symbol->m_value); - ss << ">"; - } - - *symbol_text = strdup(ss.str().c_str()); - return AMD_DBGAPI_STATUS_SUCCESS; - }; - - char* value; - if (amd_dbgapi_disassemble_instruction(architecture_id, addr, &size, buffer.data(), &value, - reinterpret_cast(this), - symbolizer) != AMD_DBGAPI_STATUS_SUCCESS) - agent_error("amd_dbgapi_disassemble_instruction failed"); - - std::string instruction(value); - free(value); - - agent_out << " "; - - agent_out << "0x" << std::hex << addr; - if (symbol) { - agent_out << " <"; - if (addr >= symbol->m_value) { - agent_out << "+" << std::dec << (addr - symbol->m_value); - } else { - agent_out << "-" << std::dec << (symbol->m_value - addr); - } - agent_out << ">"; - } - - agent_out << ": " << instruction << std::endl; - - addr += size; - } - - /* If the end_pc address (addr) is not the beginning of a new line number - block, then print ... to show that the previous instruction was - not the last of the instructions associated with the previous source ine - printed. */ - if (auto it = m_line_number_map->find(addr); it == m_line_number_map->end()) - agent_out << " ..." << std::endl; - - agent_out << std::endl << "End of assembler dump." << std::endl; -} - -bool code_object_t::save(const std::string& directory) const { - agent_assert(is_open() && "code object is not opened"); - - std::string name{m_uri}; - - size_t pos{}; - while ((pos = name.find_first_of(":/#?&="), pos) != std::string::npos) { - name[pos] = '_'; - } - - std::string file_path = directory + '/' + name; - std::ofstream file(file_path, std::ios::out | std::ios::binary); - std::vector buffer(lseek(*m_fd, 0, SEEK_END)); - - ::lseek(*m_fd, 0, SEEK_SET); - if (size_t size = ::read(*m_fd, buffer.data(), buffer.size()); size != buffer.size()) { - return false; - } - - file.write(buffer.data(), buffer.size()); - file.close(); - - return file.good(); -} - -} // namespace amd::debug_agent - -using namespace amd::debug_agent; - -std::tuple> -init_disassembly() { - set_log_level(log_level_t::warning); - if (!agent_out.is_open()) { - agent_out.copyfmt(std::cerr); - agent_out.clear(std::cerr.rdstate()); - agent_out.basic_ios::rdbuf(std::cerr.rdbuf()); - } - - DBGAPI_CHECK(amd_dbgapi_initialize(&dbgapi_callbacks)); - - amd_dbgapi_process_id_t process_id; - DBGAPI_CHECK(amd_dbgapi_process_attach((amd_dbgapi_client_process_id_t)&process_id, &process_id)); - - /* Check the runtime state. */ - for (;;) { - amd_dbgapi_event_id_t event_id; - amd_dbgapi_event_kind_t event_kind; - - DBGAPI_CHECK(amd_dbgapi_process_next_pending_event(process_id, &event_id, &event_kind)); - - if (event_kind == AMD_DBGAPI_EVENT_KIND_RUNTIME) { - amd_dbgapi_runtime_state_t runtime_state; - - DBGAPI_CHECK(amd_dbgapi_event_get_info(event_id, AMD_DBGAPI_EVENT_INFO_RUNTIME_STATE, - sizeof(runtime_state), &runtime_state)); - - switch (runtime_state) { - case AMD_DBGAPI_RUNTIME_STATE_LOADED_SUCCESS: - break; - - case AMD_DBGAPI_RUNTIME_STATE_UNLOADED: - agent_error("invalid runtime state %d", runtime_state); - - case AMD_DBGAPI_RUNTIME_STATE_LOADED_ERROR_RESTRICTION: - agent_error( - "unable to enable GPU debugging due to a " - "restriction error"); - break; - } - } - - /* No more events. */ - if (event_kind == AMD_DBGAPI_EVENT_KIND_NONE) { - break; - } - - DBGAPI_CHECK(amd_dbgapi_event_processed(event_id)); - } - - auto ret = std::make_tuple(process_id, std::map{}); - auto& code_object_map = std::get<1>(ret); - - amd_dbgapi_code_object_id_t* code_objects_id; - size_t code_object_count; - DBGAPI_CHECK(amd_dbgapi_process_code_object_list(process_id, &code_object_count, &code_objects_id, - nullptr)); - - for (size_t i = 0; i < code_object_count; ++i) { - code_object_t code_object(code_objects_id[i]); - - code_object.open(); - if (!code_object.is_open()) { - agent_warning("could not open code_object_%ld", code_objects_id[i].handle); - continue; - } - - code_object_map.emplace(code_object.load_address(), std::move(code_object)); - } - free(code_objects_id); - - return ret; -} - -void disassemble(disassembly_mode const mode, amd_dbgapi_process_id_t const process_id, - std::map& code_object_map, - uint64_t const addr) { - /* This function is not thread-safe and not re-entrant. */ - static std::mutex lock; - if (!lock.try_lock()) { - return; - } - /* Make sure the lock is released when this function returns. */ - std::scoped_lock sl(std::adopt_lock, lock); - - code_object_t* code_object_found{nullptr}; - if (auto it = code_object_map.upper_bound(addr); it != code_object_map.begin()) { - if (auto&& [load_address, code_object] = *std::prev(it); - (addr - load_address) <= code_object.mem_size()) { - code_object_found = &code_object; - } - } - - if (code_object_found) { - amd_dbgapi_architecture_id_t architecture_id; - DBGAPI_CHECK( - amd_dbgapi_get_architecture(code_object_found->elf_amdgpu_machine(), &architecture_id)); - switch (mode) { - case disassembly_mode::KERNEL: - code_object_found->disassemble_kernel(architecture_id, addr); - break; - case disassembly_mode::AROUND: - code_object_found->disassemble_around(architecture_id, addr); - break; - } - } else { - // TODO: Add disassembly even if we did not find a code object - } -} - -void print_pc_context(amd_dbgapi_process_id_t const process_id, - std::map& code_object_map, - amd_dbgapi_global_address_t const pc) { - disassemble(disassembly_mode::AROUND, process_id, code_object_map, pc); -} diff --git a/samples/pcsampler/code_printing_sample/code_printing.hpp b/samples/pcsampler/code_printing_sample/code_printing.hpp deleted file mode 100644 index a3caf0ccb5..0000000000 --- a/samples/pcsampler/code_printing_sample/code_printing.hpp +++ /dev/null @@ -1,104 +0,0 @@ -/* Copyright (c) 2022 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#ifndef SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_CODE_PRINTING_HPP_ -#define SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_CODE_PRINTING_HPP_ - -#include -#include -#include -#include - -#include - -namespace amd::debug_agent { - -class code_object_t { - struct symbol_info_t { - const std::string m_name; - amd_dbgapi_global_address_t m_value; - amd_dbgapi_size_t m_size; - }; - - using symbol_map_t = std::optional< - std::map>>; - - public: - void load_symbol_map(); - void load_debug_info(); - - std::optional find_symbol(amd_dbgapi_global_address_t address); - - code_object_t(amd_dbgapi_code_object_id_t code_object_id); - code_object_t(code_object_t&& rhs); - - ~code_object_t(); - - void open(); - bool is_open() const { return m_fd.has_value(); } - - amd_dbgapi_global_address_t load_address() const { return m_load_address; } - amd_dbgapi_size_t mem_size() const { return m_mem_size; } - // FIXME(?): extra function not in rocr-debug-agent - uint32_t elf_amdgpu_machine() const { return m_elf_amdgpu_machine; } - - void disassemble_around(amd_dbgapi_architecture_id_t architecture_id, - amd_dbgapi_global_address_t pc); - - void disassemble_kernel(amd_dbgapi_architecture_id_t architecture_id, - amd_dbgapi_global_address_t start_addr, bool const print_src = false); - - bool save(const std::string& directory) const; - - amd_dbgapi_global_address_t m_load_address{0}; - amd_dbgapi_size_t m_mem_size{0}; - std::optional m_fd; - - std::optional>> - m_line_number_map; - - std::optional> m_pc_ranges_map; - - symbol_map_t m_symbol_map; - std::string m_uri; - amd_dbgapi_code_object_id_t const m_code_object_id; - // FIXME(?): extra field not in rocr-debug-agent - uint32_t m_elf_amdgpu_machine{0}; -}; - -} // namespace amd::debug_agent - -enum struct disassembly_mode { AROUND, KERNEL }; - -std::tuple> -init_disassembly(); - -void disassemble( - disassembly_mode const mode, amd_dbgapi_process_id_t const process_id, - std::map& code_object_map, - uint64_t const addr); - -void print_pc_context( - amd_dbgapi_process_id_t const process_id, - std::map& code_object_map, - amd_dbgapi_global_address_t const pc); - -#endif // SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_CODE_PRINTING_HPP_ diff --git a/samples/pcsampler/code_printing_sample/disassembly.cpp b/samples/pcsampler/code_printing_sample/disassembly.cpp deleted file mode 100644 index 326df76036..0000000000 --- a/samples/pcsampler/code_printing_sample/disassembly.cpp +++ /dev/null @@ -1,176 +0,0 @@ -/* Copyright (c) 2022 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include - -#include - -#include "code_printing.hpp" -#include "program.hpp" - -struct libc_freer { - void operator()(char* p) { free(p); } -}; - -namespace util { - -template -static void hash_combine(size_t& hsh, T const& v, Ts const&... rest) { - hsh ^= std::hash{}(v) + 0x9e3779b9 + (hsh << 6) + (hsh >> 2); - (hash_combine(hsh, rest), ...); -} - -} // namespace util - -[[maybe_unused]] static inline bool operator==(hsa_executable_t const& l, - hsa_executable_t const& r) { - return l.handle == r.handle; -} - -[[maybe_unused]] static inline bool operator==(rocprofiler_kernel_dispatch_id_t const& l, - rocprofiler_kernel_dispatch_id_t const& r) { - return l.value == r.value; -} - -static inline bool operator==(amd_dbgapi_process_id_t const& l, amd_dbgapi_process_id_t const& r) { - return l.handle == r.handle; -} - -static inline bool operator!=(amd_dbgapi_process_id_t const& l, amd_dbgapi_process_id_t const& r) { - return !(l == r); -} - -namespace std { - -template <> struct hash { - size_t operator()(hsa_executable_t const& v) const { - size_t ret = 0; - util::hash_combine(ret, v.handle); - return ret; - } -}; - -template <> struct hash { - size_t operator()(rocprofiler_kernel_dispatch_id_t const& v) const { - size_t ret = 0; - util::hash_combine(ret, v.value); - return ret; - } -}; - -} // namespace std - -struct disassembly_ctx_t { - disassembly_ctx_t(); - ~disassembly_ctx_t(); - - void disassemble_kernels(bool const reinitialize); - void init(); - bool inited() const; - void reset(); - - amd_dbgapi_process_id_t process_id; - std::map codeobjs; -}; - -disassembly_ctx_t::disassembly_ctx_t() : process_id(AMD_DBGAPI_PROCESS_NONE), codeobjs() {} - -disassembly_ctx_t::~disassembly_ctx_t() { reset(); } - -void disassembly_ctx_t::disassemble_kernels(bool const reinitialize) { - if (reinitialize) { - reset(); - } - if (!inited()) { - init(); - } - - auto it = codeobjs.begin(); - auto const end = codeobjs.end(); - auto const pred = [](decltype(*it)& x) { - /* - * A lame filter for the kernels in the current file, because nothing - * else in this little demo will have the URL prefix of `file://`. - */ - return x.second.m_uri.find("file://", 0, 7) != std::string::npos; - }; - while (end != (it = std::find_if(it, end, pred))) { - auto& codeobj = it->second; - codeobj.load_symbol_map(); - if (!codeobj.m_symbol_map) { - fputs(PROGNAME ": error: failed to load symbol map\n", stderr); - break; - } - - for (auto const& sym : *codeobj.m_symbol_map) { - auto const& addr = sym.first; - ::disassemble(disassembly_mode::KERNEL, process_id, codeobjs, addr); - } - - ++it; - } -} - -inline void disassembly_ctx_t::init() { std::tie(process_id, codeobjs) = init_disassembly(); } - -inline bool disassembly_ctx_t::inited() const { return AMD_DBGAPI_PROCESS_NONE != process_id; } - -void disassembly_ctx_t::reset() { - codeobjs.clear(); - if (AMD_DBGAPI_PROCESS_NONE.handle != process_id.handle) { - amd_dbgapi_process_detach(process_id); - amd_dbgapi_finalize(); - process_id = AMD_DBGAPI_PROCESS_NONE; - } -} - -static disassembly_ctx_t g_dis; - -void disassembly_disassemble_kernels(bool const reinitialize) { - g_dis.disassemble_kernels(reinitialize); -} - -void disassembly_print_pc_sample_context(amd_dbgapi_global_address_t const pc) { - if (!g_dis.inited()) { - g_dis.init(); - } - print_pc_context(g_dis.process_id, g_dis.codeobjs, pc); -} diff --git a/samples/pcsampler/code_printing_sample/disassembly.hpp b/samples/pcsampler/code_printing_sample/disassembly.hpp deleted file mode 100644 index 5ea9912301..0000000000 --- a/samples/pcsampler/code_printing_sample/disassembly.hpp +++ /dev/null @@ -1,30 +0,0 @@ -/* Copyright (c) 2022 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#ifndef SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_DISASSEMBLY_HPP_ -#define SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_DISASSEMBLY_HPP_ - -#include - -void disassembly_disassemble_kernels(bool const); - -void disassembly_print_pc_sample_context(amd_dbgapi_global_address_t const); - -#endif // SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_DISASSEMBLY_HPP_ diff --git a/samples/pcsampler/code_printing_sample/main.cpp b/samples/pcsampler/code_printing_sample/main.cpp deleted file mode 100644 index 3b698a883e..0000000000 --- a/samples/pcsampler/code_printing_sample/main.cpp +++ /dev/null @@ -1,383 +0,0 @@ -/* Copyright (c) 2022 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include - -#include - -#include "program.hpp" -#include "program_options.hpp" -#include "disassembly.hpp" - -#define XSTR(x) STR(x) -#define STR(x) #x -#define DBL_FMT "." XSTR(DBL_DECIMAL_DIG) "f" - -namespace util { - -struct hipMalloc_freer { - void operator()(void* const ptr) { (void)hipFree(ptr); } -}; - -} // namespace util - -namespace prng { - -static uint64_t splitmix64_next(uint64_t* const sm64_state) { - uint64_t z = (*sm64_state += 0x9e3779b97f4a7c15); - z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9; - z = (z ^ (z >> 27)) * 0x94d049bb133111eb; - return z ^ (z >> 31); -} - -static inline uint64_t rotl64(const uint64_t x, int k) { return (x << k) | (x >> (64 - k)); } - -static uint64_t xrs_next(uint64_t* const xrs_state) { - const uint64_t result = rotl64(xrs_state[0] + xrs_state[3], 23) + xrs_state[0]; - - const uint64_t t = xrs_state[1] << 17; - - xrs_state[2] ^= xrs_state[0]; - xrs_state[3] ^= xrs_state[1]; - xrs_state[1] ^= xrs_state[2]; - xrs_state[0] ^= xrs_state[3]; - - xrs_state[2] ^= t; - - xrs_state[3] = rotl64(xrs_state[3], 45); - - return result; -} - -} // namespace prng - -namespace kernel { - -template __global__ static void memset_gpu(T* const s, T const c, size_t const n) { - size_t i_start = threadIdx.x + blockIdx.x * blockDim.x; - size_t i_shift = blockDim.x * gridDim.x; - for (size_t i = i_start; i < n; i += i_shift) { - s[i] = c; - } -} - -template -__global__ static void count_gpu(T const* const xs, T* const out, size_t const n, - size_t const nblocks, T const gt) { - size_t i_start = threadIdx.x + blockIdx.x * blockDim.x; - size_t i_shift = blockDim.x * gridDim.x; - for (size_t i = i_start; i < n; i += i_shift) { - if (xs[i] > gt) { - atomicAdd(&out[i % nblocks], 1); - } - } -} - -} // namespace kernel - -static char const GETOPT_ARGS[] = "cd:mn:DP"; - -static void usage() { - fputs("usage: " PROGNAME - " [OPTION]... MIN [SEED]\n" - " -d DEV\tHIP device number\n" - " -n LEN\tLength of random integer array\n" - " -D\t\tPrint kernel disassembly\n" - " -P\t\tPrint source and disassembly of sampled PC locations\n" - "where\n" - " DEV : i32\n" - " MIN : u64\n" - " LEN : u64\n" - " SEED : u64\n", - stderr); -} - -static int get_options(int argc, char** argv, program_options* const opts) { - int opt; - - while (-1 != (opt = getopt(argc, argv, GETOPT_ARGS))) { - switch (opt) { - case 'd': - // TODO error checking - opts->device = strtol(optarg, nullptr, 10); - break; - case 'n': - // TODO error checking - opts->rands_len = strtoul(optarg, nullptr, 10); - break; - case 'D': - opts->disassemble = true; - break; - case 'P': - opts->pc_sampling = true; - break; - default: - usage(); - return EXIT_FAILURE; - } - } - - auto const optcount = argc - optind; - if (!(1 == optcount || 2 == optcount)) { - usage(); - return EXIT_FAILURE; - } - - // TODO error checking - opts->gt = strtoul(argv[optind], nullptr, 10); - if (2 == argc - optind) { - opts->seed = strtoull(argv[optind + 1], nullptr, 10); - } - - return EXIT_SUCCESS; -} - -static program_options g_opts; - -static void callback_flush_fn(rocprofiler_record_header_t const* record, - rocprofiler_record_header_t const* end_record, - rocprofiler_session_id_t session_id, - rocprofiler_buffer_id_t buffer_id) { - while (record < end_record) { - if (nullptr == record) { - break; - } - if (ROCPROFILER_PC_SAMPLING_RECORD == record->kind) { - auto const& pcr = (rocprofiler_record_pc_sample_t&)*record; - printf("dispatch[%" PRIu64 "] timestamp(%" PRIu64 ") gpu_id(%#" PRIx64 ") pc-sample(%#" PRIx64 - ") se(%" PRIu32 ")\n", - pcr.pc_sample.dispatch_id.value, pcr.pc_sample.timestamp.value, - pcr.pc_sample.gpu_id.handle, pcr.pc_sample.pc, pcr.pc_sample.se); - if (g_opts.pc_sampling) { - disassembly_print_pc_sample_context(pcr.pc_sample.pc); - } - } - rocprofiler_next_record(record, &record, session_id, buffer_id); - } -} - -static int run_kernel(program_options const& opts) { - rocprofiler_session_id_t sid; - rocprofiler_filter_id_t fid, fid2; - rocprofiler_buffer_id_t bid; - auto rocprofiler_ok = ROCPROFILER_STATUS_SUCCESS; - - if (opts.pc_sampling) { - ROCPROFILER_CHECK(rocprofiler_create_session(ROCPROFILER_NONE_REPLAY_MODE, &sid), - rocprofiler_ok); - if (ROCPROFILER_STATUS_SUCCESS != rocprofiler_ok) { - fputs("error: failed to create rocprofiler session\n", stderr); - return EXIT_FAILURE; - } - - rocprofiler_filter_property_t property{}; - - ROCPROFILER_CHECK( - rocprofiler_create_buffer(sid, callback_flush_fn, static_cast(0x1000), &bid), - rocprofiler_ok); - if (ROCPROFILER_STATUS_SUCCESS != rocprofiler_ok) { - fputs("error: failed to add PC sampling session mode\n", stderr); - goto out; - } - - ROCPROFILER_CHECK(rocprofiler_create_filter(sid, ROCPROFILER_PC_SAMPLING_COLLECTION, - rocprofiler_filter_data_t{}, 0, &fid, property), - rocprofiler_ok); - if (ROCPROFILER_STATUS_SUCCESS != rocprofiler_ok) { - goto cleanup; - } - - ROCPROFILER_CHECK(rocprofiler_create_filter(sid, ROCPROFILER_DISPATCH_TIMESTAMPS_COLLECTION, - rocprofiler_filter_data_t{}, 0, &fid2, property), - rocprofiler_ok); - if (ROCPROFILER_STATUS_SUCCESS != rocprofiler_ok) { - goto cleanup; - } - - ROCPROFILER_CHECK(rocprofiler_set_filter_buffer(sid, fid, bid), rocprofiler_ok); - if (ROCPROFILER_STATUS_SUCCESS != rocprofiler_ok) { - goto cleanup; - } - - ROCPROFILER_CHECK(rocprofiler_set_filter_buffer(sid, fid2, bid), rocprofiler_ok); - if (ROCPROFILER_STATUS_SUCCESS != rocprofiler_ok) { - goto cleanup; - } - - ROCPROFILER_CHECK(rocprofiler_start_session(sid), rocprofiler_ok); - if (ROCPROFILER_STATUS_SUCCESS != rocprofiler_ok) { - goto cleanup; - } - } - - { - printf("seed = %" PRIu64 "\n", opts.seed); - - std::vector rands(opts.rands_len); - using rands_elt_t = decltype(rands)::value_type; - - uint64_t sm64_state = opts.seed, xrs_state[4]; - - { - using prng::splitmix64_next; - using prng::xrs_next; - - // Initialize the Xoroshiro PRNG - xrs_state[0] = splitmix64_next(&sm64_state); - xrs_state[1] = splitmix64_next(&sm64_state); - xrs_state[2] = splitmix64_next(&sm64_state); - xrs_state[3] = splitmix64_next(&sm64_state); - - // Fill rands with random integers - for (auto& i : rands) { - i = xrs_next(xrs_state); - } - } - - struct tm { - using monoclk = std::chrono::steady_clock; - using dur = std::chrono::duration; - }; - - using util::hipMalloc_freer; - - auto const begin_time = tm::monoclk::now(); - - auto hip_ok = hipSuccess; - do { - HIP_CHECK_BREAK(hipSetDevice(opts.device), hip_ok); - - auto const rands_nbytes = rands.size() * sizeof(rands_elt_t); - std::unique_ptr rands_gpu; - { - rands_elt_t* rands_gpu_ptr; - HIP_CHECK_BREAK(hipMalloc(&rands_gpu_ptr, rands_nbytes), hip_ok); - rands_gpu.reset(rands_gpu_ptr); - } - - HIP_CHECK_BREAK(hipMemcpy(rands_gpu.get(), rands.data(), rands_nbytes, hipMemcpyHostToDevice), - hip_ok); - (void)hipDeviceSynchronize(); - - uint32_t constexpr nthreads = 256U; - uint32_t const nblocks = (rands.size() + nthreads - 1) / nthreads; - - using count_elt_t = size_t; - - auto const count_subtotals_nbytes = nblocks * sizeof(count_elt_t); - std::unique_ptr count_subtotals_gpu; - { - count_elt_t* count_subtotals_gpu_ptr; - HIP_CHECK_BREAK(hipMalloc(&count_subtotals_gpu_ptr, count_subtotals_nbytes), hip_ok); - count_subtotals_gpu.reset(count_subtotals_gpu_ptr); - } - - hipLaunchKernelGGL(kernel::memset_gpu, nblocks, nthreads, 0, 0, count_subtotals_gpu.get(), - 0UL, static_cast(nblocks)); - HIP_CHECK_BREAK(hipGetLastError(), hip_ok); - (void)hipDeviceSynchronize(); - - auto const kernel_begin_time = tm::monoclk::now(); - - hipLaunchKernelGGL(kernel::count_gpu, nblocks, nthreads, 0, 0, rands_gpu.get(), - count_subtotals_gpu.get(), rands.size(), static_cast(nblocks), - opts.gt); - HIP_CHECK_BREAK(hipGetLastError(), hip_ok); - (void)hipDeviceSynchronize(); - - auto const kernel_end_time = tm::monoclk::now(); - - std::vector count_subtotals(nblocks); - HIP_CHECK_BREAK(hipMemcpy(count_subtotals.data(), count_subtotals_gpu.get(), - count_subtotals_nbytes, hipMemcpyDeviceToHost), - hip_ok); - (void)hipDeviceSynchronize(); - - // TODO parallel sum on GPU - auto const total = - std::accumulate(count_subtotals.cbegin(), count_subtotals.cend(), static_cast(0)); - - auto const all_end_time = tm::monoclk::now(); - - tm::dur const kernel_time(kernel_end_time - kernel_begin_time); - auto total_time(all_end_time - begin_time); - tm::dur const total_time_without_tool_init(total_time); - printf( - "len(rands) = %zu; gt = %zu; count(rands, gt) = %zu\n" - "main kernel time elapsed: %" DBL_FMT - "\n" - "full time elapsed: %" DBL_FMT "\n", - rands.size(), opts.gt, total, kernel_time.count(), total_time_without_tool_init.count()); - } while (false); - - if (opts.disassemble) { - disassembly_disassemble_kernels(false); - } - } - -cleanup: - if (opts.pc_sampling) { - rocprofiler_terminate_session(sid); - rocprofiler_flush_data(sid, bid); - rocprofiler_destroy_session(sid); - } - -out: - return ROCPROFILER_STATUS_SUCCESS == rocprofiler_ok ? EXIT_SUCCESS : EXIT_FAILURE; -} - -int main(int argc, char** argv) { - if (auto const ret = get_options(argc, argv, &g_opts); EXIT_SUCCESS != ret) { - return ret; - } - - if (hsa_init() != HSA_STATUS_SUCCESS) { - return EXIT_FAILURE; - } - - int ret = EXIT_FAILURE; - auto ok = ROCPROFILER_STATUS_SUCCESS; - - ROCPROFILER_CHECK(rocprofiler_initialize(), ok); - if (ROCPROFILER_STATUS_SUCCESS == ok) { - ret = run_kernel(g_opts); - } else { - goto out; - } - - rocprofiler_finalize(); - -out: - hsa_shut_down(); - return ROCPROFILER_STATUS_SUCCESS == ok && EXIT_FAILURE != ret ? EXIT_SUCCESS : EXIT_FAILURE; -} diff --git a/samples/pcsampler/code_printing_sample/program.hpp b/samples/pcsampler/code_printing_sample/program.hpp deleted file mode 100644 index 521b854216..0000000000 --- a/samples/pcsampler/code_printing_sample/program.hpp +++ /dev/null @@ -1,52 +0,0 @@ -/* Copyright (c) 2022 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#ifndef SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_PROGRAM_HPP_ -#define SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_PROGRAM_HPP_ - -#define PROGNAME "code_printing_sample" - -#define HIP_ERROR(code) \ - do { \ - fprintf(stderr, PROGNAME ": Assertion failed at %s:%d, HIP error: %s\n", __FILE__, __LINE__, \ - hipGetErrorString((code))); \ - fflush(stderr); \ - } while (false); - -#define HIP_CHECK_BREAK(expr, var) \ - if (auto const code = (expr); hipSuccess != code) { \ - HIP_ERROR(code); \ - (var) = code; \ - break; \ - } - -#define ROCPROFILER_ERROR(code) \ - do { \ - fprintf(stderr, PROGNAME ": Assertion failed at %s:%d, ROCProfiler error: %s\n", __FILE__, \ - __LINE__, rocprofiler_error_str(code)); \ - fflush(stderr); \ - } while (false); - -#define ROCPROFILER_CHECK(expr, var) \ - if ((var) = (expr); ROCPROFILER_STATUS_SUCCESS != (var)) { \ - ROCPROFILER_ERROR((var)); \ - } - -#endif // SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_PROGRAM_HPP_ diff --git a/samples/pcsampler/code_printing_sample/program_options.hpp b/samples/pcsampler/code_printing_sample/program_options.hpp deleted file mode 100644 index 9eff15131b..0000000000 --- a/samples/pcsampler/code_printing_sample/program_options.hpp +++ /dev/null @@ -1,48 +0,0 @@ -/* Copyright (c) 2022 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#ifndef SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_PROGRAM_OPTIONS_HPP_ -#define SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_PROGRAM_OPTIONS_HPP_ - -#include -#include - -struct program_options { - program_options() - : device(0), - no_gpu(false), - hip_memset(false), - rands_len(1024 * 1024 * 4), - gt(0), - seed(std::chrono::steady_clock::now().time_since_epoch().count()), - disassemble(false), - pc_sampling(false) {} - - int device; - bool no_gpu; - bool hip_memset; - size_t rands_len; - uint64_t gt; - uint64_t seed; - bool disassemble; - bool pc_sampling; -}; - -#endif // SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_PROGRAM_OPTIONS_HPP_