From 4f40fb8a4020f6e3bbc119d2a1d01efb1022c17c Mon Sep 17 00:00:00 2001 From: Ammar ELWazir Date: Sat, 12 Aug 2023 04:49:49 +0000 Subject: [PATCH] SWDEV-391589: Addressing changes requested by Omniperf: Adding Versioning to rocprof main script Adding Versioning to file plugins to support legacy v1 output and new v2 output Fixing headers for v2 output Change-Id: I352e8c8ed03871466269a792d1bfa572da58bd8a --- bin/rocprofv2 | 13 +- bin/rpl_run.sh | 16 +- plugin/cli/cli.cpp | 7 +- plugin/exportmap | 2 +- plugin/file/CMakeLists.txt | 47 +- plugin/file/file.cpp | 8 +- plugin/file/file_v1.cpp | 515 ++++++++++++++++++ .../hip_helloworld_golden_traces.txt | 2 +- .../hip_vectoradd_golden_traces.txt | 2 +- .../mpi_vectoradd_golden_traces.txt | 4 +- .../openmp_helloworld_golden_traces.txt | 3 +- tests-v2/featuretests/utils/test_utils.cpp | 8 +- tests-v2/featuretests/utils/test_utils.h | 6 +- 13 files changed, 605 insertions(+), 28 deletions(-) create mode 100644 plugin/file/file_v1.cpp diff --git a/bin/rocprofv2 b/bin/rocprofv2 index 36c010abf9..38a2e5bb94 100755 --- a/bin/rocprofv2 +++ b/bin/rocprofv2 @@ -24,7 +24,7 @@ GREY='\033[0;90m' RESET='\033[0m' usage() { - echo -e "ROCProfilerV2 Run Script Usage:" + echo -e "${RESET}ROCProfilerV2 Run Script Usage:" echo -e "${GREEN}-h | --help ${RESET} For showing this message" echo -e "${GREEN}--list-counters ${RESET} For showing all available counters for the current GPUs" if [ $RUN_FROM_BUILD == 1 ]; then @@ -41,11 +41,12 @@ usage() { echo -e "${GREEN}--kernel-trace ${RESET} For Collecting Kernel dispatch Traces" echo -e "${GREEN}--sys-trace ${RESET} For Collecting HIP and HSA APIs and their Activities Traces along ROCTX and Kernel Dispatch traces\n" echo -e "\t#${GREY}usage e.g: rocprofv2 --[hip-trace|hsa-trace|roctx-trace|kernel-trace|sys-trace] \n"${RESET} - echo -e "${GREEN}--plugin ${RESET} PLUGIN_NAME For enabling a plugin (file/perfetto/att/ctf)" echo -e "\t#${GREY} usage(file/perfetto/ctf) e.g: rocprofv2 -i pmc.txt --plugin [file/perfetto/ctf] -d out_dir " echo -e "\t# usage(att): rocprofv2 --plugin att " echo -e "\t# use \"rocprofv2 --plugin att --help\" for ATT-specific parameters help.${RESET}\n" + echo -e "${GREEN}--plugin-version ${RESET} <1|2> For selecting the version for the plugin (1/2)" + echo -e "\t#${GREY} 1 - Legacy output format, 2 - New output format (default)${RESET}\n" echo -e "${GREEN}-i | --input ${RESET} For adding counters file path (every line in the text file represents a counter)" echo -e "\t#${GREY} usage: rocprofv2 -i pmc.txt -d ${RESET}\n" echo -e "${GREEN}-o | --output-file ${RESET} For the output file name" @@ -177,6 +178,12 @@ while [ 1 ]; do shift elif [ "$1" = "--basenames" ]; then export ROCPROFILER_TRUNCATE_KERNEL_PATH=1 + elif [ "$1" = "--version" ]; then + shift + shift + elif [ "$1" = "--plugin-version" ]; then + export ROCPROFILER_PLUGIN_LIB=$ROCPROFILER_PLUGIN_LIB.$2 + shift shift elif [ "$1" = "--plugin" ]; then if [ -n $2 ]; then @@ -186,7 +193,7 @@ while [ 1 ]; do usage exit 1 fi - export ROCPROFILER_PLUGIN_LIB=lib${PLUGIN}_plugin.so + export ROCPROFILER_PLUGIN_LIB=lib${PLUGIN}_plugin.so$ROCPROFILER_PLUGIN_LIB else echo -e "Wrong input \"$2\" for plugin!" usage diff --git a/bin/rpl_run.sh b/bin/rpl_run.sh index 9959a50b12..e114e7a3c4 100755 --- a/bin/rpl_run.sh +++ b/bin/rpl_run.sh @@ -22,6 +22,7 @@ # THE SOFTWARE. ################################################################################ +ROCPROF_ARGS="$*" time_stamp=`date +%y%m%d_%H%M%S` BIN_DIR=$(dirname $(realpath ${BASH_SOURCE[0]})) ROOT_DIR=$(dirname $BIN_DIR) @@ -110,6 +111,9 @@ usage() { echo "" echo "Options:" echo " -h - this help" + echo " --tool-version <1|2> - to use specific version of rocprof tool, by default v1 is used" + echo " 1 - rocprofiler tool v1" + echo " 2 - rocprofiler tool v2" echo " --verbose - verbose mode, dumping all base counters used in the input metrics" echo " --list-basic - to print the list of basic HW counters" echo " --list-derived - to print the list of derived metrics with formulas" @@ -367,7 +371,17 @@ ARG_IN="" while [ 1 ] ; do ARG_IN=$1 ARG_VAL=1 - if [ "$1" = "-h" ] ; then + if [ "$1" = "--tool-version" ] ; then + if [ $2 = 1 ] ; then + : + elif [ $2 = 2 ] ; then + eval $BIN_DIR/rocprofv2 $ROCPROF_ARGS + exit 0 + else + echo "Wrong option '$1 $2'" + usage + fi + elif [ "$1" = "-h" ] ; then usage elif [ "$1" = "-i" ] ; then INPUT_FILE="$2" diff --git a/plugin/cli/cli.cpp b/plugin/cli/cli.cpp index fd96aa4688..92e7cd4c68 100644 --- a/plugin/cli/cli.cpp +++ b/plugin/cli/cli.cpp @@ -223,23 +223,22 @@ class file_plugin_t { *output_file << "Dispatch_ID(" << std::to_string(profiler_record->header.id.handle) << "), " << "GPU_ID(" << std::to_string(profiler_record->gpu_id.handle) << "), " << "Queue_ID(" << std::to_string(profiler_record->queue_id.handle) << "), " - << "Queue_Index(" << std::to_string(profiler_record->queue_idx.value) << "), " << "Process_ID(" << std::to_string(GetPid()) << "), " << "Thread_ID(" << std::to_string(profiler_record->thread_id.value) << "), " << "Grid_Size(" << std::to_string(profiler_record->kernel_properties.grid_size) << "), " << "Workgroup_Size(" << std::to_string(profiler_record->kernel_properties.workgroup_size) << "), " - << "LDS(" + << "LDS_Per_Workgroup(" << std::to_string( ((profiler_record->kernel_properties.lds_size + (lds_block_size - 1)) & ~(lds_block_size - 1))) << "), " - << "Scratch_Size(" + << "Scratch_Per_Workitem(" << std::to_string(profiler_record->kernel_properties.scratch_size) << "), " << "Arch_VGPR(" << std::to_string(profiler_record->kernel_properties.arch_vgpr_count) << "), " - << "Accumulative_VGPR(" + << "Accum_VGPR(" << std::to_string(profiler_record->kernel_properties.accum_vgpr_count) << "), " << "SGPR(" << std::to_string(profiler_record->kernel_properties.sgpr_count) << "), " diff --git a/plugin/exportmap b/plugin/exportmap index e5ec30164a..3eb22197f7 100644 --- a/plugin/exportmap +++ b/plugin/exportmap @@ -1,4 +1,4 @@ -{ +ROCPROFILER_PLUGINS_1.0.0{ global: rocprofiler_plugin_initialize; rocprofiler_plugin_finalize; rocprofiler_plugin_write_buffer_records; diff --git a/plugin/file/CMakeLists.txt b/plugin/file/CMakeLists.txt index 3248dc2564..816ff6432d 100644 --- a/plugin/file/CMakeLists.txt +++ b/plugin/file/CMakeLists.txt @@ -22,15 +22,58 @@ file(GLOB ROCPROFILER_UTIL_SRC_FILES ${PROJECT_SOURCE_DIR}/src/utils/helper.cpp) -file(GLOB FILE_SOURCES "*.cpp") +file(GLOB FILE_SOURCES_V1 "file_v1.cpp") +add_library(file_plugin_v1 SHARED ${FILE_SOURCES_V1} ${ROCPROFILER_UTIL_SRC_FILES}) + +set_target_properties( + file_plugin_v1 + PROPERTIES CXX_VISIBILITY_PRESET hidden + DEFINE_SYMBOL "ROCPROFILER_EXPORTS" + LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/../exportmap + LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib/rocprofiler/v1 + INSTALL_RPATH "${ROCM_APPEND_PRIVLIB_RPATH}" + OUTPUT_NAME file_plugin + VERSION 1.0.0 + SOVERSION 1) + +target_compile_definitions(file_plugin_v1 PRIVATE HIP_PROF_HIP_API_STRING=1 + __HIP_PLATFORM_HCC__=1) + +target_include_directories(file_plugin_v1 PRIVATE ${PROJECT_SOURCE_DIR}) + +target_link_options( + file_plugin_v1 PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exportmap + -Wl,--no-undefined) + +target_link_libraries(file_plugin_v1 PRIVATE rocprofiler-v2 hsa-runtime64::hsa-runtime64 + stdc++fs amd_comgr dl) + +# Prepare Name Link SO files for V1 file plugin Library +add_custom_command( + TARGET file_plugin_v1 + POST_BUILD + COMMAND + ${CMAKE_COMMAND} -E copy + ${PROJECT_BINARY_DIR}/lib/rocprofiler/v1/libfile_plugin.so.1* + ${PROJECT_BINARY_DIR}/lib/rocprofiler/.) + +install(TARGETS file_plugin_v1 LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME} + COMPONENT asan NAMELINK_SKIP) +install(TARGETS file_plugin_v1 LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME} + COMPONENT runtime NAMELINK_SKIP) + +file(GLOB FILE_SOURCES "file.cpp") add_library(file_plugin SHARED ${FILE_SOURCES} ${ROCPROFILER_UTIL_SRC_FILES}) set_target_properties( file_plugin PROPERTIES CXX_VISIBILITY_PRESET hidden + DEFINE_SYMBOL "ROCPROFILER_EXPORTS" LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/../exportmap LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib/rocprofiler - INSTALL_RPATH "${ROCM_APPEND_PRIVLIB_RPATH}") + INSTALL_RPATH "${ROCM_APPEND_PRIVLIB_RPATH}" + VERSION 2.0.0 + SOVERSION 2) target_compile_definitions(file_plugin PRIVATE HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_AMD__=1) diff --git a/plugin/file/file.cpp b/plugin/file/file.cpp index 51cdfa3458..53d95f3805 100644 --- a/plugin/file/file.cpp +++ b/plugin/file/file.cpp @@ -229,8 +229,8 @@ class file_plugin_t { if (kernel_dispatches_header_written_.load(std::memory_order_relaxed)) return; output_file = get_output_file(output_type_t::COUNTER); *output_file - << "Dispatch_ID,GPU_ID,Queue_ID,Queue_Index,PID,TID,GRD,WGR,LDS,SCR,Arch_VGPR," - "ACCUM_VGPR,SGPR,Wave_Size,SIG,OBJ,Kernel_Name,Start_Timestamp,End_Timestamp," + << "Dispatch_ID,GPU_ID,Queue_ID,PID,TID,Grid_Size,Workgroup_Size,LDS_Per_Workgroup,Scratch_Per_Workitem,Arch_VGPR," + "Accum_VGPR,SGPR,Wave_Size,Kernel_Name,Start_Timestamp,End_Timestamp," "Correlation_ID"; kernel_dispatches_header_written_.exchange(true, std::memory_order_release); return; @@ -367,7 +367,6 @@ class file_plugin_t { *output_file << std::to_string(profiler_record->header.id.handle) << "," << std::to_string(profiler_record->gpu_id.handle) << "," << std::to_string(profiler_record->queue_id.handle) << "," - << std::to_string(profiler_record->queue_idx.value) << "," << std::to_string(GetPid()) << "," << std::to_string(profiler_record->thread_id.value); *output_file << "," << std::to_string(profiler_record->kernel_properties.grid_size) << "," @@ -379,8 +378,7 @@ class file_plugin_t { << std::to_string(profiler_record->kernel_properties.arch_vgpr_count) << "," << std::to_string(profiler_record->kernel_properties.accum_vgpr_count) << "," << std::to_string(profiler_record->kernel_properties.sgpr_count) << "," - << std::to_string(profiler_record->kernel_properties.wave_size) << "," - << std::to_string(profiler_record->kernel_properties.signal_handle); + << std::to_string(profiler_record->kernel_properties.wave_size); std::string kernel_name = ""; if (name_length > 1) { kernel_name = rocprofiler::cxx_demangle(kernel_name_c); diff --git a/plugin/file/file_v1.cpp b/plugin/file/file_v1.cpp new file mode 100644 index 0000000000..eedc14d8bf --- /dev/null +++ b/plugin/file/file_v1.cpp @@ -0,0 +1,515 @@ +/* Copyright (c) 2022 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rocprofiler.h" +#include "rocprofiler_plugin.h" +#include "../utils.h" + +namespace fs = std::experimental::filesystem; + +namespace { + +std::vector GetCounterNames() { + std::vector counters; + const char* line_c_str = getenv("ROCPROFILER_COUNTERS"); + if (line_c_str) { + std::string line = line_c_str; + // skip commented lines + auto found = line.find_first_not_of(" \t"); + if (found != std::string::npos) { + if (line[found] == '#') return {}; + } + if (line.find("pmc") == std::string::npos) return counters; + char seperator = ' '; + std::string::size_type prev_pos = 0, pos = line.find(seperator, prev_pos); + prev_pos = ++pos; + if (pos != std::string::npos) { + while ((pos = line.find(seperator, pos)) != std::string::npos) { + std::string substring(line.substr(prev_pos, pos - prev_pos)); + if (substring.length() > 0 && substring != ":") { + counters.push_back(substring); + } + prev_pos = ++pos; + } + if (!line.substr(prev_pos, pos - prev_pos).empty()) { + counters.push_back(line.substr(prev_pos, pos - prev_pos)); + } + } + } + return counters; +} + +static std::string output_file_name; +class file_plugin_t { + private: + enum class output_type_t { COUNTER, TRACER, PC_SAMPLING }; + + class output_file_t { + public: + output_file_t(std::string name, bool bOpenOnInit = false) : name_(std::move(name)) { + if (bOpenOnInit) open(); + } + + std::string name() const { return name_; } + + template std::ostream& operator<<(T&& value) { + if (!is_open()) open(); + return stream_ << std::forward(value); + } + + std::ostream& operator<<(std::ostream& (*func)(std::ostream&)) { + if (!is_open()) open(); + return stream_ << func; + } + + void open() { + // If the stream is already in the failed state, there's no need to try + // to open the file. + if (fail()) return; + + const char* output_dir = getenv("OUTPUT_PATH"); + output_file_name = getenv("OUT_FILE_NAME") ? std::string(getenv("OUT_FILE_NAME")) : ""; + + if (output_dir == nullptr && getenv("OUT_FILE_NAME") == nullptr) { + stream_.copyfmt(std::cout); + stream_.clear(std::cout.rdstate()); + stream_.basic_ios::rdbuf(std::cout.rdbuf()); + bPrintToStdout = true; + return; + } + if (output_dir == nullptr) output_dir = "."; + + fs::path output_prefix(output_dir); + if (!fs::is_directory(fs::status(output_prefix))) { + if (!stream_.fail()) rocprofiler::warning("Cannot open output directory '%s'", output_dir); + stream_.setstate(std::ios_base::failbit); + return; + } + + output_file_name = replace_MPI_macros(output_file_name); + + std::stringstream ss; + ss << name_ << "_" << ((output_file_name.empty()) ? std::to_string(GetPid()) : "") + << output_file_name << ".csv"; + std::cout << "Results File: " << output_prefix / ss.str() << std::endl; + stream_.open(output_prefix / ss.str()); + } + + bool is_open() const { return stream_.is_open(); } + bool fail() const { return stream_.fail(); } + bool isStdOut() const { return bPrintToStdout; } + + // Returns a string with the MPI %macro replaced with the corresponding envvar + std::string replace_MPI_macros(std::string output_file_name) { + std::unordered_map MPI_BUILTINS = { + {"MPI_RANK", "%rank"}, + {"OMPI_COMM_WORLD_RANK", "%rank"}, + {"MV2_COMM_WORLD_RANK", "%rank"}}; + + for (const auto& [envvar, key] : MPI_BUILTINS) { + size_t key_find = output_file_name.rfind(key); + if (key_find == std::string::npos) continue; // Does not contain a %?rank var + + const char* env_var_set = getenv(envvar); + if (env_var_set == nullptr) continue; // MPI_COMM_WORLD_x var is does not exist + + int rank = atoi(env_var_set); + output_file_name = output_file_name.substr(0, key_find) + std::to_string(rank) + + output_file_name.substr(key_find + std::string(key).size()); + } + + return output_file_name; + } + + private: + const std::string name_; + std::ofstream stream_; + bool bPrintToStdout = false; + }; + + output_file_t* get_output_file(output_type_t output_type, uint32_t domain = 0) { + switch (output_type) { + case output_type_t::COUNTER: + return &output_file_; + case output_type_t::TRACER: + switch (domain) { + case ACTIVITY_DOMAIN_ROCTX: + return &roctx_file_; + case ACTIVITY_DOMAIN_HSA_API: + return &hsa_api_file_; + case ACTIVITY_DOMAIN_HIP_API: + return &hip_api_file_; + case ACTIVITY_DOMAIN_HIP_OPS: + return &hip_activity_file_; + case ACTIVITY_DOMAIN_HSA_OPS: + return &hsa_async_copy_file_; + default: + assert(!"domain/op not supported!"); + break; + } + break; + case output_type_t::PC_SAMPLING: + return &pc_sample_file_; + } + return nullptr; + } + + public: + file_plugin_t(void* data) { + if (data) counter_names_ = GetCounterNames(); + + valid_ = true; + } + + void WriteHeader(output_type_t type, rocprofiler_tracer_activity_domain_t domain) { + output_file_t* output_file; + switch (domain) { + case ACTIVITY_DOMAIN_HSA_API: { + if (hsa_api_header_written_.load(std::memory_order_relaxed)) return; + output_file = get_output_file(output_type_t::TRACER, ACTIVITY_DOMAIN_HSA_API); + *output_file << "Domain,Function,Start_Timestamp,End_Timestamp,Correlation_ID" << std::endl; + *output_file << std::endl; + hsa_api_header_written_.exchange(true, std::memory_order_release); + return; + } + case ACTIVITY_DOMAIN_HIP_API: { + if (hip_api_header_written_.load(std::memory_order_relaxed)) return; + output_file = get_output_file(output_type_t::TRACER, ACTIVITY_DOMAIN_HIP_API); + *output_file << "Domain,Function,Start_Timestamp,End_Timestamp,Correlation_ID" << std::endl; + *output_file << std::endl; + hip_api_header_written_.exchange(true, std::memory_order_release); + return; + } + case ACTIVITY_DOMAIN_ROCTX: { + if (roctx_header_written_.load(std::memory_order_relaxed)) return; + output_file = get_output_file(output_type_t::TRACER, ACTIVITY_DOMAIN_ROCTX); + *output_file << "Domain,ROCTX_ID,Message,Timestamp" << std::endl; + *output_file << std::endl; + roctx_header_written_.exchange(true, std::memory_order_release); + return; + } + case ACTIVITY_DOMAIN_HSA_OPS: { + if (hsa_async_copy_header_written_.load(std::memory_order_relaxed)) return; + output_file = get_output_file(output_type_t::TRACER, ACTIVITY_DOMAIN_HSA_OPS); + *output_file << "Domain,Operation,Start_Timestamp,Stop_Timestamp,Correlation_ID" + << std::endl; + *output_file << std::endl; + hsa_async_copy_header_written_.exchange(true, std::memory_order_release); + return; + } + case ACTIVITY_DOMAIN_HIP_OPS: { + if (hip_activity_header_written_.load(std::memory_order_relaxed)) return; + output_file = get_output_file(output_type_t::TRACER, ACTIVITY_DOMAIN_HIP_OPS); + *output_file << "Domain,Operation,Kernel_Name,Start_Timestamp,Stop_Timestamp," + "Correlation_ID" + << std::endl; + *output_file << std::endl; + hip_activity_header_written_.exchange(true, std::memory_order_release); + return; + } + default: { + if (type == output_type_t::COUNTER) { + if (kernel_dispatches_header_written_.load(std::memory_order_relaxed)) return; + output_file = get_output_file(output_type_t::COUNTER); + + *output_file << "Index,KernelName,gpu-id,queue-id,queue-index,pid,tid,grd,wgr,lds,scr," + "arch_vgpr,accum_vgpr,sgpr,wave_size,sig,obj"; + if (counter_names_.size() > 0) { + for (uint32_t i = 0; i < counter_names_.size(); i++) + *output_file << "," << counter_names_[i]; + } + *output_file << ",DispatchNs,BeginNs,EndNs,CompleteNs"; + *output_file << std::endl; + *output_file << std::endl; + kernel_dispatches_header_written_.exchange(true, std::memory_order_release); + return; + } else if (type == output_type_t::PC_SAMPLING) { + if (pc_sample_header_written_.load(std::memory_order_relaxed)) return; + output_file = get_output_file(output_type_t::PC_SAMPLING); + *output_file << "Dispatch_ID,Timestamp,GPU_ID,PC_Sample,Shader_Engines" << std::endl; + *output_file << std::endl; + pc_sample_header_written_.exchange(true, std::memory_order_release); + return; + } + return; + } + } + } + + std::mutex writing_lock; + + const char* GetDomainName(rocprofiler_tracer_activity_domain_t domain) { + switch (domain) { + case ACTIVITY_DOMAIN_ROCTX: + return "ROCTX_DOMAIN"; + break; + case ACTIVITY_DOMAIN_HIP_API: + return "HIP_API_DOMAIN"; + break; + case ACTIVITY_DOMAIN_HIP_OPS: + return "HIP_OPS_DOMAIN"; + break; + case ACTIVITY_DOMAIN_HSA_API: + return "HSA_API_DOMAIN"; + break; + case ACTIVITY_DOMAIN_HSA_OPS: + return "HSA_OPS_DOMAIN"; + break; + case ACTIVITY_DOMAIN_HSA_EVT: + return "HSA_EVT_DOMAIN"; + break; + default: + return ""; + } + } + + void FlushTracerRecord(rocprofiler_record_tracer_t tracer_record, + rocprofiler_session_id_t session_id, + rocprofiler_buffer_id_t buffer_id = rocprofiler_buffer_id_t{0}) { + std::lock_guard lock(writing_lock); + if (tracer_record.timestamps.end.value <= 0 && tracer_record.domain != ACTIVITY_DOMAIN_ROCTX) + return; + WriteHeader(output_type_t::TRACER, tracer_record.domain); + std::string roctx_message; + if (tracer_record.domain == ACTIVITY_DOMAIN_ROCTX && tracer_record.name) { + roctx_message = tracer_record.name; + } + + const char* operation_name_c = nullptr; + // ROCTX domain Operation ID doesn't have a name + // It depends on the user input of the roctx functions. + // ROCTX message is the tracer_record.name + if (tracer_record.domain != ACTIVITY_DOMAIN_ROCTX) { + CHECK_ROCPROFILER(rocprofiler_query_tracer_operation_name( + tracer_record.domain, tracer_record.operation_id, &operation_name_c)); + } + output_file_t* output_file = get_output_file(output_type_t::TRACER, tracer_record.domain); + *output_file << GetDomainName(tracer_record.domain); + if (tracer_record.domain == ACTIVITY_DOMAIN_ROCTX && tracer_record.external_id.id >= 0) + *output_file << "," << tracer_record.external_id.id; + if (tracer_record.domain == ACTIVITY_DOMAIN_ROCTX) { + if (roctx_message.size() > 1) + *output_file << ",\"" << roctx_message << "\""; + else + *output_file << ","; + } + if (operation_name_c) *output_file << ",\"" << operation_name_c << "\""; + if (tracer_record.name && tracer_record.domain != ACTIVITY_DOMAIN_ROCTX) { + *output_file << ",\"" << rocprofiler::cxx_demangle(tracer_record.name) << "\""; + } else if (tracer_record.domain == ACTIVITY_DOMAIN_HIP_OPS) { + *output_file << ","; + } + if (tracer_record.domain != ACTIVITY_DOMAIN_ROCTX) { + *output_file << "," << tracer_record.timestamps.begin.value << "," + << tracer_record.timestamps.end.value; + *output_file << "," << tracer_record.correlation_id.value; + } else { + *output_file << "," << tracer_record.timestamps.begin.value; + } + *output_file << std::endl; + } + + void FlushProfilerRecord(const rocprofiler_record_profiler_t* profiler_record, + rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) { + std::lock_guard lock(writing_lock); + WriteHeader(output_type_t::COUNTER, ACTIVITY_DOMAIN_NUMBER); + size_t name_length = 0; + output_file_t* output_file{nullptr}; + output_file = get_output_file(output_type_t::COUNTER); + CHECK_ROCPROFILER(rocprofiler_query_kernel_info_size(ROCPROFILER_KERNEL_NAME, + profiler_record->kernel_id, &name_length)); + // Taken from rocprofiler: The size hasn't changed in recent past + static const uint32_t lds_block_size = 128 * 4; + const char* kernel_name_c = nullptr; + if (name_length > 1) { + CHECK_ROCPROFILER(rocprofiler_query_kernel_info(ROCPROFILER_KERNEL_NAME, + profiler_record->kernel_id, &kernel_name_c)); + } + *output_file << std::to_string(profiler_record->header.id.handle) << ","; + std::string kernel_name = ""; + if (name_length > 1) { + kernel_name = rocprofiler::cxx_demangle(kernel_name_c); + std::string key = "\""; + std::size_t found = kernel_name.rfind(key); + while (found != std::string::npos) { + kernel_name.replace(found, key.length(), "'"); + found = kernel_name.rfind(key, found - 1); + } + } + *output_file << "\"" << kernel_name << "\","; + *output_file << std::to_string(profiler_record->gpu_id.handle) << "," + << std::to_string(profiler_record->queue_id.handle) << "," + << std::to_string(profiler_record->queue_idx.value) << "," + << std::to_string(GetPid()) << "," + << std::to_string(profiler_record->thread_id.value) << "," + << std::to_string(profiler_record->kernel_properties.grid_size) << "," + << std::to_string(profiler_record->kernel_properties.workgroup_size) << "," + << std::to_string( + ((profiler_record->kernel_properties.lds_size + (lds_block_size - 1)) & + ~(lds_block_size - 1))) + << "," << std::to_string(profiler_record->kernel_properties.scratch_size) << "," + << std::to_string(profiler_record->kernel_properties.arch_vgpr_count) << "," + << std::to_string(profiler_record->kernel_properties.accum_vgpr_count) << "," + << std::to_string(profiler_record->kernel_properties.sgpr_count) << "," + << std::to_string(profiler_record->kernel_properties.wave_size); + + // For Counters + if (profiler_record->counters) { + for (uint64_t i = 0; i < profiler_record->counters_count.value; i++) { + if (profiler_record->counters[i].counter_handler.handle > 0) { + *output_file << "," << std::to_string(profiler_record->counters[i].value.value); + } + } + } + *output_file << ",0," + << std::to_string(profiler_record->timestamps.begin.value) << "," + << std::to_string(profiler_record->timestamps.end.value) << ",0"; + *output_file << '\n'; + if (kernel_name_c) { + free(const_cast(kernel_name_c)); + } + } + + void FlushPCSamplingRecord(const rocprofiler_record_pc_sample_t* pc_sampling_record) { + WriteHeader(output_type_t::PC_SAMPLING, ACTIVITY_DOMAIN_NUMBER); + output_file_t* output_file{nullptr}; + output_file = get_output_file(output_type_t::PC_SAMPLING); + const auto& sample = pc_sampling_record->pc_sample; + *output_file << sample.dispatch_id.value << "," << sample.timestamp.value << "," + << sample.gpu_id.handle << "," << std::hex << std::showbase << sample.pc << "," + << sample.se << std::endl; + } + int WriteBufferRecords(const rocprofiler_record_header_t* begin, + const rocprofiler_record_header_t* end, + rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) { + while (begin < end) { + if (!begin) return 0; + switch (begin->kind) { + case ROCPROFILER_PROFILER_RECORD: { + const rocprofiler_record_profiler_t* profiler_record = + reinterpret_cast(begin); + FlushProfilerRecord(profiler_record, session_id, buffer_id); + break; + } + case ROCPROFILER_TRACER_RECORD: { + rocprofiler_record_tracer_t* tracer_record = const_cast( + reinterpret_cast(begin)); + FlushTracerRecord(*tracer_record, session_id, buffer_id); + break; + } + case ROCPROFILER_ATT_TRACER_RECORD: { + break; + } + case ROCPROFILER_PC_SAMPLING_RECORD: { + const rocprofiler_record_pc_sample_t* pc_sampling_record = + reinterpret_cast(begin); + FlushPCSamplingRecord(pc_sampling_record); + break; + } + default: + break; + } + rocprofiler_next_record(begin, &begin, session_id, buffer_id); + } + return 0; + } + + bool is_valid() const { return valid_; } + + private: + bool valid_{false}; + std::vector counter_names_; + + std::atomic roctx_header_written_{false}, hsa_api_header_written_{false}, + hip_api_header_written_{false}, hip_activity_header_written_{false}, + hsa_async_copy_header_written_{false}, pc_sample_header_written_{false}, + kernel_dispatches_header_written_{false}; + + output_file_t roctx_file_{"roctx_trace"}, hsa_api_file_{"hsa_api_trace"}, + hip_api_file_{"hip_api_trace"}, hip_activity_file_{"hcc_ops_trace"}, + hsa_async_copy_file_{"async_copy_trace"}, pc_sample_file_{"pcs_trace"}, + output_file_{"results"}; +}; + +file_plugin_t* file_plugin = nullptr; + +} // namespace + +ROCPROFILER_EXPORT int rocprofiler_plugin_initialize(uint32_t rocprofiler_major_version, + uint32_t rocprofiler_minor_version, + void* data) { + if (rocprofiler_major_version != ROCPROFILER_VERSION_MAJOR || + rocprofiler_minor_version < ROCPROFILER_VERSION_MINOR) + return -1; + + if (file_plugin != nullptr) return -1; + + file_plugin = new file_plugin_t(data); + if (file_plugin->is_valid()) return 0; + + // The plugin failed to initialized, destroy it and return an error. + delete file_plugin; + file_plugin = nullptr; + return -1; +} + +ROCPROFILER_EXPORT void rocprofiler_plugin_finalize() { + if (!file_plugin) return; + delete file_plugin; + file_plugin = nullptr; +} + +ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records( + const rocprofiler_record_header_t* begin, const rocprofiler_record_header_t* end, + rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) { + if (!file_plugin || !file_plugin->is_valid()) return -1; + return file_plugin->WriteBufferRecords(begin, end, session_id, buffer_id); +} + +ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(rocprofiler_record_tracer_t record) { + if (!file_plugin || !file_plugin->is_valid()) return -1; + if (record.header.id.handle == 0) return 0; + file_plugin->FlushTracerRecord(record, rocprofiler_session_id_t{0}, rocprofiler_buffer_id_t{0}); + return 0; +} diff --git a/tests-v2/featuretests/profiler/apps/goldentraces/hip_helloworld_golden_traces.txt b/tests-v2/featuretests/profiler/apps/goldentraces/hip_helloworld_golden_traces.txt index 1cbc5bcc02..70cc1bfd97 100755 --- a/tests-v2/featuretests/profiler/apps/goldentraces/hip_helloworld_golden_traces.txt +++ b/tests-v2/featuretests/profiler/apps/goldentraces/hip_helloworld_golden_traces.txt @@ -1,4 +1,4 @@ ROCProfilerV2: Collecting the following counters: - GRBM_COUNT Enabling Counter Collection -Dispatch_ID(1), GPU_ID(4), Queue_ID(1), Queue_Index(1), Process_ID(2185227), Thread_ID(2185227), Grid_Size(10), Workgroup_Size(10), LDS(0), Scratch_Size(0), Arch_VGPR(8), Accumulative_VGPR(0), SGPR(16), Wave_Size(64), Kernel_Name("helloworld"), Begin_Timestamp(140136998244448), End_Timestamp(140141431998688), Correlation_ID(0), GRBM_COUNT(14222.000000) +Dispatch_ID(1), GPU_ID(4), Queue_ID(1), Process_ID(2185227), Thread_ID(2185227), Grid_Size(10), Workgroup_Size(10), LDS(0), Scratch_Size(0), Arch_VGPR(8), Accumulative_VGPR(0), SGPR(16), Wave_Size(64), Kernel_Name("helloworld"), Begin_Timestamp(140136998244448), End_Timestamp(140141431998688), Correlation_ID(0), GRBM_COUNT(14222.000000) diff --git a/tests-v2/featuretests/profiler/apps/goldentraces/hip_vectoradd_golden_traces.txt b/tests-v2/featuretests/profiler/apps/goldentraces/hip_vectoradd_golden_traces.txt index e5a020f52e..953660ce25 100755 --- a/tests-v2/featuretests/profiler/apps/goldentraces/hip_vectoradd_golden_traces.txt +++ b/tests-v2/featuretests/profiler/apps/goldentraces/hip_vectoradd_golden_traces.txt @@ -1,4 +1,4 @@ ROCProfilerV2: Collecting the following counters: - GRBM_COUNT Enabling Counter Collection -Dispatch_ID(1), GPU_ID(4), Queue_ID(1), Queue_Index(0), Process_ID(2185273), Thread_ID(2185273), Grid_Size(1048576), Workgroup_Size(256), LDS(0), Scratch_Size(0), Arch_VGPR(8), Accumulative_VGPR(0), SGPR(16), Wave_Size(64), Kernel_Name("vectoradd_float"), Begin_Timestamp(140169881587808), End_Timestamp(140174279043296), Correlation_ID(0), GRBM_COUNT(40539.000000) +Dispatch_ID(1), GPU_ID(4), Queue_ID(1), Process_ID(2185273), Thread_ID(2185273), Grid_Size(1048576), Workgroup_Size(256), LDS(0), Scratch_Size(0), Arch_VGPR(8), Accumulative_VGPR(0), SGPR(16), Wave_Size(64), Kernel_Name("vectoradd_float"), Begin_Timestamp(140169881587808), End_Timestamp(140174279043296), Correlation_ID(0), GRBM_COUNT(40539.000000) diff --git a/tests-v2/featuretests/profiler/apps/goldentraces/mpi_vectoradd_golden_traces.txt b/tests-v2/featuretests/profiler/apps/goldentraces/mpi_vectoradd_golden_traces.txt index 59107d0ab6..677f71e7ea 100755 --- a/tests-v2/featuretests/profiler/apps/goldentraces/mpi_vectoradd_golden_traces.txt +++ b/tests-v2/featuretests/profiler/apps/goldentraces/mpi_vectoradd_golden_traces.txt @@ -7,4 +7,6 @@ Enabling Counter Collection device count and rank is1: 1 Rank Id: 0 | Device Id : 0 | Num Devices: 1 Max error: 0.000000 -Dispatch_ID(1), GPU_ID(1), Queue_ID(1), Queue_Index(0), Process_ID(6293), Thread_ID(6293), Grid_Size(1048576), Workgroup_Size(256), LDS(0), Scratch_Size(0), Arch_VGPR(12), Accumulative_VGPR(0), SGPR(32), Wave_Size(64), Kernel_Name("add"), Begin_Timestamp(140016470724832), End_Timestamp(5), Correlation_ID(0), GRBM_COUNT(1108537.000000) +Max error: 0.000000 +Dispatch_ID(1), GPU_ID(5), Queue_ID(1), Process_ID(2185441), Thread_ID(2185441), Grid_Size(1048576), Workgroup_Size(256), LDS(0), Scratch_Size(0), Arch_VGPR(12), Accumulative_VGPR(4), SGPR(32), Wave_Size(64), Kernel_Name("add"), Begin_Timestamp(139857691152944), End_Timestamp(139857835223272), Correlation_ID(0), GRBM_COUNT(499551.000000) +Dispatch_ID(1), GPU_ID(4), Queue_ID(1), Process_ID(2185436), Thread_ID(2185436), Grid_Size(1048576), Workgroup_Size(256), LDS(0), Scratch_Size(0), Arch_VGPR(12), Accumulative_VGPR(4), SGPR(32), Wave_Size(64), Kernel_Name("add"), Begin_Timestamp(140429257347632), End_Timestamp(140429483317480), Correlation_ID(0), GRBM_COUNT(499406.000000) diff --git a/tests-v2/featuretests/profiler/apps/goldentraces/openmp_helloworld_golden_traces.txt b/tests-v2/featuretests/profiler/apps/goldentraces/openmp_helloworld_golden_traces.txt index 5333b219e1..4f04f401ec 100755 --- a/tests-v2/featuretests/profiler/apps/goldentraces/openmp_helloworld_golden_traces.txt +++ b/tests-v2/featuretests/profiler/apps/goldentraces/openmp_helloworld_golden_traces.txt @@ -1,5 +1,4 @@ ROCProfilerV2: Collecting the following counters: - GRBM_COUNT Enabling Counter Collection -PASSED! -Dispatch_ID(1), GPU_ID(1), Queue_ID(1), Queue_Index(0), Process_ID(11822), Thread_ID(11822), Grid_Size(1), Workgroup_Size(1), LDS(0), Scratch_Size(0), Arch_VGPR(4), Accumulative_VGPR(0), SGPR(16), Wave_Size(64), Kernel_Name("hip_helloworld"), Begin_Timestamp(140470675179888), End_Timestamp(140470675179776), Correlation_ID(0), GRBM_COUNT(22315.000000) \ No newline at end of file +Dispatch_ID(1), GPU_ID(4), Queue_ID(1), Process_ID(2186189), Thread_ID(2186189), Grid_Size(10), Workgroup_Size(10), LDS(0), Scratch_Size(0), Arch_VGPR(8), Accumulative_VGPR(0), SGPR(16), Wave_Size(64), Kernel_Name("helloworld"), Begin_Timestamp(140284033765472), End_Timestamp(140288419293408), Correlation_ID(0), GRBM_COUNT(13839.000000) diff --git a/tests-v2/featuretests/utils/test_utils.cpp b/tests-v2/featuretests/utils/test_utils.cpp index 4e5813ed4a..eda460a17c 100644 --- a/tests-v2/featuretests/utils/test_utils.cpp +++ b/tests-v2/featuretests/utils/test_utils.cpp @@ -91,8 +91,6 @@ void tokenize_profiler_output(std::string line, profiler_kernel_info_t& kinfo) { std::getline(tokenStream, token, ','); kinfo.queue_id = token; std::getline(tokenStream, token, ','); - kinfo.queue_index = token; - std::getline(tokenStream, token, ','); kinfo.process_id = token; std::getline(tokenStream, token, ','); kinfo.thread_id = token; @@ -101,9 +99,9 @@ void tokenize_profiler_output(std::string line, profiler_kernel_info_t& kinfo) { std::getline(tokenStream, token, ','); kinfo.workgroup_size = token; std::getline(tokenStream, token, ','); - kinfo.lds = token; + kinfo.lds_per_workgroup = token; std::getline(tokenStream, token, ','); - kinfo.scratch_size = token; + kinfo.scratch_per_workitem = token; std::getline(tokenStream, token, ','); kinfo.arch_vgpr = token; std::getline(tokenStream, token, ','); @@ -119,6 +117,8 @@ void tokenize_profiler_output(std::string line, profiler_kernel_info_t& kinfo) { std::getline(tokenStream, token, ','); kinfo.end_time = token; std::getline(tokenStream, token, ','); + kinfo.correlation_id = token; + std::getline(tokenStream, token, ','); kinfo.counter = token; } diff --git a/tests-v2/featuretests/utils/test_utils.h b/tests-v2/featuretests/utils/test_utils.h index 80543ab93d..7b0b9fe4b4 100644 --- a/tests-v2/featuretests/utils/test_utils.h +++ b/tests-v2/featuretests/utils/test_utils.h @@ -43,13 +43,12 @@ typedef struct { std::string dispatch_id; std::string gpu_id; std::string queue_id; - std::string queue_index; std::string process_id; std::string thread_id; std::string grid_size; std::string workgroup_size; - std::string lds; - std::string scratch_size; + std::string lds_per_workgroup; + std::string scratch_per_workitem; std::string arch_vgpr; std::string accum_vgpr; std::string sgpr; @@ -57,6 +56,7 @@ typedef struct { std::string kernel_name; std::string begin_time; std::string end_time; + std::string correlation_id; std::string counter; } profiler_kernel_info_t;