SWDEV-391589: Addressing changes requested by Omniperf:

Adding Versioning to rocprof main script
Adding Versioning to file plugins to support legacy v1 output and new v2 output
Fixing headers for v2 output

Change-Id: I352e8c8ed03871466269a792d1bfa572da58bd8a
This commit is contained in:
Ammar ELWazir
2023-08-12 04:49:49 +00:00
committed by Ammar Elwazir
orang tua def7fa710a
melakukan 4f40fb8a40
13 mengubah file dengan 605 tambahan dan 28 penghapusan
+10 -3
Melihat File
@@ -24,7 +24,7 @@ GREY='\033[0;90m'
RESET='\033[0m'
usage() {
echo -e "ROCProfilerV2 Run Script Usage:"
echo -e "${RESET}ROCProfilerV2 Run Script Usage:"
echo -e "${GREEN}-h | --help ${RESET} For showing this message"
echo -e "${GREEN}--list-counters ${RESET} For showing all available counters for the current GPUs"
if [ $RUN_FROM_BUILD == 1 ]; then
@@ -41,11 +41,12 @@ usage() {
echo -e "${GREEN}--kernel-trace ${RESET} For Collecting Kernel dispatch Traces"
echo -e "${GREEN}--sys-trace ${RESET} For Collecting HIP and HSA APIs and their Activities Traces along ROCTX and Kernel Dispatch traces\n"
echo -e "\t#${GREY}usage e.g: rocprofv2 --[hip-trace|hsa-trace|roctx-trace|kernel-trace|sys-trace] <executable>\n"${RESET}
echo -e "${GREEN}--plugin ${RESET} PLUGIN_NAME For enabling a plugin (file/perfetto/att/ctf)"
echo -e "\t#${GREY} usage(file/perfetto/ctf) e.g: rocprofv2 -i pmc.txt --plugin [file/perfetto/ctf] -d out_dir <executable>"
echo -e "\t# usage(att): rocprofv2 <rocprofv2_params> --plugin att <ISA_file> <att_parameters> <executable>"
echo -e "\t# use \"rocprofv2 --plugin att --help\" for ATT-specific parameters help.${RESET}\n"
echo -e "${GREEN}--plugin-version ${RESET} <1|2> For selecting the version for the plugin (1/2)"
echo -e "\t#${GREY} 1 - Legacy output format, 2 - New output format (default)${RESET}\n"
echo -e "${GREEN}-i | --input ${RESET} For adding counters file path (every line in the text file represents a counter)"
echo -e "\t#${GREY} usage: rocprofv2 -i pmc.txt -d <executable>${RESET}\n"
echo -e "${GREEN}-o | --output-file ${RESET} For the output file name"
@@ -177,6 +178,12 @@ while [ 1 ]; do
shift
elif [ "$1" = "--basenames" ]; then
export ROCPROFILER_TRUNCATE_KERNEL_PATH=1
elif [ "$1" = "--version" ]; then
shift
shift
elif [ "$1" = "--plugin-version" ]; then
export ROCPROFILER_PLUGIN_LIB=$ROCPROFILER_PLUGIN_LIB.$2
shift
shift
elif [ "$1" = "--plugin" ]; then
if [ -n $2 ]; then
@@ -186,7 +193,7 @@ while [ 1 ]; do
usage
exit 1
fi
export ROCPROFILER_PLUGIN_LIB=lib${PLUGIN}_plugin.so
export ROCPROFILER_PLUGIN_LIB=lib${PLUGIN}_plugin.so$ROCPROFILER_PLUGIN_LIB
else
echo -e "Wrong input \"$2\" for plugin!"
usage
+15 -1
Melihat File
@@ -22,6 +22,7 @@
# THE SOFTWARE.
################################################################################
ROCPROF_ARGS="$*"
time_stamp=`date +%y%m%d_%H%M%S`
BIN_DIR=$(dirname $(realpath ${BASH_SOURCE[0]}))
ROOT_DIR=$(dirname $BIN_DIR)
@@ -110,6 +111,9 @@ usage() {
echo ""
echo "Options:"
echo " -h - this help"
echo " --tool-version <1|2> - to use specific version of rocprof tool, by default v1 is used"
echo " 1 - rocprofiler tool v1"
echo " 2 - rocprofiler tool v2"
echo " --verbose - verbose mode, dumping all base counters used in the input metrics"
echo " --list-basic - to print the list of basic HW counters"
echo " --list-derived - to print the list of derived metrics with formulas"
@@ -367,7 +371,17 @@ ARG_IN=""
while [ 1 ] ; do
ARG_IN=$1
ARG_VAL=1
if [ "$1" = "-h" ] ; then
if [ "$1" = "--tool-version" ] ; then
if [ $2 = 1 ] ; then
:
elif [ $2 = 2 ] ; then
eval $BIN_DIR/rocprofv2 $ROCPROF_ARGS
exit 0
else
echo "Wrong option '$1 $2'"
usage
fi
elif [ "$1" = "-h" ] ; then
usage
elif [ "$1" = "-i" ] ; then
INPUT_FILE="$2"
+3 -4
Melihat File
@@ -223,23 +223,22 @@ class file_plugin_t {
*output_file << "Dispatch_ID(" << std::to_string(profiler_record->header.id.handle) << "), "
<< "GPU_ID(" << std::to_string(profiler_record->gpu_id.handle) << "), "
<< "Queue_ID(" << std::to_string(profiler_record->queue_id.handle) << "), "
<< "Queue_Index(" << std::to_string(profiler_record->queue_idx.value) << "), "
<< "Process_ID(" << std::to_string(GetPid()) << "), "
<< "Thread_ID(" << std::to_string(profiler_record->thread_id.value) << "), "
<< "Grid_Size(" << std::to_string(profiler_record->kernel_properties.grid_size)
<< "), "
<< "Workgroup_Size("
<< std::to_string(profiler_record->kernel_properties.workgroup_size) << "), "
<< "LDS("
<< "LDS_Per_Workgroup("
<< std::to_string(
((profiler_record->kernel_properties.lds_size + (lds_block_size - 1)) &
~(lds_block_size - 1)))
<< "), "
<< "Scratch_Size("
<< "Scratch_Per_Workitem("
<< std::to_string(profiler_record->kernel_properties.scratch_size) << "), "
<< "Arch_VGPR("
<< std::to_string(profiler_record->kernel_properties.arch_vgpr_count) << "), "
<< "Accumulative_VGPR("
<< "Accum_VGPR("
<< std::to_string(profiler_record->kernel_properties.accum_vgpr_count) << "), "
<< "SGPR(" << std::to_string(profiler_record->kernel_properties.sgpr_count)
<< "), "
+1 -1
Melihat File
@@ -1,4 +1,4 @@
{
ROCPROFILER_PLUGINS_1.0.0{
global: rocprofiler_plugin_initialize;
rocprofiler_plugin_finalize;
rocprofiler_plugin_write_buffer_records;
+45 -2
Melihat File
@@ -22,15 +22,58 @@
file(GLOB ROCPROFILER_UTIL_SRC_FILES ${PROJECT_SOURCE_DIR}/src/utils/helper.cpp)
file(GLOB FILE_SOURCES "*.cpp")
file(GLOB FILE_SOURCES_V1 "file_v1.cpp")
add_library(file_plugin_v1 SHARED ${FILE_SOURCES_V1} ${ROCPROFILER_UTIL_SRC_FILES})
set_target_properties(
file_plugin_v1
PROPERTIES CXX_VISIBILITY_PRESET hidden
DEFINE_SYMBOL "ROCPROFILER_EXPORTS"
LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/../exportmap
LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib/rocprofiler/v1
INSTALL_RPATH "${ROCM_APPEND_PRIVLIB_RPATH}"
OUTPUT_NAME file_plugin
VERSION 1.0.0
SOVERSION 1)
target_compile_definitions(file_plugin_v1 PRIVATE HIP_PROF_HIP_API_STRING=1
__HIP_PLATFORM_HCC__=1)
target_include_directories(file_plugin_v1 PRIVATE ${PROJECT_SOURCE_DIR})
target_link_options(
file_plugin_v1 PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exportmap
-Wl,--no-undefined)
target_link_libraries(file_plugin_v1 PRIVATE rocprofiler-v2 hsa-runtime64::hsa-runtime64
stdc++fs amd_comgr dl)
# Prepare Name Link SO files for V1 file plugin Library
add_custom_command(
TARGET file_plugin_v1
POST_BUILD
COMMAND
${CMAKE_COMMAND} -E copy
${PROJECT_BINARY_DIR}/lib/rocprofiler/v1/libfile_plugin.so.1*
${PROJECT_BINARY_DIR}/lib/rocprofiler/.)
install(TARGETS file_plugin_v1 LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME}
COMPONENT asan NAMELINK_SKIP)
install(TARGETS file_plugin_v1 LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME}
COMPONENT runtime NAMELINK_SKIP)
file(GLOB FILE_SOURCES "file.cpp")
add_library(file_plugin SHARED ${FILE_SOURCES} ${ROCPROFILER_UTIL_SRC_FILES})
set_target_properties(
file_plugin
PROPERTIES CXX_VISIBILITY_PRESET hidden
DEFINE_SYMBOL "ROCPROFILER_EXPORTS"
LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/../exportmap
LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib/rocprofiler
INSTALL_RPATH "${ROCM_APPEND_PRIVLIB_RPATH}")
INSTALL_RPATH "${ROCM_APPEND_PRIVLIB_RPATH}"
VERSION 2.0.0
SOVERSION 2)
target_compile_definitions(file_plugin PRIVATE HIP_PROF_HIP_API_STRING=1
__HIP_PLATFORM_AMD__=1)
+3 -5
Melihat File
@@ -229,8 +229,8 @@ class file_plugin_t {
if (kernel_dispatches_header_written_.load(std::memory_order_relaxed)) return;
output_file = get_output_file(output_type_t::COUNTER);
*output_file
<< "Dispatch_ID,GPU_ID,Queue_ID,Queue_Index,PID,TID,GRD,WGR,LDS,SCR,Arch_VGPR,"
"ACCUM_VGPR,SGPR,Wave_Size,SIG,OBJ,Kernel_Name,Start_Timestamp,End_Timestamp,"
<< "Dispatch_ID,GPU_ID,Queue_ID,PID,TID,Grid_Size,Workgroup_Size,LDS_Per_Workgroup,Scratch_Per_Workitem,Arch_VGPR,"
"Accum_VGPR,SGPR,Wave_Size,Kernel_Name,Start_Timestamp,End_Timestamp,"
"Correlation_ID";
kernel_dispatches_header_written_.exchange(true, std::memory_order_release);
return;
@@ -367,7 +367,6 @@ class file_plugin_t {
*output_file << std::to_string(profiler_record->header.id.handle) << ","
<< std::to_string(profiler_record->gpu_id.handle) << ","
<< std::to_string(profiler_record->queue_id.handle) << ","
<< std::to_string(profiler_record->queue_idx.value) << ","
<< std::to_string(GetPid()) << ","
<< std::to_string(profiler_record->thread_id.value);
*output_file << "," << std::to_string(profiler_record->kernel_properties.grid_size) << ","
@@ -379,8 +378,7 @@ class file_plugin_t {
<< std::to_string(profiler_record->kernel_properties.arch_vgpr_count) << ","
<< std::to_string(profiler_record->kernel_properties.accum_vgpr_count) << ","
<< std::to_string(profiler_record->kernel_properties.sgpr_count) << ","
<< std::to_string(profiler_record->kernel_properties.wave_size) << ","
<< std::to_string(profiler_record->kernel_properties.signal_handle);
<< std::to_string(profiler_record->kernel_properties.wave_size);
std::string kernel_name = "";
if (name_length > 1) {
kernel_name = rocprofiler::cxx_demangle(kernel_name_c);
+515
Melihat File
@@ -0,0 +1,515 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include <cxxabi.h>
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
#include <atomic>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <experimental/filesystem>
#include <fstream>
#include <iostream>
#include <memory>
#include <optional>
#include <ostream>
#include <sstream>
#include <string>
#include <hsa/hsa.h>
#include <mutex>
#include <unordered_map>
#include "rocprofiler.h"
#include "rocprofiler_plugin.h"
#include "../utils.h"
namespace fs = std::experimental::filesystem;
namespace {
std::vector<std::string> GetCounterNames() {
std::vector<std::string> counters;
const char* line_c_str = getenv("ROCPROFILER_COUNTERS");
if (line_c_str) {
std::string line = line_c_str;
// skip commented lines
auto found = line.find_first_not_of(" \t");
if (found != std::string::npos) {
if (line[found] == '#') return {};
}
if (line.find("pmc") == std::string::npos) return counters;
char seperator = ' ';
std::string::size_type prev_pos = 0, pos = line.find(seperator, prev_pos);
prev_pos = ++pos;
if (pos != std::string::npos) {
while ((pos = line.find(seperator, pos)) != std::string::npos) {
std::string substring(line.substr(prev_pos, pos - prev_pos));
if (substring.length() > 0 && substring != ":") {
counters.push_back(substring);
}
prev_pos = ++pos;
}
if (!line.substr(prev_pos, pos - prev_pos).empty()) {
counters.push_back(line.substr(prev_pos, pos - prev_pos));
}
}
}
return counters;
}
static std::string output_file_name;
class file_plugin_t {
private:
enum class output_type_t { COUNTER, TRACER, PC_SAMPLING };
class output_file_t {
public:
output_file_t(std::string name, bool bOpenOnInit = false) : name_(std::move(name)) {
if (bOpenOnInit) open();
}
std::string name() const { return name_; }
template <typename T> std::ostream& operator<<(T&& value) {
if (!is_open()) open();
return stream_ << std::forward<T>(value);
}
std::ostream& operator<<(std::ostream& (*func)(std::ostream&)) {
if (!is_open()) open();
return stream_ << func;
}
void open() {
// If the stream is already in the failed state, there's no need to try
// to open the file.
if (fail()) return;
const char* output_dir = getenv("OUTPUT_PATH");
output_file_name = getenv("OUT_FILE_NAME") ? std::string(getenv("OUT_FILE_NAME")) : "";
if (output_dir == nullptr && getenv("OUT_FILE_NAME") == nullptr) {
stream_.copyfmt(std::cout);
stream_.clear(std::cout.rdstate());
stream_.basic_ios<char>::rdbuf(std::cout.rdbuf());
bPrintToStdout = true;
return;
}
if (output_dir == nullptr) output_dir = ".";
fs::path output_prefix(output_dir);
if (!fs::is_directory(fs::status(output_prefix))) {
if (!stream_.fail()) rocprofiler::warning("Cannot open output directory '%s'", output_dir);
stream_.setstate(std::ios_base::failbit);
return;
}
output_file_name = replace_MPI_macros(output_file_name);
std::stringstream ss;
ss << name_ << "_" << ((output_file_name.empty()) ? std::to_string(GetPid()) : "")
<< output_file_name << ".csv";
std::cout << "Results File: " << output_prefix / ss.str() << std::endl;
stream_.open(output_prefix / ss.str());
}
bool is_open() const { return stream_.is_open(); }
bool fail() const { return stream_.fail(); }
bool isStdOut() const { return bPrintToStdout; }
// Returns a string with the MPI %macro replaced with the corresponding envvar
std::string replace_MPI_macros(std::string output_file_name) {
std::unordered_map<const char*, const char*> MPI_BUILTINS = {
{"MPI_RANK", "%rank"},
{"OMPI_COMM_WORLD_RANK", "%rank"},
{"MV2_COMM_WORLD_RANK", "%rank"}};
for (const auto& [envvar, key] : MPI_BUILTINS) {
size_t key_find = output_file_name.rfind(key);
if (key_find == std::string::npos) continue; // Does not contain a %?rank var
const char* env_var_set = getenv(envvar);
if (env_var_set == nullptr) continue; // MPI_COMM_WORLD_x var is does not exist
int rank = atoi(env_var_set);
output_file_name = output_file_name.substr(0, key_find) + std::to_string(rank) +
output_file_name.substr(key_find + std::string(key).size());
}
return output_file_name;
}
private:
const std::string name_;
std::ofstream stream_;
bool bPrintToStdout = false;
};
output_file_t* get_output_file(output_type_t output_type, uint32_t domain = 0) {
switch (output_type) {
case output_type_t::COUNTER:
return &output_file_;
case output_type_t::TRACER:
switch (domain) {
case ACTIVITY_DOMAIN_ROCTX:
return &roctx_file_;
case ACTIVITY_DOMAIN_HSA_API:
return &hsa_api_file_;
case ACTIVITY_DOMAIN_HIP_API:
return &hip_api_file_;
case ACTIVITY_DOMAIN_HIP_OPS:
return &hip_activity_file_;
case ACTIVITY_DOMAIN_HSA_OPS:
return &hsa_async_copy_file_;
default:
assert(!"domain/op not supported!");
break;
}
break;
case output_type_t::PC_SAMPLING:
return &pc_sample_file_;
}
return nullptr;
}
public:
file_plugin_t(void* data) {
if (data) counter_names_ = GetCounterNames();
valid_ = true;
}
void WriteHeader(output_type_t type, rocprofiler_tracer_activity_domain_t domain) {
output_file_t* output_file;
switch (domain) {
case ACTIVITY_DOMAIN_HSA_API: {
if (hsa_api_header_written_.load(std::memory_order_relaxed)) return;
output_file = get_output_file(output_type_t::TRACER, ACTIVITY_DOMAIN_HSA_API);
*output_file << "Domain,Function,Start_Timestamp,End_Timestamp,Correlation_ID" << std::endl;
*output_file << std::endl;
hsa_api_header_written_.exchange(true, std::memory_order_release);
return;
}
case ACTIVITY_DOMAIN_HIP_API: {
if (hip_api_header_written_.load(std::memory_order_relaxed)) return;
output_file = get_output_file(output_type_t::TRACER, ACTIVITY_DOMAIN_HIP_API);
*output_file << "Domain,Function,Start_Timestamp,End_Timestamp,Correlation_ID" << std::endl;
*output_file << std::endl;
hip_api_header_written_.exchange(true, std::memory_order_release);
return;
}
case ACTIVITY_DOMAIN_ROCTX: {
if (roctx_header_written_.load(std::memory_order_relaxed)) return;
output_file = get_output_file(output_type_t::TRACER, ACTIVITY_DOMAIN_ROCTX);
*output_file << "Domain,ROCTX_ID,Message,Timestamp" << std::endl;
*output_file << std::endl;
roctx_header_written_.exchange(true, std::memory_order_release);
return;
}
case ACTIVITY_DOMAIN_HSA_OPS: {
if (hsa_async_copy_header_written_.load(std::memory_order_relaxed)) return;
output_file = get_output_file(output_type_t::TRACER, ACTIVITY_DOMAIN_HSA_OPS);
*output_file << "Domain,Operation,Start_Timestamp,Stop_Timestamp,Correlation_ID"
<< std::endl;
*output_file << std::endl;
hsa_async_copy_header_written_.exchange(true, std::memory_order_release);
return;
}
case ACTIVITY_DOMAIN_HIP_OPS: {
if (hip_activity_header_written_.load(std::memory_order_relaxed)) return;
output_file = get_output_file(output_type_t::TRACER, ACTIVITY_DOMAIN_HIP_OPS);
*output_file << "Domain,Operation,Kernel_Name,Start_Timestamp,Stop_Timestamp,"
"Correlation_ID"
<< std::endl;
*output_file << std::endl;
hip_activity_header_written_.exchange(true, std::memory_order_release);
return;
}
default: {
if (type == output_type_t::COUNTER) {
if (kernel_dispatches_header_written_.load(std::memory_order_relaxed)) return;
output_file = get_output_file(output_type_t::COUNTER);
*output_file << "Index,KernelName,gpu-id,queue-id,queue-index,pid,tid,grd,wgr,lds,scr,"
"arch_vgpr,accum_vgpr,sgpr,wave_size,sig,obj";
if (counter_names_.size() > 0) {
for (uint32_t i = 0; i < counter_names_.size(); i++)
*output_file << "," << counter_names_[i];
}
*output_file << ",DispatchNs,BeginNs,EndNs,CompleteNs";
*output_file << std::endl;
*output_file << std::endl;
kernel_dispatches_header_written_.exchange(true, std::memory_order_release);
return;
} else if (type == output_type_t::PC_SAMPLING) {
if (pc_sample_header_written_.load(std::memory_order_relaxed)) return;
output_file = get_output_file(output_type_t::PC_SAMPLING);
*output_file << "Dispatch_ID,Timestamp,GPU_ID,PC_Sample,Shader_Engines" << std::endl;
*output_file << std::endl;
pc_sample_header_written_.exchange(true, std::memory_order_release);
return;
}
return;
}
}
}
std::mutex writing_lock;
const char* GetDomainName(rocprofiler_tracer_activity_domain_t domain) {
switch (domain) {
case ACTIVITY_DOMAIN_ROCTX:
return "ROCTX_DOMAIN";
break;
case ACTIVITY_DOMAIN_HIP_API:
return "HIP_API_DOMAIN";
break;
case ACTIVITY_DOMAIN_HIP_OPS:
return "HIP_OPS_DOMAIN";
break;
case ACTIVITY_DOMAIN_HSA_API:
return "HSA_API_DOMAIN";
break;
case ACTIVITY_DOMAIN_HSA_OPS:
return "HSA_OPS_DOMAIN";
break;
case ACTIVITY_DOMAIN_HSA_EVT:
return "HSA_EVT_DOMAIN";
break;
default:
return "";
}
}
void FlushTracerRecord(rocprofiler_record_tracer_t tracer_record,
rocprofiler_session_id_t session_id,
rocprofiler_buffer_id_t buffer_id = rocprofiler_buffer_id_t{0}) {
std::lock_guard<std::mutex> lock(writing_lock);
if (tracer_record.timestamps.end.value <= 0 && tracer_record.domain != ACTIVITY_DOMAIN_ROCTX)
return;
WriteHeader(output_type_t::TRACER, tracer_record.domain);
std::string roctx_message;
if (tracer_record.domain == ACTIVITY_DOMAIN_ROCTX && tracer_record.name) {
roctx_message = tracer_record.name;
}
const char* operation_name_c = nullptr;
// ROCTX domain Operation ID doesn't have a name
// It depends on the user input of the roctx functions.
// ROCTX message is the tracer_record.name
if (tracer_record.domain != ACTIVITY_DOMAIN_ROCTX) {
CHECK_ROCPROFILER(rocprofiler_query_tracer_operation_name(
tracer_record.domain, tracer_record.operation_id, &operation_name_c));
}
output_file_t* output_file = get_output_file(output_type_t::TRACER, tracer_record.domain);
*output_file << GetDomainName(tracer_record.domain);
if (tracer_record.domain == ACTIVITY_DOMAIN_ROCTX && tracer_record.external_id.id >= 0)
*output_file << "," << tracer_record.external_id.id;
if (tracer_record.domain == ACTIVITY_DOMAIN_ROCTX) {
if (roctx_message.size() > 1)
*output_file << ",\"" << roctx_message << "\"";
else
*output_file << ",";
}
if (operation_name_c) *output_file << ",\"" << operation_name_c << "\"";
if (tracer_record.name && tracer_record.domain != ACTIVITY_DOMAIN_ROCTX) {
*output_file << ",\"" << rocprofiler::cxx_demangle(tracer_record.name) << "\"";
} else if (tracer_record.domain == ACTIVITY_DOMAIN_HIP_OPS) {
*output_file << ",";
}
if (tracer_record.domain != ACTIVITY_DOMAIN_ROCTX) {
*output_file << "," << tracer_record.timestamps.begin.value << ","
<< tracer_record.timestamps.end.value;
*output_file << "," << tracer_record.correlation_id.value;
} else {
*output_file << "," << tracer_record.timestamps.begin.value;
}
*output_file << std::endl;
}
void FlushProfilerRecord(const rocprofiler_record_profiler_t* profiler_record,
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
std::lock_guard<std::mutex> lock(writing_lock);
WriteHeader(output_type_t::COUNTER, ACTIVITY_DOMAIN_NUMBER);
size_t name_length = 0;
output_file_t* output_file{nullptr};
output_file = get_output_file(output_type_t::COUNTER);
CHECK_ROCPROFILER(rocprofiler_query_kernel_info_size(ROCPROFILER_KERNEL_NAME,
profiler_record->kernel_id, &name_length));
// Taken from rocprofiler: The size hasn't changed in recent past
static const uint32_t lds_block_size = 128 * 4;
const char* kernel_name_c = nullptr;
if (name_length > 1) {
CHECK_ROCPROFILER(rocprofiler_query_kernel_info(ROCPROFILER_KERNEL_NAME,
profiler_record->kernel_id, &kernel_name_c));
}
*output_file << std::to_string(profiler_record->header.id.handle) << ",";
std::string kernel_name = "";
if (name_length > 1) {
kernel_name = rocprofiler::cxx_demangle(kernel_name_c);
std::string key = "\"";
std::size_t found = kernel_name.rfind(key);
while (found != std::string::npos) {
kernel_name.replace(found, key.length(), "'");
found = kernel_name.rfind(key, found - 1);
}
}
*output_file << "\"" << kernel_name << "\",";
*output_file << std::to_string(profiler_record->gpu_id.handle) << ","
<< std::to_string(profiler_record->queue_id.handle) << ","
<< std::to_string(profiler_record->queue_idx.value) << ","
<< std::to_string(GetPid()) << ","
<< std::to_string(profiler_record->thread_id.value) << ","
<< std::to_string(profiler_record->kernel_properties.grid_size) << ","
<< std::to_string(profiler_record->kernel_properties.workgroup_size) << ","
<< std::to_string(
((profiler_record->kernel_properties.lds_size + (lds_block_size - 1)) &
~(lds_block_size - 1)))
<< "," << std::to_string(profiler_record->kernel_properties.scratch_size) << ","
<< std::to_string(profiler_record->kernel_properties.arch_vgpr_count) << ","
<< std::to_string(profiler_record->kernel_properties.accum_vgpr_count) << ","
<< std::to_string(profiler_record->kernel_properties.sgpr_count) << ","
<< std::to_string(profiler_record->kernel_properties.wave_size);
// For Counters
if (profiler_record->counters) {
for (uint64_t i = 0; i < profiler_record->counters_count.value; i++) {
if (profiler_record->counters[i].counter_handler.handle > 0) {
*output_file << "," << std::to_string(profiler_record->counters[i].value.value);
}
}
}
*output_file << ",0,"
<< std::to_string(profiler_record->timestamps.begin.value) << ","
<< std::to_string(profiler_record->timestamps.end.value) << ",0";
*output_file << '\n';
if (kernel_name_c) {
free(const_cast<char*>(kernel_name_c));
}
}
void FlushPCSamplingRecord(const rocprofiler_record_pc_sample_t* pc_sampling_record) {
WriteHeader(output_type_t::PC_SAMPLING, ACTIVITY_DOMAIN_NUMBER);
output_file_t* output_file{nullptr};
output_file = get_output_file(output_type_t::PC_SAMPLING);
const auto& sample = pc_sampling_record->pc_sample;
*output_file << sample.dispatch_id.value << "," << sample.timestamp.value << ","
<< sample.gpu_id.handle << "," << std::hex << std::showbase << sample.pc << ","
<< sample.se << std::endl;
}
int WriteBufferRecords(const rocprofiler_record_header_t* begin,
const rocprofiler_record_header_t* end,
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
while (begin < end) {
if (!begin) return 0;
switch (begin->kind) {
case ROCPROFILER_PROFILER_RECORD: {
const rocprofiler_record_profiler_t* profiler_record =
reinterpret_cast<const rocprofiler_record_profiler_t*>(begin);
FlushProfilerRecord(profiler_record, session_id, buffer_id);
break;
}
case ROCPROFILER_TRACER_RECORD: {
rocprofiler_record_tracer_t* tracer_record = const_cast<rocprofiler_record_tracer_t*>(
reinterpret_cast<const rocprofiler_record_tracer_t*>(begin));
FlushTracerRecord(*tracer_record, session_id, buffer_id);
break;
}
case ROCPROFILER_ATT_TRACER_RECORD: {
break;
}
case ROCPROFILER_PC_SAMPLING_RECORD: {
const rocprofiler_record_pc_sample_t* pc_sampling_record =
reinterpret_cast<const rocprofiler_record_pc_sample_t*>(begin);
FlushPCSamplingRecord(pc_sampling_record);
break;
}
default:
break;
}
rocprofiler_next_record(begin, &begin, session_id, buffer_id);
}
return 0;
}
bool is_valid() const { return valid_; }
private:
bool valid_{false};
std::vector<std::string> counter_names_;
std::atomic<bool> roctx_header_written_{false}, hsa_api_header_written_{false},
hip_api_header_written_{false}, hip_activity_header_written_{false},
hsa_async_copy_header_written_{false}, pc_sample_header_written_{false},
kernel_dispatches_header_written_{false};
output_file_t roctx_file_{"roctx_trace"}, hsa_api_file_{"hsa_api_trace"},
hip_api_file_{"hip_api_trace"}, hip_activity_file_{"hcc_ops_trace"},
hsa_async_copy_file_{"async_copy_trace"}, pc_sample_file_{"pcs_trace"},
output_file_{"results"};
};
file_plugin_t* file_plugin = nullptr;
} // namespace
ROCPROFILER_EXPORT int rocprofiler_plugin_initialize(uint32_t rocprofiler_major_version,
uint32_t rocprofiler_minor_version,
void* data) {
if (rocprofiler_major_version != ROCPROFILER_VERSION_MAJOR ||
rocprofiler_minor_version < ROCPROFILER_VERSION_MINOR)
return -1;
if (file_plugin != nullptr) return -1;
file_plugin = new file_plugin_t(data);
if (file_plugin->is_valid()) return 0;
// The plugin failed to initialized, destroy it and return an error.
delete file_plugin;
file_plugin = nullptr;
return -1;
}
ROCPROFILER_EXPORT void rocprofiler_plugin_finalize() {
if (!file_plugin) return;
delete file_plugin;
file_plugin = nullptr;
}
ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records(
const rocprofiler_record_header_t* begin, const rocprofiler_record_header_t* end,
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
if (!file_plugin || !file_plugin->is_valid()) return -1;
return file_plugin->WriteBufferRecords(begin, end, session_id, buffer_id);
}
ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(rocprofiler_record_tracer_t record) {
if (!file_plugin || !file_plugin->is_valid()) return -1;
if (record.header.id.handle == 0) return 0;
file_plugin->FlushTracerRecord(record, rocprofiler_session_id_t{0}, rocprofiler_buffer_id_t{0});
return 0;
}
@@ -1,4 +1,4 @@
ROCProfilerV2: Collecting the following counters:
- GRBM_COUNT
Enabling Counter Collection
Dispatch_ID(1), GPU_ID(4), Queue_ID(1), Queue_Index(1), Process_ID(2185227), Thread_ID(2185227), Grid_Size(10), Workgroup_Size(10), LDS(0), Scratch_Size(0), Arch_VGPR(8), Accumulative_VGPR(0), SGPR(16), Wave_Size(64), Kernel_Name("helloworld"), Begin_Timestamp(140136998244448), End_Timestamp(140141431998688), Correlation_ID(0), GRBM_COUNT(14222.000000)
Dispatch_ID(1), GPU_ID(4), Queue_ID(1), Process_ID(2185227), Thread_ID(2185227), Grid_Size(10), Workgroup_Size(10), LDS(0), Scratch_Size(0), Arch_VGPR(8), Accumulative_VGPR(0), SGPR(16), Wave_Size(64), Kernel_Name("helloworld"), Begin_Timestamp(140136998244448), End_Timestamp(140141431998688), Correlation_ID(0), GRBM_COUNT(14222.000000)
@@ -1,4 +1,4 @@
ROCProfilerV2: Collecting the following counters:
- GRBM_COUNT
Enabling Counter Collection
Dispatch_ID(1), GPU_ID(4), Queue_ID(1), Queue_Index(0), Process_ID(2185273), Thread_ID(2185273), Grid_Size(1048576), Workgroup_Size(256), LDS(0), Scratch_Size(0), Arch_VGPR(8), Accumulative_VGPR(0), SGPR(16), Wave_Size(64), Kernel_Name("vectoradd_float"), Begin_Timestamp(140169881587808), End_Timestamp(140174279043296), Correlation_ID(0), GRBM_COUNT(40539.000000)
Dispatch_ID(1), GPU_ID(4), Queue_ID(1), Process_ID(2185273), Thread_ID(2185273), Grid_Size(1048576), Workgroup_Size(256), LDS(0), Scratch_Size(0), Arch_VGPR(8), Accumulative_VGPR(0), SGPR(16), Wave_Size(64), Kernel_Name("vectoradd_float"), Begin_Timestamp(140169881587808), End_Timestamp(140174279043296), Correlation_ID(0), GRBM_COUNT(40539.000000)
@@ -7,4 +7,6 @@ Enabling Counter Collection
device count and rank is1: 1
Rank Id: 0 | Device Id : 0 | Num Devices: 1
Max error: 0.000000
Dispatch_ID(1), GPU_ID(1), Queue_ID(1), Queue_Index(0), Process_ID(6293), Thread_ID(6293), Grid_Size(1048576), Workgroup_Size(256), LDS(0), Scratch_Size(0), Arch_VGPR(12), Accumulative_VGPR(0), SGPR(32), Wave_Size(64), Kernel_Name("add"), Begin_Timestamp(140016470724832), End_Timestamp(5), Correlation_ID(0), GRBM_COUNT(1108537.000000)
Max error: 0.000000
Dispatch_ID(1), GPU_ID(5), Queue_ID(1), Process_ID(2185441), Thread_ID(2185441), Grid_Size(1048576), Workgroup_Size(256), LDS(0), Scratch_Size(0), Arch_VGPR(12), Accumulative_VGPR(4), SGPR(32), Wave_Size(64), Kernel_Name("add"), Begin_Timestamp(139857691152944), End_Timestamp(139857835223272), Correlation_ID(0), GRBM_COUNT(499551.000000)
Dispatch_ID(1), GPU_ID(4), Queue_ID(1), Process_ID(2185436), Thread_ID(2185436), Grid_Size(1048576), Workgroup_Size(256), LDS(0), Scratch_Size(0), Arch_VGPR(12), Accumulative_VGPR(4), SGPR(32), Wave_Size(64), Kernel_Name("add"), Begin_Timestamp(140429257347632), End_Timestamp(140429483317480), Correlation_ID(0), GRBM_COUNT(499406.000000)
@@ -1,5 +1,4 @@
ROCProfilerV2: Collecting the following counters:
- GRBM_COUNT
Enabling Counter Collection
PASSED!
Dispatch_ID(1), GPU_ID(1), Queue_ID(1), Queue_Index(0), Process_ID(11822), Thread_ID(11822), Grid_Size(1), Workgroup_Size(1), LDS(0), Scratch_Size(0), Arch_VGPR(4), Accumulative_VGPR(0), SGPR(16), Wave_Size(64), Kernel_Name("hip_helloworld"), Begin_Timestamp(140470675179888), End_Timestamp(140470675179776), Correlation_ID(0), GRBM_COUNT(22315.000000)
Dispatch_ID(1), GPU_ID(4), Queue_ID(1), Process_ID(2186189), Thread_ID(2186189), Grid_Size(10), Workgroup_Size(10), LDS(0), Scratch_Size(0), Arch_VGPR(8), Accumulative_VGPR(0), SGPR(16), Wave_Size(64), Kernel_Name("helloworld"), Begin_Timestamp(140284033765472), End_Timestamp(140288419293408), Correlation_ID(0), GRBM_COUNT(13839.000000)
@@ -91,8 +91,6 @@ void tokenize_profiler_output(std::string line, profiler_kernel_info_t& kinfo) {
std::getline(tokenStream, token, ',');
kinfo.queue_id = token;
std::getline(tokenStream, token, ',');
kinfo.queue_index = token;
std::getline(tokenStream, token, ',');
kinfo.process_id = token;
std::getline(tokenStream, token, ',');
kinfo.thread_id = token;
@@ -101,9 +99,9 @@ void tokenize_profiler_output(std::string line, profiler_kernel_info_t& kinfo) {
std::getline(tokenStream, token, ',');
kinfo.workgroup_size = token;
std::getline(tokenStream, token, ',');
kinfo.lds = token;
kinfo.lds_per_workgroup = token;
std::getline(tokenStream, token, ',');
kinfo.scratch_size = token;
kinfo.scratch_per_workitem = token;
std::getline(tokenStream, token, ',');
kinfo.arch_vgpr = token;
std::getline(tokenStream, token, ',');
@@ -119,6 +117,8 @@ void tokenize_profiler_output(std::string line, profiler_kernel_info_t& kinfo) {
std::getline(tokenStream, token, ',');
kinfo.end_time = token;
std::getline(tokenStream, token, ',');
kinfo.correlation_id = token;
std::getline(tokenStream, token, ',');
kinfo.counter = token;
}
+3 -3
Melihat File
@@ -43,13 +43,12 @@ typedef struct {
std::string dispatch_id;
std::string gpu_id;
std::string queue_id;
std::string queue_index;
std::string process_id;
std::string thread_id;
std::string grid_size;
std::string workgroup_size;
std::string lds;
std::string scratch_size;
std::string lds_per_workgroup;
std::string scratch_per_workitem;
std::string arch_vgpr;
std::string accum_vgpr;
std::string sgpr;
@@ -57,6 +56,7 @@ typedef struct {
std::string kernel_name;
std::string begin_time;
std::string end_time;
std::string correlation_id;
std::string counter;
} profiler_kernel_info_t;