4fa165ec1a
* Add ToolsApiTable Add ToolsApiTable wrapping for scratch memory tracking * Add initial support for scratch memory tracking Buffering is implemented * cmake formatting (cmake-format) (#525) Co-authored-by: MythreyaK <MythreyaK@users.noreply.github.com> * source formatting (clang-format v11) (#524) Co-authored-by: MythreyaK <MythreyaK@users.noreply.github.com> * Add callback tracing for scratch Fixed the error where scratch tracking init was called irrespective of whether any client requested for it * Apply suggestions from code review Co-authored-by: Jonathan R. Madsen <jrmadsen@users.noreply.github.com> * Fix tools api copy/update Table were saved/updated incorrectly in previous commit. Also adds passing user data through the callback * Fix OpKind sequence for scratch tracking Previously scratch was using OpKind from rocprofiler-sdk, but templates were instantiated using API ID. These differ by 1 * Integration tests for scratch reporting Added buffer and callback integration tests for scratch reporting * source formatting (clang-format v11) (#550) Co-authored-by: MythreyaK <26112391+MythreyaK@users.noreply.github.com> * cmake formatting (cmake-format) (#551) Co-authored-by: MythreyaK <26112391+MythreyaK@users.noreply.github.com> * python formatting (black) (#549) Co-authored-by: MythreyaK <26112391+MythreyaK@users.noreply.github.com> * CI fixes * source formatting (clang-format v11) (#554) Co-authored-by: MythreyaK <26112391+MythreyaK@users.noreply.github.com> * Update api Rebase on main and updates based on PR feedback * Update scratch reporting and address PR comments - Added agent id to buffer records - Updated `test_internal_correlation_ids` - Is almost identical to one in async-copy - Updated scratch test to check for agent id - Updated queue id serialization in callback records (prints handle as nested key) - Remove `marker_api_traces` from scratch `test_internal_correlation_ids` validation test - Rename `amd_tools_api` to `scratch_memory` - Added doxygen comments - Remove scratch callback from `tool.cpp` - Replace assert with `LOF_IF` in `scratch_memory.cpp` * Update tools table Changed to match up with changes to hsa tables in main branch * Rework scratch memory structure * Update tests - Added suggestions from PR review, and updated tests accordingly * Misc cleanup * Update scratch test As of Apr 4th, `hsa_amd_agent_set_async_scratch_limit` is disabled. Note, > This API: `hsa_amd_agent_set_async_scratch_limit` is currently > disabled. We need some changes in CP firmware to be able to do this > and these changes are not ready yet. > With the current code, you will also not get notifications for > alternate-scratch allocations because this feature has been disabled > while CP firmware is making additional changes > We are hoping to have that feature enabled by ROCm-6.3 * Minor update to lib/rocprofiler-sdk/internal_threading.* - delay destruction of shared_ptrs of the tasks to prevent rare (but possible) data race on the destruction of the shared_ptr --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: MythreyaK <MythreyaK@users.noreply.github.com> Co-authored-by: Jonathan R. Madsen <jrmadsen@users.noreply.github.com> Co-authored-by: Jonathan R. Madsen <jonathanrmadsen@gmail.com>
230 wiersze
8.2 KiB
Bash
Executable File
230 wiersze
8.2 KiB
Bash
Executable File
#!/bin/bash -e
|
|
|
|
set -eo pipefail
|
|
|
|
ROCPROFV3_DIR=$(dirname -- "$(realpath "${BASH_SOURCE[0]}")")
|
|
ROCM_DIR=$(dirname -- "${ROCPROFV3_DIR}")
|
|
|
|
: ${ROCPROFILER_LIBRARY_CTOR:=1}
|
|
: ${ROCPROF_OUTPUT_PATH:="."}
|
|
: ${ROCPROF_OUTPUT_PATH_INTERNAL:="."}
|
|
: ${ROCPROF_OUTPUT_FILE_NAME:=""}
|
|
: ${ROCPROF_COUNTERS_PATH:=""}
|
|
: ${ROCPROF_PRELOAD:=""}
|
|
: ${ROCPROF_TOOL_LIBRARY:="${ROCM_DIR}/lib/rocprofiler-sdk/librocprofiler-sdk-tool.so"}
|
|
: ${ROCPROF_SDK_LIBRARY:="${ROCM_DIR}/lib/librocprofiler-sdk.so"}
|
|
|
|
export ROCPROFILER_LIBRARY_CTOR
|
|
|
|
# Define color codes
|
|
GREEN='\033[0;32m'
|
|
GREY='\033[0;90m'
|
|
RESET='\033[0m'
|
|
|
|
usage() {
|
|
local EC=${1}
|
|
if [ -z "${EC}" ]; then EC=1; fi
|
|
echo -e "${RESET}ROCProfilerV3 Run Script Usage:"
|
|
echo -e "${GREEN}-h | --help ${RESET} For showing this message"
|
|
echo -e ""
|
|
echo -e "${GREEN}--hip-trace ${RESET} For Collecting HIP Traces (runtime + compiler)"
|
|
echo -e "${GREEN}--hip-runtime-trace ${RESET} For Collecting HIP Runtime API Traces"
|
|
echo -e "${GREEN}--hip-compiler-trace ${RESET} For Collecting HIP Compiler generated code Traces"
|
|
echo -e ""
|
|
echo -e "${GREEN}--marker-trace ${RESET} For Collecting Marker (ROCTx) Traces"
|
|
echo -e "${GREEN}--kernel-trace ${RESET} For Collecting Kernel Dispatch Traces"
|
|
echo -e "${GREEN}--memory-copy-trace ${RESET} For Collecting Memory Copy Traces"
|
|
echo -e "${GREEN}--scratch-memory-trace ${RESET} For Collecting Scratch Memory operations Traces"
|
|
echo -e ""
|
|
echo -e "${GREEN}--hsa-trace ${RESET} For Collecting HSA API Traces (core + amd + image + finalizer)"
|
|
echo -e "${GREEN}--hsa-core-trace ${RESET} For Collecting HSA API Traces (core API)"
|
|
echo -e "${GREEN}--hsa-amd-trace ${RESET} For Collecting HSA API Traces (AMD-extension API)"
|
|
echo -e "${GREEN}--hsa-image-trace ${RESET} For Collecting HSA API Traces (Image-extenson API)"
|
|
echo -e "${GREEN}--hsa-finalizer-trace ${RESET} For Collecting HSA API Traces (Finalizer-extension API)"
|
|
echo -e ""
|
|
echo -e "${GREEN}--sys-trace ${RESET} For Collecting HIP,HSA, Memory Copy, (marker)ROCTx and Kernel dispatch traces\n"
|
|
echo -e ""
|
|
echo -e "${GREEN}-o | --output-file ${RESET} For the output file name"
|
|
echo -e "\t#${GREY} usage (with current dir): rocprofv3 --hsa-trace -o <file_name> <executable>"
|
|
echo -e "\t#${GREY} usage (with custom dir): rocprofv3 --hsa-trace -d <out_dir> -o <file_name> <executable>${RESET}\n"
|
|
echo -e ""
|
|
echo -e "${GREEN}-d | --output-directory ${RESET} For adding output path where the output files will be saved"
|
|
echo -e "\t#${GREY} usage (with custom dir): rocprofv3 --hsa-trace -d <out_dir> <executable>${RESET}"
|
|
echo -e ""
|
|
echo -e "${GREEN}-M | --mangled-kernels ${RESET} Do not demangle the kernel names"
|
|
echo -e "${GREEN}-T | --truncate-kernels ${RESET} Truncate the demangled kernel names"
|
|
echo -e ""
|
|
echo -e "${GREEN}-L | --list-metrics ${RESET} List metrics for counter collection"
|
|
echo -e "${GREEN}-i | --input ${RESET} For counter collection "
|
|
echo -e "\t#${GREY} Input file .txt format, automatically rerun application for every profiling features line"
|
|
echo -e "\t# Perf counters group 1"
|
|
echo -e "\tpmc : Wavefronts VALUInsts SALUInsts SFetchInsts FlatVMemInsts LDSInsts"
|
|
echo -e "\t# Perf counters group 2"
|
|
echo -e "\tpmc : WriteSize L2CacheHit ${RESET}"
|
|
echo -e ""
|
|
exit ${EC}
|
|
}
|
|
|
|
if [ -z "$1" ]; then
|
|
usage 1
|
|
fi
|
|
|
|
if [ -n "${ROCPROF_PRELOAD}" ]; then
|
|
ROCPROF_PRELOAD="${ROCPROF_PRELOAD}:${ROCPROF_TOOL_LIBRARY}:${ROCPROF_SDK_LIBRARY}"
|
|
else
|
|
ROCPROF_PRELOAD="${ROCPROF_TOOL_LIBRARY}:${ROCPROF_SDK_LIBRARY}"
|
|
fi
|
|
|
|
if [ -n "${ROCP_TOOL_LIBRARIES}" ]; then
|
|
ROCP_TOOL_LIBRARIES="${ROCP_TOOL_LIBRARIES}:${ROCPROF_TOOL_LIBRARY}"
|
|
else
|
|
ROCP_TOOL_LIBRARIES="${ROCPROF_TOOL_LIBRARY}"
|
|
fi
|
|
|
|
LD_LIBRARY_PATH=${ROCM_DIR}/lib:${LD_LIBRARY_PATH}
|
|
|
|
export ROCP_TOOL_LIBRARIES
|
|
export LD_LIBRARY_PATH
|
|
|
|
while true; do
|
|
if [[ "$1" == "-h" || "$1" == "--help" ]]; then
|
|
usage 0
|
|
elif [[ "$1" == "-M" || "$1" == "--mangled-kernels" ]]; then
|
|
export ROCPROF_DEMANGLE_KERNELS=0
|
|
shift
|
|
elif [[ "$1" == "-T" || "$1" == "--truncate-kernels" ]]; then
|
|
export ROCPROF_TRUNCATE_KERNELS=1
|
|
shift
|
|
elif [[ "$1" == "-i" || "$1" == "--input" ]]; then
|
|
if [ "$2" ] && [ -n "$2" ] && [ -r "$2" ]; then
|
|
export ROCPROF_COUNTERS_PATH=$2
|
|
export ROCPROF_COUNTER_COLLECTION=1
|
|
else
|
|
echo -e "Error: input file \"$2\" doesn't exist!"
|
|
usage 1
|
|
fi
|
|
shift
|
|
shift
|
|
elif [[ "$1" == "-o" || "$1" == "--output-file-name" ]]; then
|
|
if [ "$2" ]; then
|
|
export ROCPROF_OUTPUT_FILE_NAME=$2
|
|
export ROCPROF_OUTPUT_LIST_METRICS_FILE=1
|
|
else
|
|
usage 1
|
|
fi
|
|
shift
|
|
shift
|
|
elif [[ "$1" == "-d" || "$1" == "--output-directory" ]]; then
|
|
if [ "$2" ]; then
|
|
ROCPROF_OUTPUT_PATH_INTERNAL=$2
|
|
export ROCPROF_OUTPUT_PATH=$ROCPROF_OUTPUT_PATH_INTERNAL
|
|
export ROCPROF_OUTPUT_LIST_METRICS_FILE=1
|
|
else
|
|
usage 1
|
|
fi
|
|
shift
|
|
shift
|
|
elif [ "$1" == "--hsa-trace" ]; then
|
|
export ROCPROF_HSA_CORE_API_TRACE=1
|
|
export ROCPROF_HSA_AMD_EXT_API_TRACE=1
|
|
export ROCPROF_HSA_IMAGE_EXT_API_TRACE=1
|
|
export ROCPROF_HSA_FINALIZER_EXT_API_TRACE=1
|
|
shift
|
|
elif [ "$1" == "--hsa-core-trace" ]; then
|
|
export ROCPROF_HSA_CORE_API_TRACE=1
|
|
shift
|
|
elif [ "$1" == "--hsa-amd-trace" ]; then
|
|
export ROCPROF_HSA_AMD_EXT_API_TRACE=1
|
|
shift
|
|
elif [ "$1" == "--hsa-image-trace" ]; then
|
|
export ROCPROF_HSA_IMAGE_EXT_API_TRACE=1
|
|
shift
|
|
elif [ "$1" == "--hsa-finalizer-trace" ]; then
|
|
export ROCPROF_HSA_FINALIZER_EXT_API_TRACE=1
|
|
shift
|
|
elif [[ "$1" == "-L" || "$1" == "--list-metrics" ]]; then
|
|
export ROCPROF_LIST_METRICS=1
|
|
shift
|
|
elif [ "$1" == "--kernel-trace" ]; then
|
|
export ROCPROF_KERNEL_TRACE=1
|
|
shift
|
|
elif [ "$1" == "--memory-copy-trace" ]; then
|
|
export ROCPROF_MEMORY_COPY_TRACE=1
|
|
shift
|
|
elif [ "$1" == "--scratch-memory-trace" ]; then
|
|
export ROCPROF_SCRATCH_MEMORY_TRACE=1
|
|
shift
|
|
elif [ "$1" == "--marker-trace" ]; then
|
|
export ROCPROF_MARKER_API_TRACE=1
|
|
shift
|
|
elif [ "$1" == "--hip-trace" ]; then
|
|
export ROCPROF_HIP_RUNTIME_API_TRACE=1
|
|
export ROCPROF_HIP_COMPILER_API_TRACE=1
|
|
shift
|
|
elif [ "$1" == "--hip-runtime-trace" ]; then
|
|
export ROCPROF_HIP_RUNTIME_API_TRACE=1
|
|
shift
|
|
elif [ "$1" == "--hip-compiler-trace" ]; then
|
|
export ROCPROF_HIP_COMPILER_API_TRACE=1
|
|
shift
|
|
elif [ "$1" == "--sys-trace" ]; then
|
|
export ROCPROF_HSA_CORE_API_TRACE=1
|
|
export ROCPROF_HSA_AMD_EXT_API_TRACE=1
|
|
export ROCPROF_HSA_IMAGE_EXT_API_TRACE=1
|
|
export ROCPROF_HSA_FINALIZER_EXT_API_TRACE=1
|
|
export ROCPROF_HIP_RUNTIME_API_TRACE=1
|
|
export ROCPROF_HIP_COMPILER_API_TRACE=1
|
|
export ROCPROF_MARKER_API_TRACE=1
|
|
export ROCPROF_KERNEL_TRACE=1
|
|
export ROCPROF_MEMORY_COPY_TRACE=1
|
|
shift
|
|
elif [ "$1" == "--" ]; then
|
|
shift
|
|
break
|
|
elif [[ "$1" == "-"* || "$1" == "--"* ]]; then
|
|
echo -e "Wrong option \"$1\", Please use the following options:\n"
|
|
usage 1
|
|
else
|
|
break
|
|
fi
|
|
done
|
|
|
|
# read input counter file
|
|
PMC_LINES=()
|
|
if [ -n "$ROCPROF_COUNTERS_PATH" ]; then
|
|
input=$ROCPROF_COUNTERS_PATH
|
|
while IFS= read -r line || [[ -n "$line" ]]; do
|
|
#skip empty lines
|
|
if [[ -z "$line" ]]; then
|
|
continue
|
|
fi
|
|
PMC_LINES+=("$line")
|
|
done <"$input"
|
|
fi
|
|
|
|
if [ -n "${PMC_LINES:-}" ]; then
|
|
#for counter collection
|
|
COUNTER=1
|
|
for i in "${!PMC_LINES[@]}"; do
|
|
export ROCPROF_COUNTERS="${PMC_LINES[$i]}"
|
|
if [[ ! ${PMC_LINES[$i]} =~ "pmc" ]]; then
|
|
continue
|
|
fi
|
|
|
|
RESULT_PATH="$ROCPROF_OUTPUT_PATH_INTERNAL/pmc_$COUNTER"
|
|
if [ -n "$ROCPROF_OUTPUT_FILE_NAME" ] || [ -n "$ROCPROF_OUTPUT_PATH" ]; then
|
|
export ROCPROF_OUTPUT_PATH=$RESULT_PATH
|
|
fi
|
|
((COUNTER++))
|
|
LD_PRELOAD="${ROCPROF_PRELOAD}" "${@}"
|
|
if [ -n "$ROCPROF_OUTPUT_PATH" ]; then
|
|
echo -e "\nThe output path for the following counters: $ROCPROF_OUTPUT_PATH"
|
|
fi
|
|
done
|
|
elif [ -n "$ROCPROF_LIST_METRICS" ]; then
|
|
LD_PRELOAD="${ROCPROF_PRELOAD}" exec ${ROCM_DIR}/lib/rocprofiler-sdk/rocprofv3-trigger-list-metrics
|
|
else
|
|
# for non counter collection. e.g: tracing
|
|
LD_PRELOAD="${ROCPROF_PRELOAD}" exec "${@}"
|
|
fi
|