ROC profiler prototype sources importing
Cette révision appartient à :
@@ -0,0 +1,100 @@
|
||||
################################################################################
|
||||
##
|
||||
## Copyright (c) 2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
##
|
||||
## MIT LICENSE:
|
||||
## Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
## this software and associated documentation files (the "Software"), to deal in
|
||||
## the Software without restriction, including without limitation the rights to
|
||||
## use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
## of the Software, and to permit persons to whom the Software is furnished to do
|
||||
## so, subject to the following conditions:
|
||||
##
|
||||
## The above copyright notice and this permission notice shall be included in all
|
||||
## copies or substantial portions of the Software.
|
||||
##
|
||||
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
## OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
## SOFTWARE.
|
||||
##
|
||||
################################################################################
|
||||
|
||||
cmake_minimum_required ( VERSION 3.5.0 )
|
||||
|
||||
## Verbose output.
|
||||
set ( CMAKE_VERBOSE_MAKEFILE TRUE CACHE BOOL "Verbose Output" FORCE )
|
||||
|
||||
## Set module name and project name.
|
||||
set ( ROCPROFILER_NAME "rocprofiler" )
|
||||
set ( ROCPROFILER_TARGET "${ROCPROFILER_NAME}64" )
|
||||
set ( ROCPROFILER_LIBRARY "lib${ROCPROFILER_TARGET}" )
|
||||
project ( ${ROCPROFILER_TARGET} )
|
||||
|
||||
## Adding default path cmake modules
|
||||
list ( APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules" )
|
||||
## Include common cmake modules
|
||||
include ( utils )
|
||||
## Set build environment
|
||||
include ( env )
|
||||
|
||||
## Setup the package version.
|
||||
get_version ( "1.0.0" )
|
||||
message ( "-- LIB-VERSION: ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}" )
|
||||
|
||||
set ( BUILD_VERSION_MAJOR ${VERSION_MAJOR} )
|
||||
set ( BUILD_VERSION_MINOR ${VERSION_MINOR} )
|
||||
set ( BUILD_VERSION_PATCH ${VERSION_PATCH} )
|
||||
set ( LIB_VERSION_STRING "${BUILD_VERSION_MAJOR}.${BUILD_VERSION_MINOR}.${BUILD_VERSION_PATCH}" )
|
||||
if ( DEFINED VERSION_BUILD AND NOT ${VERSION_BUILD} STREQUAL "" )
|
||||
message ( "VERSION BUILD DEFINED ${VERSION_BUILD}" )
|
||||
set ( BUILD_VERSION_PATCH "${BUILD_VERSION_PATCH}-${VERSION_BUILD}" )
|
||||
endif ()
|
||||
set ( BUILD_VERSION_STRING "${BUILD_VERSION_MAJOR}.${BUILD_VERSION_MINOR}.${BUILD_VERSION_PATCH}" )
|
||||
|
||||
## Set target and root/lib/test directory
|
||||
set ( TARGET_NAME "${ROCPROFILER_TARGET}" )
|
||||
set ( ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}" )
|
||||
set ( LIB_DIR "${ROOT_DIR}/src" )
|
||||
set ( TEST_DIR "${ROOT_DIR}/test" )
|
||||
|
||||
## Build library
|
||||
include ( ${LIB_DIR}/CMakeLists.txt )
|
||||
|
||||
## Set the VERSION and SOVERSION values
|
||||
set_property ( TARGET ${TARGET_NAME} PROPERTY VERSION "${LIB_VERSION_STRING}" )
|
||||
set_property ( TARGET ${TARGET_NAME} PROPERTY SOVERSION "${BUILD_VERSION_MAJOR}" )
|
||||
|
||||
## If the library is a release, strip the target library
|
||||
if ( "${CMAKE_BUILD_TYPE}" STREQUAL release )
|
||||
add_custom_command ( TARGET ${ROCPROFILER_TARGET} POST_BUILD COMMAND ${CMAKE_STRIP} *.so )
|
||||
endif ()
|
||||
|
||||
## Build tests
|
||||
add_subdirectory ( ${TEST_DIR} ${PROJECT_BINARY_DIR}/test )
|
||||
|
||||
## Add the install directives for the runtime library.
|
||||
install ( TARGETS ${ROCPROFILER_TARGET} LIBRARY DESTINATION ${ROCPROFILER_NAME}/lib COMPONENT ${ROCPROFILER_LIBRARY} )
|
||||
|
||||
## Add the packaging directives for the runtime library.
|
||||
set ( CPACK_PACKAGE_NAME ${ROCPROFILER_NAME} )
|
||||
set ( CPACK_PACKAGE_VENDOR "AMD" )
|
||||
set ( CPACK_PACKAGE_VERSION_MAJOR ${BUILD_VERSION_MAJOR} )
|
||||
set ( CPACK_PACKAGE_VERSION_MINOR ${BUILD_VERSION_MINOR} )
|
||||
set ( CPACK_PACKAGE_VERSION_PATCH ${BUILD_VERSION_PATCH} )
|
||||
set ( CPACK_PACKAGE_CONTACT "Advanced Micro Devices Inc." )
|
||||
set ( CPACK_PACKAGE_DESCRIPTION_SUMMARY "ROCPROFILER library for AMD HSA runtime API extension support" )
|
||||
set ( CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.md" )
|
||||
|
||||
## Debian package specific variables
|
||||
set ( CPACK_DEBIAN_PACKAGE_HOMEPAGE "https://github.com/RadeonOpenCompute/HSA-RocProfiler" )
|
||||
set ( CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/postinst;${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/prerm" )
|
||||
|
||||
## RPM package specific variables
|
||||
set ( CPACK_RPM_PRE_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_post" )
|
||||
set ( CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_postun" )
|
||||
|
||||
include ( CPack )
|
||||
@@ -0,0 +1,19 @@
|
||||
#/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
do_ldconfig() {
|
||||
echo /opt/rocm/librocprofiler/lib > /etc/ld.so.conf.d/libhsa-rocprofiler64.conf && ldconfig
|
||||
}
|
||||
|
||||
case "$1" in
|
||||
configure)
|
||||
do_ldconfig
|
||||
;;
|
||||
abort-upgrade|abort-remove|abort-deconfigure)
|
||||
echo "$1"
|
||||
;;
|
||||
*)
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
@@ -0,0 +1,18 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
rm_ldconfig() {
|
||||
rm -f /etc/ld.so.conf.d/libhsa-rocprofiler64.conf && ldconfig
|
||||
}
|
||||
|
||||
case "$1" in
|
||||
remove)
|
||||
rm_ldconfig
|
||||
;;
|
||||
purge)
|
||||
;;
|
||||
*)
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
@@ -0,0 +1,39 @@
|
||||
HSA ROC Profiler LICENSE
|
||||
|
||||
The University of Illinois/NCSA
|
||||
Open Source License (NCSA)
|
||||
|
||||
Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Developed by:
|
||||
|
||||
AMD Research and AMD HSA Software Development
|
||||
|
||||
Advanced Micro Devices, Inc.
|
||||
|
||||
www.amd.com
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal with the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
- Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimers.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimers in
|
||||
the documentation and/or other materials provided with the distribution.
|
||||
- Neither the names of Advanced Micro Devices, Inc,
|
||||
nor the names of its contributors may be used to endorse or promote
|
||||
products derived from this Software without specific prior written
|
||||
permission.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS WITH THE SOFTWARE.
|
||||
@@ -0,0 +1 @@
|
||||
echo /opt/rocm/librocprofiler/lib > /etc/ld.so.conf.d/libhsa-rocprofiler64.conf && ldconfig
|
||||
@@ -0,0 +1 @@
|
||||
rm -f /etc/ld.so.conf.d/libhsa-rocprofiler64.conf && ldconfig
|
||||
@@ -0,0 +1,50 @@
|
||||
ROC Profiler library.
|
||||
Profiling with metrics and traces based perfcounters (PMC) and SQ threadtraces (SQTT)
|
||||
and AqlProfile HSA extension.
|
||||
|
||||
Library supports GFX8/GFX9 APIs.
|
||||
The library source tree:
|
||||
- doc - Documantation
|
||||
- inc/rocprofiler.h - Library public API
|
||||
- src - Library sources
|
||||
- core - Library API sources
|
||||
- util - Library utils sources
|
||||
- xml - XML parser
|
||||
- test - Library test suite
|
||||
- ctrl - Test controll
|
||||
- util - Test utils
|
||||
- simple_convolution - Simple convolution test kernel
|
||||
|
||||
Build environment:
|
||||
|
||||
$ export CMAKE_PREFIX_PATH="<path to hsa-runtime includes>;<path to hsa-runtime library>"
|
||||
$ export CMAKE_BUILD_TYPE=<debug|release> # release by default
|
||||
$ export CMAKE_DEBUG_TRACE=1 # 1 to enable debug tracing
|
||||
|
||||
To build with the current installed ROCM:
|
||||
|
||||
$ cd .../rocprofiler
|
||||
$ mkdir build
|
||||
$ cd build
|
||||
|
||||
$ export CMAKE_PREFIX_PATH=/home/evgeny/git/compute/out/ubuntu-16.04/16.04
|
||||
$ cmake ..
|
||||
$ make
|
||||
|
||||
$ cmake -DCMAKE_PREFIX_PATH=/opt/rocm ..
|
||||
$ make
|
||||
|
||||
To run the test:
|
||||
|
||||
$ cd .../rocprofiler/build
|
||||
$ export LD_LIBRARY_PATH=$PWD
|
||||
$ export HSA_TOOLS_LIB=librocprofiler64.so
|
||||
$ run.sh
|
||||
|
||||
To enabled error messages logging to '/tmp/rocprofiler_log.txt':
|
||||
|
||||
$ export ROCPROFILER_LOG=1
|
||||
|
||||
To enable verbose tracing:
|
||||
|
||||
$ export ROCPROFILER_TRACE=1
|
||||
@@ -0,0 +1,60 @@
|
||||
---
|
||||
Language: Cpp
|
||||
# BasedOnStyle: Google
|
||||
AccessModifierOffset: -1
|
||||
ConstructorInitializerIndentWidth: 4
|
||||
AlignEscapedNewlinesLeft: false
|
||||
AlignTrailingComments: true
|
||||
AlignConsecutiveAssignments: false
|
||||
AlignOperands: false
|
||||
AllowAllParametersOfDeclarationOnNextLine: true
|
||||
AllowShortBlocksOnASingleLine: false
|
||||
AllowShortIfStatementsOnASingleLine: true
|
||||
AllowShortLoopsOnASingleLine: true
|
||||
AllowShortFunctionsOnASingleLine: All
|
||||
AlwaysBreakAfterDefinitionReturnType: false
|
||||
AlwaysBreakTemplateDeclarations: false
|
||||
AlwaysBreakBeforeMultilineStrings: true
|
||||
BreakBeforeBinaryOperators: false
|
||||
BreakBeforeTernaryOperators: true
|
||||
BreakConstructorInitializersBeforeComma: false
|
||||
BinPackParameters: true
|
||||
ColumnLimit: 100
|
||||
ConstructorInitializerAllOnOneLineOrOnePerLine: true
|
||||
ExperimentalAutoDetectBinPacking: false
|
||||
IndentCaseLabels: true
|
||||
IndentWrappedFunctionNames: false
|
||||
IndentFunctionDeclarationAfterType: false
|
||||
MaxEmptyLinesToKeep: 2
|
||||
KeepEmptyLinesAtTheStartOfBlocks: false
|
||||
NamespaceIndentation: None
|
||||
ObjCSpaceAfterProperty: false
|
||||
ObjCSpaceBeforeProtocolList: false
|
||||
PenaltyBreakBeforeFirstCallParameter: 1
|
||||
PenaltyBreakComment: 300
|
||||
PenaltyBreakString: 1000
|
||||
PenaltyBreakFirstLessLess: 120
|
||||
PenaltyExcessCharacter: 1000000
|
||||
PenaltyReturnTypeOnItsOwnLine: 200
|
||||
DerivePointerAlignment: false
|
||||
PointerAlignment: Left
|
||||
SpacesBeforeTrailingComments: 2
|
||||
Cpp11BracedListStyle: true
|
||||
Standard: Auto
|
||||
IndentWidth: 2
|
||||
TabWidth: 8
|
||||
UseTab: Never
|
||||
BreakBeforeBraces: Attach
|
||||
SpacesInParentheses: false
|
||||
SpacesInAngles: false
|
||||
SpaceInEmptyParentheses: false
|
||||
SpacesInCStyleCastParentheses: false
|
||||
SpacesInContainerLiterals: true
|
||||
SpaceBeforeAssignmentOperators: true
|
||||
ContinuationIndentWidth: 4
|
||||
CommentPragmas: '^ IWYU pragma:'
|
||||
ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
|
||||
SpaceBeforeParens: ControlStatements
|
||||
DisableFormat: false
|
||||
SortIncludes: false
|
||||
...
|
||||
@@ -0,0 +1,112 @@
|
||||
## Build is not supported on Windows plaform
|
||||
if ( WIN32 )
|
||||
message ( FATAL_ERROR "Windows build is not supported." )
|
||||
endif ()
|
||||
|
||||
## Compiler Preprocessor definitions.
|
||||
add_definitions ( -D__linux__ )
|
||||
add_definitions ( -DUNIX_OS )
|
||||
add_definitions ( -DLINUX )
|
||||
add_definitions ( -D__AMD64__ )
|
||||
add_definitions ( -D__x86_64__ )
|
||||
add_definitions ( -DAMD_INTERNAL_BUILD )
|
||||
add_definitions ( -DLITTLEENDIAN_CPU=1 )
|
||||
add_definitions ( -DHSA_LARGE_MODEL= )
|
||||
add_definitions ( -DHSA_DEPRECATED= )
|
||||
|
||||
## Linux Compiler options
|
||||
set ( CMAKE_CXX_FLAGS "-std=c++11")
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror=return-type" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-math-errno" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-threadsafe-statics" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fmerge-all-constants" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fms-extensions" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fmerge-all-constants" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC" )
|
||||
|
||||
set ( CMAKE_SHARED_LINKER_FLAGS "-Wl,-Bdynamic -Wl,-z,noexecstack" )
|
||||
|
||||
set ( CMAKE_SKIP_BUILD_RPATH TRUE )
|
||||
|
||||
## CLANG options
|
||||
if ( "$ENV{CXX}" STREQUAL "/usr/bin/clang++" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ferror-limit=1000000" )
|
||||
endif()
|
||||
|
||||
## Enable debug trace
|
||||
if ( DEFINED ENV{CMAKE_DEBUG_TRACE} )
|
||||
add_definitions ( -DDEBUG_TRACE=1 )
|
||||
endif()
|
||||
|
||||
## Enable HSA APIs intersepting
|
||||
if ( NOT "$ENV{CMAKE_HSA_INTERCEPT}" STREQUAL "no" )
|
||||
add_definitions ( -DROCP_HSA_INTERCEPT=1 )
|
||||
endif()
|
||||
|
||||
## Enable HSA proxy queue
|
||||
if ( NOT "$ENV{CMAKE_HSA_PROXY}" STREQUAL "no" )
|
||||
add_definitions ( -DROCP_HSA_PROXY=1 )
|
||||
endif()
|
||||
|
||||
## Check env vars
|
||||
if ( NOT DEFINED CMAKE_BUILD_TYPE OR "${CMAKE_BUILD_TYPE}" STREQUAL "" )
|
||||
if ( DEFINED ENV{CMAKE_BUILD_TYPE} )
|
||||
set ( CMAKE_BUILD_TYPE $ENV{CMAKE_BUILD_TYPE} )
|
||||
endif()
|
||||
endif()
|
||||
if ( NOT DEFINED CMAKE_PREFIX_PATH AND DEFINED ENV{CMAKE_PREFIX_PATH} )
|
||||
set ( CMAKE_PREFIX_PATH $ENV{CMAKE_PREFIX_PATH} )
|
||||
endif()
|
||||
|
||||
## Extend Compiler flags based on build type
|
||||
string ( TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE )
|
||||
if ( "${CMAKE_BUILD_TYPE}" STREQUAL debug )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb" )
|
||||
set ( CMAKE_BUILD_TYPE "debug" )
|
||||
else ()
|
||||
set ( CMAKE_BUILD_TYPE "release" )
|
||||
endif ()
|
||||
|
||||
## Extend Compiler flags based on Processor architecture
|
||||
if ( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64" )
|
||||
set ( NBIT 64 )
|
||||
set ( NBITSTR "64" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64 -msse -msse2" )
|
||||
elseif ( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86" )
|
||||
set ( NBIT 32 )
|
||||
set ( NBITSTR "" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32" )
|
||||
endif ()
|
||||
|
||||
## Find hsa-runtime headers/lib
|
||||
find_file ( HSA_RUNTIME_INC "hsa/hsa.h" )
|
||||
get_filename_component ( HSA_RUNTIME_INC_PATH ${HSA_RUNTIME_INC} DIRECTORY )
|
||||
if ( "${HSA_RUNTIME_INC_PATH}" STREQUAL "" )
|
||||
find_file ( HSA_RUNTIME_INC "hsa.h" )
|
||||
get_filename_component ( HSA_RUNTIME_INC_PATH ${HSA_RUNTIME_INC} DIRECTORY )
|
||||
endif()
|
||||
|
||||
set ( HSA_RUNTIME_NAME "hsa-runtime64" )
|
||||
find_library ( HSA_RUNTIME_LIB "lib${HSA_RUNTIME_NAME}.so" )
|
||||
get_filename_component ( HSA_RUNTIME_LIB_PATH ${HSA_RUNTIME_LIB} DIRECTORY )
|
||||
|
||||
set ( HSA_KMT_NAME "hsakmt" )
|
||||
find_library ( HSA_KMT_LIB "lib${HSA_KMT_NAME}.so" )
|
||||
get_filename_component ( HSA_KMT_LIB_PATH ${HSA_KMT_LIB} DIRECTORY )
|
||||
|
||||
set ( API_PATH ${HSA_RUNTIME_INC_PATH} )
|
||||
|
||||
## Basic Tool Chain Information
|
||||
message ( "----------------NBIT: ${NBIT}" )
|
||||
message ( "-----------BuildType: ${CMAKE_BUILD_TYPE}" )
|
||||
message ( "------------Compiler: ${CMAKE_CXX_COMPILER}" )
|
||||
message ( "----Compiler-Version: ${CMAKE_CXX_COMPILER_VERSION}" )
|
||||
message ( "-----HSA-Runtime-Inc: ${HSA_RUNTIME_INC_PATH}" )
|
||||
message ( "-----HSA-Runtime-Lib: ${HSA_RUNTIME_LIB_PATH}" )
|
||||
message ( "------------API-path: ${API_PATH}" )
|
||||
message ( "-----CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}" )
|
||||
message ( "---CMAKE_PREFIX_PATH: ${CMAKE_PREFIX_PATH}" )
|
||||
@@ -0,0 +1,116 @@
|
||||
################################################################################
|
||||
##
|
||||
## The University of Illinois/NCSA
|
||||
## Open Source License (NCSA)
|
||||
##
|
||||
## Copyright (c) 2014-2017, Advanced Micro Devices, Inc. All rights reserved.
|
||||
##
|
||||
## Developed by:
|
||||
##
|
||||
## AMD Research and AMD HSA Software Development
|
||||
##
|
||||
## Advanced Micro Devices, Inc.
|
||||
##
|
||||
## www.amd.com
|
||||
##
|
||||
## Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
## of this software and associated documentation files (the "Software"), to
|
||||
## deal with the Software without restriction, including without limitation
|
||||
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
## and#or sell copies of the Software, and to permit persons to whom the
|
||||
## Software is furnished to do so, subject to the following conditions:
|
||||
##
|
||||
## - Redistributions of source code must retain the above copyright notice,
|
||||
## this list of conditions and the following disclaimers.
|
||||
## - Redistributions in binary form must reproduce the above copyright
|
||||
## notice, this list of conditions and the following disclaimers in
|
||||
## the documentation and#or other materials provided with the distribution.
|
||||
## - Neither the names of Advanced Micro Devices, Inc,
|
||||
## nor the names of its contributors may be used to endorse or promote
|
||||
## products derived from this Software without specific prior written
|
||||
## permission.
|
||||
##
|
||||
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
## DEALINGS WITH THE SOFTWARE.
|
||||
##
|
||||
################################################################################
|
||||
|
||||
## Parses the VERSION_STRING variable and places
|
||||
## the first, second and third number values in
|
||||
## the major, minor and patch variables.
|
||||
function( parse_version VERSION_STRING )
|
||||
|
||||
string ( FIND ${VERSION_STRING} "-" STRING_INDEX )
|
||||
|
||||
if ( ${STRING_INDEX} GREATER -1 )
|
||||
math ( EXPR STRING_INDEX "${STRING_INDEX} + 1" )
|
||||
string ( SUBSTRING ${VERSION_STRING} ${STRING_INDEX} -1 VERSION_BUILD )
|
||||
endif ()
|
||||
|
||||
string ( REGEX MATCHALL "[0123456789]+" VERSIONS ${VERSION_STRING} )
|
||||
list ( LENGTH VERSIONS VERSION_COUNT )
|
||||
|
||||
if ( ${VERSION_COUNT} GREATER 0)
|
||||
list ( GET VERSIONS 0 MAJOR )
|
||||
set ( VERSION_MAJOR ${MAJOR} PARENT_SCOPE )
|
||||
set ( TEMP_VERSION_STRING "${MAJOR}" )
|
||||
endif ()
|
||||
|
||||
if ( ${VERSION_COUNT} GREATER 1 )
|
||||
list ( GET VERSIONS 1 MINOR )
|
||||
set ( VERSION_MINOR ${MINOR} PARENT_SCOPE )
|
||||
set ( TEMP_VERSION_STRING "${TEMP_VERSION_STRING}.${MINOR}" )
|
||||
endif ()
|
||||
|
||||
if ( ${VERSION_COUNT} GREATER 2 )
|
||||
list ( GET VERSIONS 2 PATCH )
|
||||
set ( VERSION_PATCH ${PATCH} PARENT_SCOPE )
|
||||
set ( TEMP_VERSION_STRING "${TEMP_VERSION_STRING}.${PATCH}" )
|
||||
endif ()
|
||||
|
||||
if ( DEFINED VERSION_BUILD )
|
||||
set ( VERSION_BUILD "${VERSION_BUILD}" PARENT_SCOPE )
|
||||
endif ()
|
||||
|
||||
set ( VERSION_STRING "${TEMP_VERSION_STRING}" PARENT_SCOPE )
|
||||
|
||||
endfunction ()
|
||||
|
||||
## Gets the current version of the repository
|
||||
## using versioning tags and git describe.
|
||||
## Passes back a packaging version string
|
||||
## and a library version string.
|
||||
function ( get_version DEFAULT_VERSION_STRING )
|
||||
|
||||
parse_version ( ${DEFAULT_VERSION_STRING} )
|
||||
|
||||
find_program ( GIT NAMES git )
|
||||
|
||||
if ( GIT )
|
||||
|
||||
execute_process ( COMMAND "git describe --dirty --long --match [0-9]* 2>/dev/null"
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE GIT_TAG_STRING
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
RESULT_VARIABLE RESULT )
|
||||
|
||||
if ( ${RESULT} EQUAL 0 )
|
||||
|
||||
parse_version ( ${GIT_TAG_STRING} )
|
||||
|
||||
endif ()
|
||||
|
||||
endif ()
|
||||
|
||||
set( VERSION_STRING "${VERSION_STRING}" PARENT_SCOPE )
|
||||
set( VERSION_MAJOR "${VERSION_MAJOR}" PARENT_SCOPE )
|
||||
set( VERSION_MINOR "${VERSION_MINOR}" PARENT_SCOPE )
|
||||
set( VERSION_PATCH "${VERSION_PATCH}" PARENT_SCOPE )
|
||||
set( VERSION_BUILD "${VERSION_BUILD}" PARENT_SCOPE )
|
||||
|
||||
endfunction()
|
||||
Fichier binaire non affiché.
@@ -0,0 +1,272 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// ROC Profiler API
|
||||
//
|
||||
// The goal of the implementation is to provide a HW specific low-level
|
||||
// performance analysis interface for profiling of GPU compute applications.
|
||||
// The profiling includes HW performance counters (PMC) with complex
|
||||
// performance metrics and thread traces (SQTT). The profiling is supported
|
||||
// by the SQTT, PMC and Callback APIs.
|
||||
//
|
||||
// The library can be used by a tool library loaded by HSA runtime or by
|
||||
// higher level HW independent performance analysis API like PAPI.
|
||||
//
|
||||
// The library is written on C and will be based on AQLprofile AMD specific
|
||||
// HSA extension. The library implementation requires HSA API intercepting and
|
||||
// a profiling queue supporting a submit callback interface.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef INC_ROCPROFILER_H_
|
||||
#define INC_ROCPROFILER_H_
|
||||
|
||||
#include <hsa.h>
|
||||
#include <hsa_api_trace.h>
|
||||
#include <hsa_ven_amd_aqlprofile.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Profiling info
|
||||
//
|
||||
// Profiling info objects have profiling feature info, type, parameters and data
|
||||
// Also profiling data samplaes can be iterated using a callback
|
||||
|
||||
// Profiling feature type
|
||||
typedef enum {
|
||||
ROCPROFILER_TYPE_METRIC = 0,
|
||||
ROCPROFILER_TYPE_TRACE = 1
|
||||
} rocprofiler_type_t;
|
||||
|
||||
// Profiling feture parameter
|
||||
typedef hsa_ven_amd_aqlprofile_parameter_t rocprofiler_parameter_t;
|
||||
|
||||
// Profiling data kind
|
||||
typedef enum {
|
||||
ROCPROFILER_UNINIT = 0,
|
||||
ROCPROFILER_INT32 = 1,
|
||||
ROCPROFILER_INT64 = 2,
|
||||
ROCPROFILER_FLOAT = 3,
|
||||
ROCPROFILER_DOUBLE = 4,
|
||||
ROCPROFILER_BYTES = 5
|
||||
} rocprofiler_metric_kind_t;
|
||||
|
||||
// Profiling data type
|
||||
typedef struct {
|
||||
rocprofiler_metric_kind_t kind; // result kind
|
||||
union {
|
||||
uint32_t result_int32; // 32bit integer result
|
||||
uint64_t result_int64; // 64bit integer result
|
||||
float result_float; // float single-precision result
|
||||
double result_double; // float double-precision result
|
||||
struct {
|
||||
void* ptr;
|
||||
uint32_t size;
|
||||
uint32_t instance_count;
|
||||
bool copy;
|
||||
} result_bytes; // data by ptr and byte size
|
||||
};
|
||||
} rocprofiler_data_t;
|
||||
|
||||
// Profiling feature info
|
||||
typedef struct {
|
||||
rocprofiler_type_t type; // feature type
|
||||
const char* name; // [in] feature name
|
||||
const rocprofiler_parameter_t* parameters; // feature parameters array
|
||||
uint32_t parameter_count; // feature parameters count
|
||||
rocprofiler_data_t data; // [out] profiling data
|
||||
} rocprofiler_info_t;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Profiling context
|
||||
//
|
||||
// Profiling context object accumuate all profiling information
|
||||
|
||||
// Profiling context object
|
||||
typedef void rocprofiler_t;
|
||||
|
||||
// Profiling group object
|
||||
typedef struct {
|
||||
unsigned index; // group index
|
||||
rocprofiler_info_t** info; // profiling info array
|
||||
uint32_t info_count; // profiling info count
|
||||
rocprofiler_t* context; // context object
|
||||
} rocprofiler_group_t;
|
||||
|
||||
// Profiling mode
|
||||
typedef enum {
|
||||
ROCPROFILER_MODE_STANDALONE = 1,
|
||||
ROCPROFILER_MODE_CREATEQUEUE = 2,
|
||||
} rocprofiler_mode_t;
|
||||
|
||||
// Profiling preperties
|
||||
typedef struct {
|
||||
hsa_queue_t* queue; // queue for STANDALONE mode
|
||||
// the queue is created and returned in CREATEQUEUE mode
|
||||
uint32_t queue_depth; // created queue depth
|
||||
} rocprofiler_properties_t;
|
||||
|
||||
// Create new profiling context
|
||||
hsa_status_t rocprofiler_open(
|
||||
unsigned gpu_index, // GPU index
|
||||
rocprofiler_info_t* info, // [in] profiling info array
|
||||
uint32_t info_count, // profiling info count
|
||||
rocprofiler_t** context, // [out] context object
|
||||
uint32_t mode, // profiling mode mask
|
||||
rocprofiler_properties_t* properties); // profiling properties
|
||||
|
||||
// Delete profiling info
|
||||
hsa_status_t rocprofiler_close(
|
||||
rocprofiler_t* context); // [in] profiling context
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Runtime API observer
|
||||
//
|
||||
// Runtime API observer is called on enter and exit for the API
|
||||
|
||||
// Profiling callback data
|
||||
typedef struct {
|
||||
uint64_t kernel_object;
|
||||
uint64_t queue_index;
|
||||
uint32_t gpu_index;
|
||||
} rocprofiler_callback_data_t;
|
||||
|
||||
// Profiling callback type
|
||||
typedef hsa_status_t (*rocprofiler_callback_t)(
|
||||
const rocprofiler_callback_data_t* callback_data, // [in] callback data union, data depends on
|
||||
// the callback API id
|
||||
void* user_data, // [in/out] user data passed to the callback
|
||||
rocprofiler_group_t** group); // [out] profiling group
|
||||
|
||||
// Provided standard profiling callback
|
||||
static inline hsa_status_t rocprofiler_set_dispatch_callback(
|
||||
const rocprofiler_callback_data_t* callback_data,
|
||||
void* user_data,
|
||||
rocprofiler_group_t** group) {
|
||||
*group = reinterpret_cast<rocprofiler_group_t*>(user_data);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Set/remove kernel dispatch observer
|
||||
hsa_status_t rocprofiler_set_dispatch_observer(
|
||||
rocprofiler_callback_t callback, // observer callback
|
||||
void* data); // [in/out] passed callback data
|
||||
|
||||
hsa_status_t rocprofiler_remove_dispatch_observer();
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Start/stop profiling
|
||||
//
|
||||
// Start/stop the context profiling invocation, have to be as many as
|
||||
// contect.invocations' to collect all profiling data
|
||||
|
||||
// Start profiling
|
||||
hsa_status_t rocprofiler_start(
|
||||
rocprofiler_t* context, // [in/out] profiling context
|
||||
uint32_t group_index = 0); // group index
|
||||
|
||||
// Stop profiling
|
||||
hsa_status_t rocprofiler_stop(
|
||||
rocprofiler_t* context, // [in/out] profiling context
|
||||
uint32_t group_index = 0); // group index
|
||||
|
||||
// Read profiling data
|
||||
hsa_status_t rocprofiler_get_data(
|
||||
rocprofiler_t* context, // [in/out] profiling context
|
||||
uint32_t group_index = 0); // group index
|
||||
|
||||
// Get profiling groups
|
||||
hsa_status_t rocprofiler_get_groups(
|
||||
rocprofiler_t* context, // [in] profiling context
|
||||
rocprofiler_group_t** groups, // [out] profiling groups
|
||||
uint32_t* group_count); // [out] profiling groups count
|
||||
|
||||
// Start profiling
|
||||
hsa_status_t rocprofiler_start_group(
|
||||
rocprofiler_group_t* group); // [in/out] profiling group
|
||||
|
||||
// Stop profiling
|
||||
hsa_status_t rocprofiler_stop_group(
|
||||
rocprofiler_group_t* group); // [in/out] profiling group
|
||||
|
||||
// Get profiling data
|
||||
hsa_status_t rocprofiler_get_group_data(
|
||||
rocprofiler_group_t* group); // [in/out] profiling group
|
||||
|
||||
// Get metrics data
|
||||
hsa_status_t rocprofiler_get_metrics_data(
|
||||
const rocprofiler_t* context); // [in/out] profiling context
|
||||
|
||||
// Definition of output data iterator callback
|
||||
typedef hsa_ven_amd_aqlprofile_data_callback_t rocprofiler_trace_data_callback_t;
|
||||
|
||||
// Method for iterating the events output data
|
||||
hsa_status_t rocprofiler_iterate_trace_data(
|
||||
rocprofiler_t* context, // [in] profiling context
|
||||
rocprofiler_trace_data_callback_t callback, // [in] callback to iterate the output data
|
||||
void* data); // [in/out] callback data
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Returning the error string method
|
||||
|
||||
hsa_status_t rocprofiler_error_string (
|
||||
const char** str); // [out] the API error string pointer returning
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// HSA-runtime tool on-load method
|
||||
bool OnLoad(
|
||||
HsaApiTable* table,
|
||||
uint64_t runtime_version,
|
||||
uint64_t failed_tool_count,
|
||||
const char* const * failed_tool_names);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C" block
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif // INC_ROCPROFILER_H_
|
||||
@@ -0,0 +1,14 @@
|
||||
#
|
||||
# Build dynamic Library object
|
||||
#
|
||||
set ( TARGET_LIB "${TARGET_NAME}" )
|
||||
set ( LIB_SRC
|
||||
${LIB_DIR}/core/rocprofiler.cpp
|
||||
${LIB_DIR}/core/proxy_queue.cpp
|
||||
${LIB_DIR}/core/simple_proxy_queue.cpp
|
||||
${LIB_DIR}/core/intercept_queue.cpp
|
||||
${LIB_DIR}/util/hsa_rsrc_factory.cpp
|
||||
)
|
||||
add_library ( ${TARGET_LIB} SHARED ${LIB_SRC} )
|
||||
target_include_directories ( ${TARGET_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} )
|
||||
target_link_libraries( ${TARGET_LIB} PRIVATE ${HSA_RUNTIME_LIB} c stdc++)
|
||||
@@ -0,0 +1,399 @@
|
||||
#ifndef SRC_CORE_CONTEXT_H_
|
||||
#define SRC_CORE_CONTEXT_H_
|
||||
|
||||
#include "inc/rocprofiler.h"
|
||||
|
||||
#include <hsa.h>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include "core/metrics.h"
|
||||
#include "core/profile.h"
|
||||
#include "core/queue.h"
|
||||
#include "core/types.h"
|
||||
#include "util/exception.h"
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
|
||||
namespace rocprofiler {
|
||||
struct rocprofiler_contex_t;
|
||||
class Context;
|
||||
|
||||
inline unsigned align_size(unsigned size, unsigned alignment) { return ((size + alignment - 1) & ~(alignment - 1)); }
|
||||
|
||||
// Block descriptor
|
||||
struct block_des_t {
|
||||
uint32_t id;
|
||||
uint32_t index;
|
||||
};
|
||||
|
||||
// block_des_t less-then functor
|
||||
struct lt_block_des {
|
||||
bool operator()(const block_des_t& a1, const block_des_t& a2) const {
|
||||
return (a1.id < a2.id) || ((a1.id == a2.id) && (a1.index < a2.index));
|
||||
}
|
||||
};
|
||||
|
||||
// Block status
|
||||
struct block_status_t {
|
||||
uint32_t max_counters;
|
||||
uint32_t counter_index;
|
||||
uint32_t group_index;
|
||||
};
|
||||
|
||||
// Metrics arguments
|
||||
template <class Map>
|
||||
class MetricArgs : public xml::args_cache_t {
|
||||
public:
|
||||
MetricArgs(const Map& map) : map_(map) {}
|
||||
bool Lookup(const std::string& name, uint64_t& result) const {
|
||||
rocprofiler_info_t* info = NULL;
|
||||
auto it = map_.find(name);
|
||||
if (it == map_.end()) EXC_RAISING(HSA_STATUS_ERROR, "var '" << name << "' is not found");
|
||||
info = it->second;
|
||||
if (info) {
|
||||
result = info->data.result_int64;
|
||||
if (info->data.kind == ROCPROFILER_UNINIT) EXC_RAISING(HSA_STATUS_ERROR, "var '" << name << "' is uninitialized");
|
||||
if (info->data.kind != ROCPROFILER_INT64) EXC_RAISING(HSA_STATUS_ERROR, "var '" << name << "' is of incompatible type, not INT64");
|
||||
} else EXC_RAISING(HSA_STATUS_ERROR, "var '" << name << "' info is NULL");
|
||||
return (info != NULL);
|
||||
}
|
||||
private:
|
||||
const Map& map_;
|
||||
};
|
||||
|
||||
// Profiling group
|
||||
class Group {
|
||||
public:
|
||||
Group(const util::AgentInfo* agent_info, Context *context, const uint32_t& index) :
|
||||
pmc_profile_(agent_info),
|
||||
sqtt_profile_(agent_info),
|
||||
context_(context),
|
||||
index_(index)
|
||||
{}
|
||||
|
||||
void Insert(const profile_info_t& info) {
|
||||
const rocprofiler_type_t type = info.rinfo->type;
|
||||
info_vector_.push_back(info.rinfo);
|
||||
switch (type) {
|
||||
case ROCPROFILER_TYPE_METRIC:
|
||||
pmc_profile_.Insert(info);
|
||||
break;
|
||||
case ROCPROFILER_TYPE_TRACE:
|
||||
sqtt_profile_.Insert(info);
|
||||
break;
|
||||
default:
|
||||
EXC_RAISING(HSA_STATUS_ERROR, "bad rocprofiler type (" << type << ")");
|
||||
}
|
||||
}
|
||||
|
||||
hsa_status_t Finalize() {
|
||||
hsa_status_t status = pmc_profile_.Finalize(start_vector_, stop_vector_);
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
status = sqtt_profile_.Finalize(start_vector_, stop_vector_);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
void GetProfiles(profile_vector_t& vec) {
|
||||
pmc_profile_.GetProfiles(vec);
|
||||
sqtt_profile_.GetProfiles(vec);
|
||||
}
|
||||
|
||||
void GetTraceProfiles(profile_vector_t& vec) {
|
||||
sqtt_profile_.GetProfiles(vec);
|
||||
}
|
||||
|
||||
info_vector_t& GetInfoVector() { return info_vector_; }
|
||||
const pkt_vector_t& GetStartVector() const { return start_vector_; }
|
||||
const pkt_vector_t& GetStopVector() const { return stop_vector_; }
|
||||
Context* GetContext() { return context_; }
|
||||
uint32_t GetIndex() const { return index_; }
|
||||
|
||||
private:
|
||||
PmcProfile pmc_profile_;
|
||||
SqttProfile sqtt_profile_;
|
||||
info_vector_t info_vector_;
|
||||
pkt_vector_t start_vector_;
|
||||
pkt_vector_t stop_vector_;
|
||||
Context* const context_;
|
||||
const uint32_t index_;
|
||||
};
|
||||
|
||||
// Profiling context
|
||||
class Context {
|
||||
public:
|
||||
typedef std::map<std::string, rocprofiler_info_t*> info_map_t;
|
||||
|
||||
Context(const util::AgentInfo* agent_info, Queue* queue, rocprofiler_info_t* info, const uint32_t info_count) :
|
||||
agent_(agent_info->dev_id),
|
||||
agent_info_(agent_info),
|
||||
queue_(queue),
|
||||
hsa_rsrc_(&util::HsaRsrcFactory::Instance()),
|
||||
api_(hsa_rsrc_->AqlProfileApi()),
|
||||
metrics_(agent_info)
|
||||
{
|
||||
Initialize(info, info_count);
|
||||
Finalize();
|
||||
}
|
||||
|
||||
~Context() {
|
||||
for (const auto& v : info_map_) {
|
||||
const std::string& name = v.first;
|
||||
const rocprofiler_info_t* info = v.second;
|
||||
if ((info->type == ROCPROFILER_TYPE_METRIC) && (metrics_map_.find(name) == metrics_map_.end())) {
|
||||
delete info;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Initialize(rocprofiler_info_t* info_array, const uint32_t info_count) {
|
||||
info_map_t input_map;
|
||||
for (unsigned i = 0; i < info_count; ++i) {
|
||||
rocprofiler_info_t* info = &info_array[i];
|
||||
input_map[info->name] = info;
|
||||
info->data.kind = ROCPROFILER_UNINIT;
|
||||
}
|
||||
|
||||
if (info_count) set_.push_back(Group(agent_info_, this, 0));
|
||||
|
||||
for (unsigned i = 0; i < info_count; ++i) {
|
||||
rocprofiler_info_t* info = &info_array[i];
|
||||
info_map_[info->name] = info;
|
||||
const rocprofiler_type_t type = info->type;
|
||||
const char* name = info->name;
|
||||
|
||||
if (type == ROCPROFILER_TYPE_METRIC) {
|
||||
const Metric* metric = metrics_.Get(name);
|
||||
if (metric == NULL) EXC_RAISING(HSA_STATUS_ERROR, "metric '" << name << "' is not found");
|
||||
auto ret = metrics_map_.insert({name, metric});
|
||||
if (!ret.second) EXC_RAISING(HSA_STATUS_ERROR, "metric '" << name << "' is registered more then once");
|
||||
|
||||
counters_vec_t counters_vec = metric->GetCounters();
|
||||
if (counters_vec.empty()) EXC_RAISING(HSA_STATUS_ERROR, "metric name '" << name << "' is not found");
|
||||
|
||||
for (const counter_t* counter : counters_vec) {
|
||||
if (metric->GetExpr()) {
|
||||
auto it = input_map.find(counter->name);
|
||||
if (it != input_map.end()) {
|
||||
continue;
|
||||
} else {
|
||||
info = NewCounterInfo(counter);
|
||||
info_map_[info->name] = info;
|
||||
}
|
||||
}
|
||||
|
||||
const event_t* event = &(counter->event);
|
||||
const block_des_t block_des = {event->block_name, event->block_index};
|
||||
auto ret = groups_map_.insert({block_des, {}});
|
||||
block_status_t& block_status = ret.first->second;
|
||||
if (block_status.max_counters == 0) {
|
||||
profile_t query = {};
|
||||
query.agent = agent_;
|
||||
query.type = HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC;
|
||||
query.events = event;
|
||||
|
||||
uint32_t block_counters;
|
||||
hsa_status_t status = api_->hsa_ven_amd_aqlprofile_get_info(&query, HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_COUNTERS, &block_counters);
|
||||
if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "get block_counters info");
|
||||
block_status.max_counters = block_counters;
|
||||
}
|
||||
if (block_status.counter_index >= block_status.max_counters) {
|
||||
block_status.counter_index = 0;
|
||||
block_status.group_index += 1;
|
||||
}
|
||||
if (block_status.group_index >= set_.size()) {
|
||||
set_.push_back(Group(agent_info_, this, block_status.group_index));
|
||||
}
|
||||
const uint32_t group_index = block_status.group_index;
|
||||
set_[group_index].Insert(profile_info_t{event, NULL, 0, info});
|
||||
}
|
||||
} else if (type == ROCPROFILER_TYPE_TRACE) {
|
||||
set_[0].Insert(profile_info_t{NULL, info->parameters, info->parameter_count, info});
|
||||
} else {
|
||||
EXC_RAISING(HSA_STATUS_ERROR, "bad rocprofiler type (" << type << ")");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Finalize() {
|
||||
for (unsigned index = 0; index < set_.size(); ++index) {
|
||||
const hsa_status_t status = set_[index].Finalize();
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "context finalize failed");
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t GetGroupCount() const { return set_.size(); }
|
||||
|
||||
rocprofiler_group_t GetGroupInfo(const uint32_t& index) {
|
||||
rocprofiler::info_vector_t& info_vector = set_[index].GetInfoVector();
|
||||
rocprofiler_group_t group = {};
|
||||
group.info_count = info_vector.size();
|
||||
group.info = &info_vector[0];
|
||||
group.context = reinterpret_cast<rocprofiler_t*>(this);
|
||||
group.index = index;
|
||||
return group;
|
||||
}
|
||||
|
||||
const pkt_vector_t& StartPackets(const uint32_t& group_index) const { return set_[group_index].GetStartVector(); }
|
||||
const pkt_vector_t& StopPackets(const uint32_t& group_index) const { return set_[group_index].GetStopVector(); }
|
||||
|
||||
void Start(const uint32_t& group_index, Queue* const queue = NULL) {
|
||||
const pkt_vector_t& start_packets = StartPackets(group_index);
|
||||
Queue* const submit_queue = (queue != NULL) ? queue : queue_;
|
||||
submit_queue->Submit(&start_packets[0], start_packets.size());
|
||||
}
|
||||
void Stop(const uint32_t& group_index, Queue* const queue = NULL) {
|
||||
const pkt_vector_t& stop_packets = StopPackets(group_index);
|
||||
Queue* const submit_queue = (queue != NULL) ? queue : queue_;
|
||||
submit_queue->Submit(&stop_packets[0], stop_packets.size());
|
||||
}
|
||||
void Submit(const uint32_t& group_index, const packet_t* packet, Queue* const queue = NULL) {
|
||||
Queue* const submit_queue = (queue != NULL) ? queue : queue_;
|
||||
Start(group_index, submit_queue);
|
||||
submit_queue->Submit(packet);
|
||||
Stop(group_index, submit_queue);
|
||||
}
|
||||
|
||||
struct callback_data_t {
|
||||
info_vector_t* info_vector;
|
||||
size_t index;
|
||||
char* ptr;
|
||||
};
|
||||
|
||||
void GetData(const uint32_t& group_index) {
|
||||
const profile_vector_t profile_vector = GetProfiles(group_index);
|
||||
for (auto& tuple : profile_vector) {
|
||||
// Wait for stop packet to complete
|
||||
hsa_signal_wait_scacquire(
|
||||
tuple.completion_signal,
|
||||
HSA_SIGNAL_CONDITION_LT,
|
||||
1,
|
||||
(uint64_t)-1,
|
||||
HSA_WAIT_STATE_BLOCKED);
|
||||
callback_data_t callback_data{tuple.info_vector, tuple.info_vector->size(), NULL};
|
||||
const hsa_status_t status = api_->hsa_ven_amd_aqlprofile_iterate_data(tuple.profile, DataCallback, &callback_data);
|
||||
if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "context iterate data failed");
|
||||
}
|
||||
}
|
||||
|
||||
void GetMetricsData() const {
|
||||
const MetricArgs<info_map_t> args(info_map_);
|
||||
for (const auto v : metrics_map_) {
|
||||
const std::string& name = v.first;
|
||||
const Metric* metric = v.second;
|
||||
const xml::Expr* expr = metric->GetExpr();
|
||||
if (expr) {
|
||||
auto it = info_map_.find(name);
|
||||
if (it == info_map_.end()) EXC_RAISING(HSA_STATUS_ERROR, "metric '" << name << "', rocprofiler info is not found");
|
||||
rocprofiler_info_t* info = it->second;
|
||||
info->data.result_int64 = expr->Eval(args);
|
||||
info->data.kind = ROCPROFILER_INT64;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void IterateTraceData(rocprofiler_trace_data_callback_t callback, void *data) {
|
||||
profile_vector_t profile_vector;
|
||||
set_[0].GetTraceProfiles(profile_vector);
|
||||
for (auto& tuple : profile_vector) {
|
||||
const hsa_status_t status = api_->hsa_ven_amd_aqlprofile_iterate_data(tuple.profile, callback, data);
|
||||
if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "context iterate data failed");
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// Getting profling packets
|
||||
profile_vector_t GetProfiles(const uint32_t& index) {
|
||||
profile_vector_t vec;
|
||||
if (index >= set_.size()) {
|
||||
EXC_RAISING(HSA_STATUS_ERROR, "index exceeding the maximum " << set_.size());
|
||||
}
|
||||
set_[index].GetProfiles(vec);
|
||||
return vec;
|
||||
}
|
||||
|
||||
static hsa_status_t DataCallback(hsa_ven_amd_aqlprofile_info_type_t ainfo_type,
|
||||
hsa_ven_amd_aqlprofile_info_data_t* ainfo_data,
|
||||
void* data) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
callback_data_t* callback_data = reinterpret_cast<callback_data_t*>(data);
|
||||
info_vector_t& info_vector = *(callback_data->info_vector);
|
||||
uint32_t index = callback_data->index;
|
||||
const uint32_t sample_id = ainfo_data->sample_id;
|
||||
if (info_vector.size() == index) {
|
||||
index = 0;
|
||||
} else {
|
||||
if (sample_id == 0) index += 1;
|
||||
}
|
||||
callback_data->index = index;
|
||||
|
||||
if (index < info_vector.size()) {
|
||||
rocprofiler_info_t* rinfo = info_vector[index];
|
||||
if (ainfo_type == HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA) {
|
||||
if (ainfo_data->sample_id == 0) rinfo->data.result_int64 = 0;
|
||||
rinfo->data.result_int64 += ainfo_data->pmc_data.result;
|
||||
rinfo->data.kind = ROCPROFILER_INT64;
|
||||
} else if (ainfo_type == HSA_VEN_AMD_AQLPROFILE_INFO_SQTT_DATA) {
|
||||
if (rinfo->data.result_bytes.copy) {
|
||||
char* result_bytes_ptr = reinterpret_cast<char*>(rinfo->data.result_bytes.ptr);
|
||||
const char* end = result_bytes_ptr + rinfo->data.result_bytes.size;
|
||||
const char* src = reinterpret_cast<char*>(ainfo_data->sqtt_data.ptr);
|
||||
const uint32_t size = ainfo_data->sqtt_data.size;
|
||||
char* ptr = (sample_id == 0) ? result_bytes_ptr : callback_data->ptr;
|
||||
uint64_t* header = reinterpret_cast<uint64_t*>(ptr);
|
||||
char* dest = ptr + sizeof(*header);
|
||||
|
||||
if ((dest + size) < end) {
|
||||
hsa_status_t status = hsa_memory_copy(dest, src, size);
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
*header = size;
|
||||
rinfo->data.kind = ROCPROFILER_BYTES;
|
||||
rinfo->data.result_bytes.instance_count = sample_id + 1;
|
||||
callback_data->ptr = dest + align_size(size, sizeof(uint64_t));
|
||||
}
|
||||
} else status = HSA_STATUS_ERROR;
|
||||
} else {
|
||||
if (sample_id == 0) {
|
||||
rinfo->data.kind = ROCPROFILER_BYTES;
|
||||
rinfo->data.result_bytes.ptr = ainfo_data->sqtt_data.ptr;
|
||||
rinfo->data.result_bytes.instance_count = UINT32_MAX;
|
||||
}
|
||||
rinfo->data.result_bytes.instance_count += 1;
|
||||
}
|
||||
} else status = HSA_STATUS_ERROR;
|
||||
} else status = HSA_STATUS_ERROR;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
rocprofiler_info_t* NewCounterInfo(const counter_t* counter) {
|
||||
rocprofiler_info_t* info = new rocprofiler_info_t{};
|
||||
info->type = ROCPROFILER_TYPE_METRIC;
|
||||
info->name = counter->name.c_str();
|
||||
return info;
|
||||
}
|
||||
|
||||
// GPU handel
|
||||
const hsa_agent_t agent_;
|
||||
const util::AgentInfo* agent_info_;
|
||||
// Profiling queue
|
||||
Queue* queue_;
|
||||
// HSA resources factory
|
||||
util::HsaRsrcFactory* hsa_rsrc_;
|
||||
// aqlprofile API table
|
||||
const pfn_t* api_;
|
||||
// Profile group set
|
||||
std::vector<Group> set_;
|
||||
// Metrics dictionary
|
||||
MetricsDict metrics_;
|
||||
// Groups map
|
||||
std::map<block_des_t, block_status_t, lt_block_des> groups_map_;
|
||||
// Info map
|
||||
info_map_t info_map_;
|
||||
// Metrics map
|
||||
std::map<std::string, const Metric*> metrics_map_;
|
||||
};
|
||||
|
||||
} // namespace rocprofiler
|
||||
|
||||
#endif // SRC_CORE_CONTEXT_H_
|
||||
@@ -0,0 +1,49 @@
|
||||
#ifndef _SRC_CORE_HSA_PROXY_QUEUE_H
|
||||
#define _SRC_CORE_HSA_PROXY_QUEUE_H
|
||||
|
||||
#include <hsa.h>
|
||||
#include <atomic>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
|
||||
#include "core/proxy_queue.h"
|
||||
#include "util/exception.h"
|
||||
|
||||
namespace rocprofiler {
|
||||
extern decltype(hsa_queue_destroy)* hsa_queue_destroy_fn;
|
||||
extern decltype(hsa_amd_queue_intercept_create)* hsa_amd_queue_intercept_create_fn;
|
||||
extern decltype(hsa_amd_queue_intercept_register)* hsa_amd_queue_intercept_register_fn;
|
||||
|
||||
class HsaProxyQueue : public ProxyQueue {
|
||||
public:
|
||||
hsa_status_t SetInterceptCB(on_submit_cb_t on_submit_cb, void* data) {
|
||||
return hsa_amd_queue_intercept_register_fn(queue_, on_submit_cb, data);
|
||||
}
|
||||
|
||||
void Submit(const packet_t* packet) { EXC_RAISING(HSA_STATUS_ERROR, "HsaProxyQueue::Submit() is not supported"); }
|
||||
|
||||
private:
|
||||
hsa_status_t Init(
|
||||
hsa_agent_t agent,
|
||||
uint32_t size,
|
||||
hsa_queue_type32_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data),
|
||||
void *data,
|
||||
uint32_t private_segment_size,
|
||||
uint32_t group_segment_size,
|
||||
hsa_queue_t **queue)
|
||||
{
|
||||
printf("HsaProxyQueue::Init()\n");
|
||||
const auto status = hsa_amd_queue_intercept_create_fn(agent, size, type, callback, data, private_segment_size, group_segment_size, &queue_);
|
||||
*queue = queue_;
|
||||
return status;
|
||||
}
|
||||
|
||||
hsa_status_t Cleanup() const { return hsa_queue_destroy_fn(queue_); }
|
||||
|
||||
hsa_queue_t* queue_;
|
||||
};
|
||||
|
||||
} // namespace rocprofiler
|
||||
|
||||
#endif // _SRC_CORE_HSA_PROXY_QUEUE_H
|
||||
@@ -0,0 +1,60 @@
|
||||
#ifndef _SRC_CORE_HSA_QUEUE_H
|
||||
#define _SRC_CORE_HSA_QUEUE_H
|
||||
|
||||
#include <atomic>
|
||||
|
||||
#include "core/queue.h"
|
||||
#include "core/types.h"
|
||||
|
||||
namespace rocprofiler {
|
||||
|
||||
class HsaQueue : public Queue {
|
||||
public:
|
||||
typedef void (HsaQueue::*submit_fptr_t)(const packet_t* packet);
|
||||
enum {
|
||||
LEGACY_SLOT_SIZE_W = HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(packet_word_t),
|
||||
LEGACY_SLOT_SIZE_P = HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(packet_t)
|
||||
};
|
||||
struct slot_pm4_t {
|
||||
packet_word_t words[LEGACY_SLOT_SIZE_W];
|
||||
};
|
||||
|
||||
HsaQueue(const util::AgentInfo* agent_info, hsa_queue_t* queue) :
|
||||
queue_(queue)
|
||||
{}
|
||||
|
||||
void Submit(const packet_t* packet) {
|
||||
// Compute the write index of queue and copy Aql packet into it
|
||||
const uint64_t que_idx = hsa_queue_load_write_index_relaxed(queue_);
|
||||
// Increment the write index
|
||||
hsa_queue_store_write_index_relaxed(queue_, que_idx + 1);
|
||||
|
||||
const uint32_t mask = queue_->size - 1;
|
||||
|
||||
// Copy packet to the queue
|
||||
const packet_word_t* src = reinterpret_cast<const packet_word_t*>(packet);
|
||||
packet_t* slot = reinterpret_cast<packet_t*>(queue_->base_address) + (que_idx & mask);
|
||||
packet_word_t* dst = reinterpret_cast<packet_word_t*>(slot);
|
||||
const uint32_t nwords = sizeof(packet_t) / sizeof(packet_word_t);
|
||||
for (unsigned i = 1; i < nwords; ++i) {
|
||||
dst[i] = src[i];
|
||||
}
|
||||
|
||||
// To maintain global order to ensure the prior copy of the packet contents is made visible
|
||||
// before the header is updated.
|
||||
// With in-order CP it will wait until the first packet in the blob will be valid
|
||||
std::atomic<packet_word_t>* header_atomic_ptr =
|
||||
reinterpret_cast<std::atomic<packet_word_t>*>(&dst[0]);
|
||||
header_atomic_ptr->store(src[0], std::memory_order_release);
|
||||
|
||||
// Doorbell signaling
|
||||
hsa_signal_store_relaxed(queue_->doorbell_signal, que_idx);
|
||||
}
|
||||
|
||||
private:
|
||||
hsa_queue_t* queue_;
|
||||
};
|
||||
|
||||
} // namespace rocprofiler
|
||||
|
||||
#endif // _SRC_CORE_HSA_QUEUE_H
|
||||
@@ -0,0 +1,15 @@
|
||||
#include "core/intercept_queue.h"
|
||||
|
||||
namespace rocprofiler {
|
||||
void InterceptQueue::HsaIntercept(HsaApiTable* table) {
|
||||
table->core_->hsa_queue_create_fn = rocprofiler::InterceptQueue::QueueCreate;
|
||||
table->core_->hsa_queue_destroy_fn = rocprofiler::InterceptQueue::QueueDestroy;
|
||||
}
|
||||
|
||||
InterceptQueue::mutex_t InterceptQueue::mutex_;
|
||||
rocprofiler_callback_t InterceptQueue::on_dispatch_cb_ = NULL;
|
||||
void* InterceptQueue::on_dispatch_cb_data_ = NULL;
|
||||
const char* InterceptQueue::tool_lib_ = NULL;
|
||||
void* InterceptQueue::tool_handle_ = NULL;
|
||||
InterceptQueue::obj_map_t* InterceptQueue::obj_map_ = NULL;
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,161 @@
|
||||
#ifndef _SRC_CORE_INTERCEPT_QUEUE_H
|
||||
#define _SRC_CORE_INTERCEPT_QUEUE_H
|
||||
|
||||
#include <dlfcn.h>
|
||||
#include <atomic>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
|
||||
#include "core/context.h"
|
||||
#include "core/proxy_queue.h"
|
||||
#include "core/types.h"
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
|
||||
namespace rocprofiler {
|
||||
extern decltype(hsa_queue_create)* hsa_queue_create_fn;
|
||||
extern decltype(hsa_queue_destroy)* hsa_queue_destroy_fn;
|
||||
|
||||
class InterceptQueue {
|
||||
public:
|
||||
typedef std::recursive_mutex mutex_t;
|
||||
typedef std::map<uint64_t, InterceptQueue*> obj_map_t;
|
||||
|
||||
static void HsaIntercept(HsaApiTable* table);
|
||||
|
||||
static void SetTool(const char* tool) { tool_lib_ = tool; }
|
||||
|
||||
static void UnloadTool() { if (tool_handle_) dlclose(tool_handle_); }
|
||||
|
||||
static hsa_status_t QueueCreate(
|
||||
hsa_agent_t agent,
|
||||
uint32_t size,
|
||||
hsa_queue_type32_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data),
|
||||
void *data,
|
||||
uint32_t private_segment_size,
|
||||
uint32_t group_segment_size,
|
||||
hsa_queue_t **queue)
|
||||
{
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
|
||||
if (tool_lib_) {
|
||||
tool_handle_ = dlopen(tool_lib_, RTLD_NOW);
|
||||
if (tool_handle_ == NULL) {
|
||||
fprintf(stderr, "ROCProfiler: can't load tool library \"%s\"\n", tool_lib_);
|
||||
fprintf(stderr, "%s\n", dlerror());
|
||||
exit(1);
|
||||
}
|
||||
tool_lib_ = NULL;
|
||||
}
|
||||
|
||||
if (!obj_map_) obj_map_ = new obj_map_t;
|
||||
|
||||
ProxyQueue* proxy = ProxyQueue::Create(agent, size, type, callback, data, private_segment_size, group_segment_size, queue, &status);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
InterceptQueue* obj = new InterceptQueue(agent, proxy);
|
||||
(*obj_map_)[(uint64_t)(*queue)] = obj;
|
||||
status = proxy->SetInterceptCB(OnSubmitCB, obj);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static hsa_status_t QueueDestroy(hsa_queue_t *queue) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
|
||||
obj_map_t::iterator it = obj_map_->find((uint64_t)queue);
|
||||
if (it != obj_map_->end()) {
|
||||
const InterceptQueue* obj = it->second;
|
||||
delete obj;
|
||||
obj_map_->erase(it);
|
||||
status = HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static void OnSubmitCB(const void* in_packets, uint64_t count, uint64_t user_que_idx, void* data, hsa_amd_queue_intercept_packet_writer writer) {
|
||||
const packet_t* packets_arr = reinterpret_cast<const packet_t*>(in_packets);
|
||||
InterceptQueue* obj = reinterpret_cast<InterceptQueue*>(data);
|
||||
Queue* proxy = obj->proxy_;
|
||||
|
||||
for (uint64_t j = 0; j < count; ++j) {
|
||||
bool to_submit = true;
|
||||
const packet_t* packet = &packets_arr[j];
|
||||
|
||||
if ((GetHeaderType(packet) == HSA_PACKET_TYPE_KERNEL_DISPATCH) && (on_dispatch_cb_ != NULL)) {
|
||||
rocprofiler_group_t* group = NULL;
|
||||
const hsa_kernel_dispatch_packet_t* dispatch_packet = reinterpret_cast<const hsa_kernel_dispatch_packet_t*>(packet);
|
||||
rocprofiler_callback_data_t data = {dispatch_packet->kernel_object, user_que_idx, obj->agent_info_->dev_index};
|
||||
hsa_status_t status = on_dispatch_cb_(&data, on_dispatch_cb_data_, &group);
|
||||
if ((status == HSA_STATUS_SUCCESS) && (group != NULL)) {
|
||||
Context* context = reinterpret_cast<Context*>(group->context);
|
||||
const pkt_vector_t& start_vector = context->StartPackets(group->index);
|
||||
const pkt_vector_t& stop_vector = context->StopPackets(group->index);
|
||||
|
||||
pkt_vector_t packets = start_vector;
|
||||
packets.insert(packets.end(), *packet);
|
||||
packets.insert(packets.end(), stop_vector.begin(), stop_vector.end());
|
||||
if (writer != NULL) {
|
||||
writer(&packets[0], packets.size());
|
||||
} else {
|
||||
proxy->Submit(&packets[0], packets.size());
|
||||
}
|
||||
to_submit = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (to_submit) {
|
||||
if (writer != NULL) {
|
||||
writer(packet, 1);
|
||||
} else {
|
||||
proxy->Submit(packet, 1);
|
||||
}
|
||||
}
|
||||
|
||||
packet += 1;
|
||||
}
|
||||
}
|
||||
|
||||
static void SetDispatchCB(rocprofiler_callback_t on_dispatch_cb, void* data) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
on_dispatch_cb_ = on_dispatch_cb;
|
||||
on_dispatch_cb_data_ = data;
|
||||
}
|
||||
|
||||
static void UnsetDispatchCB() {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
on_dispatch_cb_ = NULL;
|
||||
on_dispatch_cb_data_ = NULL;
|
||||
}
|
||||
|
||||
private:
|
||||
InterceptQueue(const hsa_agent_t& agent, ProxyQueue* proxy) : proxy_(proxy) {
|
||||
agent_info_ = util::HsaRsrcFactory::Instance().GetAgentInfo(agent);
|
||||
}
|
||||
~InterceptQueue() { ProxyQueue::Destroy(proxy_); }
|
||||
|
||||
static packet_word_t GetHeaderType(const packet_t* packet) {
|
||||
const packet_word_t* header = reinterpret_cast<const packet_word_t*>(packet);
|
||||
return (*header >> HSA_PACKET_HEADER_TYPE) & header_type_mask;
|
||||
}
|
||||
|
||||
static mutex_t mutex_;
|
||||
static const packet_word_t header_type_mask = (1ul << HSA_PACKET_HEADER_WIDTH_TYPE) - 1;
|
||||
static rocprofiler_callback_t on_dispatch_cb_;
|
||||
static void* on_dispatch_cb_data_;
|
||||
static const char* tool_lib_;
|
||||
static void* tool_handle_;
|
||||
static obj_map_t* obj_map_;
|
||||
|
||||
ProxyQueue* const proxy_;
|
||||
const util::AgentInfo* agent_info_;
|
||||
};
|
||||
|
||||
} // namespace rocprofiler
|
||||
|
||||
#endif // _SRC_CORE_INTERCEPT_QUEUE_H
|
||||
@@ -0,0 +1,169 @@
|
||||
#ifndef SRC_CORE_METRICS_H_
|
||||
#define SRC_CORE_METRICS_H_
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include "core/types.h"
|
||||
#include "util/exception.h"
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
#include "xml/expr.h"
|
||||
#include "xml/xml.h"
|
||||
|
||||
namespace rocprofiler {
|
||||
struct counter_t {
|
||||
std::string name;
|
||||
event_t event;
|
||||
};
|
||||
typedef std::vector<const counter_t*> counters_vec_t;
|
||||
|
||||
class Metric {
|
||||
public:
|
||||
Metric(const std::string& name) : name_(name) {}
|
||||
std::string GetName() const { return name_; }
|
||||
virtual void GetCounters(counters_vec_t &vec) const = 0;
|
||||
counters_vec_t GetCounters() const {
|
||||
counters_vec_t counters;
|
||||
GetCounters(counters);
|
||||
return counters;
|
||||
}
|
||||
virtual const xml::Expr* GetExpr() const = 0;
|
||||
private:
|
||||
std::string name_;
|
||||
};
|
||||
|
||||
class BaseMetric : public Metric {
|
||||
public:
|
||||
BaseMetric(const std::string& name, const counter_t& counter) : Metric(name), counter_(counter) {}
|
||||
void GetCounters(counters_vec_t &vec) const { vec.push_back(&counter_); }
|
||||
const xml::Expr* GetExpr() const { return NULL; }
|
||||
private:
|
||||
const counter_t counter_;
|
||||
};
|
||||
|
||||
class ExprMetric : public Metric {
|
||||
public:
|
||||
ExprMetric(const std::string& name, const counters_vec_t& counters, const xml::Expr* expr) : Metric(name), counters_(counters), expr_(expr) {}
|
||||
void GetCounters(counters_vec_t& vec) const { vec.insert(vec.end(), counters_.begin(), counters_.end()); }
|
||||
const xml::Expr* GetExpr() const { return expr_; }
|
||||
private:
|
||||
const counters_vec_t counters_;
|
||||
const xml::Expr* expr_;
|
||||
};
|
||||
|
||||
|
||||
class MetricsDict {
|
||||
public:
|
||||
typedef std::map<std::string, const Metric*> cache_t;
|
||||
typedef cache_t::const_iterator const_iterator_t;
|
||||
|
||||
class ExprCache : public xml::expr_cache_t {
|
||||
public:
|
||||
ExprCache(const cache_t* cache) : cache_(cache) {}
|
||||
bool Lookup(const std::string& name, std::string& result) const {
|
||||
bool ret = false;
|
||||
auto it = cache_->find(name);
|
||||
if (it != cache_->end()) {
|
||||
ret = true;
|
||||
const rocprofiler::ExprMetric* expr_metric = dynamic_cast<const rocprofiler::ExprMetric*>(it->second);
|
||||
if (expr_metric) result = expr_metric->GetExpr()->GetStr();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
private:
|
||||
const cache_t* const cache_;
|
||||
};
|
||||
|
||||
MetricsDict(const util::AgentInfo* agent_info) : xml_(NULL) {
|
||||
const char* xml_name = getenv("ROCP_METRICS");
|
||||
if (xml_name != NULL) {
|
||||
xml_ = new xml::Xml(xml_name);
|
||||
std::cout << "ROCProfiler: importing metrics from '" << xml_name << "':" << std::endl;
|
||||
ImportMetrics(agent_info, agent_info->gfxip);
|
||||
ImportMetrics(agent_info, "global");
|
||||
}
|
||||
}
|
||||
|
||||
const Metric* Get(const std::string& name) const {
|
||||
const Metric* metric = NULL;
|
||||
auto it = cache_.find(name);
|
||||
if (it != cache_.end()) metric = it->second;
|
||||
return metric;
|
||||
}
|
||||
|
||||
private:
|
||||
void ImportMetrics(const util::AgentInfo* agent_info, const char* scope) {
|
||||
auto scope_list = xml_->GetNodes("top." + std::string(scope) + ".metric");
|
||||
if (!scope_list.empty()) {
|
||||
std::cout << " " << scope_list.size() << " " << scope << " metrics found" << std::endl;
|
||||
|
||||
for (auto node : scope_list) {
|
||||
const std::string name = node->opts["name"];
|
||||
if (cache_.find(name) != cache_.end()) EXC_RAISING(HSA_STATUS_ERROR, "ImportMetrics: metrics redefined '" << name << "'");
|
||||
|
||||
const std::string expr_str = node->opts["expr"];
|
||||
if (expr_str.empty()) {
|
||||
const std::string block_name = node->opts["block"];
|
||||
const uint32_t event_id = atoi(node->opts["event"].c_str());
|
||||
|
||||
hsa_ven_amd_aqlprofile_profile_t profile;
|
||||
profile.agent = agent_info->dev_id;
|
||||
hsa_ven_amd_aqlprofile_id_query_t query = {block_name.c_str(), 0, 0};
|
||||
hsa_status_t status = util::HsaRsrcFactory::Instance().AqlProfileApi()->
|
||||
hsa_ven_amd_aqlprofile_get_info(&profile, HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_ID, &query);
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
const hsa_ven_amd_aqlprofile_block_name_t block_id = (hsa_ven_amd_aqlprofile_block_name_t)query.id;
|
||||
if (query.instance_count > 1) {
|
||||
for (unsigned block_index = 0; block_index < query.instance_count; ++block_index) {
|
||||
std::ostringstream os;
|
||||
os << name << '[' << block_index << ']';
|
||||
const std::string full_name = os.str();
|
||||
const counter_t counter = {full_name, {block_id, block_index, event_id}};
|
||||
cache_[full_name] = new BaseMetric(full_name, counter);
|
||||
}
|
||||
} else {
|
||||
const counter_t counter = {name, {block_id, 0, event_id}};
|
||||
cache_[name] = new BaseMetric(name, counter);
|
||||
}
|
||||
} else AQL_EXC_RAISING(HSA_STATUS_ERROR, "ImportMetrics: bad block name '" << block_name << "'");
|
||||
} else {
|
||||
xml::Expr* expr_obj = new xml::Expr(expr_str, new ExprCache(&cache_));
|
||||
std::cout << " " << name << " = " << expr_obj->String() << std::endl;
|
||||
counters_vec_t counters_vec;
|
||||
for (const std::string var : expr_obj->GetVars()) {
|
||||
auto it = cache_.find(var);
|
||||
if (it == cache_.end()) EXC_RAISING(HSA_STATUS_ERROR, "Bad metric '" << name << "', var '" << var << "' is not found");
|
||||
it->second->GetCounters(counters_vec);
|
||||
}
|
||||
cache_[name] = new ExprMetric(name, counters_vec, expr_obj);
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
for (auto& v : cache_) {
|
||||
const Metric* metric = v.second;
|
||||
counters_vec_t counters_vec;
|
||||
printf("> Metric '%s'\n", metric->GetName().c_str());
|
||||
metric->GetCounters(counters_vec);
|
||||
for (auto c : counters_vec) {
|
||||
printf(" counter %s, b(%u), i (%u), e (%u)\n", c->name.c_str(), c->event.block_name, c->event.block_index, c->event.counter_id);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// Metrics map
|
||||
xml::Xml* xml_;
|
||||
cache_t cache_;
|
||||
};
|
||||
|
||||
} // namespace rocprofiler
|
||||
|
||||
#endif // SRC_CORE_METRICS_H_
|
||||
@@ -0,0 +1,206 @@
|
||||
#ifndef SRC_CORE_PROFILE_H_
|
||||
#define SRC_CORE_PROFILE_H_
|
||||
|
||||
#include "inc/rocprofiler.h"
|
||||
|
||||
#include <hsa.h>
|
||||
#include <vector>
|
||||
|
||||
#include "core/types.h"
|
||||
#include "util/exception.h"
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
|
||||
namespace rocprofiler {
|
||||
struct profile_info_t {
|
||||
const event_t* event;
|
||||
const parameter_t* parameters;
|
||||
uint32_t parameter_count;
|
||||
rocprofiler_info_t* rinfo;
|
||||
};
|
||||
typedef std::vector<rocprofiler_info_t*> info_vector_t;
|
||||
typedef std::vector<packet_t> pkt_vector_t;
|
||||
struct profile_tuple_t {
|
||||
const profile_t* profile;
|
||||
info_vector_t* info_vector;
|
||||
hsa_signal_t completion_signal;
|
||||
};
|
||||
typedef std::vector<profile_tuple_t> profile_vector_t;
|
||||
|
||||
template<class Item> class ConfigBase {};
|
||||
|
||||
template<> class ConfigBase<event_t> {
|
||||
public:
|
||||
ConfigBase(profile_t *profile) : profile_(profile) {}
|
||||
|
||||
protected:
|
||||
void* Array() { return const_cast<event_t*>(profile_->events); }
|
||||
unsigned Count() const { return profile_->event_count; }
|
||||
void Set(event_t* events, const unsigned& count) {
|
||||
profile_->events = events;
|
||||
profile_->event_count = count;
|
||||
}
|
||||
profile_t* profile_;
|
||||
};
|
||||
|
||||
template<> class ConfigBase<parameter_t> {
|
||||
public:
|
||||
ConfigBase(profile_t *profile) : profile_(profile) {}
|
||||
|
||||
protected:
|
||||
void* Array() { return const_cast<parameter_t*>(profile_->parameters); }
|
||||
unsigned Count() const { return profile_->parameter_count; }
|
||||
void Set(parameter_t* parameters, const unsigned& count) {
|
||||
profile_->parameters = parameters;
|
||||
profile_->parameter_count = count;
|
||||
}
|
||||
profile_t* profile_;
|
||||
};
|
||||
|
||||
template<class Item>
|
||||
class Config : protected ConfigBase<Item> {
|
||||
typedef ConfigBase<Item> Parent;
|
||||
public:
|
||||
Config(profile_t *profile) : Parent(profile) {}
|
||||
void Insert(const Item& item) {
|
||||
auto count = Parent::Count();
|
||||
count += 1;
|
||||
Item* array = reinterpret_cast<Item*>(realloc(const_cast<void*>(Parent::Array()), count * sizeof(Item)));
|
||||
array[count - 1] = item;
|
||||
Parent::Set(array, count);
|
||||
}
|
||||
};
|
||||
|
||||
class Profile {
|
||||
public:
|
||||
static const uint32_t LEGACY_SLOT_SIZE_PKT = HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(packet_t);
|
||||
|
||||
Profile(const util::AgentInfo* agent_info) : agent_info_(agent_info) {
|
||||
profile_ = {};
|
||||
profile_.agent = agent_info->dev_id;
|
||||
is_legacy_ = (strncmp(agent_info->name, "gfx8", 4) == 0);
|
||||
}
|
||||
virtual ~Profile() {
|
||||
hsa_memory_free(profile_.command_buffer.ptr);
|
||||
hsa_memory_free(profile_.output_buffer.ptr);
|
||||
free(const_cast<event_t*>(profile_.events));
|
||||
free(const_cast<parameter_t*>(profile_.parameters));
|
||||
}
|
||||
|
||||
virtual void Insert(const profile_info_t& info) {
|
||||
info_vector_.push_back(info.rinfo);
|
||||
}
|
||||
|
||||
hsa_status_t Finalize(pkt_vector_t& start_vector, pkt_vector_t& stop_vector) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
|
||||
if (!info_vector_.empty()) {
|
||||
util::HsaRsrcFactory* rsrc = &util::HsaRsrcFactory::Instance();
|
||||
const pfn_t* api = rsrc->AqlProfileApi();
|
||||
packet_t start{};
|
||||
packet_t stop{};
|
||||
|
||||
// Check the profile buffer sizes
|
||||
status = api->hsa_ven_amd_aqlprofile_start(&profile_, NULL);
|
||||
if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "aqlprofile_start(NULL)");
|
||||
Allocate(rsrc);
|
||||
// Generate start/stop profiling packets
|
||||
status = api->hsa_ven_amd_aqlprofile_start(&profile_, &start);
|
||||
if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "aqlprofile_start");
|
||||
status = api->hsa_ven_amd_aqlprofile_stop(&profile_, &stop);
|
||||
if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "aqlprofile_stop");
|
||||
// Set completion signals
|
||||
hsa_signal_t dummy_signal{};
|
||||
dummy_signal.handle = 0;
|
||||
start.completion_signal = dummy_signal;
|
||||
hsa_signal_t post_signal;
|
||||
status = hsa_signal_create(1, 0, NULL, &post_signal);
|
||||
if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "hsa_signal_create");
|
||||
stop.completion_signal = post_signal;
|
||||
completion_signal_ = post_signal;
|
||||
|
||||
if (is_legacy_) {
|
||||
const uint32_t start_index = start_vector.size();
|
||||
const uint32_t stop_index = stop_vector.size();
|
||||
|
||||
start_vector.insert(start_vector.end(), LEGACY_SLOT_SIZE_PKT, packet_t{});
|
||||
stop_vector.insert(stop_vector.end(), LEGACY_SLOT_SIZE_PKT, packet_t{});
|
||||
status = api->hsa_ven_amd_aqlprofile_legacy_get_pm4(&start, reinterpret_cast<void*>(&start_vector[start_index]));
|
||||
if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "hsa_ven_amd_aqlprofile_legacy_get_pm4");
|
||||
status = api->hsa_ven_amd_aqlprofile_legacy_get_pm4(&stop, reinterpret_cast<void*>(&stop_vector[stop_index]));
|
||||
if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "hsa_ven_amd_aqlprofile_legacy_get_pm4");
|
||||
} else {
|
||||
start_vector.push_back(start);
|
||||
stop_vector.push_back(stop);
|
||||
}
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void GetProfiles(profile_vector_t& vec) {
|
||||
if (!info_vector_.empty()) {
|
||||
vec.push_back(profile_tuple_t{&profile_, &info_vector_, completion_signal_});
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual hsa_status_t Allocate(util::HsaRsrcFactory* rsrc) = 0;
|
||||
|
||||
const util::AgentInfo* const agent_info_;
|
||||
bool is_legacy_;
|
||||
profile_t profile_;
|
||||
info_vector_t info_vector_;
|
||||
hsa_signal_t completion_signal_;
|
||||
};
|
||||
|
||||
class PmcProfile : public Profile {
|
||||
public:
|
||||
PmcProfile(const util::AgentInfo* agent_info) : Profile(agent_info) {
|
||||
profile_.type = HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC;
|
||||
}
|
||||
|
||||
void Insert(const profile_info_t& info) {
|
||||
Profile::Insert(info);
|
||||
Config<event_t>(&profile_).Insert(*(info.event));
|
||||
}
|
||||
|
||||
hsa_status_t Allocate(util::HsaRsrcFactory* rsrc) {
|
||||
profile_.command_buffer.ptr = rsrc->AllocateSysMemory(agent_info_, profile_.command_buffer.size);
|
||||
profile_.output_buffer.ptr = rsrc->AllocateSysMemory(agent_info_, profile_.output_buffer.size);
|
||||
return (profile_.command_buffer.ptr && profile_.output_buffer.ptr) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR;
|
||||
}
|
||||
};
|
||||
|
||||
class SqttProfile : public Profile {
|
||||
public:
|
||||
static const uint32_t output_buffer_size = 0x2000000; // 32M
|
||||
|
||||
SqttProfile(const util::AgentInfo* agent_info) : Profile(agent_info) {
|
||||
profile_.type = HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_SQTT;
|
||||
}
|
||||
|
||||
void Insert(const profile_info_t& info) {
|
||||
Profile::Insert(info);
|
||||
for (unsigned j = 0; j < info.parameter_count; ++j) {
|
||||
Config<parameter_t>(&profile_).Insert(info.parameters[j]);
|
||||
}
|
||||
|
||||
info.rinfo->data.result_bytes.size = output_buffer_size;
|
||||
if (info.rinfo->data.result_bytes.copy) {
|
||||
const uint32_t output_buffer_size64 = output_buffer_size / sizeof(uint64_t);
|
||||
info.rinfo->data.result_bytes.ptr = calloc(output_buffer_size64, sizeof(uint64_t));
|
||||
memset(info.rinfo->data.result_bytes.ptr, 0, output_buffer_size);
|
||||
}
|
||||
}
|
||||
|
||||
hsa_status_t Allocate(util::HsaRsrcFactory* rsrc) {
|
||||
profile_.output_buffer.size = output_buffer_size;
|
||||
profile_.command_buffer.ptr = rsrc->AllocateSysMemory(agent_info_, profile_.command_buffer.size);
|
||||
profile_.output_buffer.ptr = rsrc->AllocateLocalMemory(agent_info_, profile_.output_buffer.size);
|
||||
return (profile_.command_buffer.ptr && profile_.output_buffer.ptr) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace rocprofiler
|
||||
|
||||
#endif // SRC_CORE_PROFILE_H_
|
||||
@@ -0,0 +1,48 @@
|
||||
#include "core/proxy_queue.h"
|
||||
|
||||
#ifdef ROCP_HSA_PROXY
|
||||
#include "core/hsa_proxy_queue.h"
|
||||
#endif
|
||||
#include "core/simple_proxy_queue.h"
|
||||
|
||||
namespace rocprofiler {
|
||||
void ProxyQueue::HsaIntercept(HsaApiTable* table) {
|
||||
if (rocp_type_) SimpleProxyQueue::HsaIntercept(table);
|
||||
}
|
||||
|
||||
ProxyQueue* ProxyQueue::Create(
|
||||
hsa_agent_t agent,
|
||||
uint32_t size,
|
||||
hsa_queue_type32_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data),
|
||||
void *data,
|
||||
uint32_t private_segment_size,
|
||||
uint32_t group_segment_size,
|
||||
hsa_queue_t **queue,
|
||||
hsa_status_t* status)
|
||||
{
|
||||
hsa_status_t suc = HSA_STATUS_ERROR;
|
||||
#ifdef ROCP_HSA_PROXY
|
||||
ProxyQueue* instance = (rocp_type_) ? (ProxyQueue*) new SimpleProxyQueue() : (ProxyQueue*) new HsaProxyQueue();
|
||||
#else
|
||||
ProxyQueue* instance = new SimpleProxyQueue();
|
||||
#endif
|
||||
if (instance != NULL) {
|
||||
const auto suc = instance->Init(agent, size, type, callback, data, private_segment_size, group_segment_size, queue);
|
||||
if (suc != HSA_STATUS_SUCCESS) {
|
||||
delete instance;
|
||||
instance = NULL;
|
||||
}
|
||||
}
|
||||
*status = suc;
|
||||
return instance;
|
||||
}
|
||||
|
||||
hsa_status_t ProxyQueue::Destroy(const ProxyQueue* obj) {
|
||||
auto suc = obj->Cleanup();
|
||||
delete obj;
|
||||
return suc;
|
||||
}
|
||||
|
||||
bool ProxyQueue::rocp_type_ = false;
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,70 @@
|
||||
#ifndef _SRC_CORE_PROXY_QUEUE_H
|
||||
#define _SRC_CORE_PROXY_QUEUE_H
|
||||
|
||||
#include <hsa.h>
|
||||
#include <hsa_api_trace.h>
|
||||
#include <atomic>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
|
||||
#include "core/queue.h"
|
||||
#include "core/types.h"
|
||||
|
||||
struct HsaApiTable;
|
||||
|
||||
namespace rocprofiler {
|
||||
typedef void (*hsa_amd_queue_intercept_packet_writer)(const void* packets, uint64_t count);
|
||||
typedef void (*on_submit_cb_t)(const void* packet, uint64_t count, uint64_t que_idx, void* data, hsa_amd_queue_intercept_packet_writer writer);
|
||||
|
||||
class ProxyQueue : public Queue {
|
||||
public:
|
||||
static void InitFactory() {
|
||||
#ifdef ROCP_HSA_PROXY
|
||||
const char* type = getenv("ROCP_PROXY_QUEUE");
|
||||
if (type != NULL) {
|
||||
if (strncmp(type, "rocp", 4) == 0) rocp_type_ = true;
|
||||
}
|
||||
#else
|
||||
rocp_type_ = true;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void HsaIntercept(HsaApiTable* table);
|
||||
|
||||
static ProxyQueue* Create(
|
||||
hsa_agent_t agent,
|
||||
uint32_t size,
|
||||
hsa_queue_type32_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data),
|
||||
void *data,
|
||||
uint32_t private_segment_size,
|
||||
uint32_t group_segment_size,
|
||||
hsa_queue_t **queue,
|
||||
hsa_status_t* status);
|
||||
|
||||
static hsa_status_t Destroy(const ProxyQueue* obj);
|
||||
|
||||
virtual hsa_status_t Init(
|
||||
hsa_agent_t agent,
|
||||
uint32_t size,
|
||||
hsa_queue_type32_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data),
|
||||
void *data,
|
||||
uint32_t private_segment_size,
|
||||
uint32_t group_segment_size,
|
||||
hsa_queue_t **queue
|
||||
) = 0;
|
||||
virtual hsa_status_t Cleanup() const = 0;
|
||||
virtual hsa_status_t SetInterceptCB(on_submit_cb_t on_submit_cb, void* data) = 0;
|
||||
virtual void Submit(const packet_t* packet) = 0;
|
||||
|
||||
protected:
|
||||
virtual ~ProxyQueue() {};
|
||||
|
||||
private:
|
||||
static bool rocp_type_;
|
||||
};
|
||||
|
||||
} // namespace rocprofiler
|
||||
|
||||
#endif // _SRC_CORE_PROXY_QUEUE_H
|
||||
@@ -0,0 +1,20 @@
|
||||
#ifndef _SRC_CORE_QUEUE_H
|
||||
#define _SRC_CORE_QUEUE_H
|
||||
|
||||
#include "core/types.h"
|
||||
|
||||
namespace rocprofiler {
|
||||
|
||||
class Queue {
|
||||
public:
|
||||
Queue() {}
|
||||
virtual ~Queue() {}
|
||||
virtual void Submit(const packet_t* packet) = 0;
|
||||
virtual void Submit(const packet_t* packet, const size_t& count) {
|
||||
for (const packet_t* p = packet; p < packet + count; ++p) Submit(p);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace rocprofiler
|
||||
|
||||
#endif // _SRC_CORE_QUEUE_H
|
||||
@@ -0,0 +1,251 @@
|
||||
#include "inc/rocprofiler.h"
|
||||
|
||||
#include <hsa.h>
|
||||
#include <hsa_api_trace.h>
|
||||
#include <string.h>
|
||||
#include <vector>
|
||||
|
||||
#include "core/context.h"
|
||||
#include "core/hsa_queue.h"
|
||||
#include "core/intercept_queue.h"
|
||||
#include "core/proxy_queue.h"
|
||||
#include "core/simple_proxy_queue.h"
|
||||
#include "util/exception.h"
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
#include "util/logger.h"
|
||||
|
||||
#define PUBLIC_API __attribute__((visibility("default")))
|
||||
#define CONSTRUCTOR_API __attribute__((constructor))
|
||||
#define DESTRUCTOR_API __attribute__((destructor))
|
||||
|
||||
#define API_METHOD_PREFIX \
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS; \
|
||||
try {
|
||||
|
||||
#define API_METHOD_SUFFIX \
|
||||
} catch (std::exception& e) { \
|
||||
ERR_LOGGING(__FUNCTION__ << "(), " << e.what()); \
|
||||
status = rocprofiler::GetExcStatus(e); \
|
||||
} \
|
||||
return status;
|
||||
|
||||
namespace rocprofiler {
|
||||
decltype(hsa_queue_create)* hsa_queue_create_fn;
|
||||
decltype(hsa_queue_destroy)* hsa_queue_destroy_fn;
|
||||
decltype(hsa_signal_store_relaxed)* hsa_signal_store_relaxed_fn;
|
||||
decltype(hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed_fn;
|
||||
decltype(hsa_queue_store_write_index_relaxed)* hsa_queue_store_write_index_relaxed_fn;
|
||||
#ifdef ROCP_HSA_PROXY
|
||||
decltype(hsa_amd_queue_intercept_create)* hsa_amd_queue_intercept_create_fn;
|
||||
decltype(hsa_amd_queue_intercept_register)* hsa_amd_queue_intercept_register_fn;
|
||||
#endif
|
||||
|
||||
::HsaApiTable* kHsaApiTable;
|
||||
|
||||
void SaveHsaApi(::HsaApiTable* table) {
|
||||
kHsaApiTable = table;
|
||||
hsa_queue_create_fn = table->core_->hsa_queue_create_fn;
|
||||
hsa_queue_destroy_fn = table->core_->hsa_queue_destroy_fn;
|
||||
hsa_signal_store_relaxed_fn = table->core_->hsa_signal_store_relaxed_fn;
|
||||
hsa_queue_load_write_index_relaxed_fn = table->core_->hsa_queue_load_write_index_relaxed_fn;
|
||||
hsa_queue_store_write_index_relaxed_fn = table->core_->hsa_queue_store_write_index_relaxed_fn;
|
||||
#ifdef ROCP_HSA_PROXY
|
||||
hsa_amd_queue_intercept_create_fn = table->amd_ext_->hsa_amd_queue_intercept_create_fn;
|
||||
hsa_amd_queue_intercept_register_fn = table->amd_ext_->hsa_amd_queue_intercept_register_fn;
|
||||
#endif
|
||||
}
|
||||
|
||||
void RestoreHsaApi() {
|
||||
::HsaApiTable* table = kHsaApiTable;
|
||||
table->core_->hsa_queue_create_fn = hsa_queue_create_fn;
|
||||
table->core_->hsa_queue_destroy_fn = hsa_queue_destroy_fn;
|
||||
table->core_->hsa_signal_store_relaxed_fn = hsa_signal_store_relaxed_fn;
|
||||
table->core_->hsa_queue_load_write_index_relaxed_fn = hsa_queue_load_write_index_relaxed_fn;
|
||||
table->core_->hsa_queue_store_write_index_relaxed_fn = hsa_queue_store_write_index_relaxed_fn;
|
||||
#ifdef ROCP_HSA_PROXY
|
||||
table->amd_ext_->hsa_amd_queue_intercept_create_fn = hsa_amd_queue_intercept_create_fn;
|
||||
table->amd_ext_->hsa_amd_queue_intercept_register_fn = hsa_amd_queue_intercept_register_fn;
|
||||
#endif
|
||||
}
|
||||
|
||||
CONSTRUCTOR_API void constructor() {
|
||||
util::Logger::Create();
|
||||
util::HsaRsrcFactory::Create();
|
||||
}
|
||||
|
||||
DESTRUCTOR_API void destructor() {
|
||||
util::HsaRsrcFactory::Destroy();
|
||||
util::Logger::Destroy();
|
||||
}
|
||||
|
||||
hsa_status_t GetExcStatus(const std::exception& e) {
|
||||
const util::exception* rocprofiler_exc_ptr = dynamic_cast<const util::exception*>(&e);
|
||||
return (rocprofiler_exc_ptr) ? static_cast<hsa_status_t>(rocprofiler_exc_ptr->status()) : HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
util::Logger::mutex_t util::Logger::mutex_;
|
||||
util::Logger* util::Logger::instance_ = NULL;
|
||||
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
|
||||
// Returns the last error message
|
||||
PUBLIC_API hsa_status_t rocprofiler_error_string(const char** str) {
|
||||
API_METHOD_PREFIX
|
||||
*str = rocprofiler::util::Logger::LastMessage().c_str();
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Create new profiling context
|
||||
PUBLIC_API hsa_status_t rocprofiler_open(
|
||||
unsigned agent_id,
|
||||
rocprofiler_info_t* info,
|
||||
uint32_t info_count,
|
||||
rocprofiler_t** handle,
|
||||
uint32_t mode,
|
||||
rocprofiler_properties_t* properties)
|
||||
{
|
||||
API_METHOD_PREFIX
|
||||
rocprofiler::util::HsaRsrcFactory* hsa_rsrc = &rocprofiler::util::HsaRsrcFactory::Instance();
|
||||
const rocprofiler::util::AgentInfo* agent_info;
|
||||
if (!hsa_rsrc->GetGpuAgentInfo(agent_id, &agent_info)) {
|
||||
EXC_RAISING(HSA_STATUS_ERROR, "agent[" << agent_id << "] is not found");
|
||||
}
|
||||
|
||||
rocprofiler::Queue* queue = NULL;
|
||||
if (mode != 0) {
|
||||
if (mode & ROCPROFILER_MODE_STANDALONE) {
|
||||
if (mode & ROCPROFILER_MODE_CREATEQUEUE) {
|
||||
if (hsa_rsrc->CreateQueue(agent_info, properties->queue_depth, &(properties->queue)) == false) {
|
||||
EXC_RAISING(HSA_STATUS_ERROR, "CreateQueue() failed");
|
||||
}
|
||||
}
|
||||
queue = new rocprofiler::HsaQueue(agent_info, properties->queue);
|
||||
} else {
|
||||
EXC_RAISING(HSA_STATUS_ERROR, "invalid mode (" << mode << ")");
|
||||
}
|
||||
}
|
||||
|
||||
*handle = (void*) new rocprofiler::Context(agent_info, queue, info, info_count);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Delete profiling info
|
||||
PUBLIC_API hsa_status_t rocprofiler_close(rocprofiler_t* handle)
|
||||
{
|
||||
API_METHOD_PREFIX
|
||||
rocprofiler::Context* context = reinterpret_cast<rocprofiler::Context*>(handle);
|
||||
if (context) delete context;
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Get profiling groups
|
||||
PUBLIC_API hsa_status_t rocprofiler_get_groups(rocprofiler_t* handle, rocprofiler_group_t** group_array, uint32_t* group_count) {
|
||||
API_METHOD_PREFIX
|
||||
rocprofiler::Context* context = reinterpret_cast<rocprofiler::Context*>(handle);
|
||||
const uint32_t count = context->GetGroupCount();
|
||||
rocprofiler_group_t* groups = (rocprofiler_group_t*) calloc(count, sizeof(rocprofiler_group_t));
|
||||
for (unsigned i = 0; i < count; ++i) groups[i] = context->GetGroupInfo(i);
|
||||
*group_array = groups;
|
||||
*group_count = count;
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Start profiling
|
||||
PUBLIC_API hsa_status_t rocprofiler_start(rocprofiler_t* handle, uint32_t group_index) {
|
||||
API_METHOD_PREFIX
|
||||
rocprofiler::Context* context = reinterpret_cast<rocprofiler::Context*>(handle);
|
||||
context->Start(group_index);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Stop profiling
|
||||
PUBLIC_API hsa_status_t rocprofiler_stop(rocprofiler_t* handle, uint32_t group_index) {
|
||||
API_METHOD_PREFIX
|
||||
rocprofiler::Context* context = reinterpret_cast<rocprofiler::Context*>(handle);
|
||||
context->Stop(group_index);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Get profiling data
|
||||
PUBLIC_API hsa_status_t rocprofiler_get_data(rocprofiler_t* handle, uint32_t group_index) {
|
||||
API_METHOD_PREFIX
|
||||
rocprofiler::Context* context = reinterpret_cast<rocprofiler::Context*>(handle);
|
||||
context->GetData(group_index);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Start profiling
|
||||
PUBLIC_API hsa_status_t rocprofiler_group_start(rocprofiler_group_t* group) {
|
||||
API_METHOD_PREFIX
|
||||
rocprofiler_start(group->context, group->index);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Stop profiling
|
||||
PUBLIC_API hsa_status_t rocprofiler_group_stop(rocprofiler_group_t* group) {
|
||||
API_METHOD_PREFIX
|
||||
rocprofiler_stop(group->context, group->index);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Get profiling data
|
||||
PUBLIC_API hsa_status_t rocprofiler_get_group_data(rocprofiler_group_t* group) {
|
||||
API_METHOD_PREFIX
|
||||
rocprofiler::Context* context = reinterpret_cast<rocprofiler::Context*>(group->context);
|
||||
context->GetData(group->index);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Get metrics data
|
||||
PUBLIC_API hsa_status_t rocprofiler_get_metrics_data(const rocprofiler_t* handle) {
|
||||
API_METHOD_PREFIX
|
||||
const rocprofiler::Context* context = reinterpret_cast<const rocprofiler::Context*>(handle);
|
||||
context->GetMetricsData();
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Set kernel dispatch observer
|
||||
PUBLIC_API hsa_status_t rocprofiler_set_dispatch_observer(rocprofiler_callback_t callback, void* data) {
|
||||
API_METHOD_PREFIX
|
||||
rocprofiler::InterceptQueue::SetDispatchCB(callback, data);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Set kernel dispatch observer
|
||||
PUBLIC_API hsa_status_t rocprofiler_remove_dispatch_observer() {
|
||||
API_METHOD_PREFIX
|
||||
rocprofiler::InterceptQueue::UnsetDispatchCB();
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Method for iterating the events output data
|
||||
PUBLIC_API hsa_status_t rocprofiler_iterate_trace_data(rocprofiler_t* handle, hsa_ven_amd_aqlprofile_data_callback_t callback, void* data) {
|
||||
API_METHOD_PREFIX
|
||||
rocprofiler::Context* context = reinterpret_cast<rocprofiler::Context*>(handle);
|
||||
context->IterateTraceData(callback, data);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
PUBLIC_API bool OnLoad(
|
||||
HsaApiTable* table,
|
||||
uint64_t runtime_version,
|
||||
uint64_t failed_tool_count,
|
||||
const char* const * failed_tool_names) {
|
||||
rocprofiler::SaveHsaApi(table);
|
||||
rocprofiler::ProxyQueue::InitFactory();
|
||||
rocprofiler::InterceptQueue::SetTool(getenv("ROCP_TOOL_LIB"));
|
||||
// HSA intercepting
|
||||
if (getenv("ROCP_HSA_INTERCEPT") != NULL) {
|
||||
rocprofiler::InterceptQueue::HsaIntercept(table);
|
||||
rocprofiler::ProxyQueue::HsaIntercept(table);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
PUBLIC_API void OnUnload() {
|
||||
rocprofiler::RestoreHsaApi();
|
||||
}
|
||||
|
||||
} // extern "C"
|
||||
@@ -0,0 +1,11 @@
|
||||
#include "core/simple_proxy_queue.h"
|
||||
|
||||
namespace rocprofiler {
|
||||
void SimpleProxyQueue::HsaIntercept(HsaApiTable* table) {
|
||||
table->core_->hsa_signal_store_relaxed_fn = rocprofiler::SimpleProxyQueue::SignalStore;
|
||||
table->core_->hsa_queue_load_write_index_relaxed_fn = rocprofiler::SimpleProxyQueue::LoadIndex;
|
||||
table->core_->hsa_queue_store_write_index_relaxed_fn = rocprofiler::SimpleProxyQueue::StoreIndex;
|
||||
}
|
||||
|
||||
std::map<signal_handle_t, SimpleProxyQueue*> SimpleProxyQueue::queue_map_;
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,187 @@
|
||||
#ifndef _SRC_CORE_SIMPLE_PROXY_QUEUE_H
|
||||
#define _SRC_CORE_SIMPLE_PROXY_QUEUE_H
|
||||
|
||||
#include <hsa.h>
|
||||
#include <atomic>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
|
||||
#include "core/proxy_queue.h"
|
||||
#include "core/types.h"
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
|
||||
namespace rocprofiler {
|
||||
extern decltype(hsa_queue_create)* hsa_queue_create_fn;
|
||||
extern decltype(hsa_queue_destroy)* hsa_queue_destroy_fn;
|
||||
extern decltype(hsa_signal_store_relaxed)* hsa_signal_store_relaxed_fn;
|
||||
extern decltype(hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed_fn;
|
||||
extern decltype(hsa_queue_store_write_index_relaxed)* hsa_queue_store_write_index_relaxed_fn;
|
||||
typedef decltype(hsa_signal_t::handle) signal_handle_t;
|
||||
|
||||
|
||||
class SimpleProxyQueue : public ProxyQueue {
|
||||
public:
|
||||
static void HsaIntercept(HsaApiTable* table);
|
||||
|
||||
static void SignalStore(
|
||||
hsa_signal_t signal,
|
||||
hsa_signal_value_t que_idx)
|
||||
{
|
||||
auto it = queue_map_.find(signal.handle);
|
||||
if (it != queue_map_.end()) {
|
||||
SimpleProxyQueue* instance = it->second;
|
||||
const uint64_t begin = instance->submit_index_;
|
||||
const uint64_t end = que_idx + 1;
|
||||
instance->submit_index_ = end;
|
||||
for (uint64_t j = begin; j < end; ++j) {
|
||||
// Submited packet
|
||||
const uint32_t idx = j & instance->queue_mask_;
|
||||
packet_t* packet = reinterpret_cast<packet_t*>(instance->queue_->base_address) + idx;
|
||||
if (instance->on_submit_cb_ != NULL) instance->on_submit_cb_(packet, 1, j, instance->on_submit_cb_data_, NULL);
|
||||
else instance->Submit(packet);
|
||||
}
|
||||
} else {
|
||||
hsa_signal_store_relaxed_fn(signal, que_idx);
|
||||
}
|
||||
}
|
||||
|
||||
static uint64_t LoadIndex(
|
||||
const hsa_queue_t *queue)
|
||||
{
|
||||
uint64_t index = 0;
|
||||
auto it = queue_map_.find(queue->doorbell_signal.handle);
|
||||
if (it != queue_map_.end()) {
|
||||
SimpleProxyQueue* instance = it->second;
|
||||
instance->mutex_.lock();
|
||||
index = instance->queue_index_;
|
||||
} else {
|
||||
index = hsa_queue_load_write_index_relaxed_fn(queue);
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
static void StoreIndex(
|
||||
const hsa_queue_t *queue,
|
||||
uint64_t value)
|
||||
{
|
||||
auto it = queue_map_.find(queue->doorbell_signal.handle);
|
||||
if (it != queue_map_.end()) {
|
||||
SimpleProxyQueue* instance = it->second;
|
||||
instance->queue_index_ = value;
|
||||
instance->mutex_.unlock();
|
||||
} else {
|
||||
hsa_queue_store_write_index_relaxed_fn(queue, value);
|
||||
}
|
||||
}
|
||||
|
||||
hsa_status_t SetInterceptCB(on_submit_cb_t on_submit_cb, void* data) {
|
||||
on_submit_cb_ = on_submit_cb;
|
||||
on_submit_cb_data_ = data;
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
void Submit(const packet_t* packet) {
|
||||
// Compute the write index of queue and copy Aql packet into it
|
||||
const uint64_t que_idx = hsa_queue_load_write_index_relaxed_fn(queue_);
|
||||
// Increment the write index and ring the doorbell to submit the packet.
|
||||
hsa_queue_store_write_index_relaxed_fn(queue_, que_idx + 1);
|
||||
|
||||
const uint32_t mask = queue_->size - 1;
|
||||
const uint32_t idx = que_idx & mask;
|
||||
|
||||
// Copy packet to the queue
|
||||
const packet_word_t* src = reinterpret_cast<const packet_word_t*>(packet);
|
||||
packet_word_t* dst = reinterpret_cast<packet_word_t*>(base_address_ + idx);
|
||||
for (unsigned i = 1; i < sizeof(packet_t) / sizeof(packet_word_t); ++i) {
|
||||
dst[i] = src[i];
|
||||
}
|
||||
|
||||
// To maintain global order to ensure the prior copy of the packet contents is made visible
|
||||
// before the header is updated.
|
||||
// With in-order CP it will wait until the first packet in the blob will be valid
|
||||
std::atomic<packet_word_t>* header_atomic_ptr =
|
||||
reinterpret_cast<std::atomic<packet_word_t>*>(&dst[0]);
|
||||
header_atomic_ptr->store(src[0], std::memory_order_release);
|
||||
|
||||
// Doorbell signaling
|
||||
hsa_signal_store_relaxed_fn(doorbell_signal_, que_idx);
|
||||
}
|
||||
|
||||
SimpleProxyQueue() :
|
||||
agent_info_(NULL),
|
||||
queue_(NULL),
|
||||
base_address_(NULL),
|
||||
doorbell_signal_({}),
|
||||
queue_index_(0),
|
||||
queue_mask_(0),
|
||||
submit_index_(0),
|
||||
on_submit_cb_(0),
|
||||
on_submit_cb_data_(0)
|
||||
{}
|
||||
|
||||
~SimpleProxyQueue() {}
|
||||
|
||||
private:
|
||||
hsa_status_t Init(
|
||||
hsa_agent_t agent,
|
||||
uint32_t size,
|
||||
hsa_queue_type32_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data),
|
||||
void *data,
|
||||
uint32_t private_segment_size,
|
||||
uint32_t group_segment_size,
|
||||
hsa_queue_t **queue)
|
||||
{
|
||||
auto status = Init(agent, size);
|
||||
*queue = queue_;
|
||||
return status;
|
||||
}
|
||||
|
||||
hsa_status_t Init(hsa_agent_t agent, uint32_t size) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
agent_info_ = util::HsaRsrcFactory::Instance().GetAgentInfo(agent);
|
||||
if (agent_info_ != NULL) {
|
||||
if (agent_info_->dev_type == HSA_DEVICE_TYPE_GPU) {
|
||||
status = hsa_queue_create_fn(agent, size, HSA_QUEUE_TYPE_MULTI, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue_);
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
base_address_ = reinterpret_cast<packet_t*>(queue_->base_address);
|
||||
doorbell_signal_ = queue_->doorbell_signal;
|
||||
data_array_ = calloc(size + 1, sizeof(packet_t));
|
||||
uintptr_t addr = (uintptr_t)data_array_;
|
||||
queue_->base_address = (void*) ((addr + align_mask_) & ~align_mask_);
|
||||
status = hsa_signal_create(1, 0, NULL, &(queue_->doorbell_signal));
|
||||
queue_mask_ = size - 1;
|
||||
queue_map_[queue_->doorbell_signal.handle] = this;
|
||||
}
|
||||
}
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
hsa_status_t Cleanup() const {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
queue_->base_address = base_address_;
|
||||
queue_->doorbell_signal = doorbell_signal_;
|
||||
status = hsa_queue_destroy_fn(queue_);
|
||||
free(data_array_);
|
||||
return status;
|
||||
}
|
||||
|
||||
static std::map<signal_handle_t, SimpleProxyQueue*> queue_map_;
|
||||
const util::AgentInfo* agent_info_;
|
||||
hsa_queue_t* queue_;
|
||||
static const uintptr_t align_mask_ = sizeof(packet_t) - 1;
|
||||
packet_t* base_address_;
|
||||
hsa_signal_t doorbell_signal_;
|
||||
uint64_t queue_index_;
|
||||
uint64_t queue_mask_;
|
||||
uint64_t submit_index_;
|
||||
std::mutex mutex_;
|
||||
on_submit_cb_t on_submit_cb_;
|
||||
void* on_submit_cb_data_;
|
||||
void* data_array_;
|
||||
};
|
||||
|
||||
} // namespace rocprofiler
|
||||
|
||||
#endif // _SRC_CORE_SIMPLE_PROXY_QUEUE_H
|
||||
@@ -0,0 +1,15 @@
|
||||
#ifndef SRC_CORE_TYPES_H_
|
||||
#define SRC_CORE_TYPES_H_
|
||||
|
||||
#include <hsa_ven_amd_aqlprofile.h>
|
||||
|
||||
namespace rocprofiler {
|
||||
typedef hsa_ven_amd_aqlprofile_1_00_pfn_t pfn_t;
|
||||
typedef hsa_ven_amd_aqlprofile_event_t event_t;
|
||||
typedef hsa_ven_amd_aqlprofile_parameter_t parameter_t;
|
||||
typedef hsa_ven_amd_aqlprofile_profile_t profile_t;
|
||||
typedef hsa_ext_amd_aql_pm4_packet_t packet_t;
|
||||
typedef uint32_t packet_word_t;
|
||||
} // namespace rocprofiler
|
||||
|
||||
#endif // SRC_CORE_TYPES_H_
|
||||
@@ -0,0 +1,39 @@
|
||||
#ifndef SRC_UTIL_EXCEPTION_H_
|
||||
#define SRC_UTIL_EXCEPTION_H_
|
||||
|
||||
#include <exception>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
|
||||
#include <hsa_ven_amd_aqlprofile.h>
|
||||
|
||||
#define EXC_RAISING(error, stream) { \
|
||||
std::ostringstream oss; oss << __FUNCTION__ << "(), " << stream; \
|
||||
throw rocprofiler::util::exception(error, oss.str()); \
|
||||
}
|
||||
|
||||
#define AQL_EXC_RAISING(error, stream) { \
|
||||
const char* error_string = NULL; \
|
||||
const rocprofiler::pfn_t* api = util::HsaRsrcFactory::Instance().AqlProfileApi(); \
|
||||
api->hsa_ven_amd_aqlprofile_error_string(&error_string); \
|
||||
EXC_RAISING(error, stream << ", " << error_string); \
|
||||
}
|
||||
|
||||
namespace rocprofiler {
|
||||
namespace util {
|
||||
|
||||
class exception : public std::exception {
|
||||
public:
|
||||
explicit exception(const uint32_t &status, const std::string& msg) : status_(status), str_(msg) {}
|
||||
const char* what() const throw() { return str_.c_str(); }
|
||||
uint32_t status() const throw() { return status_; }
|
||||
|
||||
protected:
|
||||
const uint32_t status_;
|
||||
const std::string str_;
|
||||
};
|
||||
|
||||
} // namespace util
|
||||
} // namespace rocprofiler
|
||||
|
||||
#endif // SRC_UTIL_EXCEPTION_H_
|
||||
@@ -0,0 +1,392 @@
|
||||
/**********************************************************************
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
provided that the following conditions are met:
|
||||
|
||||
<95> Redistributions of source code must retain the above copyright notice, this list of
|
||||
conditions and the following disclaimer.
|
||||
<95> Redistributions in binary form must reproduce the above copyright notice, this list of
|
||||
conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
|
||||
SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
********************************************************************/
|
||||
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
|
||||
#include <hsa.h>
|
||||
#include <hsa_ext_finalize.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <cassert>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace rocprofiler {
|
||||
namespace util {
|
||||
|
||||
// Callback function to get available in the system agents
|
||||
hsa_status_t HsaRsrcFactory::GetHsaAgentsCallback(hsa_agent_t agent, void* data) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
HsaRsrcFactory* hsa_rsrc = reinterpret_cast<HsaRsrcFactory*>(data);
|
||||
const AgentInfo* agent_info = hsa_rsrc->AddAgentInfo(agent);
|
||||
if (agent_info != NULL) status = HSA_STATUS_SUCCESS;
|
||||
return status;
|
||||
}
|
||||
|
||||
// Callback function to find and bind kernarg region of an agent
|
||||
hsa_status_t HsaRsrcFactory::FindMemRegionsCallback(hsa_region_t region, void* data) {
|
||||
hsa_region_global_flag_t flags;
|
||||
hsa_region_segment_t segment_id;
|
||||
|
||||
hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment_id);
|
||||
if (segment_id != HSA_REGION_SEGMENT_GLOBAL) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
AgentInfo* agent_info = (AgentInfo*)data;
|
||||
hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags);
|
||||
if (flags & HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED) {
|
||||
agent_info->coarse_region = region;
|
||||
}
|
||||
|
||||
if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG) {
|
||||
agent_info->kernarg_region = region;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Constructor of the class
|
||||
HsaRsrcFactory::HsaRsrcFactory() {
|
||||
// Initialize the Hsa Runtime
|
||||
hsa_status_t status = hsa_init();
|
||||
CHECK_STATUS("Error in hsa_init", status);
|
||||
|
||||
// Discover the set of Gpu devices available on the platform
|
||||
status = hsa_iterate_agents(GetHsaAgentsCallback, this);
|
||||
CHECK_STATUS("Error Calling hsa_iterate_agents", status);
|
||||
|
||||
// Get AqlProfile API table
|
||||
status = hsa_system_get_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, 1, 0, &aqlprofile_api_);
|
||||
CHECK_STATUS("aqlprofile API table query failed", status);
|
||||
}
|
||||
|
||||
// Destructor of the class
|
||||
HsaRsrcFactory::~HsaRsrcFactory() {
|
||||
hsa_status_t status = hsa_shut_down();
|
||||
CHECK_STATUS("Error in hsa_shut_down", status);
|
||||
}
|
||||
|
||||
// Add system agent info
|
||||
const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) {
|
||||
// Determine if device is a Gpu agent
|
||||
hsa_status_t status;
|
||||
AgentInfo* agent_info = NULL;
|
||||
|
||||
hsa_device_type_t type;
|
||||
status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type);
|
||||
CHECK_STATUS("Error Calling hsa_agent_get_info", status);
|
||||
|
||||
if (type == HSA_DEVICE_TYPE_CPU) {
|
||||
agent_info = new AgentInfo{};
|
||||
agent_info->dev_id = agent;
|
||||
agent_info->dev_type = HSA_DEVICE_TYPE_CPU;
|
||||
agent_info->dev_index = cpu_list_.size();
|
||||
cpu_list_.push_back(agent_info);
|
||||
}
|
||||
|
||||
if (type == HSA_DEVICE_TYPE_GPU) {
|
||||
agent_info = new AgentInfo{};
|
||||
agent_info->dev_id = agent;
|
||||
agent_info->dev_type = HSA_DEVICE_TYPE_GPU;
|
||||
hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_info->name);
|
||||
strncpy(agent_info->gfxip, agent_info->name, 4);
|
||||
agent_info->gfxip[4] = '\0';
|
||||
agent_info->max_wave_size = 0;
|
||||
hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &agent_info->max_wave_size);
|
||||
agent_info->max_queue_size = 0;
|
||||
hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &agent_info->max_queue_size);
|
||||
agent_info->profile = hsa_profile_t(108);
|
||||
hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_info->profile);
|
||||
|
||||
// Initialize memory regions to zero
|
||||
agent_info->kernarg_region.handle = 0;
|
||||
agent_info->coarse_region.handle = 0;
|
||||
|
||||
// Find and Bind Memory regions of the Gpu agent
|
||||
hsa_agent_iterate_regions(agent, FindMemRegionsCallback, agent_info);
|
||||
|
||||
// Set GPU index
|
||||
agent_info->dev_index = gpu_list_.size();
|
||||
gpu_list_.push_back(agent_info);
|
||||
}
|
||||
|
||||
if (agent_info) agent_map_[agent.handle] = agent_info;
|
||||
|
||||
return agent_info;
|
||||
}
|
||||
|
||||
// Return systen agent info
|
||||
const AgentInfo* HsaRsrcFactory::GetAgentInfo(const hsa_agent_t agent) {
|
||||
const AgentInfo* agent_info = NULL;
|
||||
auto it = agent_map_.find(agent.handle);
|
||||
if (it != agent_map_.end()) {
|
||||
agent_info = it->second;
|
||||
}
|
||||
return agent_info;
|
||||
}
|
||||
|
||||
// Get the count of Hsa Gpu Agents available on the platform
|
||||
//
|
||||
// @return uint32_t Number of Gpu agents on platform
|
||||
//
|
||||
uint32_t HsaRsrcFactory::GetCountOfGpuAgents() { return uint32_t(gpu_list_.size()); }
|
||||
|
||||
// Get the count of Hsa Cpu Agents available on the platform
|
||||
//
|
||||
// @return uint32_t Number of Cpu agents on platform
|
||||
//
|
||||
uint32_t HsaRsrcFactory::GetCountOfCpuAgents() { return uint32_t(cpu_list_.size()); }
|
||||
|
||||
// Get the AgentInfo handle of a Gpu device
|
||||
//
|
||||
// @param idx Gpu Agent at specified index
|
||||
//
|
||||
// @param agent_info Output parameter updated with AgentInfo
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool HsaRsrcFactory::GetGpuAgentInfo(uint32_t idx, const AgentInfo** agent_info) {
|
||||
// Determine if request is valid
|
||||
uint32_t size = uint32_t(gpu_list_.size());
|
||||
if (idx >= size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Copy AgentInfo from specified index
|
||||
*agent_info = gpu_list_[idx];
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Get the AgentInfo handle of a Cpu device
|
||||
//
|
||||
// @param idx Cpu Agent at specified index
|
||||
//
|
||||
// @param agent_info Output parameter updated with AgentInfo
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool HsaRsrcFactory::GetCpuAgentInfo(uint32_t idx, const AgentInfo** agent_info) {
|
||||
// Determine if request is valid
|
||||
uint32_t size = uint32_t(cpu_list_.size());
|
||||
if (idx >= size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Copy AgentInfo from specified index
|
||||
*agent_info = cpu_list_[idx];
|
||||
return true;
|
||||
}
|
||||
|
||||
// Create a Queue object and return its handle. The queue object is expected
|
||||
// to support user requested number of Aql dispatch packets.
|
||||
//
|
||||
// @param agent_info Gpu Agent on which to create a queue object
|
||||
//
|
||||
// @param num_Pkts Number of packets to be held by queue
|
||||
//
|
||||
// @param queue Output parameter updated with handle of queue object
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool HsaRsrcFactory::CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts, hsa_queue_t** queue) {
|
||||
hsa_status_t status;
|
||||
status = hsa_queue_create(agent_info->dev_id, num_pkts, HSA_QUEUE_TYPE_MULTI, NULL, NULL,
|
||||
UINT32_MAX, UINT32_MAX, queue);
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
// Create a Signal object and return its handle.
|
||||
//
|
||||
// @param value Initial value of signal object
|
||||
//
|
||||
// @param signal Output parameter updated with handle of signal object
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool HsaRsrcFactory::CreateSignal(uint32_t value, hsa_signal_t* signal) {
|
||||
hsa_status_t status;
|
||||
status = hsa_signal_create(value, 0, NULL, signal);
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
// Allocate memory for use by a kernel of specified size in specified
|
||||
// agent's memory region. Currently supports Global segment whose Kernarg
|
||||
// flag set.
|
||||
//
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
//
|
||||
// @param size Size of memory in terms of bytes
|
||||
//
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
//
|
||||
uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t size) {
|
||||
hsa_status_t status;
|
||||
uint8_t* buffer = NULL;
|
||||
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
|
||||
|
||||
if (agent_info->coarse_region.handle != 0) {
|
||||
// Allocate in local memory if it is available
|
||||
status = hsa_memory_allocate(agent_info->coarse_region, size, (void**)&buffer);
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
status = hsa_memory_assign_agent(buffer, agent_info->dev_id, HSA_ACCESS_PERMISSION_RW);
|
||||
}
|
||||
} else {
|
||||
// Allocate in system memory if local memory is not available
|
||||
status = hsa_memory_allocate(agent_info->kernarg_region, size, (void**)&buffer);
|
||||
}
|
||||
|
||||
return (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
|
||||
}
|
||||
|
||||
// Allocate memory tp pass kernel parameters.
|
||||
//
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
//
|
||||
// @param size Size of memory in terms of bytes
|
||||
//
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
//
|
||||
uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t size) {
|
||||
hsa_status_t status;
|
||||
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
|
||||
|
||||
uint8_t* buffer = NULL;
|
||||
status = hsa_memory_allocate(agent_info->kernarg_region, size, (void**)&buffer);
|
||||
return (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
|
||||
}
|
||||
|
||||
// Transfer data method
|
||||
bool HsaRsrcFactory::TransferData(void* dest_buff, void* src_buff, uint32_t length,
|
||||
bool host_to_dev) {
|
||||
hsa_status_t status;
|
||||
status = hsa_memory_copy(dest_buff, src_buff, length);
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
// Loads an Assembled Brig file and Finalizes it into Device Isa
|
||||
//
|
||||
// @param agent_info Gpu device for which to finalize
|
||||
//
|
||||
// @param brig_path File path of the Assembled Brig file
|
||||
//
|
||||
// @param kernel_name Name of the kernel to finalize
|
||||
//
|
||||
// @param code_desc Handle of finalized Code Descriptor that could
|
||||
// be used to submit for execution
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* brig_path,
|
||||
char* kernel_name, hsa_executable_symbol_t* code_desc) {
|
||||
// Finalize the Hsail object into code object
|
||||
hsa_status_t status;
|
||||
hsa_code_object_t code_object;
|
||||
|
||||
// Build the code object filename
|
||||
std::string filename(brig_path);
|
||||
std::clog << "Code object filename: " << filename << std::endl;
|
||||
|
||||
// Open the file containing code object
|
||||
std::ifstream codeStream(filename.c_str(), std::ios::binary | std::ios::ate);
|
||||
if (!codeStream) {
|
||||
std::cerr << "Error: failed to load " << filename << std::endl;
|
||||
assert(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Allocate memory to read in code object from file
|
||||
size_t size = std::string::size_type(codeStream.tellg());
|
||||
char* codeBuff = (char*)AllocateSysMemory(agent_info, size);
|
||||
if (!codeBuff) {
|
||||
std::cerr << "Error: failed to allocate memory for code object." << std::endl;
|
||||
assert(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Read the code object into allocated memory
|
||||
codeStream.seekg(0, std::ios::beg);
|
||||
std::copy(std::istreambuf_iterator<char>(codeStream), std::istreambuf_iterator<char>(), codeBuff);
|
||||
|
||||
// De-Serialize the code object that has been read into memory
|
||||
status = hsa_code_object_deserialize(codeBuff, size, NULL, &code_object);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
std::cerr << "Failed to deserialize code object" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create executable.
|
||||
hsa_executable_t hsaExecutable;
|
||||
status =
|
||||
hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, "", &hsaExecutable);
|
||||
CHECK_STATUS("Error in creating executable object", status);
|
||||
|
||||
// Load code object.
|
||||
status = hsa_executable_load_code_object(hsaExecutable, agent_info->dev_id, code_object, "");
|
||||
CHECK_STATUS("Error in loading executable object", status);
|
||||
|
||||
// Freeze executable.
|
||||
status = hsa_executable_freeze(hsaExecutable, "");
|
||||
CHECK_STATUS("Error in freezing executable object", status);
|
||||
|
||||
// Get symbol handle.
|
||||
hsa_executable_symbol_t kernelSymbol;
|
||||
status = hsa_executable_get_symbol(hsaExecutable, NULL, kernel_name, agent_info->dev_id, 0,
|
||||
&kernelSymbol);
|
||||
CHECK_STATUS("Error in looking up kernel symbol", status);
|
||||
|
||||
// Update output parameter
|
||||
*code_desc = kernelSymbol;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Print the various fields of Hsa Gpu Agents
|
||||
bool HsaRsrcFactory::PrintGpuAgents(const std::string& header) {
|
||||
std::clog << header << " :" << std::endl;
|
||||
|
||||
const AgentInfo* agent_info;
|
||||
int size = uint32_t(gpu_list_.size());
|
||||
for (int idx = 0; idx < size; idx++) {
|
||||
agent_info = gpu_list_[idx];
|
||||
|
||||
std::clog << "> agent[" << idx << "] :" << std::endl;
|
||||
std::clog << ">> Name : " << agent_info->name << std::endl;
|
||||
std::clog << ">> Max Wave Size : " << agent_info->max_wave_size << std::endl;
|
||||
std::clog << ">> Max Queue Size : " << agent_info->max_queue_size << std::endl;
|
||||
std::clog << ">> Kernarg Region Id : " << agent_info->coarse_region.handle << std::endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
HsaRsrcFactory* HsaRsrcFactory::instance_ = NULL;
|
||||
HsaRsrcFactory::mutex_t HsaRsrcFactory::mutex_;
|
||||
|
||||
} // namespace util
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,262 @@
|
||||
/**********************************************************************
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
provided that the following conditions are met:
|
||||
|
||||
<95> Redistributions of source code must retain the above copyright notice, this list of
|
||||
conditions and the following disclaimer.
|
||||
<95> Redistributions in binary form must reproduce the above copyright notice, this list of
|
||||
conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
|
||||
SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
********************************************************************/
|
||||
|
||||
#ifndef SRC_UTIL_HSA_RSRC_FACTORY_H_
|
||||
#define SRC_UTIL_HSA_RSRC_FACTORY_H_
|
||||
|
||||
#include <hsa.h>
|
||||
#include <hsa_ext_finalize.h>
|
||||
#include <hsa_ven_amd_aqlprofile.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#define HSA_ARGUMENT_ALIGN_BYTES 16
|
||||
#define HSA_QUEUE_ALIGN_BYTES 64
|
||||
#define HSA_PACKET_ALIGN_BYTES 64
|
||||
|
||||
#define CHECK_STATUS(msg, status) \
|
||||
if (status != HSA_STATUS_SUCCESS) { \
|
||||
const char* emsg = 0; \
|
||||
hsa_status_string(status, &emsg); \
|
||||
printf("%s: %s\n", msg, emsg ? emsg : "<unknown error>"); \
|
||||
exit(1); \
|
||||
}
|
||||
|
||||
namespace rocprofiler {
|
||||
namespace util {
|
||||
static const unsigned MEM_PAGE_BYTES = 0x1000;
|
||||
static const unsigned MEM_PAGE_MASK = MEM_PAGE_BYTES - 1;
|
||||
typedef decltype(hsa_agent_t::handle) hsa_agent_handle_t;
|
||||
|
||||
// Encapsulates information about a Hsa Agent such as its
|
||||
// handle, name, max queue size, max wavefront size, etc.
|
||||
struct AgentInfo {
|
||||
// Handle of Agent
|
||||
hsa_agent_t dev_id;
|
||||
|
||||
// Agent type - Cpu = 0, Gpu = 1 or Dsp = 2
|
||||
uint32_t dev_type;
|
||||
|
||||
// Agent system index
|
||||
uint32_t dev_index;
|
||||
|
||||
// GFXIP name
|
||||
char gfxip[64];
|
||||
|
||||
// Name of Agent whose length is less than 64
|
||||
char name[64];
|
||||
|
||||
// Max size of Wavefront size
|
||||
uint32_t max_wave_size;
|
||||
|
||||
// Max size of Queue buffer
|
||||
uint32_t max_queue_size;
|
||||
|
||||
// Hsail profile supported by agent
|
||||
hsa_profile_t profile;
|
||||
|
||||
// Memory region supporting kernel parameters
|
||||
hsa_region_t coarse_region;
|
||||
|
||||
// Memory region supporting kernel arguments
|
||||
hsa_region_t kernarg_region;
|
||||
};
|
||||
|
||||
class HsaRsrcFactory {
|
||||
public:
|
||||
typedef std::recursive_mutex mutex_t;
|
||||
|
||||
static HsaRsrcFactory* Create() { return NULL; }
|
||||
|
||||
static HsaRsrcFactory* CreateInstance() {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (instance_ == NULL) {
|
||||
instance_ = new HsaRsrcFactory();
|
||||
}
|
||||
return instance_;
|
||||
}
|
||||
|
||||
static HsaRsrcFactory& Instance() {
|
||||
CreateInstance();
|
||||
hsa_status_t status = (instance_ != NULL) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR;
|
||||
CHECK_STATUS("HsaRsrcFactory::Instance() is not found", status);
|
||||
return *instance_;
|
||||
}
|
||||
|
||||
static void Destroy() {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (instance_) delete instance_;
|
||||
instance_ = NULL;
|
||||
}
|
||||
|
||||
// Return system agent info
|
||||
const AgentInfo* GetAgentInfo(const hsa_agent_t agent);
|
||||
|
||||
// Get the count of Hsa Gpu Agents available on the platform
|
||||
//
|
||||
// @return uint32_t Number of Gpu agents on platform
|
||||
//
|
||||
uint32_t GetCountOfGpuAgents();
|
||||
|
||||
// Get the count of Hsa Cpu Agents available on the platform
|
||||
//
|
||||
// @return uint32_t Number of Cpu agents on platform
|
||||
//
|
||||
uint32_t GetCountOfCpuAgents();
|
||||
|
||||
// Get the AgentInfo handle of a Gpu device
|
||||
//
|
||||
// @param idx Gpu Agent at specified index
|
||||
//
|
||||
// @param agent_info Output parameter updated with AgentInfo
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool GetGpuAgentInfo(uint32_t idx, const AgentInfo** agent_info);
|
||||
|
||||
// Get the AgentInfo handle of a Cpu device
|
||||
//
|
||||
// @param idx Cpu Agent at specified index
|
||||
//
|
||||
// @param agent_info Output parameter updated with AgentInfo
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool GetCpuAgentInfo(uint32_t idx, const AgentInfo** agent_info);
|
||||
|
||||
// Create a Queue object and return its handle. The queue object is expected
|
||||
// to support user requested number of Aql dispatch packets.
|
||||
//
|
||||
// @param agent_info Gpu Agent on which to create a queue object
|
||||
//
|
||||
// @param num_Pkts Number of packets to be held by queue
|
||||
//
|
||||
// @param queue Output parameter updated with handle of queue object
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts, hsa_queue_t** queue);
|
||||
|
||||
// Create a Signal object and return its handle.
|
||||
//
|
||||
// @param value Initial value of signal object
|
||||
//
|
||||
// @param signal Output parameter updated with handle of signal object
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool CreateSignal(uint32_t value, hsa_signal_t* signal);
|
||||
|
||||
// Allocate memory for use by a kernel of specified size in specified
|
||||
// agent's memory region. Currently supports Global segment whose Kernarg
|
||||
// flag set.
|
||||
//
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
//
|
||||
// @param size Size of memory in terms of bytes
|
||||
//
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
//
|
||||
uint8_t* AllocateLocalMemory(const AgentInfo* agent_info, size_t size);
|
||||
|
||||
// Allocate memory tp pass kernel parameters.
|
||||
//
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
//
|
||||
// @param size Size of memory in terms of bytes
|
||||
//
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
//
|
||||
uint8_t* AllocateSysMemory(const AgentInfo* agent_info, size_t size);
|
||||
|
||||
// Transfer data method
|
||||
bool TransferData(void* dest_buff, void* src_buff, uint32_t length, bool host_to_dev);
|
||||
|
||||
// Loads an Assembled Brig file and Finalizes it into Device Isa
|
||||
//
|
||||
// @param agent_info Gpu device for which to finalize
|
||||
//
|
||||
// @param brig_path File path of the Assembled Brig file
|
||||
//
|
||||
// @param kernel_name Name of the kernel to finalize
|
||||
//
|
||||
// @param code_desc Handle of finalized Code Descriptor that could
|
||||
// be used to submit for execution
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool LoadAndFinalize(const AgentInfo* agent_info, const char* brig_path, char* kernel_name,
|
||||
hsa_executable_symbol_t* code_desc);
|
||||
|
||||
// Print the various fields of Hsa Gpu Agents
|
||||
bool PrintGpuAgents(const std::string& header);
|
||||
|
||||
// Return AqlProfile API table
|
||||
const hsa_ven_amd_aqlprofile_1_00_pfn_t* AqlProfileApi() const { return &aqlprofile_api_; }
|
||||
|
||||
private:
|
||||
// System agents iterating callback
|
||||
static hsa_status_t GetHsaAgentsCallback(hsa_agent_t agent, void* data);
|
||||
|
||||
// Callback function to find and bind kernarg region of an agent
|
||||
static hsa_status_t FindMemRegionsCallback(hsa_region_t region, void* data);
|
||||
|
||||
// Constructor of the class. Will initialize the Hsa Runtime and
|
||||
// query the system topology to get the list of Cpu and Gpu devices
|
||||
HsaRsrcFactory();
|
||||
|
||||
// Destructor of the class
|
||||
~HsaRsrcFactory();
|
||||
|
||||
// Add an instance of AgentInfo representing a Hsa Gpu agent
|
||||
const AgentInfo* AddAgentInfo(const hsa_agent_t agent);
|
||||
|
||||
static HsaRsrcFactory* instance_;
|
||||
static mutex_t mutex_;
|
||||
|
||||
// Used to maintain a list of Hsa Gpu Agent Info
|
||||
std::vector<const AgentInfo*> gpu_list_;
|
||||
|
||||
// Used to maintain a list of Hsa Cpu Agent Info
|
||||
std::vector<const AgentInfo*> cpu_list_;
|
||||
|
||||
// System agents map
|
||||
std::map<hsa_agent_handle_t, const AgentInfo*> agent_map_;
|
||||
|
||||
// AqlProfile API table
|
||||
hsa_ven_amd_aqlprofile_1_00_pfn_t aqlprofile_api_;
|
||||
};
|
||||
|
||||
} // namespace util
|
||||
} // namespace rocprofiler
|
||||
|
||||
#endif // SRC_UTIL_HSA_RSRC_FACTORY_H_
|
||||
@@ -0,0 +1,163 @@
|
||||
#ifndef SRC_UTIL_LOGGER_H_
|
||||
#define SRC_UTIL_LOGGER_H_
|
||||
|
||||
#include <time.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/file.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <exception>
|
||||
#include <mutex>
|
||||
#include <map>
|
||||
|
||||
namespace rocprofiler {
|
||||
namespace util {
|
||||
|
||||
class Logger {
|
||||
public:
|
||||
typedef std::recursive_mutex mutex_t;
|
||||
|
||||
template <typename T> Logger& operator<<(const T& m) {
|
||||
std::ostringstream oss;
|
||||
oss << m;
|
||||
if (!streaming_)
|
||||
Log(oss.str());
|
||||
else
|
||||
Put(oss.str());
|
||||
streaming_ = true;
|
||||
return *this;
|
||||
}
|
||||
|
||||
typedef void (*manip_t)();
|
||||
Logger& operator<<(manip_t f) {
|
||||
f();
|
||||
return *this;
|
||||
}
|
||||
|
||||
static void begm() { Instance().ResetStreaming(true); }
|
||||
static void endl() { Instance().ResetStreaming(false); }
|
||||
|
||||
static const std::string& LastMessage() {
|
||||
Logger& logger = Instance();
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
return logger.message_[GetTid()];
|
||||
}
|
||||
|
||||
static Logger* Create() {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (instance_ == NULL) instance_ = new Logger();
|
||||
return instance_;
|
||||
}
|
||||
|
||||
static void Destroy() {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (instance_ != NULL) delete instance_;
|
||||
instance_ = NULL;
|
||||
}
|
||||
|
||||
static Logger& Instance() {
|
||||
Create();
|
||||
return *instance_;
|
||||
}
|
||||
|
||||
private:
|
||||
static uint32_t GetPid() { return syscall(__NR_getpid); }
|
||||
static uint32_t GetTid() { return syscall(__NR_gettid); }
|
||||
|
||||
Logger() : file_(NULL), dirty_(false), streaming_(false), messaging_(false) {
|
||||
const char* path = getenv("ROCPROFILER_LOG");
|
||||
if (path != NULL) {
|
||||
file_ = fopen("/tmp/rocprofiler_log.txt", "a");
|
||||
}
|
||||
ResetStreaming(false);
|
||||
}
|
||||
|
||||
~Logger() {
|
||||
if (file_ != NULL) {
|
||||
if (dirty_) Put("\n");
|
||||
fclose(file_);
|
||||
}
|
||||
}
|
||||
|
||||
void ResetStreaming(const bool messaging) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (messaging) {
|
||||
message_[GetTid()] = "";
|
||||
}
|
||||
messaging_ = messaging;
|
||||
streaming_ = messaging;
|
||||
}
|
||||
|
||||
void Put(const std::string& m) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (messaging_) {
|
||||
message_[GetTid()] += m;
|
||||
}
|
||||
if (file_ != NULL) {
|
||||
dirty_ = true;
|
||||
flock(fileno(file_), LOCK_EX);
|
||||
fprintf(file_, "%s", m.c_str());
|
||||
fflush(file_);
|
||||
flock(fileno(file_), LOCK_UN);
|
||||
}
|
||||
}
|
||||
|
||||
void Log(const std::string& m) {
|
||||
const time_t rawtime = time(NULL);
|
||||
tm tm_info;
|
||||
localtime_r(&rawtime, &tm_info);
|
||||
char tm_str[26];
|
||||
strftime(tm_str, 26, "%Y-%m-%d %H:%M:%S", &tm_info);
|
||||
std::ostringstream oss;
|
||||
oss << "<" << tm_str << std::dec << " pid" << GetPid() << " tid" << GetTid() << "> " << m;
|
||||
Put(oss.str());
|
||||
}
|
||||
|
||||
FILE* file_;
|
||||
bool dirty_;
|
||||
bool streaming_;
|
||||
bool messaging_;
|
||||
|
||||
static mutex_t mutex_;
|
||||
static Logger* instance_;
|
||||
std::map<uint32_t, std::string> message_;
|
||||
};
|
||||
|
||||
} // namespace util
|
||||
} // namespace rocprofiler
|
||||
|
||||
#define ERR_LOGGING(stream) { \
|
||||
rocprofiler::util::Logger::Instance() \
|
||||
<< "error: " \
|
||||
<< rocprofiler::util::Logger::begm \
|
||||
<< stream \
|
||||
<< rocprofiler::util::Logger::endl; \
|
||||
}
|
||||
|
||||
#define INFO_LOGGING(stream) { \
|
||||
rocprofiler::util::Logger::Instance() \
|
||||
<< "info: " \
|
||||
<< rocprofiler::util::Logger::begm \
|
||||
<< stream \
|
||||
<< rocprofiler::util::Logger::endl; \
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
# define DBG_LOGGING(stream) { \
|
||||
rocprofiler::util::Logger::Instance() << rocprofiler::util::Logger::begm \
|
||||
<< "debug: \"" << stream << "\"" << \
|
||||
<< " in " << __FUNCTION__ \
|
||||
<< " at " << __FILE__ << " line " << __LINE__ \
|
||||
<< rocprofiler::util::Logger::endl; \
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // SRC_UTIL_LOGGER_H_
|
||||
@@ -0,0 +1,279 @@
|
||||
#ifndef _SRC_XML_EXPR_H
|
||||
#define _SRC_XML_EXPR_H
|
||||
|
||||
#include <exception>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string.h>
|
||||
|
||||
namespace xml {
|
||||
class exception_t : public std::exception {
|
||||
public:
|
||||
explicit exception_t(const std::string& msg) : str_(msg) {}
|
||||
const char* what() const throw() { return str_.c_str(); }
|
||||
protected:
|
||||
const std::string str_;
|
||||
};
|
||||
|
||||
typedef uint64_t args_t;
|
||||
typedef std::map<std::string, args_t> args_map_t;
|
||||
class Expr;
|
||||
|
||||
template <class T>
|
||||
class any_cache_t {
|
||||
public:
|
||||
virtual ~any_cache_t() {}
|
||||
virtual bool Lookup(const std::string& name, T& result) const = 0;
|
||||
};
|
||||
|
||||
typedef any_cache_t<std::string> expr_cache_t;
|
||||
typedef any_cache_t<args_t> args_cache_t;
|
||||
|
||||
class bin_expr_t {
|
||||
public:
|
||||
static const bin_expr_t* CreateExpr(const bin_expr_t* arg1, const bin_expr_t* arg2, const char op);
|
||||
static const bin_expr_t* CreateArg(Expr *obj, const std::string str);
|
||||
|
||||
bin_expr_t() : arg1_(NULL), arg2_(NULL) {}
|
||||
bin_expr_t(const bin_expr_t* arg1, const bin_expr_t* arg2) : arg1_(arg1), arg2_(arg2) {}
|
||||
|
||||
virtual args_t Eval(const args_cache_t& args) const = 0;
|
||||
virtual std::string Symbol() const = 0;
|
||||
|
||||
std::string String() const {
|
||||
std::string str;
|
||||
if (arg1_) { str = "(" + arg1_->String() + " " + Symbol() + " " + arg2_->String() + ")"; }
|
||||
else str = Symbol();
|
||||
return str;
|
||||
}
|
||||
|
||||
protected:
|
||||
const bin_expr_t* arg1_;
|
||||
const bin_expr_t* arg2_;
|
||||
};
|
||||
|
||||
class Expr {
|
||||
public:
|
||||
explicit Expr(const std::string& expr, const expr_cache_t* cache) :
|
||||
expr_(expr),
|
||||
pos_(0),
|
||||
sub_count_(0),
|
||||
cache_(cache)
|
||||
{
|
||||
sub_vec_ = new std::vector<const Expr*>;
|
||||
var_vec_ = new std::vector<std::string>;
|
||||
tree_ = ParseExpr();
|
||||
}
|
||||
|
||||
explicit Expr(const std::string& expr, const Expr* obj) :
|
||||
expr_(expr),
|
||||
pos_(0),
|
||||
sub_count_(0),
|
||||
cache_(obj->cache_),
|
||||
sub_vec_(obj->sub_vec_),
|
||||
var_vec_(obj->var_vec_)
|
||||
{
|
||||
sub_vec_->push_back(this);
|
||||
tree_ = ParseExpr();
|
||||
if (!SubCheck()) throw exception_t("expr '" + expr_ + "', bad parenthesis count");
|
||||
}
|
||||
|
||||
~Expr() {
|
||||
delete cache_;
|
||||
for (auto it : *sub_vec_) delete it;
|
||||
delete sub_vec_;
|
||||
delete var_vec_;
|
||||
}
|
||||
|
||||
std::string GetStr() const { return expr_; }
|
||||
const expr_cache_t* GetCache() const { return cache_; }
|
||||
const bin_expr_t* GetTree() const { return tree_; }
|
||||
|
||||
args_t Eval(const args_cache_t& args) const { return tree_->Eval(args); }
|
||||
|
||||
std::string Lookup(const std::string& str) const {
|
||||
std::string result;
|
||||
if (cache_ && !(cache_->Lookup(str, result))) throw exception_t("expr '" + expr_ + "', lookup '" + str + "' failed");
|
||||
return result;
|
||||
}
|
||||
|
||||
void AddVar(const std::string& str) {
|
||||
bool found = false;
|
||||
for (std::string s : *var_vec_) if (s == str) found = true;
|
||||
if (!found) var_vec_->push_back(str);
|
||||
}
|
||||
|
||||
const std::vector<std::string>& GetVars() const { return *var_vec_; }
|
||||
|
||||
std::string String() const { return tree_->String(); }
|
||||
|
||||
private:
|
||||
const bin_expr_t* ParseExpr() {
|
||||
const bin_expr_t* expr = ParseArg();
|
||||
while (!IsEnd()) {
|
||||
const char op = Symb();
|
||||
const bin_expr_t* second_arg = NULL;
|
||||
if (IsSymb(')')) {
|
||||
Next();
|
||||
SubClose();
|
||||
break;
|
||||
} if (IsSymb('*') || IsSymb('/')) {
|
||||
Next();
|
||||
second_arg = ParseArg();
|
||||
expr = bin_expr_t::CreateExpr(expr, second_arg, op);
|
||||
} else if (IsSymb('+') || IsSymb('-')) {
|
||||
Next();
|
||||
second_arg = ParseExpr();
|
||||
expr = bin_expr_t::CreateExpr(expr, second_arg, op);
|
||||
break;
|
||||
} else {
|
||||
throw exception_t("expr '" + expr_ + "', bad operator '" + op + "'");
|
||||
}
|
||||
}
|
||||
return expr;
|
||||
}
|
||||
|
||||
const bin_expr_t* ParseArg() {
|
||||
const bin_expr_t* arg = NULL;
|
||||
if (IsSymb('(')) {
|
||||
Next();
|
||||
SubOpen();
|
||||
arg = ParseExpr();
|
||||
} else {
|
||||
const unsigned pos = FindOp();
|
||||
const std::string str = CutTill(pos);
|
||||
arg = bin_expr_t::CreateArg(this, str);
|
||||
if (arg == NULL) throw exception_t("expr '" + expr_ + "', bad argument '" + str + "'");
|
||||
}
|
||||
return arg;
|
||||
}
|
||||
|
||||
char Symb() const { return Symb(pos_); }
|
||||
char Symb(const unsigned ind) const { return expr_[ind]; }
|
||||
bool IsEnd() const { return (pos_ >= expr_.length()); }
|
||||
bool IsSymb(const char c) const { return IsSymb(pos_, c); }
|
||||
bool IsSymb(const unsigned ind, const char c) const { return (expr_[ind] == c); }
|
||||
void Next() { ++pos_; }
|
||||
void SubOpen() { ++sub_count_; }
|
||||
void SubClose() { --sub_count_; }
|
||||
bool SubCheck() const { return (sub_count_ == 0); }
|
||||
unsigned FindOp() const {
|
||||
unsigned i = pos_;
|
||||
while (i < expr_.length()) {
|
||||
switch (Symb(i)) {
|
||||
case '*':
|
||||
case '/':
|
||||
case '+':
|
||||
case '-':
|
||||
case '(':
|
||||
case ')':
|
||||
goto end;
|
||||
}
|
||||
++i;
|
||||
}
|
||||
end:
|
||||
return i;
|
||||
}
|
||||
std::string CutTill(const unsigned pos) {
|
||||
const std::string str = (pos > pos_) ? expr_.substr(pos_, pos - pos_) : "";
|
||||
pos_ = pos;
|
||||
return str;
|
||||
}
|
||||
|
||||
const std::string expr_;
|
||||
unsigned pos_;
|
||||
unsigned sub_count_;
|
||||
const bin_expr_t* tree_;
|
||||
const expr_cache_t* const cache_;
|
||||
std::vector<const Expr*>* sub_vec_;
|
||||
std::vector<std::string>* var_vec_;
|
||||
};
|
||||
|
||||
class add_expr_t : public bin_expr_t {
|
||||
public:
|
||||
add_expr_t(const bin_expr_t* arg1, const bin_expr_t* arg2) : bin_expr_t(arg1, arg2) {}
|
||||
args_t Eval(const args_cache_t& args) const { return (arg1_->Eval(args) + arg2_->Eval(args)); }
|
||||
std::string Symbol() const { return "+"; }
|
||||
};
|
||||
class sub_expr_t : public bin_expr_t {
|
||||
public:
|
||||
sub_expr_t(const bin_expr_t* arg1, const bin_expr_t* arg2) : bin_expr_t(arg1, arg2) {}
|
||||
args_t Eval(const args_cache_t& args) const { return (arg1_->Eval(args) - arg2_->Eval(args)); }
|
||||
std::string Symbol() const { return "-"; }
|
||||
};
|
||||
class mul_expr_t : public bin_expr_t {
|
||||
public:
|
||||
mul_expr_t(const bin_expr_t* arg1, const bin_expr_t* arg2) : bin_expr_t(arg1, arg2) {}
|
||||
args_t Eval(const args_cache_t& args) const { return (arg1_->Eval(args) * arg2_->Eval(args)); }
|
||||
std::string Symbol() const { return "*"; }
|
||||
};
|
||||
class div_expr_t : public bin_expr_t {
|
||||
public:
|
||||
div_expr_t(const bin_expr_t* arg1, const bin_expr_t* arg2) : bin_expr_t(arg1, arg2) {}
|
||||
args_t Eval(const args_cache_t& args) const { return (arg1_->Eval(args) / arg2_->Eval(args)); }
|
||||
std::string Symbol() const { return "/"; }
|
||||
};
|
||||
class const_expr_t : public bin_expr_t {
|
||||
public:
|
||||
const_expr_t(const args_t value) : value_(value) {}
|
||||
args_t Eval(const args_cache_t&) const { return value_; }
|
||||
std::string Symbol() const { std::ostringstream os; os << value_; return os.str(); }
|
||||
private:
|
||||
const args_t value_;
|
||||
};
|
||||
class var_expr_t : public bin_expr_t {
|
||||
public:
|
||||
var_expr_t(const std::string name) : name_(name) {}
|
||||
args_t Eval(const args_cache_t& args) const {
|
||||
args_t result = 0;
|
||||
if (!args.Lookup(name_, result)) throw exception_t("expr arg lookup '" + name_ + "' failed");
|
||||
return result;
|
||||
}
|
||||
std::string Symbol() const { return name_; }
|
||||
private:
|
||||
const std::string name_;
|
||||
};
|
||||
|
||||
inline const bin_expr_t* bin_expr_t::CreateExpr(const bin_expr_t* arg1, const bin_expr_t* arg2, const char op) {
|
||||
const bin_expr_t* expr = NULL;
|
||||
switch (op) {
|
||||
case '+':
|
||||
expr = new add_expr_t(arg1, arg2);
|
||||
break;
|
||||
case '-':
|
||||
expr = new sub_expr_t(arg1, arg2);
|
||||
break;
|
||||
case '*':
|
||||
expr = new mul_expr_t(arg1, arg2);
|
||||
break;
|
||||
case '/':
|
||||
expr = new div_expr_t(arg1, arg2);
|
||||
break;
|
||||
}
|
||||
return expr;
|
||||
}
|
||||
|
||||
inline const bin_expr_t* bin_expr_t::CreateArg(Expr* obj, const std::string str) {
|
||||
const bin_expr_t* arg = NULL;
|
||||
const unsigned i = strspn(str.c_str(), "1234567890");
|
||||
if (i == str.length()) {
|
||||
const unsigned value = atoi(str.c_str());
|
||||
arg = new const_expr_t(value);
|
||||
} else {
|
||||
const std::string sub_expr = obj->Lookup(str);
|
||||
if (sub_expr.empty()) {
|
||||
arg = new var_expr_t(str);
|
||||
obj->AddVar(str);
|
||||
} else {
|
||||
const Expr* expr = new Expr(sub_expr, obj);
|
||||
arg = expr->GetTree();
|
||||
}
|
||||
}
|
||||
return arg;
|
||||
}
|
||||
|
||||
} // namespace xml
|
||||
|
||||
#endif // _SRC_XML_EXPR_H
|
||||
@@ -0,0 +1,221 @@
|
||||
#ifndef SRC_XML_XML_H_
|
||||
#define SRC_XML_XML_H_
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
namespace xml {
|
||||
|
||||
class Xml {
|
||||
public:
|
||||
typedef std::vector<char> token_t;
|
||||
struct level_t {
|
||||
std::string tag;
|
||||
std::vector<level_t*> nodes;
|
||||
std::map<std::string, std::string> opts;
|
||||
};
|
||||
typedef std::vector<level_t*> nodes_vec_t;
|
||||
|
||||
enum {
|
||||
DECL_STATE,
|
||||
BODY_STATE
|
||||
};
|
||||
|
||||
Xml(const char* file_name) :
|
||||
file_name_(file_name),
|
||||
file_line_(0),
|
||||
data_size_(0),
|
||||
index_(0),
|
||||
state_(BODY_STATE),
|
||||
level_(NULL),
|
||||
comment_(false)
|
||||
{
|
||||
AddLevel("top");
|
||||
|
||||
fd_ = open(file_name, O_RDONLY);
|
||||
if (fd_ == -1) {
|
||||
std::cout << "XML file not found: " << file_name << std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
token_t remainder;
|
||||
while (1) {
|
||||
token_t token = (remainder.size()) ? remainder : NextToken();
|
||||
remainder.clear();
|
||||
// End of file
|
||||
if (token.size() == 0) break;
|
||||
|
||||
// token_t token1 = token;
|
||||
// token1.push_back('\0');
|
||||
// std::cout << "> " << &token1[0] << std::endl;
|
||||
|
||||
switch(state_) {
|
||||
case BODY_STATE:
|
||||
if (token[0] == '<') {
|
||||
bool node_begin = true;
|
||||
unsigned ind = 1;
|
||||
if (token[1] == '/') {
|
||||
node_begin = false;
|
||||
++ind;
|
||||
}
|
||||
|
||||
unsigned i = ind;
|
||||
while (i < token.size()) { if (token[i] == '>') break; ++i; }
|
||||
for (unsigned j = i + 1; j < token.size(); ++j) remainder.push_back(token[j]);
|
||||
|
||||
if (i == token.size()) {
|
||||
if (node_begin) state_ = DECL_STATE;
|
||||
else BadFormat(token);
|
||||
token.push_back('\0');
|
||||
} else token[i] = '\0';
|
||||
|
||||
const char* tag = strdup(&token[ind]);
|
||||
if (node_begin) {
|
||||
AddLevel(tag);
|
||||
} else {
|
||||
if (strncmp(CurrentLevel().c_str(), tag, strlen(tag))) {
|
||||
token.back() = '>';
|
||||
BadFormat(token);
|
||||
}
|
||||
UpLevel();
|
||||
}
|
||||
} else BadFormat(token);
|
||||
break;
|
||||
case DECL_STATE:
|
||||
if (token[0] == '>') {
|
||||
state_ = BODY_STATE;
|
||||
for (unsigned j = 1; j < token.size(); ++j) remainder.push_back(token[j]);
|
||||
continue;
|
||||
} else {
|
||||
token.push_back('\0');
|
||||
unsigned j = 0;
|
||||
for (j = 0; j < token.size(); ++j) if (token[j] == '=') break;
|
||||
if (j == token.size()) BadFormat(token);
|
||||
token[j] = '\0';
|
||||
const char* key = &token[0];
|
||||
const char* value = &token[j + 1];
|
||||
AddOption(key, value);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
std::cout << "Wrong state: " << state_ << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<level_t*> GetNodes(std::string global_tag) {
|
||||
return map_[global_tag];
|
||||
}
|
||||
|
||||
void Print() const {
|
||||
for(auto& elem : map_) {
|
||||
for (auto node : elem.second) {
|
||||
if (node->opts.size()) {
|
||||
std::cout << elem.first << ":" << std::endl;
|
||||
for (auto& opt : node->opts) {
|
||||
std::cout << " " << opt.first << " = " << opt.second << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
bool LineEndCheck() {
|
||||
bool found = false;
|
||||
if (buffer_[index_] == '\n') {
|
||||
buffer_[index_] = ' ';
|
||||
++file_line_;
|
||||
found = true;
|
||||
comment_ = false;
|
||||
} else if (comment_ || (buffer_[index_] == '#')) {
|
||||
found = true;
|
||||
comment_ = true;
|
||||
}
|
||||
return found;
|
||||
}
|
||||
|
||||
token_t NextToken() {
|
||||
token_t token;
|
||||
|
||||
while (1) {
|
||||
if (data_size_ == 0) {
|
||||
data_size_ = read(fd_, buffer_, buf_size_);
|
||||
if (data_size_ <= 0) break;
|
||||
}
|
||||
if (token.empty()) while ((index_ < data_size_) && ((buffer_[index_] == ' ') || LineEndCheck())) {
|
||||
++index_;
|
||||
}
|
||||
while ((index_ < data_size_) && (buffer_[index_] != ' ') && !LineEndCheck()) {
|
||||
token.push_back(buffer_[index_++]);
|
||||
}
|
||||
if (index_ == data_size_) {
|
||||
index_ = 0;
|
||||
data_size_ = 0;
|
||||
} else break;
|
||||
}
|
||||
|
||||
return token;
|
||||
}
|
||||
|
||||
void BadFormat(token_t token) {
|
||||
token.push_back('\0');
|
||||
std::cout << "Error: " << file_name_ << ", line " << file_line_ << ", bad XML token '" << &token[0] << "'" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
void AddLevel(const std::string& tag) {
|
||||
level_t* level = new level_t;
|
||||
level->tag = tag;
|
||||
if (level_) {
|
||||
level_->nodes.push_back(level);
|
||||
stack_.push_back(level_);
|
||||
}
|
||||
level_ = level;
|
||||
|
||||
std::string global_tag;
|
||||
for (level_t* level : stack_) { global_tag += level->tag + "."; }
|
||||
global_tag += tag;
|
||||
map_[global_tag].push_back(level_);
|
||||
}
|
||||
|
||||
void UpLevel() {
|
||||
level_ = stack_.back();
|
||||
stack_.pop_back();
|
||||
}
|
||||
|
||||
std::string CurrentLevel() const {
|
||||
return level_->tag;
|
||||
}
|
||||
|
||||
void AddOption(const std::string& key, const std::string& value) {
|
||||
level_->opts[key] = value;
|
||||
}
|
||||
|
||||
const char* file_name_;
|
||||
unsigned file_line_;
|
||||
int fd_;
|
||||
static const unsigned buf_size_ = 256;
|
||||
char buffer_[buf_size_];
|
||||
unsigned data_size_;
|
||||
unsigned index_;
|
||||
unsigned state_;
|
||||
level_t* level_;
|
||||
std::vector<level_t*> stack_;
|
||||
std::map<std::string, nodes_vec_t> map_;
|
||||
bool comment_;
|
||||
};
|
||||
|
||||
} // namespace xml
|
||||
|
||||
#endif // SRC_XML_XML_H_
|
||||
@@ -0,0 +1,41 @@
|
||||
cmake_minimum_required ( VERSION 3.5.0 )
|
||||
set ( CMAKE_VERBOSE_MAKEFILE TRUE CACHE BOOL "Verbose Output" FORCE )
|
||||
|
||||
set ( EXE_NAME "ctrl" )
|
||||
|
||||
if ( NOT DEFINED TEST_DIR )
|
||||
set ( TEST_DIR ${CMAKE_CURRENT_SOURCE_DIR} )
|
||||
project ( ${EXE_NAME} )
|
||||
## Set build environment
|
||||
include ( env )
|
||||
set ( ROCPROFILER_TARGET "rocprofiler64" )
|
||||
endif ()
|
||||
|
||||
## Util sources
|
||||
file( GLOB UTIL_SRC "${TEST_DIR}/util/*.cpp" )
|
||||
|
||||
## Test control sources
|
||||
set ( CTRL_SRC
|
||||
${TEST_DIR}/ctrl/test.cpp
|
||||
${TEST_DIR}/ctrl/test_hsa.cpp
|
||||
)
|
||||
|
||||
## Test kernels sources
|
||||
set ( TEST_NAME simple_convolution )
|
||||
set ( KERN_SRC ${TEST_DIR}/${TEST_NAME}/${TEST_NAME}.cpp )
|
||||
execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/${TEST_NAME}/*.hsaco ${PROJECT_BINARY_DIR}" )
|
||||
|
||||
## Building test executable
|
||||
add_executable ( ${EXE_NAME} ${KERN_SRC} ${CTRL_SRC} ${UTIL_SRC} )
|
||||
target_include_directories ( ${EXE_NAME} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} )
|
||||
target_link_libraries( ${EXE_NAME} ${ROCPROFILER_TARGET} ${HSA_RUNTIME_LIB} c stdc++ dl pthread rt atomic )
|
||||
execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/run.sh ${PROJECT_BINARY_DIR}" )
|
||||
execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/*.xml ${PROJECT_BINARY_DIR}" )
|
||||
|
||||
## Build test library
|
||||
set ( TEST_LIB "tool" )
|
||||
set ( TEST_LIB_SRC ${TEST_DIR}/ctrl/tool.cpp )
|
||||
add_library ( ${TEST_LIB} SHARED ${TEST_LIB_SRC} )
|
||||
target_include_directories ( ${TEST_LIB} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} )
|
||||
target_link_libraries( ${TEST_LIB} ${ROCPROFILER_TARGET} ${HSA_RUNTIME_LIB} c stdc++ dl pthread rt atomic )
|
||||
|
||||
@@ -0,0 +1,85 @@
|
||||
/******************************************************************************
|
||||
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this list
|
||||
of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef TEST_CTRL_RUN_KERNEL_H_
|
||||
#define TEST_CTRL_RUN_KERNEL_H_
|
||||
|
||||
#include "ctrl/test_hsa.h"
|
||||
#include "util/test_assert.h"
|
||||
|
||||
template <class Kernel, class Test> bool RunKernel(int argc, char* argv[]) {
|
||||
bool ret_val = false;
|
||||
|
||||
// Create test kernel object
|
||||
Kernel test_kernel;
|
||||
TestAql* test_aql = new TestHsa(&test_kernel);
|
||||
test_aql = new Test(test_aql);
|
||||
TEST_ASSERT(test_aql != NULL);
|
||||
if (test_aql == NULL) return 1;
|
||||
|
||||
// Initialization of Hsa Runtime
|
||||
ret_val = test_aql->Initialize(argc, argv);
|
||||
if (ret_val == false) {
|
||||
std::cerr << "Error in the test initialization" << std::endl;
|
||||
// TEST_ASSERT(ret_val);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Setup Hsa resources needed for execution
|
||||
ret_val = test_aql->Setup();
|
||||
if (ret_val == false) {
|
||||
std::cerr << "Error in creating hsa resources" << std::endl;
|
||||
TEST_ASSERT(ret_val);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Run test kernel
|
||||
ret_val = test_aql->Run();
|
||||
if (ret_val == false) {
|
||||
std::cerr << "Error in running the test kernel" << std::endl;
|
||||
TEST_ASSERT(ret_val);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Verify the results of the execution
|
||||
ret_val = test_aql->VerifyResults();
|
||||
if (ret_val) {
|
||||
std::clog << "Test : Passed" << std::endl;
|
||||
} else {
|
||||
std::clog << "Test : Failed" << std::endl;
|
||||
}
|
||||
|
||||
// Print time taken by sample
|
||||
test_aql->PrintTime();
|
||||
|
||||
test_aql->Cleanup();
|
||||
delete test_aql;
|
||||
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
#endif // TEST_CTRL_RUN_KERNEL_H_
|
||||
@@ -0,0 +1,126 @@
|
||||
#include <hsa.h>
|
||||
#include <string.h>
|
||||
#include <iostream>
|
||||
|
||||
#include "ctrl/run_kernel.h"
|
||||
#include "ctrl/test_aql.h"
|
||||
#include "ctrl/test_hsa.h"
|
||||
#include "inc/rocprofiler.h"
|
||||
#include "simple_convolution/simple_convolution.h"
|
||||
#include "util/test_assert.h"
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
bool ret_val = false;
|
||||
// HSA status
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
// Profiling context
|
||||
rocprofiler_t* context = NULL;
|
||||
// Profiling properties
|
||||
rocprofiler_properties_t properties;
|
||||
// Number of context invocation
|
||||
uint32_t invocation = 0;
|
||||
|
||||
#if 0
|
||||
// Profiling info objects
|
||||
const unsigned info_count = 1;
|
||||
rocprofiler_info_t info[info_count];
|
||||
// PMC events
|
||||
memset(info, 0, sizeof(info));
|
||||
info[0].type = ROCPROFILER_TYPE_METRIC;
|
||||
info[0].name = "SQ_WAVES";
|
||||
#else
|
||||
// Profiling info objects
|
||||
const unsigned info_count = 3;
|
||||
rocprofiler_info_t info[info_count];
|
||||
// PMC events
|
||||
memset(info, 0, sizeof(info));
|
||||
info[0].type = ROCPROFILER_TYPE_METRIC;
|
||||
info[0].name = "SQ_WAVES";
|
||||
info[1].type = ROCPROFILER_TYPE_METRIC;
|
||||
info[1].name = "SQ_ITEMS";
|
||||
// Tracing parameters
|
||||
const unsigned parameter_count = 2;
|
||||
rocprofiler_parameter_t parameters[parameter_count];
|
||||
info[2].name = "THREAD_TRACE";
|
||||
info[2].type = ROCPROFILER_TYPE_TRACE;
|
||||
info[2].parameters = parameters;
|
||||
info[2].parameter_count = parameter_count;
|
||||
parameters[0].parameter_name = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK;
|
||||
parameters[0].value = 0;
|
||||
parameters[1].parameter_name = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK;
|
||||
parameters[1].value = 0;
|
||||
#endif
|
||||
|
||||
// Creating profiling context
|
||||
properties = {};
|
||||
properties.queue_depth = 128;
|
||||
status = rocprofiler_open(TestHsa::HsaAgentId(), info, info_count, &context, ROCPROFILER_MODE_STANDALONE|ROCPROFILER_MODE_OWNQUEUE, &properties);
|
||||
TEST_STATUS(status == HSA_STATUS_SUCCESS);
|
||||
|
||||
TestHsa::SetQueue(properties.queue);
|
||||
|
||||
// Adding dispatch observer
|
||||
status = rocprofiler_dispatch_observer(rocprofiler_dispatch_callback, context);
|
||||
TEST_STATUS(status == HSA_STATUS_SUCCESS);
|
||||
|
||||
// Querying the number of context invocation
|
||||
status = rocprofiler_invocation(context, &invocation);
|
||||
TEST_STATUS(status == HSA_STATUS_SUCCESS);
|
||||
|
||||
// Dispatching profiled kernel n-times to collect all counter groups data
|
||||
unsigned n = 0;
|
||||
while(1) {
|
||||
std::cout << "> " << n << "/" << invocation << std::endl;
|
||||
#if 0
|
||||
status = rocprofiler_start(context);
|
||||
TEST_STATUS(status == HSA_STATUS_SUCCESS);
|
||||
ret_val = RunKernel<SimpleConvolution, TestAql>(argc, argv);
|
||||
status = rocprofiler_stop(context);
|
||||
TEST_STATUS(status == HSA_STATUS_SUCCESS);
|
||||
#else
|
||||
ret_val = RunKernel<SimpleConvolution, TestAql>(argc, argv);
|
||||
#endif
|
||||
status = rocprofiler_sample(context);
|
||||
TEST_STATUS(status == HSA_STATUS_SUCCESS);
|
||||
|
||||
for (rocprofiler_info_t* p = info; p < info + info_count; ++p) {
|
||||
std::cout << (p - info) << ": " << p->name;
|
||||
switch (p->data.kind) {
|
||||
case ROCPROFILER_INT64:
|
||||
std::cout << std::dec << " result64 (" << p->data.result64 << ")" << std::endl;
|
||||
break;
|
||||
case ROCPROFILER_BYTES: {
|
||||
const char* ptr = reinterpret_cast<const char*>(p->data.result_bytes.ptr);
|
||||
uint64_t size = 0;
|
||||
for (unsigned i = 0; i < p->data.result_bytes.instance_count; ++i) {
|
||||
size = *reinterpret_cast<const uint64_t*>(ptr);
|
||||
const char* data = ptr + sizeof(size);
|
||||
std::cout << std::endl;
|
||||
std::cout << std::hex << " data (" << (void*)data << ")" << std::endl;
|
||||
std::cout << std::dec << " size (" << size << ")" << std::endl;
|
||||
ptr = data + size;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
std::cout << "result kind (" << p->data.kind << ")" << std::endl;
|
||||
TEST_ASSERT(false);
|
||||
}
|
||||
}
|
||||
|
||||
++n;
|
||||
if (n < invocation) {
|
||||
status = rocprofiler_next(context);
|
||||
TEST_STATUS(status == HSA_STATUS_SUCCESS);
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Finishing cleanup
|
||||
// Deleting profiling context will delete all allocated resources
|
||||
status = rocprofiler_close(context);
|
||||
TEST_STATUS(status == HSA_STATUS_SUCCESS);
|
||||
|
||||
return (ret_val) ? 0 : 1;
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
#include <hsa.h>
|
||||
#include <string.h>
|
||||
#include <iostream>
|
||||
|
||||
#include "ctrl/run_kernel.h"
|
||||
#include "ctrl/test_aql.h"
|
||||
#include "simple_convolution/simple_convolution.h"
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
TestHsa::HsaInstantiate();
|
||||
for (int i = 0; i < 3; ++i) RunKernel<SimpleConvolution, TestAql>(argc, argv);
|
||||
TestHsa::HsaShutdown();
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,80 @@
|
||||
/******************************************************************************
|
||||
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this list
|
||||
of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef TEST_CTRL_TEST_AQL_H_
|
||||
#define TEST_CTRL_TEST_AQL_H_
|
||||
|
||||
#include <hsa.h>
|
||||
#include <hsa_ven_amd_aqlprofile.h>
|
||||
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
|
||||
// Test AQL interface
|
||||
class TestAql {
|
||||
public:
|
||||
explicit TestAql(TestAql* t = 0) : test_(t) {}
|
||||
virtual ~TestAql() { if (test_) delete test_; }
|
||||
|
||||
TestAql* Test() { return test_; }
|
||||
virtual AgentInfo* GetAgentInfo() { return (test_) ? test_->GetAgentInfo() : 0; }
|
||||
virtual hsa_queue_t* GetQueue() { return (test_) ? test_->GetQueue() : 0; }
|
||||
virtual HsaRsrcFactory* GetRsrcFactory() { return (test_) ? test_->GetRsrcFactory() : 0; }
|
||||
|
||||
// Initialize application environment including setting
|
||||
// up of various configuration parameters based on
|
||||
// command line arguments
|
||||
// @return bool true on success and false on failure
|
||||
virtual bool Initialize(int argc, char** argv) {
|
||||
return (test_) ? test_->Initialize(argc, argv) : true;
|
||||
}
|
||||
|
||||
// Setup application parameters for exectuion
|
||||
// @return bool true on success and false on failure
|
||||
virtual bool Setup() { return (test_) ? test_->Setup() : true; }
|
||||
|
||||
// Run the kernel
|
||||
// @return bool true on success and false on failure
|
||||
virtual bool Run() { return (test_) ? test_->Run() : true; }
|
||||
|
||||
// Verify results
|
||||
// @return bool true on success and false on failure
|
||||
virtual bool VerifyResults() { return (test_) ? test_->VerifyResults() : true; }
|
||||
|
||||
// Print to console the time taken to execute kernel
|
||||
virtual void PrintTime() {
|
||||
if (test_) test_->PrintTime();
|
||||
}
|
||||
|
||||
// Release resources e.g. memory allocations
|
||||
// @return bool true on success and false on failure
|
||||
virtual bool Cleanup() { return (test_) ? test_->Cleanup() : true; }
|
||||
|
||||
private:
|
||||
TestAql* const test_;
|
||||
};
|
||||
|
||||
#endif // TEST_CTRL_TEST_AQL_H_
|
||||
@@ -0,0 +1,252 @@
|
||||
/******************************************************************************
|
||||
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this list
|
||||
of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "ctrl/test_hsa.h"
|
||||
|
||||
#include <atomic>
|
||||
|
||||
#include "util/test_assert.h"
|
||||
#include "util/helper_funcs.h"
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
|
||||
HsaRsrcFactory* TestHsa::hsa_rsrc_ = NULL;
|
||||
AgentInfo* TestHsa::agent_info_ = NULL;
|
||||
hsa_queue_t* TestHsa::hsa_queue_ = NULL;
|
||||
uint32_t TestHsa::agent_id_ = 0;
|
||||
|
||||
HsaRsrcFactory* TestHsa::HsaInstantiate(const uint32_t agent_ind) {
|
||||
// Instantiate an instance of Hsa Resources Factory
|
||||
if (hsa_rsrc_ == NULL) {
|
||||
agent_id_ = agent_ind;
|
||||
|
||||
hsa_rsrc_ = HsaRsrcFactory::Create();
|
||||
|
||||
// Print properties of the agents
|
||||
hsa_rsrc_->PrintGpuAgents("> GPU agents");
|
||||
|
||||
// Create an instance of Gpu agent
|
||||
if (!hsa_rsrc_->GetGpuAgentInfo(agent_ind, &agent_info_)) {
|
||||
agent_info_ = NULL;
|
||||
std::cerr << "> error: agent[" << agent_ind << "] is not found" << std::endl;
|
||||
return NULL;
|
||||
}
|
||||
std::clog << "> Using agent[" << agent_ind << "] : " << agent_info_->name << std::endl;
|
||||
|
||||
// Create an instance of Aql Queue
|
||||
if (hsa_queue_ == NULL) {
|
||||
uint32_t num_pkts = 128;
|
||||
if(hsa_rsrc_->CreateQueue(agent_info_, num_pkts, &hsa_queue_) == false) {
|
||||
hsa_queue_ = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
return hsa_rsrc_;
|
||||
}
|
||||
|
||||
void TestHsa::HsaShutdown() { if (hsa_rsrc_) hsa_rsrc_->Destroy(); }
|
||||
|
||||
bool TestHsa::Initialize(int arg_cnt, char** arg_list) {
|
||||
std::clog << "TestHsa::Initialize :" << std::endl;
|
||||
|
||||
// Instantiate a Timer object
|
||||
setup_timer_idx_ = hsa_timer_.CreateTimer();
|
||||
dispatch_timer_idx_ = hsa_timer_.CreateTimer();
|
||||
|
||||
hsa_rsrc_ = HsaInstantiate(agent_id_);
|
||||
if (hsa_rsrc_ == NULL) {
|
||||
TEST_ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Obtain handle of signal
|
||||
hsa_rsrc_->CreateSignal(1, &hsa_signal_);
|
||||
|
||||
// Obtain the code object file name
|
||||
std::string agentName(agent_info_->name);
|
||||
if (agentName.compare(0, 4, "gfx8") == 0) {
|
||||
brig_path_obj_.append("gfx8");
|
||||
} else if (agentName.compare(0, 4, "gfx9") == 0) {
|
||||
brig_path_obj_.append("gfx9");
|
||||
} else {
|
||||
TEST_ASSERT(false);
|
||||
return false;
|
||||
}
|
||||
brig_path_obj_.append("_" + name_ + ".hsaco");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TestHsa::Setup() {
|
||||
std::clog << "TestHsa::setup :" << std::endl;
|
||||
|
||||
// Start the timer object
|
||||
hsa_timer_.StartTimer(setup_timer_idx_);
|
||||
|
||||
mem_map_t& mem_map = test_->GetMemMap();
|
||||
for (mem_it_t it = mem_map.begin(); it != mem_map.end(); ++it) {
|
||||
mem_descr_t& des = it->second;
|
||||
void* ptr = (des.local) ? hsa_rsrc_->AllocateLocalMemory(agent_info_, des.size)
|
||||
: hsa_rsrc_->AllocateSysMemory(agent_info_, des.size);
|
||||
des.ptr = ptr;
|
||||
TEST_ASSERT(ptr != NULL);
|
||||
if (ptr == NULL) return false;
|
||||
}
|
||||
test_->Init();
|
||||
|
||||
// Load and Finalize Kernel Code Descriptor
|
||||
char* brig_path = (char*)brig_path_obj_.c_str();
|
||||
const bool ret_val =
|
||||
hsa_rsrc_->LoadAndFinalize(agent_info_, brig_path, strdup(name_.c_str()), &kernel_code_desc_);
|
||||
if (ret_val == false) {
|
||||
std::cerr << "Error in loading and finalizing Kernel" << std::endl;
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
// Stop the timer object
|
||||
hsa_timer_.StopTimer(setup_timer_idx_);
|
||||
setup_time_taken_ = hsa_timer_.ReadTimer(setup_timer_idx_);
|
||||
total_time_taken_ = setup_time_taken_;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TestHsa::Run() {
|
||||
std::clog << "TestHsa::run :" << std::endl;
|
||||
|
||||
const uint32_t work_group_size = 64;
|
||||
const uint32_t work_grid_size = test_->GetGridSize();
|
||||
uint32_t group_segment_size = 0;
|
||||
uint32_t private_segment_size = 0;
|
||||
const size_t kernarg_segment_size = test_->GetKernargSize();
|
||||
uint64_t code_handle = 0;
|
||||
|
||||
// Retrieve the amount of group memory needed
|
||||
hsa_executable_symbol_get_info(
|
||||
kernel_code_desc_, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, &group_segment_size);
|
||||
|
||||
// Retrieve the amount of private memory needed
|
||||
hsa_executable_symbol_get_info(kernel_code_desc_,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
|
||||
&private_segment_size);
|
||||
|
||||
// Check the kernel args size
|
||||
size_t size_info = 0;
|
||||
hsa_executable_symbol_get_info(
|
||||
kernel_code_desc_, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &size_info);
|
||||
TEST_ASSERT(kernarg_segment_size == size_info);
|
||||
if (kernarg_segment_size != size_info) return false;
|
||||
|
||||
// Retrieve handle of the code block
|
||||
hsa_executable_symbol_get_info(kernel_code_desc_, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
|
||||
&code_handle);
|
||||
|
||||
// Initialize the dispatch packet.
|
||||
hsa_kernel_dispatch_packet_t aql;
|
||||
memset(&aql, 0, sizeof(aql));
|
||||
// Set the packet's type, barrier bit, acquire and release fences
|
||||
aql.header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
|
||||
aql.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE;
|
||||
aql.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE;
|
||||
// Populate Aql packet with default values
|
||||
aql.setup = 1;
|
||||
aql.grid_size_x = work_grid_size;
|
||||
aql.grid_size_y = 1;
|
||||
aql.grid_size_z = 1;
|
||||
aql.workgroup_size_x = work_group_size;
|
||||
aql.workgroup_size_y = 1;
|
||||
aql.workgroup_size_z = 1;
|
||||
// Bind the kernel code descriptor and arguments
|
||||
aql.kernel_object = code_handle;
|
||||
aql.kernarg_address = test_->GetKernargPtr();
|
||||
aql.group_segment_size = group_segment_size;
|
||||
aql.private_segment_size = private_segment_size;
|
||||
// Initialize Aql packet with handle of signal
|
||||
aql.completion_signal = hsa_signal_;
|
||||
|
||||
// Compute the write index of queue and copy Aql packet into it
|
||||
const uint64_t que_idx = hsa_queue_load_write_index_relaxed(hsa_queue_);
|
||||
const uint32_t mask = hsa_queue_->size - 1;
|
||||
|
||||
std::clog << "> Executing kernel: \"" << name_ << "\"" << std::endl;
|
||||
|
||||
// Start the timer object
|
||||
hsa_timer_.StartTimer(dispatch_timer_idx_);
|
||||
|
||||
// Disable packet so that submission to HW is complete
|
||||
const auto header = aql.header;
|
||||
aql.header = HSA_PACKET_TYPE_INVALID << HSA_PACKET_HEADER_TYPE;
|
||||
|
||||
// Copy Aql packet into queue buffer
|
||||
((hsa_kernel_dispatch_packet_t*)(hsa_queue_->base_address))[que_idx & mask] = aql;
|
||||
|
||||
// After AQL packet is fully copied into queue buffer
|
||||
// update packet header from invalid state to valid state
|
||||
std::atomic_thread_fence(std::memory_order_release);
|
||||
((hsa_kernel_dispatch_packet_t*)(hsa_queue_->base_address))[que_idx & mask].header = header;
|
||||
|
||||
// Increment the write index and ring the doorbell to dispatch the kernel.
|
||||
hsa_queue_store_write_index_relaxed(hsa_queue_, (que_idx + 1));
|
||||
hsa_signal_store_relaxed(hsa_queue_->doorbell_signal, que_idx);
|
||||
|
||||
std::clog << "> Waiting on kernel dispatch signal, que_idx=" << que_idx << std::endl;
|
||||
|
||||
// Wait on the dispatch signal until the kernel is finished.
|
||||
// Update wait condition to HSA_WAIT_STATE_ACTIVE for Polling
|
||||
hsa_signal_wait_acquire(hsa_signal_, HSA_SIGNAL_CONDITION_LT, 1, (uint64_t)-1,
|
||||
HSA_WAIT_STATE_BLOCKED);
|
||||
|
||||
// Stop the timer object
|
||||
hsa_timer_.StopTimer(dispatch_timer_idx_);
|
||||
dispatch_time_taken_ = hsa_timer_.ReadTimer(dispatch_timer_idx_);
|
||||
total_time_taken_ += dispatch_time_taken_;
|
||||
|
||||
// Copy kernel buffers from local memory into system memory
|
||||
hsa_rsrc_->TransferData(test_->GetOutputPtr(), test_->GetLocalPtr(), test_->GetOutputSize(),
|
||||
false);
|
||||
test_->PrintOutput();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TestHsa::VerifyResults() {
|
||||
// Compare the results and see if they match
|
||||
const void* const refout_ptr = test_->GetRefoutPtr();
|
||||
const int32_t cmp_val =
|
||||
(refout_ptr != NULL) ? memcmp(test_->GetOutputPtr(), refout_ptr, test_->GetOutputSize()) : 0;
|
||||
return (cmp_val == 0);
|
||||
}
|
||||
|
||||
void TestHsa::PrintTime() {
|
||||
std::clog << "Time taken for Setup by " << this->name_ << " : " << this->setup_time_taken_
|
||||
<< std::endl;
|
||||
std::clog << "Time taken for Dispatch by " << this->name_ << " : " << this->dispatch_time_taken_
|
||||
<< std::endl;
|
||||
std::clog << "Time taken in Total by " << this->name_ << " : " << this->total_time_taken_
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
bool TestHsa::Cleanup() { return true; }
|
||||
@@ -0,0 +1,125 @@
|
||||
/******************************************************************************
|
||||
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this list
|
||||
of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef TEST_CTRL_TEST_HSA_H_
|
||||
#define TEST_CTRL_TEST_HSA_H_
|
||||
|
||||
#include "ctrl/test_aql.h"
|
||||
#include "ctrl/test_kernel.h"
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
#include "util/perf_timer.h"
|
||||
|
||||
// Class implements HSA test
|
||||
class TestHsa : public TestAql {
|
||||
public:
|
||||
// Instantiate HSA resources
|
||||
static HsaRsrcFactory* HsaInstantiate(const uint32_t agent_ind = agent_id_);
|
||||
static void HsaShutdown();
|
||||
static void SetQueue(hsa_queue_t* queue) { hsa_queue_ = queue; }
|
||||
static uint32_t HsaAgentId() { return agent_id_; }
|
||||
|
||||
// Constructor
|
||||
explicit TestHsa(TestKernel* test) : test_(test), name_(test->Name()) {
|
||||
total_time_taken_ = 0;
|
||||
setup_time_taken_ = 0;
|
||||
dispatch_time_taken_ = 0;
|
||||
}
|
||||
|
||||
// Get methods for Agent Info, HAS queue, HSA Resourcse Manager
|
||||
AgentInfo* GetAgentInfo() { return agent_info_; }
|
||||
hsa_queue_t* GetQueue() { return hsa_queue_; }
|
||||
HsaRsrcFactory* GetRsrcFactory() { return hsa_rsrc_; }
|
||||
|
||||
// Initialize application environment including setting
|
||||
// up of various configuration parameters based on
|
||||
// command line arguments
|
||||
// @return bool true on success and false on failure
|
||||
bool Initialize(int argc, char** argv);
|
||||
|
||||
// Setup application parameters for exectuion
|
||||
// @return bool true on success and false on failure
|
||||
bool Setup();
|
||||
|
||||
// Run the BinarySearch kernel
|
||||
// @return bool true on success and false on failure
|
||||
bool Run();
|
||||
|
||||
// Verify against reference implementation
|
||||
// @return bool true on success and false on failure
|
||||
bool VerifyResults();
|
||||
|
||||
// Print to console the time taken to execute kernel
|
||||
void PrintTime();
|
||||
|
||||
// Release resources e.g. memory allocations
|
||||
// @return bool true on success and false on failure
|
||||
bool Cleanup();
|
||||
|
||||
private:
|
||||
typedef TestKernel::mem_descr_t mem_descr_t;
|
||||
typedef TestKernel::mem_map_t mem_map_t;
|
||||
typedef TestKernel::mem_it_t mem_it_t;
|
||||
|
||||
// Test object
|
||||
TestKernel* test_;
|
||||
|
||||
// Path of Brig file
|
||||
std::string brig_path_obj_;
|
||||
|
||||
// Used to track time taken to run the sample
|
||||
double total_time_taken_;
|
||||
double setup_time_taken_;
|
||||
double dispatch_time_taken_;
|
||||
|
||||
// Handle of signal
|
||||
hsa_signal_t hsa_signal_;
|
||||
|
||||
// Handle of Kernel Code Descriptor
|
||||
hsa_executable_symbol_t kernel_code_desc_;
|
||||
|
||||
// Instance of timer object
|
||||
uint32_t setup_timer_idx_;
|
||||
uint32_t dispatch_timer_idx_;
|
||||
PerfTimer hsa_timer_;
|
||||
|
||||
// Instance of Hsa Resources Factory
|
||||
static HsaRsrcFactory* hsa_rsrc_;
|
||||
|
||||
// GPU id
|
||||
static uint32_t agent_id_;
|
||||
|
||||
// Handle to an Hsa Gpu Agent
|
||||
static AgentInfo* agent_info_;
|
||||
|
||||
// Handle to an Hsa Queue
|
||||
static hsa_queue_t* hsa_queue_;
|
||||
|
||||
// Test kernel name
|
||||
std::string name_;
|
||||
};
|
||||
|
||||
#endif // TEST_CTRL_TEST_HSA_H_
|
||||
@@ -0,0 +1,107 @@
|
||||
/******************************************************************************
|
||||
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this list
|
||||
of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef TEST_CTRL_TEST_KERNEL_H_
|
||||
#define TEST_CTRL_TEST_KERNEL_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <map>
|
||||
|
||||
// Class implements kernel test
|
||||
class TestKernel {
|
||||
public:
|
||||
// Memory descriptors IDs
|
||||
enum { INPUT_DES_ID, OUTPUT_DES_ID, LOCAL_DES_ID, MASK_DES_ID, KERNARG_DES_ID, REFOUT_DES_ID };
|
||||
|
||||
// Memory descriptors vector declaration
|
||||
struct mem_descr_t {
|
||||
void* ptr;
|
||||
uint32_t size;
|
||||
bool local;
|
||||
};
|
||||
|
||||
// Memory map declaration
|
||||
typedef std::map<uint32_t, mem_descr_t> mem_map_t;
|
||||
typedef mem_map_t::iterator mem_it_t;
|
||||
typedef mem_map_t::const_iterator mem_const_it_t;
|
||||
|
||||
virtual ~TestKernel() {}
|
||||
|
||||
// Initialize method
|
||||
virtual void Init() = 0;
|
||||
|
||||
// Return kernel memory map
|
||||
mem_map_t& GetMemMap() { return mem_map_; }
|
||||
|
||||
// Return NULL descriptor
|
||||
static mem_descr_t NullDescriptor() { return {NULL, 0, 0}; }
|
||||
|
||||
// Methods to get the kernel attributes
|
||||
void* GetKernargPtr() const { return GetDescr(KERNARG_DES_ID).ptr; }
|
||||
uint32_t GetKernargSize() const { return GetDescr(KERNARG_DES_ID).size; }
|
||||
void* GetOutputPtr() const { return GetDescr(OUTPUT_DES_ID).ptr; }
|
||||
uint32_t GetOutputSize() const { return GetDescr(OUTPUT_DES_ID).size; }
|
||||
void* GetLocalPtr() const { return GetDescr(LOCAL_DES_ID).ptr; }
|
||||
void* GetRefoutPtr() const { return GetDescr(REFOUT_DES_ID).ptr; }
|
||||
virtual uint32_t GetGridSize() const = 0;
|
||||
|
||||
// Print output
|
||||
virtual void PrintOutput() const = 0;
|
||||
|
||||
// Return name
|
||||
virtual std::string Name() const = 0;
|
||||
|
||||
protected:
|
||||
// Set system memory descriptor
|
||||
bool SetSysDescr(const uint32_t& id, const uint32_t& size) {
|
||||
return SetMemDescr(id, size, false);
|
||||
}
|
||||
|
||||
// Set local memory descriptor
|
||||
bool SetLocalDescr(const uint32_t& id, const uint32_t& size) {
|
||||
return SetMemDescr(id, size, true);
|
||||
}
|
||||
|
||||
// Get memory descriptor
|
||||
mem_descr_t GetDescr(const uint32_t& id) const {
|
||||
mem_const_it_t it = mem_map_.find(id);
|
||||
return (it != mem_map_.end()) ? it->second : NullDescriptor();
|
||||
}
|
||||
|
||||
private:
|
||||
// Set memory descriptor
|
||||
bool SetMemDescr(const uint32_t& id, const uint32_t& size, const bool& local) {
|
||||
const mem_descr_t des = {NULL, size, local};
|
||||
auto ret = mem_map_.insert(mem_map_t::value_type(id, des));
|
||||
return ret.second;
|
||||
}
|
||||
|
||||
// Kernel memory map object
|
||||
mem_map_t mem_map_;
|
||||
};
|
||||
|
||||
#endif // TEST_CTRL_TEST_KERNEL_H_
|
||||
@@ -0,0 +1,45 @@
|
||||
/******************************************************************************
|
||||
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this list
|
||||
of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef TEST_CTRL_TEST_PGEN_H_
|
||||
#define TEST_CTRL_TEST_PGEN_H_
|
||||
|
||||
#include "ctrl/test_pmgr.h"
|
||||
|
||||
// SimpleConvolution: Class implements OpenCL SimpleConvolution sample
|
||||
class TestPGen : public TestPMgr {
|
||||
protected:
|
||||
typedef hsa_ext_amd_aql_pm4_packet_t packet_t;
|
||||
|
||||
packet_t* PrePacket() { return reinterpret_cast<packet_t*>(&pre_packet_); }
|
||||
packet_t* PostPacket() { return reinterpret_cast<packet_t*>(&post_packet_); }
|
||||
|
||||
public:
|
||||
explicit TestPGen(TestAql* t) : TestPMgr(t) {}
|
||||
};
|
||||
|
||||
#endif // TEST_CTRL_TEST_PGEN_H_
|
||||
@@ -0,0 +1,78 @@
|
||||
/******************************************************************************
|
||||
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this list
|
||||
of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef TEST_CTRL_TEST_PGEN_ROCP_H_
|
||||
#define TEST_CTRL_TEST_PGEN_ROCP_H_
|
||||
|
||||
#include <list>
|
||||
#include <vector>
|
||||
|
||||
#include "ctrl/test_pgen.h"
|
||||
#include "util/test_assert.h"
|
||||
|
||||
hsa_status_t TestPGenRocpCallback(hsa_ven_amd_aqlprofile_info_type_t info_type,
|
||||
hsa_ven_amd_aqlprofile_info_data_t* info_data,
|
||||
void* callback_data) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
typedef std::vector<hsa_ven_amd_aqlprofile_info_data_t> passed_data_t;
|
||||
reinterpret_cast<passed_data_t*>(callback_data)->push_back(*info_data);
|
||||
return status;
|
||||
}
|
||||
|
||||
// Class implements PMC profiling
|
||||
class TestPGenRocp : public TestPGen {
|
||||
public:
|
||||
explicit TestPGenRocp(TestAql* t) : TestPGen(t) { std::clog << "Test: PGen ROCP" << std::endl; }
|
||||
|
||||
bool Initialize(int /*arg_cnt*/, char** /*arg_list*/) {
|
||||
status = rocprofiler_on_dispatch(&profile_, PrePacket(), PostPacket());
|
||||
TEST_STATUS(status != HSA_STATUS_SUCCESS);
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
private:
|
||||
bool BuildPackets() { return true; }
|
||||
|
||||
bool DumpData() {
|
||||
std::clog << "TestPGenRocp::DumpData :" << std::endl;
|
||||
|
||||
typedef std::vector<hsa_ven_amd_aqlprofile_info_data_t> callback_data_t;
|
||||
|
||||
callback_data_t data;
|
||||
api_.hsa_ven_amd_aqlprofile_iterate_data(&profile_, TestPGenRocpCallback, &data);
|
||||
for (callback_data_t::iterator it = data.begin(); it != data.end(); ++it) {
|
||||
std::cout << std::dec << "event(block(" << it->pmc_data.event.block_name << "_"
|
||||
<< it->pmc_data.event.block_index << "), id(" << it->pmc_data.event.counter_id
|
||||
<< ")), sample(" << it->sample_id << "), result(" << it->pmc_data.result << ")"
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
#endif // TEST_CTRL_TEST_PGEN_ROCP_H_
|
||||
@@ -0,0 +1,144 @@
|
||||
/******************************************************************************
|
||||
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this list
|
||||
of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "ctrl/test_pmgr.h"
|
||||
|
||||
#include <atomic>
|
||||
|
||||
#include "ctrl/test_assert.h"
|
||||
|
||||
bool TestPMgr::AddPacketGfx9(const packet_t* packet) {
|
||||
packet_t aql_packet = *packet;
|
||||
|
||||
// Compute the write index of queue and copy Aql packet into it
|
||||
uint64_t que_idx = hsa_queue_load_write_index_relaxed(GetQueue());
|
||||
const uint32_t mask = GetQueue()->size - 1;
|
||||
packet_t* slot = (reinterpret_cast<packet_t*>(GetQueue()->base_address)) + (que_idx & mask);
|
||||
|
||||
// Disable packet so that submission to HW is complete
|
||||
const auto header = HSA_PACKET_TYPE_VENDOR_SPECIFIC << HSA_PACKET_HEADER_TYPE;
|
||||
aql_packet.header &= (~((1ul << HSA_PACKET_HEADER_WIDTH_TYPE) - 1)) << HSA_PACKET_HEADER_TYPE;
|
||||
aql_packet.header |= HSA_PACKET_TYPE_INVALID << HSA_PACKET_HEADER_TYPE;
|
||||
|
||||
// Copy Aql packet into queue buffer
|
||||
*slot = aql_packet;
|
||||
// After AQL packet is fully copied into queue buffer
|
||||
// update packet header from invalid state to valid state
|
||||
auto header_atomic_ptr =
|
||||
reinterpret_cast<std::atomic<uint16_t>*>(&slot->header);
|
||||
header_atomic_ptr->store(header, std::memory_order_release);
|
||||
|
||||
// Increment the write index and ring the doorbell to dispatch the kernel.
|
||||
hsa_queue_store_write_index_relaxed(GetQueue(), (que_idx + 1));
|
||||
hsa_signal_store_relaxed(GetQueue()->doorbell_signal, que_idx);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TestPMgr::AddPacketGfx8(const packet_t* packet) {
|
||||
// Create legacy devices PM4 data
|
||||
const hsa_ext_amd_aql_pm4_packet_t* aql_packet = (const hsa_ext_amd_aql_pm4_packet_t*)packet;
|
||||
slot_pm4_t data;
|
||||
api_.hsa_ven_amd_aqlprofile_legacy_get_pm4(aql_packet, reinterpret_cast<void*>(data.words));
|
||||
|
||||
// Compute the write index of queue and copy Aql packet into it
|
||||
uint64_t que_idx = hsa_queue_load_write_index_relaxed(GetQueue());
|
||||
const uint32_t mask = GetQueue()->size - 1;
|
||||
|
||||
// Copy Aql/Pm4 blob into queue buffer
|
||||
packet_t* ptr = (reinterpret_cast<packet_t*>(GetQueue()->base_address)) + (que_idx & mask);
|
||||
slot_pm4_t* slot = reinterpret_cast<slot_pm4_t*>(ptr);
|
||||
for (unsigned i = 1; i < SLOT_PM4_SIZE_DW; ++i) {
|
||||
slot->words[i] = data.words[i];
|
||||
}
|
||||
// To maintain global order to ensure the prior copy of the packet contents is made visible
|
||||
// before the header is updated.
|
||||
// With in-order CP it will wait until the first packet in the blob will be valid
|
||||
std::atomic<uint32_t>* header_atomic_ptr =
|
||||
reinterpret_cast<std::atomic<uint32_t>*>(&slot->words[0]);
|
||||
header_atomic_ptr->store(data.words[0], std::memory_order_release);
|
||||
|
||||
// Increment the write index and ring the doorbell to dispatch the kernel.
|
||||
que_idx += SLOT_PM4_SIZE_AQLP - 1;
|
||||
hsa_queue_store_write_index_relaxed(GetQueue(), (que_idx + 1));
|
||||
hsa_signal_store_relaxed(GetQueue()->doorbell_signal, que_idx);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TestPMgr::AddPacket(const packet_t* packet) {
|
||||
const char* agent_name = GetAgentInfo()->name;
|
||||
return (strncmp(agent_name, "gfx8", 4) == 0) ? AddPacketGfx8(packet) : AddPacketGfx9(packet);
|
||||
}
|
||||
|
||||
bool TestPMgr::Run() {
|
||||
// Build Aql Pkts
|
||||
const bool active = BuildPackets();
|
||||
if (active) {
|
||||
// Submit Pre-Dispatch Aql packet
|
||||
AddPacket(&pre_packet_);
|
||||
}
|
||||
|
||||
Test()->Run();
|
||||
|
||||
if (active) {
|
||||
// Set post packet completion signal
|
||||
post_packet_.completion_signal = post_signal_;
|
||||
|
||||
// Submit Post-Dispatch Aql packet
|
||||
AddPacket(&post_packet_);
|
||||
|
||||
// Wait for Post-Dispatch packet to complete
|
||||
hsa_signal_wait_acquire(post_signal_, HSA_SIGNAL_CONDITION_LT, 1, (uint64_t)-1,
|
||||
HSA_WAIT_STATE_BLOCKED);
|
||||
|
||||
// Dumping profiling data
|
||||
DumpData();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TestPMgr::Initialize(int argc, char** argv) {
|
||||
TestAql::Initialize(argc, argv);
|
||||
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
status = hsa_signal_create(1, 0, NULL, &post_signal_);
|
||||
TEST_ASSERT(status == HSA_STATUS_SUCCESS);
|
||||
status = hsa_system_get_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, 1, 0, &api_);
|
||||
TEST_ASSERT(status == HSA_STATUS_SUCCESS);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
TestPMgr::TestPMgr(TestAql* t) : TestAql(t), api_({0}) {
|
||||
memset(&pre_packet_, 0, sizeof(pre_packet_));
|
||||
memset(&post_packet_, 0, sizeof(post_packet_));
|
||||
dummy_signal_.handle = 0;
|
||||
post_signal_ = dummy_signal_;
|
||||
memset(&api_, 0, sizeof(api_));
|
||||
}
|
||||
@@ -0,0 +1,70 @@
|
||||
/******************************************************************************
|
||||
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this list
|
||||
of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef TEST_CTRL_TEST_PMGR_H_
|
||||
#define TEST_CTRL_TEST_PMGR_H_
|
||||
|
||||
#include <hsa.h>
|
||||
#include <hsa_ven_amd_aqlprofile.h>
|
||||
#include <atomic>
|
||||
|
||||
#include "ctrl/test_aql.h"
|
||||
|
||||
// Class implements profiling manager
|
||||
class TestPMgr : public TestAql {
|
||||
public:
|
||||
typedef hsa_ext_amd_aql_pm4_packet_t packet_t;
|
||||
explicit TestPMgr(TestAql* t);
|
||||
bool Run();
|
||||
|
||||
protected:
|
||||
packet_t pre_packet_;
|
||||
packet_t post_packet_;
|
||||
hsa_signal_t dummy_signal_;
|
||||
hsa_signal_t post_signal_;
|
||||
|
||||
hsa_ven_amd_aqlprofile_1_00_pfn_t api_;
|
||||
|
||||
virtual bool BuildPackets() { return false; }
|
||||
virtual bool DumpData() { return false; }
|
||||
virtual bool Initialize(int argc, char** argv);
|
||||
|
||||
private:
|
||||
enum {
|
||||
SLOT_PM4_SIZE_DW = HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(uint32_t),
|
||||
SLOT_PM4_SIZE_AQLP = HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(packet_t)
|
||||
};
|
||||
struct slot_pm4_t {
|
||||
uint32_t words[SLOT_PM4_SIZE_DW];
|
||||
};
|
||||
|
||||
bool AddPacket(const packet_t* packet);
|
||||
bool AddPacketGfx8(const packet_t* packet);
|
||||
bool AddPacketGfx9(const packet_t* packet);
|
||||
};
|
||||
|
||||
#endif // TEST_CTRL_TEST_PMGR_H_
|
||||
@@ -0,0 +1,297 @@
|
||||
#include <hsa.h>
|
||||
#include <pthread.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include "inc/rocprofiler.h"
|
||||
#include "util/xml.h"
|
||||
|
||||
#define PUBLIC_API __attribute__((visibility("default")))
|
||||
#define CONSTRUCTOR_API __attribute__((constructor))
|
||||
#define DESTRUCTOR_API __attribute__((destructor))
|
||||
|
||||
// Tool thread
|
||||
pthread_t thread;
|
||||
pthread_attr_t thr_attr;
|
||||
bool thr_stop = false;
|
||||
|
||||
struct dispatch_data_t {
|
||||
rocprofiler_info_t* info;
|
||||
unsigned info_count;
|
||||
unsigned group_index;
|
||||
};
|
||||
|
||||
struct context_entry_t {
|
||||
rocprofiler_group_t* group;
|
||||
rocprofiler_info_t* info;
|
||||
unsigned info_count;
|
||||
rocprofiler_callback_data_t data;
|
||||
};
|
||||
|
||||
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||
unsigned context_array_size = 1;
|
||||
context_entry_t* context_array = NULL;
|
||||
unsigned context_array_index = 0;
|
||||
|
||||
const char* file_name;
|
||||
FILE* file_handle = NULL;
|
||||
|
||||
void check_status(hsa_status_t status) {
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
const char* error_string = NULL;
|
||||
rocprofiler_error_string(&error_string);
|
||||
fprintf(stderr, "ERROR: %s\n", error_string);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned align_size(unsigned size, unsigned alignment) { return ((size + alignment - 1) & ~(alignment - 1)); }
|
||||
|
||||
void print_info(FILE* file, const rocprofiler_info_t* info, const unsigned info_count, const char* str) {
|
||||
if (str) fprintf(file, "%s:\n", str);
|
||||
for (unsigned i= 0; i < info_count; ++i) {
|
||||
const rocprofiler_info_t* p = &info[i];
|
||||
fprintf(file, " %s ", p->name);
|
||||
switch (p->data.kind) {
|
||||
case ROCPROFILER_INT64:
|
||||
fprintf(file, "(%lu)\n", p->data.result64);
|
||||
break;
|
||||
case ROCPROFILER_BYTES: {
|
||||
fprintf(file, "(\n");
|
||||
const char* ptr = reinterpret_cast<const char*>(p->data.result_bytes.ptr);
|
||||
uint64_t size = 0;
|
||||
for (unsigned i = 0; i < p->data.result_bytes.instance_count; ++i) {
|
||||
size = *reinterpret_cast<const uint64_t*>(ptr);
|
||||
const char* data = ptr + sizeof(size);
|
||||
fprintf(file, " data (%p), size (%lu)\n", data, size);
|
||||
size = align_size(size, sizeof(uint64_t));
|
||||
ptr = data + size;
|
||||
}
|
||||
fprintf(file, " )\n");
|
||||
break;
|
||||
}
|
||||
default:
|
||||
std::cout << "Bad result kind (" << p->data.kind << ")" << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void print_group(FILE* file, const rocprofiler_group_t* group, const char* str) {
|
||||
if (str) fprintf(file, "%s:\n", str);
|
||||
for (unsigned i= 0; i < group->info_count; ++i) {
|
||||
print_info(file, group->info[i], 1, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
void store_context(context_entry_t context_entry) {
|
||||
if(pthread_mutex_lock(&mutex) != 0) {
|
||||
perror("pthread_mutex_lock");
|
||||
exit(1);
|
||||
}
|
||||
if ((context_array == NULL) || (context_array_index >= context_array_size)) {
|
||||
context_array_size *= 2;
|
||||
context_array = reinterpret_cast<context_entry_t*>(realloc(context_array, context_array_size * sizeof(context_entry_t)));
|
||||
}
|
||||
context_array_index += 1;
|
||||
context_array[context_array_index - 1] = context_entry;
|
||||
if(pthread_mutex_unlock(&mutex) != 0) {
|
||||
perror("pthread_mutex_unlock");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
void dump_context(FILE *file, unsigned index) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
|
||||
if (pthread_mutex_lock(&mutex) != 0) {
|
||||
perror("pthread_mutex_lock");
|
||||
exit(1);
|
||||
}
|
||||
context_entry_t* entry = &context_array[index];
|
||||
rocprofiler_group_t* group = entry->group;
|
||||
const rocprofiler_info_t* info = entry->info;
|
||||
const unsigned info_count = entry->info_count;
|
||||
fprintf(file, "Dispatch[%u], kernel_object(0x%lx):\n", index, entry->data.kernel_object);
|
||||
if (pthread_mutex_unlock(&mutex) != 0) {
|
||||
perror("pthread_mutex_unlock");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
status = rocprofiler_get_group_data(group);
|
||||
check_status(status);
|
||||
//print_group(file, group, "Group[0] data");
|
||||
|
||||
status = rocprofiler_get_metrics_data(group->context);
|
||||
check_status(status);
|
||||
print_info(file, info, info_count, NULL);
|
||||
|
||||
// Finishing cleanup
|
||||
// Deleting profiling context will delete all allocated resources
|
||||
rocprofiler_close(group->context);
|
||||
}
|
||||
|
||||
// Provided standard profiling callback
|
||||
hsa_status_t dispatch_callback(
|
||||
const rocprofiler_callback_data_t* callback_data,
|
||||
void* user_data,
|
||||
rocprofiler_group_t** group) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
// Passed tool data
|
||||
dispatch_data_t* tool_data = reinterpret_cast<dispatch_data_t*>(user_data);
|
||||
// Profiling context
|
||||
rocprofiler_t* context = NULL;
|
||||
|
||||
// Open profiling context
|
||||
status = rocprofiler_open(0, tool_data->info, tool_data->info_count, &context, 0, NULL);
|
||||
check_status(status);
|
||||
|
||||
rocprofiler_group_t* groups = NULL;
|
||||
uint32_t group_count = 0;
|
||||
status = rocprofiler_get_groups(context, &groups, &group_count);
|
||||
check_status(status);
|
||||
assert(group_count == 1);
|
||||
|
||||
*group = &groups[0];
|
||||
store_context({*group, tool_data->info, tool_data->info_count, *callback_data});
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void* dumping_data(void*) {
|
||||
unsigned index = 0;
|
||||
do {
|
||||
while (index < context_array_index) {
|
||||
dump_context(file_handle, index);
|
||||
++index;
|
||||
}
|
||||
} while (!thr_stop);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
CONSTRUCTOR_API void constructor() {
|
||||
std::map<std::string, hsa_ven_amd_aqlprofile_parameter_name_t> parameters_dict;
|
||||
parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET;
|
||||
parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK;
|
||||
parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK;
|
||||
parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK;
|
||||
parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2;
|
||||
|
||||
#ifdef TOOL_THREAD
|
||||
int err = pthread_attr_init(&thr_attr);
|
||||
if (err) { errno = err; perror("pthread_attr_init"); exit(1); }
|
||||
err = pthread_create(&thread, &thr_attr, dumping_data, NULL);
|
||||
if (err) { errno = err; perror("pthread_create"); exit(1); }
|
||||
#endif
|
||||
|
||||
// Set output file
|
||||
file_name = getenv("ROCP_OUTPUT");
|
||||
if (file_name != NULL) {
|
||||
file_handle = fopen(file_name, "w");
|
||||
if (file_handle == NULL) {
|
||||
perror("fopen");
|
||||
exit(1);
|
||||
}
|
||||
} else file_handle = stdout;
|
||||
|
||||
// Getting input
|
||||
const char* xml_name = getenv("ROCP_INPUT");
|
||||
if (xml_name == NULL) {
|
||||
fprintf(stderr, "ROCProfiler: input is not specified, ROCP_INPUT env");
|
||||
exit(1);
|
||||
}
|
||||
printf("ROCProfiler: input from \"%s\"\n", xml_name);
|
||||
xml::Xml* xml = new xml::Xml(xml_name);
|
||||
|
||||
// Getting metrics
|
||||
auto metrics_list = xml->GetNodes("top.metric");
|
||||
std::vector<std::string> metrics_vec;
|
||||
for (auto* entry : metrics_list) {
|
||||
const std::string entry_str = entry->opts["name"];
|
||||
size_t pos1 = 0;
|
||||
while(pos1 < entry_str.length()) {
|
||||
const size_t pos2 = entry_str.find(",", pos1);
|
||||
const std::string metric_name = entry_str.substr(pos1, pos2 - pos1);
|
||||
metrics_vec.push_back(metric_name);
|
||||
if (pos2 == std::string::npos) break;
|
||||
pos1 = pos2 + 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Getting traces
|
||||
auto traces_list = xml->GetNodes("top.trace");
|
||||
|
||||
const unsigned info_count = metrics_vec.size() + traces_list.size();
|
||||
rocprofiler_info_t* info= new rocprofiler_info_t[info_count];
|
||||
memset(info, 0, info_count * sizeof(rocprofiler_info_t));
|
||||
|
||||
printf(" %d metrics\n", (int) metrics_vec.size());
|
||||
for (unsigned i = 0; i < metrics_vec.size(); ++i) {
|
||||
const std::string& name = metrics_vec[i];
|
||||
printf("%s%s", (i == 0) ? " " : ", ", name.c_str());
|
||||
info[i] = {};
|
||||
info[i].type = ROCPROFILER_TYPE_METRIC;
|
||||
info[i].name = strdup(name.c_str());
|
||||
}
|
||||
if (metrics_vec.size()) printf("\n");
|
||||
|
||||
printf(" %d traces\n", (int) traces_list.size());
|
||||
unsigned index = metrics_vec.size();
|
||||
for (auto* entry : traces_list) {
|
||||
auto params_list = xml->GetNodes("top.trace.parameters");
|
||||
if (params_list.size() != 1) {
|
||||
fprintf(stderr, "ROCProfiler: Single input 'parameters' section is supported\n");
|
||||
exit(1);
|
||||
}
|
||||
const std::string& name = entry->opts["name"];
|
||||
printf(" %s (\n", name.c_str());
|
||||
info[index] = {};
|
||||
info[index].type = ROCPROFILER_TYPE_TRACE;
|
||||
info[index].name = strdup(name.c_str());
|
||||
|
||||
for (auto* params : params_list) {
|
||||
const unsigned parameter_count = params->opts.size();
|
||||
rocprofiler_parameter_t *parameters = new rocprofiler_parameter_t[parameter_count];
|
||||
unsigned p_index = 0;
|
||||
for (auto& v : params->opts) {
|
||||
const std::string parameter_name = v.first;
|
||||
if (parameters_dict.find(parameter_name) == parameters_dict.end()) {
|
||||
fprintf(stderr, "ROCProfiler: unknown trace parameter %s\n", parameter_name.c_str());
|
||||
exit(1);
|
||||
}
|
||||
const uint32_t value = strtol(v.second.c_str(), NULL, 0);
|
||||
printf(" %s = 0x%x\n", parameter_name.c_str(), value);
|
||||
parameters[p_index] = {};
|
||||
parameters[p_index].parameter_name = parameters_dict[parameter_name];
|
||||
parameters[p_index].value = value;
|
||||
++p_index;
|
||||
}
|
||||
|
||||
info[index].parameters = parameters;
|
||||
info[index].parameter_count = parameter_count;
|
||||
}
|
||||
printf(" )\n");
|
||||
++index;
|
||||
}
|
||||
|
||||
if (info_count) {
|
||||
// Adding dispatch observer
|
||||
dispatch_data_t* dispatch_data = new dispatch_data_t{};
|
||||
dispatch_data->info = info;
|
||||
dispatch_data->info_count = info_count;
|
||||
dispatch_data->group_index = 0;
|
||||
rocprofiler_dispatch_observer(dispatch_callback, dispatch_data);
|
||||
}
|
||||
}
|
||||
|
||||
DESTRUCTOR_API void destructor() {
|
||||
printf("\nROCPRofiler: %u contexts collected", context_array_index);
|
||||
thr_stop = true;
|
||||
#ifdef TOOL_THREAD
|
||||
pthread_join(thread, NULL);
|
||||
#else
|
||||
dumping_data(NULL);
|
||||
#endif
|
||||
}
|
||||
@@ -0,0 +1,313 @@
|
||||
#include <hsa.h>
|
||||
#include <pthread.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include "inc/rocprofiler.h"
|
||||
#include "util/xml.h"
|
||||
|
||||
#define PUBLIC_API __attribute__((visibility("default")))
|
||||
#define CONSTRUCTOR_API __attribute__((constructor))
|
||||
#define DESTRUCTOR_API __attribute__((destructor))
|
||||
|
||||
struct dispatch_data_t {
|
||||
rocprofiler_info_t* info;
|
||||
unsigned info_count;
|
||||
unsigned group_index;
|
||||
};
|
||||
|
||||
struct context_entry_t {
|
||||
rocprofiler_group_t* group;
|
||||
rocprofiler_info_t* info;
|
||||
unsigned info_count;
|
||||
rocprofiler_callback_data_t data;
|
||||
};
|
||||
|
||||
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||
unsigned context_array_size = 1;
|
||||
context_entry_t* context_array = NULL;
|
||||
unsigned context_array_index = 0;
|
||||
unsigned dump_index = 0;
|
||||
|
||||
const char* file_name = NULL;
|
||||
FILE* file_handle = NULL;
|
||||
|
||||
void check_status(hsa_status_t status) {
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
const char* error_string = NULL;
|
||||
rocprofiler_error_string(&error_string);
|
||||
fprintf(stderr, "ERROR: %s\n", error_string);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
hsa_status_t trace_data_cb(
|
||||
hsa_ven_amd_aqlprofile_info_type_t info_type,
|
||||
hsa_ven_amd_aqlprofile_info_data_t* info_data,
|
||||
void* data)
|
||||
{
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
if (info_type == HSA_VEN_AMD_AQLPROFILE_INFO_SQTT_DATA) {
|
||||
printf(" data ptr (%p), size(%u)\n", info_data->sqtt_data.ptr, info_data->sqtt_data.size);
|
||||
} else status = HSA_STATUS_ERROR;
|
||||
return status;
|
||||
}
|
||||
|
||||
unsigned align_size(unsigned size, unsigned alignment) { return ((size + alignment - 1) & ~(alignment - 1)); }
|
||||
|
||||
void print_info(FILE* file, const rocprofiler_info_t* info, const unsigned info_count, rocprofiler_t* context, const char* str) {
|
||||
if (str) fprintf(file, "%s:\n", str);
|
||||
for (unsigned i= 0; i < info_count; ++i) {
|
||||
const rocprofiler_info_t* p = &info[i];
|
||||
fprintf(file, " %s ", p->name);
|
||||
switch (p->data.kind) {
|
||||
case ROCPROFILER_INT64:
|
||||
fprintf(file, "(%lu)\n", p->data.result_int64);
|
||||
break;
|
||||
case ROCPROFILER_BYTES: {
|
||||
fprintf(file, "(\n");
|
||||
if (p->data.result_bytes.copy) {
|
||||
fprintf(file, " system memory copy\n");
|
||||
const char* ptr = reinterpret_cast<const char*>(p->data.result_bytes.ptr);
|
||||
uint64_t size = 0;
|
||||
for (unsigned i = 0; i < p->data.result_bytes.instance_count; ++i) {
|
||||
size = *reinterpret_cast<const uint64_t*>(ptr);
|
||||
const char* data = ptr + sizeof(size);
|
||||
fprintf(file, " data (%p), size (%lu)\n", data, size);
|
||||
size = align_size(size, sizeof(uint64_t));
|
||||
ptr = data + size;
|
||||
}
|
||||
} else {
|
||||
fprintf(file, " local memory buffer\n");
|
||||
rocprofiler_iterate_trace_data(context, trace_data_cb, NULL);
|
||||
}
|
||||
fprintf(file, " )\n");
|
||||
break;
|
||||
}
|
||||
default:
|
||||
std::cout << "Bad result kind (" << p->data.kind << ")" << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void print_group(FILE* file, const rocprofiler_group_t* group, const char* str) {
|
||||
if (str) fprintf(file, "%s:\n", str);
|
||||
for (unsigned i= 0; i < group->info_count; ++i) {
|
||||
print_info(file, group->info[i], 1, group->context, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
void store_entry(const context_entry_t& context_entry) {
|
||||
if(pthread_mutex_lock(&mutex) != 0) {
|
||||
perror("pthread_mutex_lock");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if ((context_array == NULL) || (context_array_index >= context_array_size)) {
|
||||
context_array_size *= 2;
|
||||
context_array = reinterpret_cast<context_entry_t*>(realloc(context_array, context_array_size * sizeof(context_entry_t)));
|
||||
}
|
||||
context_array[context_array_index] = context_entry;
|
||||
context_array_index += 1;
|
||||
|
||||
if (pthread_mutex_unlock(&mutex) != 0) {
|
||||
perror("pthread_mutex_unlock");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
void dump_context(FILE *file, context_entry_t* entry, unsigned index) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
|
||||
rocprofiler_group_t* group = entry->group;
|
||||
const rocprofiler_info_t* info = entry->info;
|
||||
const unsigned info_count = entry->info_count;
|
||||
fprintf(file, "Dispatch[%u], kernel_object(0x%lx):\n", index, entry->data.kernel_object);
|
||||
|
||||
status = rocprofiler_get_group_data(group);
|
||||
check_status(status);
|
||||
//print_group(file, group, "Group[0] data");
|
||||
|
||||
status = rocprofiler_get_metrics_data(group->context);
|
||||
check_status(status);
|
||||
print_info(file, info, info_count, group->context, NULL);
|
||||
|
||||
// Finishing cleanup
|
||||
// Deleting profiling context will delete all allocated resources
|
||||
rocprofiler_close(group->context);
|
||||
|
||||
dump_index = index;
|
||||
}
|
||||
|
||||
void dumping_data() {
|
||||
if (pthread_mutex_lock(&mutex) != 0) {
|
||||
perror("pthread_mutex_lock");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
for (unsigned index = 0; index < context_array_index; ++index) {
|
||||
dump_context(file_handle, &context_array[index], index);
|
||||
}
|
||||
|
||||
if (pthread_mutex_unlock(&mutex) != 0) {
|
||||
perror("pthread_mutex_unlock");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// profiling callback
|
||||
hsa_status_t dispatch_callback(
|
||||
const rocprofiler_callback_data_t* callback_data,
|
||||
void* user_data,
|
||||
rocprofiler_group_t** group) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
// Passed tool data
|
||||
dispatch_data_t* tool_data = reinterpret_cast<dispatch_data_t*>(user_data);
|
||||
// Profiling context
|
||||
rocprofiler_t* context = NULL;
|
||||
// context properties
|
||||
rocprofiler_properties_t properties{};
|
||||
|
||||
// Open profiling context
|
||||
status = rocprofiler_open(0, tool_data->info, tool_data->info_count, &context, 0, &properties);
|
||||
check_status(status);
|
||||
|
||||
rocprofiler_group_t* groups = NULL;
|
||||
uint32_t group_count = 0;
|
||||
status = rocprofiler_get_groups(context, &groups, &group_count);
|
||||
check_status(status);
|
||||
assert(group_count == 1);
|
||||
|
||||
*group = &groups[0];
|
||||
context_entry_t entry;
|
||||
entry.group = *group;
|
||||
entry.info = tool_data->info;
|
||||
entry.info_count = tool_data->info_count;
|
||||
entry.data = *callback_data;
|
||||
store_entry(entry);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
CONSTRUCTOR_API void constructor() {
|
||||
std::map<std::string, hsa_ven_amd_aqlprofile_parameter_name_t> parameters_dict;
|
||||
parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET;
|
||||
parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK;
|
||||
parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK;
|
||||
parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK;
|
||||
parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2;
|
||||
|
||||
// Set output file
|
||||
file_name = getenv("ROCP_OUTPUT");
|
||||
if (file_name != NULL) {
|
||||
file_handle = fopen(file_name, "w");
|
||||
if (file_handle == NULL) {
|
||||
perror("fopen");
|
||||
exit(1);
|
||||
}
|
||||
} else file_handle = stdout;
|
||||
|
||||
// Getting input
|
||||
const char* xml_name = getenv("ROCP_INPUT");
|
||||
if (xml_name == NULL) {
|
||||
fprintf(stderr, "ROCProfiler: input is not specified, ROCP_INPUT env");
|
||||
exit(1);
|
||||
}
|
||||
printf("ROCProfiler: input from \"%s\"\n", xml_name);
|
||||
xml::Xml* xml = new xml::Xml(xml_name);
|
||||
|
||||
// Getting metrics
|
||||
auto metrics_list = xml->GetNodes("top.metric");
|
||||
std::vector<std::string> metrics_vec;
|
||||
for (auto* entry : metrics_list) {
|
||||
const std::string entry_str = entry->opts["name"];
|
||||
size_t pos1 = 0;
|
||||
while(pos1 < entry_str.length()) {
|
||||
const size_t pos2 = entry_str.find(",", pos1);
|
||||
const std::string metric_name = entry_str.substr(pos1, pos2 - pos1);
|
||||
metrics_vec.push_back(metric_name);
|
||||
if (pos2 == std::string::npos) break;
|
||||
pos1 = pos2 + 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Getting traces
|
||||
auto traces_list = xml->GetNodes("top.trace");
|
||||
|
||||
const unsigned info_count = metrics_vec.size() + traces_list.size();
|
||||
rocprofiler_info_t* info= new rocprofiler_info_t[info_count];
|
||||
memset(info, 0, info_count * sizeof(rocprofiler_info_t));
|
||||
|
||||
printf(" %d metrics\n", (int) metrics_vec.size());
|
||||
for (unsigned i = 0; i < metrics_vec.size(); ++i) {
|
||||
const std::string& name = metrics_vec[i];
|
||||
printf("%s%s", (i == 0) ? " " : ", ", name.c_str());
|
||||
info[i] = {};
|
||||
info[i].type = ROCPROFILER_TYPE_METRIC;
|
||||
info[i].name = strdup(name.c_str());
|
||||
}
|
||||
if (metrics_vec.size()) printf("\n");
|
||||
|
||||
printf(" %d traces\n", (int) traces_list.size());
|
||||
unsigned index = metrics_vec.size();
|
||||
for (auto* entry : traces_list) {
|
||||
auto params_list = xml->GetNodes("top.trace.parameters");
|
||||
if (params_list.size() != 1) {
|
||||
fprintf(stderr, "ROCProfiler: Single input 'parameters' section is supported\n");
|
||||
exit(1);
|
||||
}
|
||||
const std::string& name = entry->opts["name"];
|
||||
const bool to_copy_data = (entry->opts["copy"] == "true");
|
||||
printf(" %s (\n", name.c_str());
|
||||
info[index] = {};
|
||||
info[index].type = ROCPROFILER_TYPE_TRACE;
|
||||
info[index].name = strdup(name.c_str());
|
||||
info[index].data.result_bytes.copy = to_copy_data;
|
||||
|
||||
for (auto* params : params_list) {
|
||||
const unsigned parameter_count = params->opts.size();
|
||||
rocprofiler_parameter_t *parameters = new rocprofiler_parameter_t[parameter_count];
|
||||
unsigned p_index = 0;
|
||||
for (auto& v : params->opts) {
|
||||
const std::string parameter_name = v.first;
|
||||
if (parameters_dict.find(parameter_name) == parameters_dict.end()) {
|
||||
fprintf(stderr, "ROCProfiler: unknown trace parameter %s\n", parameter_name.c_str());
|
||||
exit(1);
|
||||
}
|
||||
const uint32_t value = strtol(v.second.c_str(), NULL, 0);
|
||||
printf(" %s = 0x%x\n", parameter_name.c_str(), value);
|
||||
parameters[p_index] = {};
|
||||
parameters[p_index].parameter_name = parameters_dict[parameter_name];
|
||||
parameters[p_index].value = value;
|
||||
++p_index;
|
||||
}
|
||||
|
||||
info[index].parameters = parameters;
|
||||
info[index].parameter_count = parameter_count;
|
||||
}
|
||||
printf(" )\n");
|
||||
++index;
|
||||
}
|
||||
|
||||
if (info_count) {
|
||||
// Adding dispatch observer
|
||||
dispatch_data_t* dispatch_data = new dispatch_data_t{};
|
||||
dispatch_data->info = info;
|
||||
dispatch_data->info_count = info_count;
|
||||
dispatch_data->group_index = 0;
|
||||
rocprofiler_set_dispatch_observer(dispatch_callback, dispatch_data);
|
||||
}
|
||||
}
|
||||
|
||||
DESTRUCTOR_API void destructor() {
|
||||
printf("\nROCPRofiler: %u contexts collected", context_array_index);
|
||||
if (file_name == NULL) {
|
||||
printf("\n");
|
||||
} else {
|
||||
printf(", dumping to %s\n", file_name);
|
||||
}
|
||||
dumping_data();
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
<metric name=CPC_ME1_STALL_WAIT_ON_RCIU_READ,SQ_WAVES,SQ_WAVE_READY,SQ_CYCLES,SQ_ITEMS,WAVE_STALLS_RATE ></metric>
|
||||
<trace name=SQTT copy=true >
|
||||
<parameters
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK=0xf
|
||||
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK=0xf
|
||||
></parameters>
|
||||
</trace>
|
||||
@@ -0,0 +1,15 @@
|
||||
<gfx8>
|
||||
<metric name=SQ_CYCLES block=SQ event=2 ></metric>
|
||||
<metric name=SQ_WAVES block=SQ event=4 ></metric>
|
||||
<metric name=SQ_ITEMS block=SQ event=14 ></metric>
|
||||
<metric name=SQ_WAVE_READY block=SQ event=47 ></metric>
|
||||
<metric name=TCC_CYCLE block=TCC event=1 ></metric>
|
||||
<metric name=TCC_REQ block=TCC event=3 ></metric>
|
||||
<metric name=TCC_WRITEBACK block=TCC event=22 ></metric>
|
||||
<metric name=CPC_ALWAYS_COUNT block=CPC event=0 ></metric>
|
||||
<metric name=CPC_ME1_STALL_WAIT_ON_RCIU_READ block=CPC event=8 ></metric>
|
||||
</gfx8>
|
||||
|
||||
<global>
|
||||
<metric name=WAVE_STALLS_RATE expr=CPC_ME1_STALL_WAIT_ON_RCIU_READ*(SQ_WAVES+SQ_WAVE_READY)*100/(SQ_CYCLES/SQ_ITEMS) ></metric>
|
||||
</global>
|
||||
Fichier exécutable
+27
@@ -0,0 +1,27 @@
|
||||
#!/bin/sh
|
||||
|
||||
tbin=./test/ctrl
|
||||
|
||||
#export HSA_LIB=/home/evgeny/pkg/compute-psdb-16453/lib
|
||||
export HSA_LIB=/home/evgeny/git/compute/out/ubuntu-16.04/16.04/lib
|
||||
export OCL_LIB=/home/evgeny/pkg/opencl_modified/opencl_x86_64/lib
|
||||
#export OCL_LIB=/home/evgeny/Perforce/eshcherb_opencl/drivers/opencl/dist/linux/debug/lib/x86_64
|
||||
|
||||
export LD_LIBRARY_PATH=$PWD:$HSA_LIB:$OCL_LIB
|
||||
export ROCPROFILER_LOG=1
|
||||
|
||||
export HSA_TOOLS_LIB=librocprofiler64.so
|
||||
export ROCP_TOOL_LIB=test/libtool.so
|
||||
export ROCP_HSA_INTERCEPT=1
|
||||
export ROCP_METRICS=metrics.xml
|
||||
export ROCP_INPUT=input.xml
|
||||
unset ROCP_PROXY_QUEUE
|
||||
|
||||
echo "Run simple profiling test"
|
||||
if [ -n "$1" ] ; then
|
||||
eval "$*"
|
||||
else
|
||||
eval $tbin
|
||||
fi
|
||||
|
||||
exit 0
|
||||
Fichier binaire non affiché.
Fichier binaire non affiché.
@@ -0,0 +1,81 @@
|
||||
/******************************************************************************
|
||||
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this list
|
||||
of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
********************************************************************************/
|
||||
|
||||
/**
|
||||
* SimpleConvolution is where each pixel of the output image
|
||||
* is the weighted sum of the neighborhood pixels of the input image
|
||||
* The neighborhood is defined by the dimensions of the mask and
|
||||
* weight of each neighbor is defined by the mask itself.
|
||||
* @param output Output matrix after performing convolution
|
||||
* @param input Input matrix on which convolution is to be performed
|
||||
* @param mask mask matrix using which convolution was to be performed
|
||||
* @param inputDimensions dimensions of the input matrix
|
||||
* @param maskDimensions dimensions of the mask matrix
|
||||
*/
|
||||
__kernel void SimpleConvolution(__global uint * output,
|
||||
__global uint * input,
|
||||
__global float * mask,
|
||||
const uint2 inputDimensions,
|
||||
const uint2 maskDimensions) {
|
||||
|
||||
uint tid = get_global_id(0);
|
||||
|
||||
uint width = inputDimensions.x;
|
||||
uint height = inputDimensions.y;
|
||||
|
||||
uint x = tid%width;
|
||||
uint y = tid/width;
|
||||
|
||||
uint maskWidth = maskDimensions.x;
|
||||
uint maskHeight = maskDimensions.y;
|
||||
|
||||
uint vstep = (maskWidth -1)/2;
|
||||
uint hstep = (maskHeight -1)/2;
|
||||
|
||||
// find the left, right, top and bottom indices such that
|
||||
// the indices do not go beyond image boundaires
|
||||
uint left = (x < vstep) ? 0 : (x - vstep);
|
||||
uint right = ((x + vstep) >= width) ? width - 1 : (x + vstep);
|
||||
uint top = (y < hstep) ? 0 : (y - hstep);
|
||||
uint bottom = ((y + hstep) >= height)? height - 1: (y + hstep);
|
||||
|
||||
// initializing wighted sum value
|
||||
float sumFX = 0;
|
||||
|
||||
for(uint i = left; i <= right; ++i) {
|
||||
for(uint j = top ; j <= bottom; ++j) {
|
||||
// performing wighted sum within the mask boundaries
|
||||
uint maskIndex = (j - (y - hstep)) * maskWidth + (i - (x - vstep));
|
||||
uint index = j * width + i;
|
||||
sumFX += ((float)input[index] * mask[maskIndex]);
|
||||
}
|
||||
}
|
||||
|
||||
// To round to the nearest integer
|
||||
sumFX += 0.5f;
|
||||
output[tid] = (uint)sumFX;
|
||||
}
|
||||
@@ -0,0 +1,390 @@
|
||||
/******************************************************************************
|
||||
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this list
|
||||
of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "simple_convolution/simple_convolution.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <iostream>
|
||||
|
||||
#include "util/helper_funcs.h"
|
||||
#include "util/test_assert.h"
|
||||
|
||||
const uint32_t SimpleConvolution::input_data_[]{
|
||||
15, 201, 51, 89, 92, 34, 96, 66, 11, 225, 161, 96, 81, 211, 108, 124, 202, 244, 182,
|
||||
90, 215, 92, 98, 20, 44, 225, 55, 247, 202, 0, 45, 218, 202, 97, 51, 39, 131, 147,
|
||||
105, 143, 116, 11, 239, 198, 222, 92, 67, 169, 81, 250, 3, 40, 86, 101, 60, 131, 70,
|
||||
116, 123, 17, 117, 168, 236, 64, 10, 31, 103, 142, 179, 209, 29, 40, 220, 13, 239, 187,
|
||||
105, 50, 100, 186, 44, 104, 227, 131, 205, 32, 6, 20, 149, 130, 38, 10, 43, 18, 75,
|
||||
53, 50, 178, 195, 230, 132, 225, 14, 96, 238, 253, 27, 88, 48, 128, 18, 92, 232, 246,
|
||||
224, 182, 23, 231, 203, 172, 105, 241, 183, 148, 4, 2, 202, 55, 181, 142, 29, 57, 111,
|
||||
43, 153, 93, 41, 181, 181, 89, 54, 200, 182, 31, 190, 150, 213, 213, 126, 160, 130, 232,
|
||||
146, 57, 125, 151, 59, 71, 206, 240, 213, 236, 42, 68, 24, 195, 162, 65, 121, 87, 155,
|
||||
175, 31, 81, 207, 222, 232, 164, 180, 102, 69, 55, 79, 216, 112, 204, 112, 171, 19, 63,
|
||||
156, 233, 43, 198, 46, 67, 138, 208, 132, 4, 39, 32, 180, 71, 113, 131, 38, 90, 40,
|
||||
219, 193, 109, 18, 16, 70, 131, 220, 182, 46, 240, 245, 203, 217, 32, 146, 7, 100, 28,
|
||||
216, 233, 32, 255, 9, 213, 71, 123, 88, 110, 213, 128, 74, 150, 238, 93, 166, 52, 224,
|
||||
131, 234, 15, 115, 224, 218, 76, 1, 108, 84, 101, 137, 44, 79, 170, 44, 88, 127, 116,
|
||||
211, 216, 226, 168, 88, 45, 63, 70, 138, 230, 123, 107, 105, 101, 122, 220, 70, 84, 41,
|
||||
71, 193, 125, 173, 75, 169, 252, 245, 213, 84, 117, 73, 40, 77, 44, 209, 166, 90, 16,
|
||||
237, 229, 246, 104, 80, 95, 206, 202, 60, 20, 31, 101, 92, 225, 226, 9, 44, 140, 5,
|
||||
34, 97, 89, 151, 171, 129, 229, 216, 82, 139, 51, 99, 120, 24, 89, 225, 104, 185, 175,
|
||||
50, 246, 196, 82, 91, 32, 51, 62, 42, 96, 202, 47, 130, 44, 137, 26, 215, 10, 255,
|
||||
176, 93, 138, 227, 193, 3, 251, 27, 229, 100, 212, 149, 151, 202, 89, 233, 38, 122, 29,
|
||||
100, 164, 125, 46, 212, 0, 90, 93, 26, 50, 103, 25, 226, 197, 164, 198, 135, 168, 194,
|
||||
162, 141, 38, 119, 34, 190, 66, 124, 167, 104, 247, 197, 204, 156, 67, 251, 112, 67, 85,
|
||||
205, 93, 135, 53, 119, 106, 251, 28, 49, 130, 196, 243, 36, 82, 26, 155, 117, 216, 221,
|
||||
241, 128, 70, 233, 70, 18, 133, 137, 14, 245, 204, 99, 195, 42, 235, 248, 161, 86, 243,
|
||||
190, 135, 118, 130, 123, 154, 213, 150, 54, 74, 111, 20, 60, 240, 90, 37, 54, 109, 171,
|
||||
191, 123, 161, 140, 222, 100, 182, 202, 93, 88, 32, 80, 23, 168, 198, 153, 36, 97, 111,
|
||||
187, 151, 185, 43, 172, 245, 27, 6, 27, 82, 115, 199, 18, 239, 104, 158, 206, 205, 85,
|
||||
152, 42, 174, 185, 123, 197, 98, 65, 95, 135, 163, 206, 66, 59, 136, 109, 231, 125, 137,
|
||||
237, 153, 219, 97, 96, 237, 81, 201, 140, 31, 150, 226, 183, 192, 144, 113, 59, 86, 212,
|
||||
125, 182, 91, 33, 132, 158, 92, 12, 12, 68, 138, 149, 50, 36, 113, 147, 133, 95, 229,
|
||||
78, 235, 4, 228, 206, 188, 165, 95, 45, 225, 181, 1, 94, 107, 93, 128, 240, 251, 220,
|
||||
252, 7, 32, 135, 156, 83, 171, 14, 230, 48, 109, 203, 126, 89, 208, 99, 39, 140, 9,
|
||||
134, 185, 234, 60, 187, 73, 167, 24, 201, 152, 20, 166, 148, 27, 199, 28, 184, 26, 199,
|
||||
198, 0, 248, 52, 204, 119, 141, 157, 218, 181, 41, 227, 59, 227, 206, 119, 159, 23, 31,
|
||||
184, 224, 183, 204, 134, 76, 231, 77, 105, 160, 103, 48, 103, 104, 41, 155, 53, 160, 41,
|
||||
210, 123, 222, 252, 95, 26, 223, 45, 146, 126, 68, 177, 54, 37, 105, 3, 171, 182, 235,
|
||||
249, 31, 139, 97, 80, 243, 202, 121, 143, 0, 26, 184, 210, 149, 151, 207, 244, 177, 174,
|
||||
34, 67, 45, 102, 245, 100, 140, 95, 104, 55, 21, 83, 49, 53, 223, 147, 134, 210, 93,
|
||||
0, 97, 93, 26, 26, 48, 175, 178, 255, 164, 99, 174, 198, 167, 220, 45, 156, 64, 185,
|
||||
252, 168, 241, 18, 252, 35, 71, 219, 182, 205, 173, 19, 206, 15, 113, 232, 42, 161, 152,
|
||||
220, 160, 60, 64, 79, 3, 231, 43, 49, 132, 108, 235, 128, 21, 220, 146, 17, 255, 218,
|
||||
236, 182, 168, 154, 201, 118, 170, 58, 94, 212, 220, 246, 177, 125, 51, 241, 204, 55, 216,
|
||||
248, 104, 92, 100, 83, 221, 121, 48, 111, 138, 47, 73, 119, 230, 241, 17, 175, 103, 187,
|
||||
234, 198, 144, 199, 188, 65, 68, 240, 51, 17, 39, 11, 9, 143, 104, 109, 227, 70, 231,
|
||||
19, 181, 113, 66, 255, 233, 41, 241, 250, 217, 89, 182, 196, 31, 71, 139, 220, 137, 208,
|
||||
204, 188, 225, 243, 200, 234, 131, 48, 88, 102, 119, 63, 121, 44, 177, 188, 44, 154, 229,
|
||||
29, 149, 190, 118, 76, 130, 150, 147, 14, 114, 28, 222, 62, 217, 191, 50, 161, 170, 181,
|
||||
210, 2, 28, 73, 66, 149, 117, 243, 81, 162, 141, 55, 191, 35, 245, 54, 111, 120, 204,
|
||||
2, 134, 62, 31, 100, 125, 248, 36, 175, 153, 206, 101, 107, 209, 129, 181, 19, 22, 43,
|
||||
7, 104, 205, 149, 159, 140, 184, 149, 195, 39, 14, 143, 42, 148, 205, 73, 249, 74, 66,
|
||||
30, 250, 219, 237, 96, 71, 190, 225, 253, 210, 248, 40, 218, 96, 245, 111, 0, 130, 39,
|
||||
150, 69, 79, 165, 212, 122, 57, 162, 195, 51, 237, 6, 82, 231, 225, 63, 71, 41, 253,
|
||||
41, 38, 208, 33, 78, 170, 130, 68, 26, 131, 198, 66, 26, 12, 145, 191, 224, 11, 249,
|
||||
130, 207, 44, 112, 213, 126, 88, 183, 190, 160, 225, 187, 201, 8, 140, 235, 87, 55, 109,
|
||||
155, 81, 241, 98, 147, 11, 110, 37, 202, 79, 49, 195, 210, 0, 240, 66, 214, 110, 154,
|
||||
142, 44, 58, 111, 232, 4, 119, 117, 239, 207, 172, 93, 106, 254, 78, 205, 145, 89, 59,
|
||||
183, 35, 138, 232, 230, 92, 233, 214, 159, 191, 69, 58, 78, 114, 116, 189, 91, 121, 53,
|
||||
208, 104, 4, 125, 198, 111, 123, 20, 60, 13, 109, 120, 196, 145, 3, 172, 119, 95, 150,
|
||||
78, 255, 85, 147, 57, 163, 6, 174, 97, 97, 39, 151, 50, 144, 155, 175, 86, 11, 43,
|
||||
107, 71, 56, 216, 191, 253, 105, 194, 170, 225, 34, 64, 47, 34, 150, 195, 91, 58, 201,
|
||||
10, 155, 43, 49, 50, 93, 194, 206, 13, 25, 217, 56, 132, 33, 112, 92, 225, 109, 198,
|
||||
164, 23, 167, 199, 88, 215, 234, 238, 155, 69, 40, 100, 80, 196, 144, 129, 246, 237, 68,
|
||||
197, 250, 93, 159, 51, 225, 193, 163, 62, 163, 17, 4, 71, 41, 172, 15, 130, 132, 249,
|
||||
112, 31, 63, 152, 132, 143, 92, 20, 17, 83, 1, 86, 25, 252, 179, 185, 47, 149, 122,
|
||||
211, 211, 29, 229, 216, 101, 15, 133, 117, 145, 9, 111, 1, 40, 175, 154, 173, 62, 247,
|
||||
193, 80, 75, 194, 166, 100, 191, 90, 29, 239, 239, 152, 194, 195, 182, 168, 156, 27, 183,
|
||||
33, 145, 73, 43, 0, 75, 83, 175, 229, 0, 238, 221, 194, 63, 40, 133, 230, 140, 68,
|
||||
64, 170, 51, 48, 66, 246, 243, 248, 159, 144, 20, 87, 177, 165, 160, 220, 166, 235, 48,
|
||||
86, 209, 49, 68, 174, 243, 132, 214, 120, 106, 99, 189, 170, 13, 241, 219, 80, 232, 207,
|
||||
72, 135, 95, 92, 223, 16, 2, 127, 237, 169, 107, 29, 255, 61, 79, 68, 236, 67, 200,
|
||||
194, 188, 50, 38, 121, 221, 52, 107, 184, 132, 84, 136, 204, 219, 231, 41, 186, 248, 44,
|
||||
58, 229, 213, 166, 3, 212, 227, 82, 25, 207, 150, 225, 146, 82, 20, 185, 204, 242, 237,
|
||||
55, 170, 113, 139, 50, 62, 103, 26, 103, 34, 18, 148, 93, 247, 105, 3, 251, 62, 231,
|
||||
77, 87, 182, 227, 57, 73, 54, 77, 2, 2, 63, 239, 57, 234, 97, 197, 29, 159, 44,
|
||||
55, 7, 79, 74, 155, 172, 66, 5, 175, 61, 67, 150, 139, 155, 77, 111, 212, 151, 165,
|
||||
34, 153, 167, 98, 137, 225, 77, 234, 166, 107, 138, 211, 163, 145, 34, 237, 45, 206, 47,
|
||||
50, 126, 108, 117, 21, 248, 17, 98, 103, 230, 249, 12, 9, 147, 179, 107, 29, 149, 185,
|
||||
7, 59, 37, 146, 14, 200, 35, 49, 182, 80, 0, 230, 130, 126, 83, 248, 148, 75, 9,
|
||||
247, 178, 240, 240, 190, 249, 132, 114, 101, 161, 7, 30, 169, 67, 68, 59, 82, 12, 95,
|
||||
131, 195, 176, 131, 169, 51, 2, 252, 44, 150, 72, 54, 141, 250, 38, 126, 185, 31, 3,
|
||||
44, 132, 165, 52, 163, 78, 120, 231, 138, 202, 244, 234, 77, 183, 155, 209, 97, 207, 212,
|
||||
94, 251, 107, 166, 49, 249, 161, 88, 120, 91, 120, 123, 135, 253, 33, 188, 160, 112, 52,
|
||||
136, 250, 254, 125, 229, 76, 53, 128, 30, 150, 79, 243, 244, 75, 95, 155, 125, 88, 60,
|
||||
213, 209, 152, 78, 77, 32, 75, 110, 220, 236, 222, 17, 117, 217, 15, 242, 190, 92, 39,
|
||||
63, 123, 190, 143, 111, 178, 219, 206, 78, 88, 38, 138, 46, 247, 34, 124, 69, 66, 199,
|
||||
179, 31, 179, 145, 48, 41, 106, 64, 27, 41, 157, 67, 105, 24, 1, 249, 135, 179, 212,
|
||||
86, 1, 44, 124, 140, 91, 116, 175, 215, 185, 242, 159, 108, 17, 83, 254, 66, 124, 105,
|
||||
131, 151, 146, 32, 218, 252, 57, 219, 245, 193, 143, 201, 23, 145, 246, 148, 30, 82, 8,
|
||||
206, 41, 194, 192, 201, 47, 210, 28, 46, 20, 152, 151, 151, 48, 42, 184, 11, 38, 241,
|
||||
231, 28, 179, 119, 230, 202, 8, 220, 94, 39, 46, 103, 245, 88, 42, 181, 33, 90, 136,
|
||||
62, 136, 156, 214, 31, 52, 7, 74, 237, 19, 113, 223, 250, 141, 146, 113, 115, 92, 122,
|
||||
80, 187, 161, 126, 35, 150, 215, 78, 76, 249, 168, 212, 55, 48, 113, 14, 80, 166, 21,
|
||||
154, 147, 40, 12, 114, 35, 153, 5, 148, 12, 98, 15, 92, 29, 176, 219, 65, 71, 179,
|
||||
143, 147, 172, 56, 104, 227, 104, 218, 241, 185, 128, 7, 84, 20, 47, 96, 135, 82, 249,
|
||||
140, 231, 6, 238, 246, 99, 12, 167, 63, 77, 238, 242, 221, 130, 158, 21, 235, 129, 126,
|
||||
197, 114, 56, 69, 121, 140, 90, 169, 237, 225, 252, 231, 109, 228, 237, 91, 219, 81, 104,
|
||||
130, 144, 181, 113, 130, 147, 244, 32, 169, 223, 162, 39, 164, 21, 95, 234, 143, 236, 68,
|
||||
57, 217, 37, 53, 192, 147, 25, 174, 239, 245, 0, 87, 119, 144, 13, 232, 19, 160, 220,
|
||||
51, 73, 188, 214, 113, 96, 235, 209, 75, 122, 190, 144, 179, 151, 181, 233, 88, 73, 3,
|
||||
7, 56, 248, 7, 143, 112, 152, 156, 89, 171, 61, 53, 223, 135, 242, 181, 248, 83, 161,
|
||||
202, 158, 28, 136, 46, 208, 32, 228, 186, 121, 45, 189, 128, 102, 182, 136, 246, 38, 32,
|
||||
147, 127, 204, 208, 181, 171, 87, 167, 97, 80, 250, 2, 26, 153, 31, 163, 200, 239, 195,
|
||||
172, 169, 60, 218, 103, 188, 65, 30, 69, 55, 68, 102, 202, 196, 50, 154, 121, 221, 242,
|
||||
33, 63, 67, 28, 66, 93, 181, 97, 0, 126, 81, 196, 43, 251, 0, 5, 98, 189, 70,
|
||||
128, 3, 126, 197, 105, 72, 137, 155, 227, 3, 121, 214, 36, 184, 25, 65, 250, 118, 247,
|
||||
91, 119, 117, 173, 60, 160, 168, 60, 166, 10, 250, 237, 139, 253, 107, 80, 102, 180, 217,
|
||||
2, 151, 221, 123, 109, 1, 52, 134, 66, 46, 253, 57, 138, 117, 175, 55, 178, 79, 223,
|
||||
239, 245, 234, 233, 226, 117, 231, 78, 198, 78, 2, 159, 80, 154, 124, 204, 7, 126, 0,
|
||||
142, 193, 47, 140, 251, 185, 2, 170, 241, 180, 249, 208, 163, 239, 186, 141, 210, 48, 116,
|
||||
32, 246, 195, 34, 150, 19, 188, 19, 224, 196, 146, 224, 83, 83, 15, 224, 78, 201, 226,
|
||||
249, 186, 151, 243, 139, 58, 226, 70, 199, 181, 118, 60, 213, 109, 255, 248, 3, 19, 181,
|
||||
23, 243, 122, 169, 212, 205, 252, 228, 173, 75, 173, 144, 68, 104, 39, 55, 243, 98, 26,
|
||||
57, 41, 207, 175, 102, 165, 29, 102, 158, 32, 121, 83, 56, 109, 205, 225, 66, 155, 222,
|
||||
38, 73, 42, 212, 218, 110, 60, 1, 166, 48, 99, 193, 105, 141, 145, 25, 244, 54, 54,
|
||||
90, 213, 87, 212, 40, 143, 66, 246, 112, 132, 146, 79, 171, 220, 121, 128, 182, 232, 189,
|
||||
184, 143, 237, 27, 80, 86, 169, 226, 112, 158, 25, 166, 248, 238, 253, 204, 23, 141, 15,
|
||||
13, 254, 147, 160, 77, 63, 124, 199, 191, 50, 175, 124, 234, 62, 105, 6, 143, 192, 176,
|
||||
113, 48, 78, 139, 215, 71, 121, 213, 20, 144, 98, 35, 158, 96, 183, 62, 174, 246, 187,
|
||||
117, 182, 237, 37, 50, 216, 99, 156, 223, 243, 93, 143, 101, 142, 222, 240, 101, 37, 106,
|
||||
58, 57, 250, 157, 93, 153, 254, 20, 216, 172, 10, 147, 34, 192, 129, 71, 243, 90, 171,
|
||||
144, 57, 159, 238, 201, 4, 124, 167, 244, 225, 205, 95, 28, 7, 89, 185, 100, 243, 184,
|
||||
121, 203, 100, 131, 95, 135, 68, 224, 207, 56, 58, 122, 201, 115, 25, 183, 61, 30, 51,
|
||||
229, 18, 21, 178, 113, 49, 186, 203, 235, 31, 191, 163, 152, 138, 8, 28, 233, 143, 97,
|
||||
202, 95, 153, 4, 217, 98, 120, 243, 26, 182, 17, 77, 155, 36, 99, 78, 150, 149, 8,
|
||||
98, 128, 39, 33, 36, 192, 172, 45, 220, 149, 189, 61, 96, 28, 215, 100, 246, 58, 221,
|
||||
233, 84, 147, 251, 162, 47, 31, 5, 125, 181, 154, 134, 23, 27, 174, 57, 64, 110, 229,
|
||||
109, 75, 123, 43, 136, 219, 71, 95, 64, 61, 154, 29, 39, 238, 177, 34, 145, 225, 65,
|
||||
150, 94, 247, 49, 229, 15, 77, 147, 72, 141, 2, 45, 251, 77, 169, 38, 213, 132, 110,
|
||||
53, 196, 172, 207, 226, 212, 190, 148, 246, 79, 117, 56, 230, 212, 48, 23, 185, 63, 100,
|
||||
76, 136, 242, 78, 181, 237, 156, 95, 20, 113, 227, 131, 167, 168, 47, 119, 139, 3, 53,
|
||||
31, 250, 133, 149, 50, 107, 105, 99, 130, 34, 162, 231, 111, 42, 217, 190, 224, 199, 90,
|
||||
63, 220, 204, 35, 95, 115, 203, 143, 234, 86, 147, 32, 118, 141, 165, 11, 192, 16, 117,
|
||||
35, 147, 152, 198, 123, 7, 240, 84, 198, 209, 28, 33, 17, 248, 237, 52, 88, 97, 255,
|
||||
231, 76, 86, 122, 109, 204, 8, 18, 216, 201, 35, 77, 237, 183, 229, 179, 50, 237, 164,
|
||||
135, 179, 118, 164, 213, 135, 157, 195, 187, 245, 36, 187, 220, 113, 18, 87, 222, 222, 96,
|
||||
241, 183, 42, 21, 4, 23, 205, 233, 203, 0, 214, 112, 136, 138, 230, 44, 95, 110, 201,
|
||||
34, 41, 191, 71, 229, 155, 185, 247, 243, 151, 214, 84, 137, 141, 126, 159, 146, 149, 108,
|
||||
124, 97, 109, 82, 209, 245, 221, 183, 34, 60, 37, 236, 95, 79, 171, 167, 53, 71, 96,
|
||||
45, 58, 248, 3, 142, 129, 145, 12, 33, 36, 162, 142, 160, 3, 251, 243, 213, 240, 208,
|
||||
141, 19, 13, 178, 255, 109, 2, 170, 20, 55, 241, 116, 101, 44, 108, 105, 186, 238, 251,
|
||||
199, 15, 31, 106, 157, 191, 110, 152, 178, 67, 137, 131, 208, 156, 144, 131, 155, 253, 134,
|
||||
70, 18, 190, 55, 134, 35, 99, 243, 140, 30, 225, 135, 230, 240, 166, 81, 142, 102, 191,
|
||||
39, 25, 3, 177, 156, 211, 77, 45, 87, 233, 43, 221, 48, 61, 155, 103, 195, 191, 203,
|
||||
182, 75, 233, 152, 211, 208, 136, 121, 33, 23, 224, 224, 62, 249, 227, 239, 149, 183, 61,
|
||||
195, 15, 39, 238, 236, 87, 43, 136, 191, 239, 71, 138, 166, 147, 116, 62, 102, 68, 199,
|
||||
224, 101, 223, 193, 70, 29, 186, 42, 13, 80, 225, 75, 19, 241, 115, 1, 221, 202, 45,
|
||||
102, 137, 29, 174, 20, 195, 66, 136, 2, 168, 205, 201, 137, 50, 168, 74, 121, 198, 4,
|
||||
163, 212, 85, 133, 31, 105, 118, 146, 106, 84, 93, 152, 187, 231, 181, 105, 251, 121, 171,
|
||||
132, 123, 84, 81, 69, 221, 132, 238, 40, 253, 181, 45, 161, 137, 130, 39, 169, 235, 158,
|
||||
59, 86, 242, 153, 239, 173, 128, 165, 23, 123, 30, 195, 0, 154, 23, 81, 224, 245, 214,
|
||||
206, 30, 212, 131, 75, 117, 12, 206, 157, 181, 186, 59, 241, 17, 45, 138, 0, 219, 11,
|
||||
165, 243, 135, 196, 182, 135, 95, 205, 217, 63, 195, 175, 14, 225, 131, 145, 45, 249, 158,
|
||||
251, 150, 84, 182, 209, 70, 199, 255, 209, 199, 219, 220, 109, 206, 99, 50, 132, 234, 146,
|
||||
82, 195, 209, 22, 114, 223, 247, 246, 113, 37, 239, 16, 33, 134, 100, 215, 88, 170, 158,
|
||||
87, 123, 102, 50, 88, 211, 1, 187, 6, 134, 165, 152, 216, 105, 106, 239, 220, 74, 231,
|
||||
210, 187, 12, 194, 204, 45, 72, 49, 4, 160, 219, 162, 248, 87, 8, 43, 176, 220, 44,
|
||||
107, 227, 178, 17, 124, 139, 122, 230, 122, 87, 48, 97, 42, 236, 110, 236, 185, 155, 53,
|
||||
234, 159, 214, 198, 66, 206, 30, 75, 249, 206, 40, 38, 57, 11, 217, 74, 136, 100, 197,
|
||||
110, 223, 29, 159, 65, 71, 140, 175, 51, 69, 74, 105, 48, 234, 63, 246, 45, 13, 20,
|
||||
121, 7, 226, 161, 46, 28, 173, 7, 103, 53, 108, 45, 164, 76, 74, 68, 141, 145, 208,
|
||||
61, 197, 22, 136, 46, 70, 115, 110, 60, 161, 124, 81, 26, 132, 51, 188, 178, 79, 106,
|
||||
186, 183, 160, 39, 228, 68, 115, 46, 136, 1, 192, 89, 62, 133, 112, 198, 180, 182, 58,
|
||||
34, 243, 219, 158, 69, 245, 34, 120, 178, 213, 200, 28, 143, 128, 188, 182, 100, 1, 41,
|
||||
146, 137, 43, 82, 227, 105, 216, 83, 48, 140, 10, 106, 175, 254, 70, 77, 67, 59, 112,
|
||||
188, 237, 69, 133, 10, 212, 5, 198, 138, 105, 199, 180, 252, 81, 223, 79, 53, 73, 39,
|
||||
137, 121, 180, 148, 228, 99, 146, 42, 177, 214, 102, 33, 147, 84, 102, 25, 94, 59, 31,
|
||||
37, 197, 137, 237, 122, 133, 63, 90, 213, 116, 163, 253, 253, 29, 177, 145, 2, 21, 36,
|
||||
45, 198, 251, 147, 231, 143, 232, 78, 168, 71, 137, 199, 108, 79, 80, 90, 201, 214, 153,
|
||||
35, 172, 13, 199, 169, 11, 228, 91, 157, 231, 112, 193, 20, 54, 189, 167, 30, 77, 144,
|
||||
108, 245, 215, 246, 189, 68, 69, 14, 158, 14, 228, 55, 50, 145, 69, 249, 58, 80, 222,
|
||||
149, 237, 198, 5, 175, 218, 60, 109, 130, 91, 186, 18, 200, 175, 234, 190, 109, 46, 3,
|
||||
123, 204, 18, 96, 4, 68, 241, 73, 62, 44, 154, 29, 193, 136, 227, 199, 55, 189, 4,
|
||||
164, 64, 95, 95, 82, 39, 15, 60, 230, 124, 107, 233, 248, 55, 251, 89, 60, 63, 75,
|
||||
134, 126, 119, 32, 156, 57, 168, 127, 0, 224, 61, 5, 133, 125, 100, 228, 208, 140, 243,
|
||||
12, 114, 111, 119, 92, 104, 175, 87, 193, 236, 151, 13, 114, 21, 132, 146, 177, 189, 59,
|
||||
49, 190, 27, 110, 195, 160, 236, 40, 132, 188, 181, 120, 201, 40, 232, 65, 132, 80, 241,
|
||||
220, 18, 221, 115, 31, 79, 137, 164, 226, 58, 98, 29, 108, 32, 57, 219, 228, 218, 199,
|
||||
13, 95, 132, 195, 215, 77, 235, 191, 143, 112, 16, 128, 76, 35, 93, 191, 66, 173, 73,
|
||||
231, 143, 132, 73, 173, 240, 106, 231, 203, 78, 193, 147, 92, 33, 23, 31, 248, 100, 11,
|
||||
184, 243, 123, 201, 115, 200, 236, 209, 135, 47, 126, 209, 22, 14, 85, 95, 188, 69, 202,
|
||||
163, 17, 24, 101, 164, 117, 134, 187, 148, 127, 31, 159, 55, 19, 27, 1, 135, 227, 237,
|
||||
89, 107, 28, 216, 60, 51, 230, 145, 147, 163, 215, 93, 70, 232, 118, 172, 140, 235, 50,
|
||||
71, 128, 177, 103, 32, 233, 123, 60, 234, 2, 31, 216, 91, 139, 244, 52, 200, 40, 26,
|
||||
90, 188, 189, 49, 25, 4, 25, 144, 176, 166, 124, 227, 237, 252, 148, 85, 29, 125, 208,
|
||||
89, 104, 210, 121, 64, 46, 4, 53, 99, 204, 93, 125, 38, 25, 59, 88, 51, 64, 113,
|
||||
195, 241, 23, 64, 212, 5, 60, 104, 90, 90, 230, 42, 179, 78, 253, 44, 143, 44, 49,
|
||||
196, 143, 254, 34, 13, 36, 60, 73, 125, 112, 137, 239, 52, 122, 7, 116, 79, 12, 177,
|
||||
183, 103, 11, 158, 146, 190, 237, 143, 235, 124, 188, 28, 65, 76, 26, 100, 89, 63, 160,
|
||||
163, 188, 17, 44, 172, 69, 167, 179, 185, 246, 191, 107, 174, 38, 118, 76, 184, 53, 58,
|
||||
72, 32, 182, 5, 61, 248, 81, 88, 92, 170, 152, 253, 77, 84, 14, 122, 1, 83, 34,
|
||||
180, 13, 25, 115, 120, 199, 154, 238, 20, 83, 36, 79, 155, 68, 5, 160, 130, 254, 242,
|
||||
218, 90, 156, 114, 87, 234, 199, 101, 101, 200, 185, 135, 124, 198, 160, 240, 62, 104, 138,
|
||||
45, 125, 222, 81, 204, 122, 150, 210, 26, 24, 208, 12, 242, 42, 169, 101, 130, 148, 44,
|
||||
232, 249, 245, 161, 128, 113, 103, 33, 98, 166, 137, 236, 212, 7, 202, 38, 211, 69, 188,
|
||||
165, 95, 212, 118, 108, 199, 161, 22, 45, 35, 170, 90, 11, 163, 79, 173, 36, 193, 20,
|
||||
69, 35, 187, 207, 16, 144, 214, 219, 182, 170, 32, 114, 79, 128, 71, 198, 237, 15, 103,
|
||||
4, 60, 139, 175, 150, 151, 82, 230, 68, 119, 168, 89, 188, 204, 20, 140, 220, 165, 98,
|
||||
184, 91, 12, 217, 205, 92, 90, 20, 35, 71, 36, 138, 76, 96, 22, 251, 247, 173, 78,
|
||||
222, 241, 197, 134, 75, 130, 83, 96, 14, 47, 5, 113, 232, 96, 126, 193, 45, 218, 28,
|
||||
66, 253, 99, 103, 136, 176, 200, 158, 171, 191, 76, 249, 158, 62, 190, 37, 137, 65, 120,
|
||||
233, 80, 168, 238, 193, 145, 79, 63, 82, 125, 26, 111, 191, 24, 210, 39, 161, 131, 239,
|
||||
64, 46, 175, 140, 39, 77, 202, 230, 115, 84, 40, 235, 62, 120, 148, 45, 57, 37, 124,
|
||||
121, 120, 249, 148, 231, 185, 172, 186, 224, 77, 61, 207, 141, 107, 126, 26, 147, 204, 229,
|
||||
121, 63, 58, 161, 43, 120, 25, 191, 165, 83, 228, 34, 205, 92, 27, 97, 67, 213, 13,
|
||||
253, 182, 91, 59, 133, 233, 166, 4, 4, 57, 209, 233, 179, 16, 35, 85, 59, 155, 111,
|
||||
250, 65, 194, 223, 99, 144, 59, 127, 241, 127, 85, 255, 125, 11, 90, 184, 145, 68, 95,
|
||||
150, 72, 153, 103, 49, 76, 120, 85, 161, 179, 241, 16, 174, 51, 211, 142, 150, 99, 201,
|
||||
22, 85, 73, 108, 84, 199, 120, 175, 128, 9, 243, 223, 160, 59, 120, 8, 109, 197, 128,
|
||||
194, 103, 52, 180, 119, 227, 231, 75, 113, 126, 175, 59, 148, 4, 132, 1, 89, 75, 121,
|
||||
8, 204, 131, 251, 171, 36, 55, 36, 44, 165, 233, 172, 103, 80, 224, 28, 200, 195, 3,
|
||||
20, 53, 129, 195, 112, 22, 200, 244, 23, 34, 64, 145, 42, 12, 20, 38, 184, 56, 94,
|
||||
220, 101, 3, 198, 17, 107, 22, 242, 135, 222, 182, 138, 243, 235, 11, 182, 91, 34, 127,
|
||||
80, 58, 161, 145, 203, 204, 158, 224, 242, 86, 24, 81, 51, 126, 84, 249, 143, 191, 15,
|
||||
130, 70, 238, 57, 209, 225, 36, 221, 152, 128, 255, 24, 208, 57, 186, 97, 4, 134, 255,
|
||||
229, 121, 86, 254, 202, 137, 124, 31, 130, 12, 222, 146, 142, 37, 129, 199, 247, 98, 236,
|
||||
212, 251, 108, 211, 20, 60, 13, 206, 158, 18, 84};
|
||||
|
||||
SimpleConvolution::SimpleConvolution() {
|
||||
width_ = 64;
|
||||
height_ = 64;
|
||||
mask_width_ = 3;
|
||||
mask_height_ = mask_width_;
|
||||
randomize_seed_ = 0;
|
||||
|
||||
if (!IsPowerOf2(width_)) {
|
||||
width_ = RoundToPowerOf2(width_);
|
||||
}
|
||||
|
||||
if (!IsPowerOf2(height_)) {
|
||||
height_ = RoundToPowerOf2(height_);
|
||||
}
|
||||
|
||||
if (!(mask_width_ % 2)) {
|
||||
mask_width_++;
|
||||
}
|
||||
|
||||
if (!(mask_height_ % 2)) {
|
||||
mask_height_++;
|
||||
}
|
||||
|
||||
if (width_ * height_ < 256) {
|
||||
width_ = 64;
|
||||
height_ = 64;
|
||||
}
|
||||
|
||||
const uint32_t input_size_bytes = width_ * height_ * sizeof(uint32_t);
|
||||
const uint32_t mask_size_bytes = mask_width_ * mask_height_ * sizeof(float);
|
||||
|
||||
SetSysDescr(KERNARG_DES_ID, sizeof(kernel_args_t));
|
||||
SetSysDescr(INPUT_DES_ID, input_size_bytes);
|
||||
SetSysDescr(OUTPUT_DES_ID, input_size_bytes);
|
||||
SetLocalDescr(LOCAL_DES_ID, input_size_bytes);
|
||||
SetSysDescr(MASK_DES_ID, mask_size_bytes);
|
||||
SetSysDescr(REFOUT_DES_ID, input_size_bytes);
|
||||
|
||||
if (!randomize_seed_) TEST_ASSERT(sizeof(input_data_) <= input_size_bytes);
|
||||
}
|
||||
|
||||
void SimpleConvolution::Init() {
|
||||
std::clog << "SimpleConvolution::init :" << std::endl;
|
||||
|
||||
mem_descr_t input_des = GetDescr(INPUT_DES_ID);
|
||||
mem_descr_t local_des = GetDescr(LOCAL_DES_ID);
|
||||
mem_descr_t mask_des = GetDescr(MASK_DES_ID);
|
||||
mem_descr_t refout_des = GetDescr(REFOUT_DES_ID);
|
||||
mem_descr_t kernarg_des = GetDescr(KERNARG_DES_ID);
|
||||
|
||||
uint32_t* input = (uint32_t*)input_des.ptr;
|
||||
uint32_t* output_local = (uint32_t*)local_des.ptr;
|
||||
float* mask = (float*)mask_des.ptr;
|
||||
kernel_args_t* kernel_args = (kernel_args_t*)kernarg_des.ptr;
|
||||
|
||||
if (randomize_seed_) {
|
||||
// random initialisation of input
|
||||
FillRandom<uint32_t>(input, width_, height_, 0, 255, randomize_seed_);
|
||||
} else {
|
||||
// initialization with preset values
|
||||
memcpy(input, input_data_, width_ * height_ * sizeof(uint32_t));
|
||||
}
|
||||
|
||||
// Fill a blurr filter or some other filter of your choice
|
||||
const float val = 1.0f / (mask_width_ * 2.0f - 1.0f);
|
||||
for (uint32_t i = 0; i < (mask_width_ * mask_height_); i++) {
|
||||
mask[i] = 0;
|
||||
}
|
||||
for (uint32_t i = 0; i < mask_width_; i++) {
|
||||
uint32_t y = mask_height_ / 2;
|
||||
mask[y * mask_width_ + i] = val;
|
||||
}
|
||||
for (uint32_t i = 0; i < mask_height_; i++) {
|
||||
uint32_t x = mask_width_ / 2;
|
||||
mask[i * mask_width_ + x] = val;
|
||||
}
|
||||
|
||||
// Print the INPUT array.
|
||||
std::clog << std::dec;
|
||||
PrintArray<uint32_t>("> Input[0]", input, width_, 1);
|
||||
PrintArray<float>("> Mask", mask, mask_width_, mask_height_);
|
||||
|
||||
// Fill the kernel args
|
||||
kernel_args->arg1 = output_local;
|
||||
kernel_args->arg2 = input;
|
||||
kernel_args->arg3 = mask;
|
||||
kernel_args->arg4 = width_;
|
||||
kernel_args->arg41 = height_;
|
||||
kernel_args->arg5 = mask_width_;
|
||||
kernel_args->arg51 = mask_height_;
|
||||
|
||||
// Calculate the reference output
|
||||
memset(refout_des.ptr, 0, refout_des.size);
|
||||
ReferenceImplementation(reinterpret_cast<uint32_t*>(refout_des.ptr), input, mask, width_, height_,
|
||||
mask_width_, mask_height_);
|
||||
}
|
||||
|
||||
void SimpleConvolution::PrintOutput() const {
|
||||
PrintArray<uint32_t>("> Output[0]", reinterpret_cast<uint32_t*>(GetOutputPtr()), width_, 1);
|
||||
}
|
||||
|
||||
bool SimpleConvolution::ReferenceImplementation(uint32_t* output, const uint32_t* input,
|
||||
const float* mask, const uint32_t width,
|
||||
const uint32_t height, const uint32_t mask_width,
|
||||
const uint32_t mask_height) {
|
||||
const uint32_t vstep = (mask_width - 1) / 2;
|
||||
const uint32_t hstep = (mask_height - 1) / 2;
|
||||
|
||||
// for each pixel in the input
|
||||
for (uint32_t x = 0; x < width; x++) {
|
||||
for (uint32_t y = 0; y < height; y++) {
|
||||
// find the left, right, top and bottom indices such that
|
||||
// the indices do not go beyond image boundaires
|
||||
const uint32_t left = (x < vstep) ? 0 : (x - vstep);
|
||||
const uint32_t right = ((x + vstep) >= width) ? width - 1 : (x + vstep);
|
||||
const uint32_t top = (y < hstep) ? 0 : (y - hstep);
|
||||
const uint32_t bottom = ((y + hstep) >= height) ? height - 1 : (y + hstep);
|
||||
|
||||
// initializing wighted sum value
|
||||
float sum_fx = 0;
|
||||
for (uint32_t i = left; i <= right; ++i) {
|
||||
for (uint32_t j = top; j <= bottom; ++j) {
|
||||
// performing wighted sum within the mask boundaries
|
||||
uint32_t mask_idx = (j - (y - hstep)) * mask_width + (i - (x - vstep));
|
||||
uint32_t index = j * width + i;
|
||||
|
||||
// to round to the nearest integer
|
||||
sum_fx += ((float)input[index] * mask[mask_idx]);
|
||||
}
|
||||
}
|
||||
sum_fx += 0.5f;
|
||||
output[y * width + x] = uint32_t(sum_fx);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -0,0 +1,96 @@
|
||||
/******************************************************************************
|
||||
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this list
|
||||
of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef TEST_SIMPLE_CONVOLUTION_SIMPLE_CONVOLUTION_H_
|
||||
#define TEST_SIMPLE_CONVOLUTION_SIMPLE_CONVOLUTION_H_
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include "ctrl/test_kernel.h"
|
||||
|
||||
// Class implements SimpleConvolution kernel parameters
|
||||
class SimpleConvolution : public TestKernel {
|
||||
public:
|
||||
// Constructor
|
||||
SimpleConvolution();
|
||||
|
||||
// Initialize method
|
||||
void Init();
|
||||
|
||||
// Return compute grid size
|
||||
uint32_t GetGridSize() const { return width_ * height_; }
|
||||
|
||||
// Print output
|
||||
void PrintOutput() const;
|
||||
|
||||
// Return name
|
||||
std::string Name() const { return std::string("SimpleConvolution"); }
|
||||
|
||||
private:
|
||||
// Local kernel arguments declaration
|
||||
struct kernel_args_t {
|
||||
void* arg1;
|
||||
void* arg2;
|
||||
void* arg3;
|
||||
uint32_t arg4;
|
||||
uint32_t arg41;
|
||||
uint32_t arg5;
|
||||
uint32_t arg51;
|
||||
};
|
||||
|
||||
// Reference CPU implementation of Simple Convolution
|
||||
// @param output Output matrix after performing convolution
|
||||
// @param input Input matrix on which convolution is to be performed
|
||||
// @param mask mask matrix using which convolution was to be performed
|
||||
// @param input_dimensions dimensions of the input matrix
|
||||
// @param mask_dimensions dimensions of the mask matrix
|
||||
// @return bool true on success and false on failure
|
||||
bool ReferenceImplementation(uint32_t* output, const uint32_t* input, const float* mask,
|
||||
const uint32_t width, const uint32_t height,
|
||||
const uint32_t maskWidth, const uint32_t maskHeight);
|
||||
|
||||
// Width of the Input array
|
||||
uint32_t width_;
|
||||
|
||||
// Height of the Input array
|
||||
uint32_t height_;
|
||||
|
||||
// Mask dimensions
|
||||
uint32_t mask_width_;
|
||||
|
||||
// Mask dimensions
|
||||
uint32_t mask_height_;
|
||||
|
||||
// Randomize input data
|
||||
unsigned randomize_seed_;
|
||||
|
||||
// Input data
|
||||
static const uint32_t input_data_[];
|
||||
};
|
||||
|
||||
#endif // TEST_SIMPLE_CONVOLUTION_SIMPLE_CONVOLUTION_H_
|
||||
@@ -0,0 +1,154 @@
|
||||
module &m:1:0:$full:$large:$default;
|
||||
extension "amd:gcn";
|
||||
extension "IMAGE";
|
||||
|
||||
decl prog function &abort()();
|
||||
|
||||
prog kernel &__OpenCL_SimpleConvolution(kernarg_u64 %__global_offset_0,
|
||||
kernarg_u64 %output,
|
||||
kernarg_u64 %input,
|
||||
kernarg_u64 %mask,
|
||||
kernarg_u32 %inputDimensions[2],
|
||||
kernarg_u32 %maskDimensions[2]) {
|
||||
|
||||
pragma "AMD RTI", "ARGSTART:__OpenCL_SimpleConvolution";
|
||||
pragma "AMD RTI", "version:3:1:104";
|
||||
pragma "AMD RTI", "device:generic";
|
||||
pragma "AMD RTI", "uniqueid:1024";
|
||||
pragma "AMD RTI", "memory:private:0";
|
||||
pragma "AMD RTI", "memory:region:0";
|
||||
pragma "AMD RTI", "memory:local:0";
|
||||
pragma "AMD RTI", "value:__global_offset_0:u64:1:1:0";
|
||||
pragma "AMD RTI", "pointer:output:u32:1:1:96:uav:7:4:RW:0:0:0";
|
||||
pragma "AMD RTI", "pointer:input:u32:1:1:112:uav:7:4:RW:0:0:0";
|
||||
pragma "AMD RTI", "pointer:mask:float:1:1:128:uav:7:4:RW:0:0:0";
|
||||
pragma "AMD RTI", "value:inputDimensions:u32:2:1:144";
|
||||
pragma "AMD RTI", "constarg:4:inputDimensions";
|
||||
pragma "AMD RTI", "value:maskDimensions:u32:2:1:160";
|
||||
pragma "AMD RTI", "constarg:5:maskDimensions";
|
||||
pragma "AMD RTI", "function:1:0";
|
||||
pragma "AMD RTI", "memory:64bitABI";
|
||||
pragma "AMD RTI", "privateid:8";
|
||||
pragma "AMD RTI", "enqueue_kernel:0";
|
||||
pragma "AMD RTI", "kernel_index:0";
|
||||
pragma "AMD RTI", "reflection:0:size_t";
|
||||
pragma "AMD RTI", "reflection:1:uint*";
|
||||
pragma "AMD RTI", "reflection:2:uint*";
|
||||
pragma "AMD RTI", "reflection:3:float*";
|
||||
pragma "AMD RTI", "reflection:4:uint2";
|
||||
pragma "AMD RTI", "reflection:5:uint2";
|
||||
pragma "AMD RTI", "ARGEND:__OpenCL_SimpleConvolution";
|
||||
|
||||
@__OpenCL_SimpleConvolution_Entry:
|
||||
|
||||
// BB#0: // %entry
|
||||
|
||||
workitemabsid_u32 $s6, 0;
|
||||
cvt_u64_u32 $d0, $s6;
|
||||
ld_kernarg_align(8)_width(all)_u64 $d4, [%__global_offset_0];
|
||||
add_u64 $d0, $d0, $d4;
|
||||
cvt_u32_u64 $s5, $d0;
|
||||
ld_v2_kernarg_align(4)_width(all)_u32 ($s0, $s4), [%inputDimensions];
|
||||
ld_v2_kernarg_align(4)_width(all)_u32 ($s1, $s9), [%maskDimensions];
|
||||
rem_u32 $s7, $s5, $s0;
|
||||
add_u32 $s2, $s1, 4294967295;
|
||||
shr_u32 $s8, $s2, 1;
|
||||
add_u32 $s2, $s7, $s8;
|
||||
add_u32 $s3, $s0, 4294967295;
|
||||
cmp_ge_b1_u32 $c0, $s2, $s0;
|
||||
cmov_b32 $s2, $c0, $s3, $s2;
|
||||
sub_u32 $s3, $s7, $s8;
|
||||
cmp_lt_b1_u32 $c0, $s7, $s8;
|
||||
cmov_b32 $s3, $c0, 0, $s3;
|
||||
ld_kernarg_align(8)_width(all)_u64 $d1, [%output];
|
||||
cmp_le_b1_u32 $c0, $s3, $s2;
|
||||
cbr_b1 $c0, @BB0_2;
|
||||
|
||||
// BB#1:
|
||||
|
||||
mov_b32 $s6, 0;
|
||||
br @BB0_6;
|
||||
|
||||
// @BB0_2: // %for.cond32.preheader.lr.ph
|
||||
|
||||
@BB0_2:
|
||||
|
||||
div_u32 $s5, $s5, $s0;
|
||||
add_u32 $s9, $s9, 4294967295;
|
||||
shr_u32 $s9, $s9, 1;
|
||||
add_u32 $s10, $s5, $s9;
|
||||
add_u32 $s11, $s4, 4294967295;
|
||||
cmp_ge_b1_u32 $c0, $s10, $s4;
|
||||
cmov_b32 $s4, $c0, $s11, $s10;
|
||||
sub_u32 $s10, $s5, $s9;
|
||||
cmp_lt_b1_u32 $c0, $s5, $s9;
|
||||
cmov_b32 $s5, $c0, 0, $s10;
|
||||
ld_kernarg_align(8)_width(all)_u64 $d2, [%mask];
|
||||
ld_kernarg_align(8)_width(all)_u64 $d3, [%input];
|
||||
cvt_u64_u32 $d5, $s6;
|
||||
add_u64 $d4, $d4, $d5;
|
||||
cvt_u32_u64 $s6, $d4;
|
||||
div_u32 $s6, $s6, $s0;
|
||||
max_u32 $s10, $s9, $s6;
|
||||
sub_u32 $s12, $s10, $s6;
|
||||
max_u32 $s11, $s7, $s8;
|
||||
mov_b32 $s6, 0;
|
||||
mad_u32 $s12, $s1, $s12, $s11;
|
||||
sub_u32 $s7, $s12, $s7;
|
||||
sub_u32 $s9, $s10, $s9;
|
||||
mad_u32 $s9, $s0, $s9, $s11;
|
||||
sub_u32 $s8, $s9, $s8;
|
||||
|
||||
// @BB0_3: // %for.cond32.preheader
|
||||
|
||||
@BB0_3:
|
||||
|
||||
cmp_gt_b1_u32 $c0, $s5, $s4;
|
||||
mov_b32 $s9, $s7;
|
||||
mov_b32 $s10, $s8;
|
||||
mov_b32 $s11, $s5;
|
||||
cbr_b1 $c0, @BB0_5;
|
||||
|
||||
// @BB0_4: // %for.body35
|
||||
|
||||
@BB0_4:
|
||||
|
||||
cvt_u64_u32 $d4, $s9;
|
||||
shl_u64 $d4, $d4, 2;
|
||||
add_u64 $d4, $d2, $d4;
|
||||
ld_global_align(4)_f32 $s12, [$d4];
|
||||
cvt_u64_u32 $d4, $s10;
|
||||
shl_u64 $d4, $d4, 2;
|
||||
add_u64 $d4, $d3, $d4;
|
||||
ld_global_align(4)_u32 $s13, [$d4];
|
||||
cvt_f32_u32 $s13, $s13;
|
||||
mul_ftz_f32 $s12, $s13, $s12;
|
||||
add_u32 $s9, $s9, $s1;
|
||||
add_u32 $s10, $s10, $s0;
|
||||
add_u32 $s11, $s11, 1;
|
||||
add_ftz_f32 $s6, $s6, $s12;
|
||||
cmp_le_b1_u32 $c0, $s11, $s4;
|
||||
cbr_b1 $c0, @BB0_4;
|
||||
|
||||
// @BB0_5: // %for.inc48
|
||||
|
||||
@BB0_5:
|
||||
|
||||
add_u32 $s7, $s7, 1;
|
||||
add_u32 $s8, $s8, 1;
|
||||
add_u32 $s3, $s3, 1;
|
||||
cmp_le_b1_u32 $c0, $s3, $s2;
|
||||
cbr_b1 $c0, @BB0_3;
|
||||
|
||||
// @BB0_6: // %for.end50
|
||||
|
||||
@BB0_6:
|
||||
|
||||
and_b64 $d0, $d0, 4294967295;
|
||||
shl_u64 $d0, $d0, 2;
|
||||
add_u64 $d0, $d1, $d0;
|
||||
add_ftz_f32 $s0, $s6, 0F3f000000;
|
||||
cvt_ftz_u32_f32 $s0, $s0;
|
||||
st_global_align(4)_u32 $s0, [$d0];
|
||||
ret;
|
||||
};
|
||||
@@ -0,0 +1,87 @@
|
||||
/**********************************************************************
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
provided that the following conditions are met:
|
||||
|
||||
• Redistributions of source code must retain the above copyright notice, this list of
|
||||
conditions and the following disclaimer.
|
||||
• Redistributions in binary form must reproduce the above copyright notice, this list of
|
||||
conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
|
||||
SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
********************************************************************/
|
||||
#ifndef TEST_UTIL_HELPER_FUNCS_H_
|
||||
#define TEST_UTIL_HELPER_FUNCS_H_
|
||||
|
||||
#include <time.h>
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
static inline void Error(std::string error_msg) {
|
||||
std::cerr << "Error: " << error_msg << std::endl;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void PrintArray(const std::string header, const T* data, const int width, const int height) {
|
||||
std::clog << header << " :\n";
|
||||
for (int i = 0; i < height; i++) {
|
||||
std::clog << "> ";
|
||||
for (int j = 0; j < width; j++) {
|
||||
std::clog << data[i * width + j] << " ";
|
||||
}
|
||||
std::clog << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool FillRandom(T* array_ptr, const int width, const int height, const T range_min,
|
||||
const T range_max, unsigned int seed = 123) {
|
||||
if (!array_ptr) {
|
||||
Error("Cannot fill array. NULL pointer.");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!seed) seed = (unsigned int)time(NULL);
|
||||
|
||||
srand(seed);
|
||||
double range = double(range_max - range_min) + 1.0;
|
||||
|
||||
/* random initialisation of input */
|
||||
for (int i = 0; i < height; i++)
|
||||
for (int j = 0; j < width; j++) {
|
||||
int index = i * width + j;
|
||||
array_ptr[index] = range_min + T(range * rand() / (RAND_MAX + 1.0));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T> T RoundToPowerOf2(T val) {
|
||||
int bytes = sizeof(T);
|
||||
|
||||
val--;
|
||||
for (int i = 0; i < bytes; i++) val |= val >> (1 << i);
|
||||
val++;
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
template <typename T> bool IsPowerOf2(T val) {
|
||||
long long long_val = val;
|
||||
return (((long_val & (-long_val)) - long_val == 0) && (long_val != 0));
|
||||
}
|
||||
|
||||
#endif // TEST_UTIL_HELPER_FUNCS_H_
|
||||
@@ -0,0 +1,372 @@
|
||||
/**********************************************************************
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
provided that the following conditions are met:
|
||||
|
||||
<95> Redistributions of source code must retain the above copyright notice, this list of
|
||||
conditions and the following disclaimer.
|
||||
<95> Redistributions in binary form must reproduce the above copyright notice, this list of
|
||||
conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
|
||||
SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
********************************************************************/
|
||||
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
|
||||
#include <hsa.h>
|
||||
#include <hsa_ext_finalize.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <cassert>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
// Callback function to find and bind kernarg region of an agent
|
||||
static hsa_status_t FindMemRegionsCallback(hsa_region_t region, void* data) {
|
||||
hsa_region_global_flag_t flags;
|
||||
hsa_region_segment_t segment_id;
|
||||
|
||||
hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment_id);
|
||||
if (segment_id != HSA_REGION_SEGMENT_GLOBAL) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
AgentInfo* agent_info = (AgentInfo*)data;
|
||||
hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags);
|
||||
if (flags & HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED) {
|
||||
agent_info->coarse_region = region;
|
||||
}
|
||||
|
||||
if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG) {
|
||||
agent_info->kernarg_region = region;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Callback function to get the number of agents
|
||||
static hsa_status_t GetHsaAgentsCallback(hsa_agent_t agent, void* data) {
|
||||
// Copy handle of agent and increment number of agents reported
|
||||
HsaRsrcFactory* rsrcFactory = reinterpret_cast<HsaRsrcFactory*>(data);
|
||||
|
||||
// Determine if device is a Gpu agent
|
||||
hsa_status_t status;
|
||||
hsa_device_type_t type;
|
||||
status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type);
|
||||
CHECK_STATUS("Error Calling hsa_agent_get_info", status);
|
||||
if (type == HSA_DEVICE_TYPE_DSP) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
if (type == HSA_DEVICE_TYPE_CPU) {
|
||||
AgentInfo* agent_info = reinterpret_cast<AgentInfo*>(malloc(sizeof(AgentInfo)));
|
||||
agent_info->dev_id = agent;
|
||||
agent_info->dev_type = HSA_DEVICE_TYPE_CPU;
|
||||
rsrcFactory->AddAgentInfo(agent_info, false);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Device is a Gpu agent, build an instance of AgentInfo
|
||||
AgentInfo* agent_info = reinterpret_cast<AgentInfo*>(malloc(sizeof(AgentInfo)));
|
||||
agent_info->dev_id = agent;
|
||||
agent_info->dev_type = HSA_DEVICE_TYPE_GPU;
|
||||
hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_info->name);
|
||||
agent_info->max_wave_size = 0;
|
||||
hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &agent_info->max_wave_size);
|
||||
agent_info->max_queue_size = 0;
|
||||
hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &agent_info->max_queue_size);
|
||||
agent_info->profile = hsa_profile_t(108);
|
||||
hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_info->profile);
|
||||
|
||||
// Initialize memory regions to zero
|
||||
agent_info->kernarg_region.handle = 0;
|
||||
agent_info->coarse_region.handle = 0;
|
||||
|
||||
// Find and Bind Memory regions of the Gpu agent
|
||||
hsa_agent_iterate_regions(agent, FindMemRegionsCallback, agent_info);
|
||||
|
||||
// Save the instance of AgentInfo
|
||||
rsrcFactory->AddAgentInfo(agent_info, true);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Constructor of the class
|
||||
HsaRsrcFactory::HsaRsrcFactory() {
|
||||
// Initialize the Hsa Runtime
|
||||
hsa_status_t status = hsa_init();
|
||||
CHECK_STATUS("Error in hsa_init", status);
|
||||
|
||||
// Discover the set of Gpu devices available on the platform
|
||||
status = hsa_iterate_agents(GetHsaAgentsCallback, this);
|
||||
CHECK_STATUS("Error Calling hsa_iterate_agents", status);
|
||||
}
|
||||
|
||||
// Destructor of the class
|
||||
HsaRsrcFactory::~HsaRsrcFactory() {
|
||||
hsa_status_t status = hsa_shut_down();
|
||||
CHECK_STATUS("Error in hsa_shut_down", status);
|
||||
}
|
||||
|
||||
// Get the count of Hsa Gpu Agents available on the platform
|
||||
//
|
||||
// @return uint32_t Number of Gpu agents on platform
|
||||
//
|
||||
uint32_t HsaRsrcFactory::GetCountOfGpuAgents() { return uint32_t(gpu_list_.size()); }
|
||||
|
||||
// Get the count of Hsa Cpu Agents available on the platform
|
||||
//
|
||||
// @return uint32_t Number of Cpu agents on platform
|
||||
//
|
||||
uint32_t HsaRsrcFactory::GetCountOfCpuAgents() { return uint32_t(cpu_list_.size()); }
|
||||
|
||||
// Get the AgentInfo handle of a Gpu device
|
||||
//
|
||||
// @param idx Gpu Agent at specified index
|
||||
//
|
||||
// @param agent_info Output parameter updated with AgentInfo
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool HsaRsrcFactory::GetGpuAgentInfo(uint32_t idx, AgentInfo** agent_info) {
|
||||
// Determine if request is valid
|
||||
uint32_t size = uint32_t(gpu_list_.size());
|
||||
if (idx >= size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Copy AgentInfo from specified index
|
||||
*agent_info = gpu_list_[idx];
|
||||
return true;
|
||||
}
|
||||
|
||||
// Get the AgentInfo handle of a Cpu device
|
||||
//
|
||||
// @param idx Cpu Agent at specified index
|
||||
//
|
||||
// @param agent_info Output parameter updated with AgentInfo
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool HsaRsrcFactory::GetCpuAgentInfo(uint32_t idx, AgentInfo** agent_info) {
|
||||
// Determine if request is valid
|
||||
uint32_t size = uint32_t(cpu_list_.size());
|
||||
if (idx >= size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Copy AgentInfo from specified index
|
||||
*agent_info = cpu_list_[idx];
|
||||
return true;
|
||||
}
|
||||
|
||||
// Create a Queue object and return its handle. The queue object is expected
|
||||
// to support user requested number of Aql dispatch packets.
|
||||
//
|
||||
// @param agent_info Gpu Agent on which to create a queue object
|
||||
//
|
||||
// @param num_Pkts Number of packets to be held by queue
|
||||
//
|
||||
// @param queue Output parameter updated with handle of queue object
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool HsaRsrcFactory::CreateQueue(AgentInfo* agent_info, uint32_t num_pkts, hsa_queue_t** queue) {
|
||||
hsa_status_t status;
|
||||
status = hsa_queue_create(agent_info->dev_id, num_pkts, HSA_QUEUE_TYPE_MULTI, NULL, NULL,
|
||||
UINT32_MAX, UINT32_MAX, queue);
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
// Create a Signal object and return its handle.
|
||||
//
|
||||
// @param value Initial value of signal object
|
||||
//
|
||||
// @param signal Output parameter updated with handle of signal object
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool HsaRsrcFactory::CreateSignal(uint32_t value, hsa_signal_t* signal) {
|
||||
hsa_status_t status;
|
||||
status = hsa_signal_create(value, 0, NULL, signal);
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
// Allocate memory for use by a kernel of specified size in specified
|
||||
// agent's memory region. Currently supports Global segment whose Kernarg
|
||||
// flag set.
|
||||
//
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
//
|
||||
// @param size Size of memory in terms of bytes
|
||||
//
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
//
|
||||
uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t size) {
|
||||
hsa_status_t status;
|
||||
uint8_t* buffer = NULL;
|
||||
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
|
||||
|
||||
if (agent_info->coarse_region.handle != 0) {
|
||||
// Allocate in local memory if it is available
|
||||
status = hsa_memory_allocate(agent_info->coarse_region, size, (void**)&buffer);
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
status = hsa_memory_assign_agent(buffer, agent_info->dev_id, HSA_ACCESS_PERMISSION_RW);
|
||||
}
|
||||
} else {
|
||||
// Allocate in system memory if local memory is not available
|
||||
status = hsa_memory_allocate(agent_info->kernarg_region, size, (void**)&buffer);
|
||||
}
|
||||
|
||||
return (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
|
||||
}
|
||||
|
||||
// Allocate memory tp pass kernel parameters.
|
||||
//
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
//
|
||||
// @param size Size of memory in terms of bytes
|
||||
//
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
//
|
||||
uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t size) {
|
||||
hsa_status_t status;
|
||||
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
|
||||
|
||||
uint8_t* buffer = NULL;
|
||||
status = hsa_memory_allocate(agent_info->kernarg_region, size, (void**)&buffer);
|
||||
return (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
|
||||
}
|
||||
|
||||
// Transfer data method
|
||||
bool HsaRsrcFactory::TransferData(void* dest_buff, void* src_buff, uint32_t length,
|
||||
bool host_to_dev) {
|
||||
hsa_status_t status;
|
||||
status = hsa_memory_copy(dest_buff, src_buff, length);
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
// Loads an Assembled Brig file and Finalizes it into Device Isa
|
||||
//
|
||||
// @param agent_info Gpu device for which to finalize
|
||||
//
|
||||
// @param brig_path File path of the Assembled Brig file
|
||||
//
|
||||
// @param kernel_name Name of the kernel to finalize
|
||||
//
|
||||
// @param code_desc Handle of finalized Code Descriptor that could
|
||||
// be used to submit for execution
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool HsaRsrcFactory::LoadAndFinalize(AgentInfo* agent_info, const char* brig_path,
|
||||
char* kernel_name, hsa_executable_symbol_t* code_desc) {
|
||||
// Finalize the Hsail object into code object
|
||||
hsa_status_t status;
|
||||
hsa_code_object_t code_object;
|
||||
|
||||
// Build the code object filename
|
||||
std::string filename(brig_path);
|
||||
std::clog << "Code object filename: " << filename << std::endl;
|
||||
|
||||
// Open the file containing code object
|
||||
std::ifstream codeStream(filename.c_str(), std::ios::binary | std::ios::ate);
|
||||
if (!codeStream) {
|
||||
std::cerr << "Error: failed to load " << filename << std::endl;
|
||||
assert(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Allocate memory to read in code object from file
|
||||
size_t size = std::string::size_type(codeStream.tellg());
|
||||
char* codeBuff = (char*)AllocateSysMemory(agent_info, size);
|
||||
if (!codeBuff) {
|
||||
std::cerr << "Error: failed to allocate memory for code object." << std::endl;
|
||||
assert(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Read the code object into allocated memory
|
||||
codeStream.seekg(0, std::ios::beg);
|
||||
std::copy(std::istreambuf_iterator<char>(codeStream), std::istreambuf_iterator<char>(), codeBuff);
|
||||
|
||||
// De-Serialize the code object that has been read into memory
|
||||
status = hsa_code_object_deserialize(codeBuff, size, NULL, &code_object);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
std::cerr << "Failed to deserialize code object" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create executable.
|
||||
hsa_executable_t hsaExecutable;
|
||||
status =
|
||||
hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, "", &hsaExecutable);
|
||||
CHECK_STATUS("Error in creating executable object", status);
|
||||
|
||||
// Load code object.
|
||||
status = hsa_executable_load_code_object(hsaExecutable, agent_info->dev_id, code_object, "");
|
||||
CHECK_STATUS("Error in loading executable object", status);
|
||||
|
||||
// Freeze executable.
|
||||
status = hsa_executable_freeze(hsaExecutable, "");
|
||||
CHECK_STATUS("Error in freezing executable object", status);
|
||||
|
||||
// Get symbol handle.
|
||||
hsa_executable_symbol_t kernelSymbol;
|
||||
status = hsa_executable_get_symbol(hsaExecutable, NULL, kernel_name, agent_info->dev_id, 0,
|
||||
&kernelSymbol);
|
||||
CHECK_STATUS("Error in looking up kernel symbol", status);
|
||||
|
||||
// Update output parameter
|
||||
*code_desc = kernelSymbol;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Add an instance of AgentInfo representing a Hsa Gpu agent
|
||||
void HsaRsrcFactory::AddAgentInfo(AgentInfo* agent_info, bool gpu) {
|
||||
// Add input to Gpu list
|
||||
if (gpu) {
|
||||
gpu_list_.push_back(agent_info);
|
||||
return;
|
||||
}
|
||||
|
||||
// Add input to Cpu list
|
||||
cpu_list_.push_back(agent_info);
|
||||
}
|
||||
|
||||
// Print the various fields of Hsa Gpu Agents
|
||||
bool HsaRsrcFactory::PrintGpuAgents(const std::string& header) {
|
||||
std::clog << header << " :" << std::endl;
|
||||
|
||||
AgentInfo* agent_info;
|
||||
int size = uint32_t(gpu_list_.size());
|
||||
for (int idx = 0; idx < size; idx++) {
|
||||
agent_info = gpu_list_[idx];
|
||||
|
||||
std::clog << "> agent[" << idx << "] :" << std::endl;
|
||||
std::clog << ">> Name : " << agent_info->name << std::endl;
|
||||
std::clog << ">> Max Wave Size : " << agent_info->max_wave_size << std::endl;
|
||||
std::clog << ">> Max Queue Size : " << agent_info->max_queue_size << std::endl;
|
||||
std::clog << ">> Kernarg Region Id : " << agent_info->coarse_region.handle << std::endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
HsaRsrcFactory* HsaRsrcFactory::instance_ = NULL;
|
||||
HsaRsrcFactory::mutex_t HsaRsrcFactory::mutex_;
|
||||
@@ -0,0 +1,234 @@
|
||||
/**********************************************************************
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
provided that the following conditions are met:
|
||||
|
||||
<95> Redistributions of source code must retain the above copyright notice, this list of
|
||||
conditions and the following disclaimer.
|
||||
<95> Redistributions in binary form must reproduce the above copyright notice, this list of
|
||||
conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
|
||||
SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
********************************************************************/
|
||||
|
||||
#ifndef TEST_UTIL_HSA_RSRC_FACTORY_H_
|
||||
#define TEST_UTIL_HSA_RSRC_FACTORY_H_
|
||||
|
||||
#include <hsa.h>
|
||||
#include <hsa_ext_finalize.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#define HSA_ARGUMENT_ALIGN_BYTES 16
|
||||
#define HSA_QUEUE_ALIGN_BYTES 64
|
||||
#define HSA_PACKET_ALIGN_BYTES 64
|
||||
|
||||
#define CHECK_STATUS(msg, status) \
|
||||
if (status != HSA_STATUS_SUCCESS) { \
|
||||
const char* emsg = 0; \
|
||||
hsa_status_string(status, &emsg); \
|
||||
printf("%s: %s\n", msg, emsg ? emsg : "<unknown error>"); \
|
||||
exit(1); \
|
||||
}
|
||||
|
||||
static const unsigned MEM_PAGE_BYTES = 0x1000;
|
||||
static const unsigned MEM_PAGE_MASK = MEM_PAGE_BYTES - 1;
|
||||
|
||||
// Encapsulates information about a Hsa Agent such as its
|
||||
// handle, name, max queue size, max wavefront size, etc.
|
||||
typedef struct {
|
||||
// Handle of Agent
|
||||
hsa_agent_t dev_id;
|
||||
|
||||
// Agent type - Cpu = 0, Gpu = 1 or Dsp = 2
|
||||
uint32_t dev_type;
|
||||
|
||||
// Name of Agent whose length is less than 64
|
||||
char name[64];
|
||||
|
||||
// Max size of Wavefront size
|
||||
uint32_t max_wave_size;
|
||||
|
||||
// Max size of Queue buffer
|
||||
uint32_t max_queue_size;
|
||||
|
||||
// Hsail profile supported by agent
|
||||
hsa_profile_t profile;
|
||||
|
||||
// Memory region supporting kernel parameters
|
||||
hsa_region_t coarse_region;
|
||||
|
||||
// Memory region supporting kernel arguments
|
||||
hsa_region_t kernarg_region;
|
||||
|
||||
} AgentInfo;
|
||||
|
||||
class HsaRsrcFactory {
|
||||
public:
|
||||
typedef std::recursive_mutex mutex_t;
|
||||
|
||||
static HsaRsrcFactory* Create() {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (HsaRsrcFactory::instance_ == NULL) {
|
||||
HsaRsrcFactory::instance_ = new HsaRsrcFactory();
|
||||
}
|
||||
return instance_;
|
||||
}
|
||||
|
||||
static void Destroy() {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (instance_) delete instance_;
|
||||
instance_ = NULL;
|
||||
}
|
||||
|
||||
static HsaRsrcFactory& Instance() {
|
||||
hsa_status_t status = (instance_ != NULL) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR;
|
||||
CHECK_STATUS("HsaRsrcFactory::Instance()", status);
|
||||
return *instance_;
|
||||
}
|
||||
|
||||
// Get the count of Hsa Gpu Agents available on the platform
|
||||
//
|
||||
// @return uint32_t Number of Gpu agents on platform
|
||||
//
|
||||
uint32_t GetCountOfGpuAgents();
|
||||
|
||||
// Get the count of Hsa Cpu Agents available on the platform
|
||||
//
|
||||
// @return uint32_t Number of Cpu agents on platform
|
||||
//
|
||||
uint32_t GetCountOfCpuAgents();
|
||||
|
||||
// Get the AgentInfo handle of a Gpu device
|
||||
//
|
||||
// @param idx Gpu Agent at specified index
|
||||
//
|
||||
// @param agent_info Output parameter updated with AgentInfo
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool GetGpuAgentInfo(uint32_t idx, AgentInfo** agent_info);
|
||||
|
||||
// Get the AgentInfo handle of a Cpu device
|
||||
//
|
||||
// @param idx Cpu Agent at specified index
|
||||
//
|
||||
// @param agent_info Output parameter updated with AgentInfo
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool GetCpuAgentInfo(uint32_t idx, AgentInfo** agent_info);
|
||||
|
||||
// Create a Queue object and return its handle. The queue object is expected
|
||||
// to support user requested number of Aql dispatch packets.
|
||||
//
|
||||
// @param agent_info Gpu Agent on which to create a queue object
|
||||
//
|
||||
// @param num_Pkts Number of packets to be held by queue
|
||||
//
|
||||
// @param queue Output parameter updated with handle of queue object
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool CreateQueue(AgentInfo* agent_info, uint32_t num_pkts, hsa_queue_t** queue);
|
||||
|
||||
// Create a Signal object and return its handle.
|
||||
//
|
||||
// @param value Initial value of signal object
|
||||
//
|
||||
// @param signal Output parameter updated with handle of signal object
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool CreateSignal(uint32_t value, hsa_signal_t* signal);
|
||||
|
||||
// Allocate memory for use by a kernel of specified size in specified
|
||||
// agent's memory region. Currently supports Global segment whose Kernarg
|
||||
// flag set.
|
||||
//
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
//
|
||||
// @param size Size of memory in terms of bytes
|
||||
//
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
//
|
||||
uint8_t* AllocateLocalMemory(const AgentInfo* agent_info, size_t size);
|
||||
|
||||
// Allocate memory tp pass kernel parameters.
|
||||
//
|
||||
// @param agent_info Agent from whose memory region to allocate
|
||||
//
|
||||
// @param size Size of memory in terms of bytes
|
||||
//
|
||||
// @return uint8_t* Pointer to buffer, null if allocation fails.
|
||||
//
|
||||
uint8_t* AllocateSysMemory(const AgentInfo* agent_info, size_t size);
|
||||
|
||||
// Transfer data method
|
||||
bool TransferData(void* dest_buff, void* src_buff, uint32_t length, bool host_to_dev);
|
||||
|
||||
// Loads an Assembled Brig file and Finalizes it into Device Isa
|
||||
//
|
||||
// @param agent_info Gpu device for which to finalize
|
||||
//
|
||||
// @param brig_path File path of the Assembled Brig file
|
||||
//
|
||||
// @param kernel_name Name of the kernel to finalize
|
||||
//
|
||||
// @param code_desc Handle of finalized Code Descriptor that could
|
||||
// be used to submit for execution
|
||||
//
|
||||
// @return bool true if successful, false otherwise
|
||||
//
|
||||
bool LoadAndFinalize(AgentInfo* agent_info, const char* brig_path, char* kernel_name,
|
||||
hsa_executable_symbol_t* code_desc);
|
||||
|
||||
// Add an instance of AgentInfo representing a Hsa Gpu agent
|
||||
void AddAgentInfo(AgentInfo* agent_info, bool gpu);
|
||||
|
||||
// Print the various fields of Hsa Gpu Agents
|
||||
bool PrintGpuAgents(const std::string& header);
|
||||
|
||||
private:
|
||||
// Constructor of the class. Will initialize the Hsa Runtime and
|
||||
// query the system topology to get the list of Cpu and Gpu devices
|
||||
HsaRsrcFactory();
|
||||
|
||||
// Destructor of the class
|
||||
~HsaRsrcFactory();
|
||||
|
||||
static HsaRsrcFactory* instance_;
|
||||
static mutex_t mutex_;
|
||||
|
||||
// Used to maintain a list of Hsa Queue handles
|
||||
std::vector<hsa_queue_t*> queue_list_;
|
||||
|
||||
// Used to maintain a list of Hsa Signal handles
|
||||
std::vector<hsa_signal_t*> signal_list_;
|
||||
|
||||
// Used to maintain a list of Hsa Gpu Agent Info
|
||||
std::vector<AgentInfo*> gpu_list_;
|
||||
|
||||
// Used to maintain a list of Hsa Cpu Agent Info
|
||||
std::vector<AgentInfo*> cpu_list_;
|
||||
};
|
||||
|
||||
#endif // TEST_UTIL_HSA_RSRC_FACTORY_H_
|
||||
@@ -0,0 +1,181 @@
|
||||
/**********************************************************************
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
provided that the following conditions are met:
|
||||
|
||||
<95> Redistributions of source code must retain the above copyright notice, this list of
|
||||
conditions and the following disclaimer.
|
||||
<95> Redistributions in binary form must reproduce the above copyright notice, this list of
|
||||
conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
|
||||
SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
********************************************************************/
|
||||
|
||||
#include "util/perf_timer.h"
|
||||
|
||||
PerfTimer::PerfTimer() { freq_in_100mhz_ = MeasureTSCFreqHz(); }
|
||||
|
||||
PerfTimer::~PerfTimer() {
|
||||
while (!timers_.empty()) {
|
||||
Timer* temp = timers_.back();
|
||||
timers_.pop_back();
|
||||
delete temp;
|
||||
}
|
||||
}
|
||||
|
||||
// New cretaed timer instantance index will be returned
|
||||
int PerfTimer::CreateTimer() {
|
||||
Timer* newTimer = new Timer;
|
||||
newTimer->start = 0;
|
||||
newTimer->clocks = 0;
|
||||
|
||||
#ifdef _WIN32
|
||||
QueryPerformanceFrequency((LARGE_INTEGER*)&newTimer->freq);
|
||||
#else
|
||||
newTimer->freq = (long long)1.0E3;
|
||||
#endif
|
||||
|
||||
/* Push back the address of new Timer instance created */
|
||||
timers_.push_back(newTimer);
|
||||
return (int)(timers_.size() - 1);
|
||||
}
|
||||
|
||||
int PerfTimer::StartTimer(int index) {
|
||||
if (index >= (int)timers_.size()) {
|
||||
Error("Cannot reset timer. Invalid handle.");
|
||||
return FAILURE;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
// General Windows timing method
|
||||
#ifndef _AMD
|
||||
long long tmpStart;
|
||||
QueryPerformanceCounter((LARGE_INTEGER*)&(tmpStart));
|
||||
timers_[index]->start = (double)tmpStart;
|
||||
#else
|
||||
// AMD Windows timing method
|
||||
#endif
|
||||
#else
|
||||
// General Linux timing method
|
||||
#ifndef _AMD
|
||||
struct timeval s;
|
||||
gettimeofday(&s, 0);
|
||||
timers_[index]->start = s.tv_sec * 1.0E3 + ((double)(s.tv_usec / 1.0E3));
|
||||
#else
|
||||
// AMD timing method
|
||||
unsigned int unused;
|
||||
timers_[index]->start = __rdtscp(&unused);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int PerfTimer::StopTimer(int index) {
|
||||
double n = 0;
|
||||
if (index >= (int)timers_.size()) {
|
||||
Error("Cannot reset timer. Invalid handle.");
|
||||
return FAILURE;
|
||||
}
|
||||
#ifdef _WIN32
|
||||
#ifndef _AMD
|
||||
long long n1;
|
||||
QueryPerformanceCounter((LARGE_INTEGER*)&(n1));
|
||||
n = (double)n1;
|
||||
#else
|
||||
// AMD Window Timing
|
||||
#endif
|
||||
|
||||
#else
|
||||
// General Linux timing method
|
||||
#ifndef _AMD
|
||||
struct timeval s;
|
||||
gettimeofday(&s, 0);
|
||||
n = s.tv_sec * 1.0E3 + (double)(s.tv_usec / 1.0E3);
|
||||
#else
|
||||
// AMD Linux timing
|
||||
unsigned int unused;
|
||||
n = __rdtscp(&unused);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
n -= timers_[index]->start;
|
||||
timers_[index]->start = 0;
|
||||
|
||||
#ifndef _AMD
|
||||
timers_[index]->clocks += n;
|
||||
#else
|
||||
// timers_[index]->clocks += 10 * n / freq_in_100mhz_; // unit is ns
|
||||
timers_[index]->clocks += 1.0E-6 * 10 * n / freq_in_100mhz_; // convert to ms
|
||||
#endif
|
||||
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
void PerfTimer::Error(std::string str) { std::cout << str << std::endl; }
|
||||
|
||||
|
||||
double PerfTimer::ReadTimer(int index) {
|
||||
if (index >= (int)timers_.size()) {
|
||||
Error("Cannot read timer. Invalid handle.");
|
||||
return FAILURE;
|
||||
}
|
||||
|
||||
double reading = double(timers_[index]->clocks);
|
||||
|
||||
reading = double(reading / timers_[index]->freq);
|
||||
|
||||
return reading;
|
||||
}
|
||||
|
||||
|
||||
uint64_t PerfTimer::CoarseTimestampUs() {
|
||||
#ifdef _WIN32
|
||||
uint64_t freqHz, ticks;
|
||||
QueryPerformanceFrequency((LARGE_INTEGER*)&freqHz);
|
||||
QueryPerformanceCounter((LARGE_INTEGER*)&ticks);
|
||||
|
||||
// Scale numerator and divisor until (ticks * 1000000) fits in uint64_t.
|
||||
while (ticks > (1ULL << 44)) {
|
||||
ticks /= 16;
|
||||
freqHz /= 16;
|
||||
}
|
||||
|
||||
return (ticks * 1000000) / freqHz;
|
||||
#else
|
||||
struct timespec ts;
|
||||
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
|
||||
return uint64_t(ts.tv_sec) * 1000000 + ts.tv_nsec / 1000;
|
||||
#endif
|
||||
}
|
||||
|
||||
uint64_t PerfTimer::MeasureTSCFreqHz() {
|
||||
// Make a coarse interval measurement of TSC ticks for 1 gigacycles.
|
||||
unsigned int unused;
|
||||
uint64_t tscTicksEnd;
|
||||
|
||||
uint64_t coarseBeginUs = CoarseTimestampUs();
|
||||
uint64_t tscTicksBegin = __rdtscp(&unused);
|
||||
do {
|
||||
tscTicksEnd = __rdtscp(&unused);
|
||||
} while (tscTicksEnd - tscTicksBegin < 1000000000);
|
||||
|
||||
uint64_t coarseEndUs = CoarseTimestampUs();
|
||||
|
||||
// Compute the TSC frequency and round to nearest 100MHz.
|
||||
uint64_t coarseIntervalNs = (coarseEndUs - coarseBeginUs) * 1000;
|
||||
uint64_t tscIntervalTicks = tscTicksEnd - tscTicksBegin;
|
||||
return (tscIntervalTicks * 10 + (coarseIntervalNs / 2)) / coarseIntervalNs;
|
||||
}
|
||||
@@ -0,0 +1,85 @@
|
||||
/**********************************************************************
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
provided that the following conditions are met:
|
||||
|
||||
<95> Redistributions of source code must retain the above copyright notice, this list of
|
||||
conditions and the following disclaimer.
|
||||
<95> Redistributions in binary form must reproduce the above copyright notice, this list of
|
||||
conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
|
||||
SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
********************************************************************/
|
||||
|
||||
#ifndef TEST_UTIL_PERF_TIMER_H_
|
||||
#define TEST_UTIL_PERF_TIMER_H_
|
||||
|
||||
// Will use AMD timer or general Linux timer based on compilation flag
|
||||
// Need to consider platform is Windows or Linux
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <intrin.h>
|
||||
#include <time.h>
|
||||
#include <windows.h>
|
||||
#else
|
||||
#if defined(__GNUC__)
|
||||
#include <sys/time.h>
|
||||
#include <x86intrin.h>
|
||||
#endif // __GNUC__
|
||||
#endif // _MSC_VER
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
class PerfTimer {
|
||||
public:
|
||||
enum { SUCCESS = 0, FAILURE = 1 };
|
||||
|
||||
PerfTimer();
|
||||
~PerfTimer();
|
||||
|
||||
// General Linux timing method
|
||||
int CreateTimer();
|
||||
int StartTimer(int index);
|
||||
int StopTimer(int index);
|
||||
|
||||
// retrieve time
|
||||
double ReadTimer(int index);
|
||||
// write into a file
|
||||
double WriteTimer(int index);
|
||||
|
||||
private:
|
||||
struct Timer {
|
||||
std::string name; /* name of time object */
|
||||
long long freq; /* frequency */
|
||||
double clocks; /* number of ticks at end */
|
||||
double start; /* start point ticks */
|
||||
};
|
||||
|
||||
std::vector<Timer*> timers_; /* vector to Timer objects */
|
||||
double freq_in_100mhz_;
|
||||
|
||||
// AMD timing method
|
||||
uint64_t CoarseTimestampUs();
|
||||
uint64_t MeasureTSCFreqHz();
|
||||
|
||||
void Error(std::string str);
|
||||
};
|
||||
|
||||
#endif // TEST_UTIL_PERF_TIMER_H_
|
||||
@@ -0,0 +1,52 @@
|
||||
/******************************************************************************
|
||||
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this list
|
||||
of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef TEST_CTRL_TEST_ASSERT_H_
|
||||
#define TEST_CTRL_TEST_ASSERT_H_
|
||||
|
||||
#define TEST_ASSERT(cond) \
|
||||
{ \
|
||||
if (!(cond)) { \
|
||||
std::cerr << "Assert failed(" << #cond << ") at " << __FILE__ << ", line " << __LINE__ \
|
||||
<< std::endl; \
|
||||
exit(-1); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define TEST_STATUS(cond) \
|
||||
{ \
|
||||
if (!(cond)) { \
|
||||
std::cerr << "Test error at " << __FILE__ << ", line " << __LINE__ \
|
||||
<< std::endl; \
|
||||
const char* message; \
|
||||
rocprofiler_error_string(&message); \
|
||||
std::cerr << "ERROR: " << message << std::endl; \
|
||||
exit(-1); \
|
||||
} \
|
||||
}
|
||||
|
||||
#endif // TEST_CTRL_TEST_ASSERT_H_
|
||||
@@ -0,0 +1,221 @@
|
||||
#ifndef TEST_UTIL_XML_H_
|
||||
#define TEST_UTIL_XML_H_
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
namespace xml {
|
||||
|
||||
class Xml {
|
||||
public:
|
||||
typedef std::vector<char> token_t;
|
||||
struct level_t {
|
||||
std::string tag;
|
||||
std::vector<level_t*> nodes;
|
||||
std::map<std::string, std::string> opts;
|
||||
};
|
||||
typedef std::vector<level_t*> nodes_vec_t;
|
||||
|
||||
enum {
|
||||
DECL_STATE,
|
||||
BODY_STATE
|
||||
};
|
||||
|
||||
Xml(const char* file_name) :
|
||||
file_name_(file_name),
|
||||
file_line_(0),
|
||||
data_size_(0),
|
||||
index_(0),
|
||||
state_(BODY_STATE),
|
||||
level_(NULL),
|
||||
comment_(false)
|
||||
{
|
||||
AddLevel("top");
|
||||
|
||||
fd_ = open(file_name, O_RDONLY);
|
||||
if (fd_ == -1) {
|
||||
std::cout << "XML file not found: " << file_name << std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
token_t remainder;
|
||||
while (1) {
|
||||
token_t token = (remainder.size()) ? remainder : NextToken();
|
||||
remainder.clear();
|
||||
// End of file
|
||||
if (token.size() == 0) break;
|
||||
|
||||
// token_t token1 = token;
|
||||
// token1.push_back('\0');
|
||||
// std::cout << "> " << &token1[0] << std::endl;
|
||||
|
||||
switch(state_) {
|
||||
case BODY_STATE:
|
||||
if (token[0] == '<') {
|
||||
bool node_begin = true;
|
||||
unsigned ind = 1;
|
||||
if (token[1] == '/') {
|
||||
node_begin = false;
|
||||
++ind;
|
||||
}
|
||||
|
||||
unsigned i = ind;
|
||||
while (i < token.size()) { if (token[i] == '>') break; ++i; }
|
||||
for (unsigned j = i + 1; j < token.size(); ++j) remainder.push_back(token[j]);
|
||||
|
||||
if (i == token.size()) {
|
||||
if (node_begin) state_ = DECL_STATE;
|
||||
else BadFormat(token);
|
||||
token.push_back('\0');
|
||||
} else token[i] = '\0';
|
||||
|
||||
const char* tag = strdup(&token[ind]);
|
||||
if (node_begin) {
|
||||
AddLevel(tag);
|
||||
} else {
|
||||
if (strncmp(CurrentLevel().c_str(), tag, strlen(tag))) {
|
||||
token.back() = '>';
|
||||
BadFormat(token);
|
||||
}
|
||||
UpLevel();
|
||||
}
|
||||
} else BadFormat(token);
|
||||
break;
|
||||
case DECL_STATE:
|
||||
if (token[0] == '>') {
|
||||
state_ = BODY_STATE;
|
||||
for (unsigned j = 1; j < token.size(); ++j) remainder.push_back(token[j]);
|
||||
continue;
|
||||
} else {
|
||||
token.push_back('\0');
|
||||
unsigned j = 0;
|
||||
for (j = 0; j < token.size(); ++j) if (token[j] == '=') break;
|
||||
if (j == token.size()) BadFormat(token);
|
||||
token[j] = '\0';
|
||||
const char* key = &token[0];
|
||||
const char* value = &token[j + 1];
|
||||
AddOption(key, value);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
std::cout << "Wrong state: " << state_ << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<level_t*> GetNodes(std::string global_tag) {
|
||||
return map_[global_tag];
|
||||
}
|
||||
|
||||
void Print() const {
|
||||
for(auto& elem : map_) {
|
||||
for (auto node : elem.second) {
|
||||
if (node->opts.size()) {
|
||||
std::cout << elem.first << ":" << std::endl;
|
||||
for (auto& opt : node->opts) {
|
||||
std::cout << " " << opt.first << " = " << opt.second << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
bool LineEndCheck() {
|
||||
bool found = false;
|
||||
if (buffer_[index_] == '\n') {
|
||||
buffer_[index_] = ' ';
|
||||
++file_line_;
|
||||
found = true;
|
||||
comment_ = false;
|
||||
} else if (comment_ || (buffer_[index_] == '#')) {
|
||||
found = true;
|
||||
comment_ = true;
|
||||
}
|
||||
return found;
|
||||
}
|
||||
|
||||
token_t NextToken() {
|
||||
token_t token;
|
||||
|
||||
while (1) {
|
||||
if (data_size_ == 0) {
|
||||
data_size_ = read(fd_, buffer_, buf_size_);
|
||||
if (data_size_ <= 0) break;
|
||||
}
|
||||
if (token.empty()) while ((index_ < data_size_) && ((buffer_[index_] == ' ') || LineEndCheck())) {
|
||||
++index_;
|
||||
}
|
||||
while ((index_ < data_size_) && (buffer_[index_] != ' ') && !LineEndCheck()) {
|
||||
token.push_back(buffer_[index_++]);
|
||||
}
|
||||
if (index_ == data_size_) {
|
||||
index_ = 0;
|
||||
data_size_ = 0;
|
||||
} else break;
|
||||
}
|
||||
|
||||
return token;
|
||||
}
|
||||
|
||||
void BadFormat(token_t token) {
|
||||
token.push_back('\0');
|
||||
std::cout << "Error: " << file_name_ << ", line " << file_line_ << ", bad XML token '" << &token[0] << "'" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
void AddLevel(const std::string& tag) {
|
||||
level_t* level = new level_t;
|
||||
level->tag = tag;
|
||||
if (level_) {
|
||||
level_->nodes.push_back(level);
|
||||
stack_.push_back(level_);
|
||||
}
|
||||
level_ = level;
|
||||
|
||||
std::string global_tag;
|
||||
for (level_t* level : stack_) { global_tag += level->tag + "."; }
|
||||
global_tag += tag;
|
||||
map_[global_tag].push_back(level_);
|
||||
}
|
||||
|
||||
void UpLevel() {
|
||||
level_ = stack_.back();
|
||||
stack_.pop_back();
|
||||
}
|
||||
|
||||
std::string CurrentLevel() const {
|
||||
return level_->tag;
|
||||
}
|
||||
|
||||
void AddOption(const std::string& key, const std::string& value) {
|
||||
level_->opts[key] = value;
|
||||
}
|
||||
|
||||
const char* file_name_;
|
||||
unsigned file_line_;
|
||||
int fd_;
|
||||
static const unsigned buf_size_ = 256;
|
||||
char buffer_[buf_size_];
|
||||
unsigned data_size_;
|
||||
unsigned index_;
|
||||
unsigned state_;
|
||||
level_t* level_;
|
||||
std::vector<level_t*> stack_;
|
||||
std::map<std::string, nodes_vec_t> map_;
|
||||
bool comment_;
|
||||
};
|
||||
|
||||
} // namespace xml
|
||||
|
||||
#endif // TEST_UTIL_XML_H_
|
||||
Référencer dans un nouveau ticket
Bloquer un utilisateur