ROC profiler prototype sources importing

Cette révision appartient à :
Evgeny
2017-11-09 17:26:19 -06:00
Parent 54156e1953
révision 85278f08a0
63 fichiers modifiés avec 7598 ajouts et 0 suppressions
+100
Voir le fichier
@@ -0,0 +1,100 @@
################################################################################
##
## Copyright (c) 2016 Advanced Micro Devices, Inc. All rights reserved.
##
## MIT LICENSE:
## Permission is hereby granted, free of charge, to any person obtaining a copy of
## this software and associated documentation files (the "Software"), to deal in
## the Software without restriction, including without limitation the rights to
## use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
## of the Software, and to permit persons to whom the Software is furnished to do
## so, subject to the following conditions:
##
## The above copyright notice and this permission notice shall be included in all
## copies or substantial portions of the Software.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
## OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
## SOFTWARE.
##
################################################################################
cmake_minimum_required ( VERSION 3.5.0 )
## Verbose output.
set ( CMAKE_VERBOSE_MAKEFILE TRUE CACHE BOOL "Verbose Output" FORCE )
## Set module name and project name.
set ( ROCPROFILER_NAME "rocprofiler" )
set ( ROCPROFILER_TARGET "${ROCPROFILER_NAME}64" )
set ( ROCPROFILER_LIBRARY "lib${ROCPROFILER_TARGET}" )
project ( ${ROCPROFILER_TARGET} )
## Adding default path cmake modules
list ( APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules" )
## Include common cmake modules
include ( utils )
## Set build environment
include ( env )
## Setup the package version.
get_version ( "1.0.0" )
message ( "-- LIB-VERSION: ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}" )
set ( BUILD_VERSION_MAJOR ${VERSION_MAJOR} )
set ( BUILD_VERSION_MINOR ${VERSION_MINOR} )
set ( BUILD_VERSION_PATCH ${VERSION_PATCH} )
set ( LIB_VERSION_STRING "${BUILD_VERSION_MAJOR}.${BUILD_VERSION_MINOR}.${BUILD_VERSION_PATCH}" )
if ( DEFINED VERSION_BUILD AND NOT ${VERSION_BUILD} STREQUAL "" )
message ( "VERSION BUILD DEFINED ${VERSION_BUILD}" )
set ( BUILD_VERSION_PATCH "${BUILD_VERSION_PATCH}-${VERSION_BUILD}" )
endif ()
set ( BUILD_VERSION_STRING "${BUILD_VERSION_MAJOR}.${BUILD_VERSION_MINOR}.${BUILD_VERSION_PATCH}" )
## Set target and root/lib/test directory
set ( TARGET_NAME "${ROCPROFILER_TARGET}" )
set ( ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}" )
set ( LIB_DIR "${ROOT_DIR}/src" )
set ( TEST_DIR "${ROOT_DIR}/test" )
## Build library
include ( ${LIB_DIR}/CMakeLists.txt )
## Set the VERSION and SOVERSION values
set_property ( TARGET ${TARGET_NAME} PROPERTY VERSION "${LIB_VERSION_STRING}" )
set_property ( TARGET ${TARGET_NAME} PROPERTY SOVERSION "${BUILD_VERSION_MAJOR}" )
## If the library is a release, strip the target library
if ( "${CMAKE_BUILD_TYPE}" STREQUAL release )
add_custom_command ( TARGET ${ROCPROFILER_TARGET} POST_BUILD COMMAND ${CMAKE_STRIP} *.so )
endif ()
## Build tests
add_subdirectory ( ${TEST_DIR} ${PROJECT_BINARY_DIR}/test )
## Add the install directives for the runtime library.
install ( TARGETS ${ROCPROFILER_TARGET} LIBRARY DESTINATION ${ROCPROFILER_NAME}/lib COMPONENT ${ROCPROFILER_LIBRARY} )
## Add the packaging directives for the runtime library.
set ( CPACK_PACKAGE_NAME ${ROCPROFILER_NAME} )
set ( CPACK_PACKAGE_VENDOR "AMD" )
set ( CPACK_PACKAGE_VERSION_MAJOR ${BUILD_VERSION_MAJOR} )
set ( CPACK_PACKAGE_VERSION_MINOR ${BUILD_VERSION_MINOR} )
set ( CPACK_PACKAGE_VERSION_PATCH ${BUILD_VERSION_PATCH} )
set ( CPACK_PACKAGE_CONTACT "Advanced Micro Devices Inc." )
set ( CPACK_PACKAGE_DESCRIPTION_SUMMARY "ROCPROFILER library for AMD HSA runtime API extension support" )
set ( CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.md" )
## Debian package specific variables
set ( CPACK_DEBIAN_PACKAGE_HOMEPAGE "https://github.com/RadeonOpenCompute/HSA-RocProfiler" )
set ( CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/postinst;${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/prerm" )
## RPM package specific variables
set ( CPACK_RPM_PRE_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_post" )
set ( CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_postun" )
include ( CPack )
+19
Voir le fichier
@@ -0,0 +1,19 @@
#/bin/bash
set -e
do_ldconfig() {
echo /opt/rocm/librocprofiler/lib > /etc/ld.so.conf.d/libhsa-rocprofiler64.conf && ldconfig
}
case "$1" in
configure)
do_ldconfig
;;
abort-upgrade|abort-remove|abort-deconfigure)
echo "$1"
;;
*)
exit 0
;;
esac
+18
Voir le fichier
@@ -0,0 +1,18 @@
#!/bin/bash
set -e
rm_ldconfig() {
rm -f /etc/ld.so.conf.d/libhsa-rocprofiler64.conf && ldconfig
}
case "$1" in
remove)
rm_ldconfig
;;
purge)
;;
*)
exit 0
;;
esac
+39
Voir le fichier
@@ -0,0 +1,39 @@
HSA ROC Profiler LICENSE
The University of Illinois/NCSA
Open Source License (NCSA)
Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved.
Developed by:
AMD Research and AMD HSA Software Development
Advanced Micro Devices, Inc.
www.amd.com
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal with the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimers.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimers in
the documentation and/or other materials provided with the distribution.
- Neither the names of Advanced Micro Devices, Inc,
nor the names of its contributors may be used to endorse or promote
products derived from this Software without specific prior written
permission.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS WITH THE SOFTWARE.
+1
Voir le fichier
@@ -0,0 +1 @@
echo /opt/rocm/librocprofiler/lib > /etc/ld.so.conf.d/libhsa-rocprofiler64.conf && ldconfig
+1
Voir le fichier
@@ -0,0 +1 @@
rm -f /etc/ld.so.conf.d/libhsa-rocprofiler64.conf && ldconfig
+50
Voir le fichier
@@ -0,0 +1,50 @@
ROC Profiler library.
Profiling with metrics and traces based perfcounters (PMC) and SQ threadtraces (SQTT)
and AqlProfile HSA extension.
Library supports GFX8/GFX9 APIs.
The library source tree:
- doc - Documantation
- inc/rocprofiler.h - Library public API
- src - Library sources
- core - Library API sources
- util - Library utils sources
- xml - XML parser
- test - Library test suite
- ctrl - Test controll
- util - Test utils
- simple_convolution - Simple convolution test kernel
Build environment:
$ export CMAKE_PREFIX_PATH="<path to hsa-runtime includes>;<path to hsa-runtime library>"
$ export CMAKE_BUILD_TYPE=<debug|release> # release by default
$ export CMAKE_DEBUG_TRACE=1 # 1 to enable debug tracing
To build with the current installed ROCM:
$ cd .../rocprofiler
$ mkdir build
$ cd build
$ export CMAKE_PREFIX_PATH=/home/evgeny/git/compute/out/ubuntu-16.04/16.04
$ cmake ..
$ make
$ cmake -DCMAKE_PREFIX_PATH=/opt/rocm ..
$ make
To run the test:
$ cd .../rocprofiler/build
$ export LD_LIBRARY_PATH=$PWD
$ export HSA_TOOLS_LIB=librocprofiler64.so
$ run.sh
To enabled error messages logging to '/tmp/rocprofiler_log.txt':
$ export ROCPROFILER_LOG=1
To enable verbose tracing:
$ export ROCPROFILER_TRACE=1
+60
Voir le fichier
@@ -0,0 +1,60 @@
---
Language: Cpp
# BasedOnStyle: Google
AccessModifierOffset: -1
ConstructorInitializerIndentWidth: 4
AlignEscapedNewlinesLeft: false
AlignTrailingComments: true
AlignConsecutiveAssignments: false
AlignOperands: false
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: false
AllowShortIfStatementsOnASingleLine: true
AllowShortLoopsOnASingleLine: true
AllowShortFunctionsOnASingleLine: All
AlwaysBreakAfterDefinitionReturnType: false
AlwaysBreakTemplateDeclarations: false
AlwaysBreakBeforeMultilineStrings: true
BreakBeforeBinaryOperators: false
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BinPackParameters: true
ColumnLimit: 100
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ExperimentalAutoDetectBinPacking: false
IndentCaseLabels: true
IndentWrappedFunctionNames: false
IndentFunctionDeclarationAfterType: false
MaxEmptyLinesToKeep: 2
KeepEmptyLinesAtTheStartOfBlocks: false
NamespaceIndentation: None
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: false
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakString: 1000
PenaltyBreakFirstLessLess: 120
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
DerivePointerAlignment: false
PointerAlignment: Left
SpacesBeforeTrailingComments: 2
Cpp11BracedListStyle: true
Standard: Auto
IndentWidth: 2
TabWidth: 8
UseTab: Never
BreakBeforeBraces: Attach
SpacesInParentheses: false
SpacesInAngles: false
SpaceInEmptyParentheses: false
SpacesInCStyleCastParentheses: false
SpacesInContainerLiterals: true
SpaceBeforeAssignmentOperators: true
ContinuationIndentWidth: 4
CommentPragmas: '^ IWYU pragma:'
ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
SpaceBeforeParens: ControlStatements
DisableFormat: false
SortIncludes: false
...
+112
Voir le fichier
@@ -0,0 +1,112 @@
## Build is not supported on Windows plaform
if ( WIN32 )
message ( FATAL_ERROR "Windows build is not supported." )
endif ()
## Compiler Preprocessor definitions.
add_definitions ( -D__linux__ )
add_definitions ( -DUNIX_OS )
add_definitions ( -DLINUX )
add_definitions ( -D__AMD64__ )
add_definitions ( -D__x86_64__ )
add_definitions ( -DAMD_INTERNAL_BUILD )
add_definitions ( -DLITTLEENDIAN_CPU=1 )
add_definitions ( -DHSA_LARGE_MODEL= )
add_definitions ( -DHSA_DEPRECATED= )
## Linux Compiler options
set ( CMAKE_CXX_FLAGS "-std=c++11")
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall" )
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror" )
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror=return-type" )
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions" )
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden" )
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-math-errno" )
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-threadsafe-statics" )
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fmerge-all-constants" )
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fms-extensions" )
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fmerge-all-constants" )
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC" )
set ( CMAKE_SHARED_LINKER_FLAGS "-Wl,-Bdynamic -Wl,-z,noexecstack" )
set ( CMAKE_SKIP_BUILD_RPATH TRUE )
## CLANG options
if ( "$ENV{CXX}" STREQUAL "/usr/bin/clang++" )
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ferror-limit=1000000" )
endif()
## Enable debug trace
if ( DEFINED ENV{CMAKE_DEBUG_TRACE} )
add_definitions ( -DDEBUG_TRACE=1 )
endif()
## Enable HSA APIs intersepting
if ( NOT "$ENV{CMAKE_HSA_INTERCEPT}" STREQUAL "no" )
add_definitions ( -DROCP_HSA_INTERCEPT=1 )
endif()
## Enable HSA proxy queue
if ( NOT "$ENV{CMAKE_HSA_PROXY}" STREQUAL "no" )
add_definitions ( -DROCP_HSA_PROXY=1 )
endif()
## Check env vars
if ( NOT DEFINED CMAKE_BUILD_TYPE OR "${CMAKE_BUILD_TYPE}" STREQUAL "" )
if ( DEFINED ENV{CMAKE_BUILD_TYPE} )
set ( CMAKE_BUILD_TYPE $ENV{CMAKE_BUILD_TYPE} )
endif()
endif()
if ( NOT DEFINED CMAKE_PREFIX_PATH AND DEFINED ENV{CMAKE_PREFIX_PATH} )
set ( CMAKE_PREFIX_PATH $ENV{CMAKE_PREFIX_PATH} )
endif()
## Extend Compiler flags based on build type
string ( TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE )
if ( "${CMAKE_BUILD_TYPE}" STREQUAL debug )
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb" )
set ( CMAKE_BUILD_TYPE "debug" )
else ()
set ( CMAKE_BUILD_TYPE "release" )
endif ()
## Extend Compiler flags based on Processor architecture
if ( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64" )
set ( NBIT 64 )
set ( NBITSTR "64" )
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64 -msse -msse2" )
elseif ( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86" )
set ( NBIT 32 )
set ( NBITSTR "" )
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32" )
endif ()
## Find hsa-runtime headers/lib
find_file ( HSA_RUNTIME_INC "hsa/hsa.h" )
get_filename_component ( HSA_RUNTIME_INC_PATH ${HSA_RUNTIME_INC} DIRECTORY )
if ( "${HSA_RUNTIME_INC_PATH}" STREQUAL "" )
find_file ( HSA_RUNTIME_INC "hsa.h" )
get_filename_component ( HSA_RUNTIME_INC_PATH ${HSA_RUNTIME_INC} DIRECTORY )
endif()
set ( HSA_RUNTIME_NAME "hsa-runtime64" )
find_library ( HSA_RUNTIME_LIB "lib${HSA_RUNTIME_NAME}.so" )
get_filename_component ( HSA_RUNTIME_LIB_PATH ${HSA_RUNTIME_LIB} DIRECTORY )
set ( HSA_KMT_NAME "hsakmt" )
find_library ( HSA_KMT_LIB "lib${HSA_KMT_NAME}.so" )
get_filename_component ( HSA_KMT_LIB_PATH ${HSA_KMT_LIB} DIRECTORY )
set ( API_PATH ${HSA_RUNTIME_INC_PATH} )
## Basic Tool Chain Information
message ( "----------------NBIT: ${NBIT}" )
message ( "-----------BuildType: ${CMAKE_BUILD_TYPE}" )
message ( "------------Compiler: ${CMAKE_CXX_COMPILER}" )
message ( "----Compiler-Version: ${CMAKE_CXX_COMPILER_VERSION}" )
message ( "-----HSA-Runtime-Inc: ${HSA_RUNTIME_INC_PATH}" )
message ( "-----HSA-Runtime-Lib: ${HSA_RUNTIME_LIB_PATH}" )
message ( "------------API-path: ${API_PATH}" )
message ( "-----CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}" )
message ( "---CMAKE_PREFIX_PATH: ${CMAKE_PREFIX_PATH}" )
+116
Voir le fichier
@@ -0,0 +1,116 @@
################################################################################
##
## The University of Illinois/NCSA
## Open Source License (NCSA)
##
## Copyright (c) 2014-2017, Advanced Micro Devices, Inc. All rights reserved.
##
## Developed by:
##
## AMD Research and AMD HSA Software Development
##
## Advanced Micro Devices, Inc.
##
## www.amd.com
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal with the Software without restriction, including without limitation
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
## and#or sell copies of the Software, and to permit persons to whom the
## Software is furnished to do so, subject to the following conditions:
##
## - Redistributions of source code must retain the above copyright notice,
## this list of conditions and the following disclaimers.
## - Redistributions in binary form must reproduce the above copyright
## notice, this list of conditions and the following disclaimers in
## the documentation and#or other materials provided with the distribution.
## - Neither the names of Advanced Micro Devices, Inc,
## nor the names of its contributors may be used to endorse or promote
## products derived from this Software without specific prior written
## permission.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
## DEALINGS WITH THE SOFTWARE.
##
################################################################################
## Parses the VERSION_STRING variable and places
## the first, second and third number values in
## the major, minor and patch variables.
function( parse_version VERSION_STRING )
string ( FIND ${VERSION_STRING} "-" STRING_INDEX )
if ( ${STRING_INDEX} GREATER -1 )
math ( EXPR STRING_INDEX "${STRING_INDEX} + 1" )
string ( SUBSTRING ${VERSION_STRING} ${STRING_INDEX} -1 VERSION_BUILD )
endif ()
string ( REGEX MATCHALL "[0123456789]+" VERSIONS ${VERSION_STRING} )
list ( LENGTH VERSIONS VERSION_COUNT )
if ( ${VERSION_COUNT} GREATER 0)
list ( GET VERSIONS 0 MAJOR )
set ( VERSION_MAJOR ${MAJOR} PARENT_SCOPE )
set ( TEMP_VERSION_STRING "${MAJOR}" )
endif ()
if ( ${VERSION_COUNT} GREATER 1 )
list ( GET VERSIONS 1 MINOR )
set ( VERSION_MINOR ${MINOR} PARENT_SCOPE )
set ( TEMP_VERSION_STRING "${TEMP_VERSION_STRING}.${MINOR}" )
endif ()
if ( ${VERSION_COUNT} GREATER 2 )
list ( GET VERSIONS 2 PATCH )
set ( VERSION_PATCH ${PATCH} PARENT_SCOPE )
set ( TEMP_VERSION_STRING "${TEMP_VERSION_STRING}.${PATCH}" )
endif ()
if ( DEFINED VERSION_BUILD )
set ( VERSION_BUILD "${VERSION_BUILD}" PARENT_SCOPE )
endif ()
set ( VERSION_STRING "${TEMP_VERSION_STRING}" PARENT_SCOPE )
endfunction ()
## Gets the current version of the repository
## using versioning tags and git describe.
## Passes back a packaging version string
## and a library version string.
function ( get_version DEFAULT_VERSION_STRING )
parse_version ( ${DEFAULT_VERSION_STRING} )
find_program ( GIT NAMES git )
if ( GIT )
execute_process ( COMMAND "git describe --dirty --long --match [0-9]* 2>/dev/null"
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
OUTPUT_VARIABLE GIT_TAG_STRING
OUTPUT_STRIP_TRAILING_WHITESPACE
RESULT_VARIABLE RESULT )
if ( ${RESULT} EQUAL 0 )
parse_version ( ${GIT_TAG_STRING} )
endif ()
endif ()
set( VERSION_STRING "${VERSION_STRING}" PARENT_SCOPE )
set( VERSION_MAJOR "${VERSION_MAJOR}" PARENT_SCOPE )
set( VERSION_MINOR "${VERSION_MINOR}" PARENT_SCOPE )
set( VERSION_PATCH "${VERSION_PATCH}" PARENT_SCOPE )
set( VERSION_BUILD "${VERSION_BUILD}" PARENT_SCOPE )
endfunction()
Fichier binaire non affiché.
+272
Voir le fichier
@@ -0,0 +1,272 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
//
// ROC Profiler API
//
// The goal of the implementation is to provide a HW specific low-level
// performance analysis interface for profiling of GPU compute applications.
// The profiling includes HW performance counters (PMC) with complex
// performance metrics and thread traces (SQTT). The profiling is supported
// by the SQTT, PMC and Callback APIs.
//
// The library can be used by a tool library loaded by HSA runtime or by
// higher level HW independent performance analysis API like PAPI.
//
// The library is written on C and will be based on AQLprofile AMD specific
// HSA extension. The library implementation requires HSA API intercepting and
// a profiling queue supporting a submit callback interface.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef INC_ROCPROFILER_H_
#define INC_ROCPROFILER_H_
#include <hsa.h>
#include <hsa_api_trace.h>
#include <hsa_ven_amd_aqlprofile.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
////////////////////////////////////////////////////////////////////////////////
// Profiling info
//
// Profiling info objects have profiling feature info, type, parameters and data
// Also profiling data samplaes can be iterated using a callback
// Profiling feature type
typedef enum {
ROCPROFILER_TYPE_METRIC = 0,
ROCPROFILER_TYPE_TRACE = 1
} rocprofiler_type_t;
// Profiling feture parameter
typedef hsa_ven_amd_aqlprofile_parameter_t rocprofiler_parameter_t;
// Profiling data kind
typedef enum {
ROCPROFILER_UNINIT = 0,
ROCPROFILER_INT32 = 1,
ROCPROFILER_INT64 = 2,
ROCPROFILER_FLOAT = 3,
ROCPROFILER_DOUBLE = 4,
ROCPROFILER_BYTES = 5
} rocprofiler_metric_kind_t;
// Profiling data type
typedef struct {
rocprofiler_metric_kind_t kind; // result kind
union {
uint32_t result_int32; // 32bit integer result
uint64_t result_int64; // 64bit integer result
float result_float; // float single-precision result
double result_double; // float double-precision result
struct {
void* ptr;
uint32_t size;
uint32_t instance_count;
bool copy;
} result_bytes; // data by ptr and byte size
};
} rocprofiler_data_t;
// Profiling feature info
typedef struct {
rocprofiler_type_t type; // feature type
const char* name; // [in] feature name
const rocprofiler_parameter_t* parameters; // feature parameters array
uint32_t parameter_count; // feature parameters count
rocprofiler_data_t data; // [out] profiling data
} rocprofiler_info_t;
////////////////////////////////////////////////////////////////////////////////
// Profiling context
//
// Profiling context object accumuate all profiling information
// Profiling context object
typedef void rocprofiler_t;
// Profiling group object
typedef struct {
unsigned index; // group index
rocprofiler_info_t** info; // profiling info array
uint32_t info_count; // profiling info count
rocprofiler_t* context; // context object
} rocprofiler_group_t;
// Profiling mode
typedef enum {
ROCPROFILER_MODE_STANDALONE = 1,
ROCPROFILER_MODE_CREATEQUEUE = 2,
} rocprofiler_mode_t;
// Profiling preperties
typedef struct {
hsa_queue_t* queue; // queue for STANDALONE mode
// the queue is created and returned in CREATEQUEUE mode
uint32_t queue_depth; // created queue depth
} rocprofiler_properties_t;
// Create new profiling context
hsa_status_t rocprofiler_open(
unsigned gpu_index, // GPU index
rocprofiler_info_t* info, // [in] profiling info array
uint32_t info_count, // profiling info count
rocprofiler_t** context, // [out] context object
uint32_t mode, // profiling mode mask
rocprofiler_properties_t* properties); // profiling properties
// Delete profiling info
hsa_status_t rocprofiler_close(
rocprofiler_t* context); // [in] profiling context
////////////////////////////////////////////////////////////////////////////////
// Runtime API observer
//
// Runtime API observer is called on enter and exit for the API
// Profiling callback data
typedef struct {
uint64_t kernel_object;
uint64_t queue_index;
uint32_t gpu_index;
} rocprofiler_callback_data_t;
// Profiling callback type
typedef hsa_status_t (*rocprofiler_callback_t)(
const rocprofiler_callback_data_t* callback_data, // [in] callback data union, data depends on
// the callback API id
void* user_data, // [in/out] user data passed to the callback
rocprofiler_group_t** group); // [out] profiling group
// Provided standard profiling callback
static inline hsa_status_t rocprofiler_set_dispatch_callback(
const rocprofiler_callback_data_t* callback_data,
void* user_data,
rocprofiler_group_t** group) {
*group = reinterpret_cast<rocprofiler_group_t*>(user_data);
return HSA_STATUS_SUCCESS;
}
// Set/remove kernel dispatch observer
hsa_status_t rocprofiler_set_dispatch_observer(
rocprofiler_callback_t callback, // observer callback
void* data); // [in/out] passed callback data
hsa_status_t rocprofiler_remove_dispatch_observer();
////////////////////////////////////////////////////////////////////////////////
// Start/stop profiling
//
// Start/stop the context profiling invocation, have to be as many as
// contect.invocations' to collect all profiling data
// Start profiling
hsa_status_t rocprofiler_start(
rocprofiler_t* context, // [in/out] profiling context
uint32_t group_index = 0); // group index
// Stop profiling
hsa_status_t rocprofiler_stop(
rocprofiler_t* context, // [in/out] profiling context
uint32_t group_index = 0); // group index
// Read profiling data
hsa_status_t rocprofiler_get_data(
rocprofiler_t* context, // [in/out] profiling context
uint32_t group_index = 0); // group index
// Get profiling groups
hsa_status_t rocprofiler_get_groups(
rocprofiler_t* context, // [in] profiling context
rocprofiler_group_t** groups, // [out] profiling groups
uint32_t* group_count); // [out] profiling groups count
// Start profiling
hsa_status_t rocprofiler_start_group(
rocprofiler_group_t* group); // [in/out] profiling group
// Stop profiling
hsa_status_t rocprofiler_stop_group(
rocprofiler_group_t* group); // [in/out] profiling group
// Get profiling data
hsa_status_t rocprofiler_get_group_data(
rocprofiler_group_t* group); // [in/out] profiling group
// Get metrics data
hsa_status_t rocprofiler_get_metrics_data(
const rocprofiler_t* context); // [in/out] profiling context
// Definition of output data iterator callback
typedef hsa_ven_amd_aqlprofile_data_callback_t rocprofiler_trace_data_callback_t;
// Method for iterating the events output data
hsa_status_t rocprofiler_iterate_trace_data(
rocprofiler_t* context, // [in] profiling context
rocprofiler_trace_data_callback_t callback, // [in] callback to iterate the output data
void* data); // [in/out] callback data
////////////////////////////////////////////////////////////////////////////////
// Returning the error string method
hsa_status_t rocprofiler_error_string (
const char** str); // [out] the API error string pointer returning
////////////////////////////////////////////////////////////////////////////////
// HSA-runtime tool on-load method
bool OnLoad(
HsaApiTable* table,
uint64_t runtime_version,
uint64_t failed_tool_count,
const char* const * failed_tool_names);
#ifdef __cplusplus
} // extern "C" block
#endif // __cplusplus
#endif // INC_ROCPROFILER_H_
+14
Voir le fichier
@@ -0,0 +1,14 @@
#
# Build dynamic Library object
#
set ( TARGET_LIB "${TARGET_NAME}" )
set ( LIB_SRC
${LIB_DIR}/core/rocprofiler.cpp
${LIB_DIR}/core/proxy_queue.cpp
${LIB_DIR}/core/simple_proxy_queue.cpp
${LIB_DIR}/core/intercept_queue.cpp
${LIB_DIR}/util/hsa_rsrc_factory.cpp
)
add_library ( ${TARGET_LIB} SHARED ${LIB_SRC} )
target_include_directories ( ${TARGET_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} )
target_link_libraries( ${TARGET_LIB} PRIVATE ${HSA_RUNTIME_LIB} c stdc++)
+399
Voir le fichier
@@ -0,0 +1,399 @@
#ifndef SRC_CORE_CONTEXT_H_
#define SRC_CORE_CONTEXT_H_
#include "inc/rocprofiler.h"
#include <hsa.h>
#include <map>
#include <vector>
#include "core/metrics.h"
#include "core/profile.h"
#include "core/queue.h"
#include "core/types.h"
#include "util/exception.h"
#include "util/hsa_rsrc_factory.h"
namespace rocprofiler {
struct rocprofiler_contex_t;
class Context;
inline unsigned align_size(unsigned size, unsigned alignment) { return ((size + alignment - 1) & ~(alignment - 1)); }
// Block descriptor
struct block_des_t {
uint32_t id;
uint32_t index;
};
// block_des_t less-then functor
struct lt_block_des {
bool operator()(const block_des_t& a1, const block_des_t& a2) const {
return (a1.id < a2.id) || ((a1.id == a2.id) && (a1.index < a2.index));
}
};
// Block status
struct block_status_t {
uint32_t max_counters;
uint32_t counter_index;
uint32_t group_index;
};
// Metrics arguments
template <class Map>
class MetricArgs : public xml::args_cache_t {
public:
MetricArgs(const Map& map) : map_(map) {}
bool Lookup(const std::string& name, uint64_t& result) const {
rocprofiler_info_t* info = NULL;
auto it = map_.find(name);
if (it == map_.end()) EXC_RAISING(HSA_STATUS_ERROR, "var '" << name << "' is not found");
info = it->second;
if (info) {
result = info->data.result_int64;
if (info->data.kind == ROCPROFILER_UNINIT) EXC_RAISING(HSA_STATUS_ERROR, "var '" << name << "' is uninitialized");
if (info->data.kind != ROCPROFILER_INT64) EXC_RAISING(HSA_STATUS_ERROR, "var '" << name << "' is of incompatible type, not INT64");
} else EXC_RAISING(HSA_STATUS_ERROR, "var '" << name << "' info is NULL");
return (info != NULL);
}
private:
const Map& map_;
};
// Profiling group
class Group {
public:
Group(const util::AgentInfo* agent_info, Context *context, const uint32_t& index) :
pmc_profile_(agent_info),
sqtt_profile_(agent_info),
context_(context),
index_(index)
{}
void Insert(const profile_info_t& info) {
const rocprofiler_type_t type = info.rinfo->type;
info_vector_.push_back(info.rinfo);
switch (type) {
case ROCPROFILER_TYPE_METRIC:
pmc_profile_.Insert(info);
break;
case ROCPROFILER_TYPE_TRACE:
sqtt_profile_.Insert(info);
break;
default:
EXC_RAISING(HSA_STATUS_ERROR, "bad rocprofiler type (" << type << ")");
}
}
hsa_status_t Finalize() {
hsa_status_t status = pmc_profile_.Finalize(start_vector_, stop_vector_);
if (status == HSA_STATUS_SUCCESS) {
status = sqtt_profile_.Finalize(start_vector_, stop_vector_);
}
return status;
}
void GetProfiles(profile_vector_t& vec) {
pmc_profile_.GetProfiles(vec);
sqtt_profile_.GetProfiles(vec);
}
void GetTraceProfiles(profile_vector_t& vec) {
sqtt_profile_.GetProfiles(vec);
}
info_vector_t& GetInfoVector() { return info_vector_; }
const pkt_vector_t& GetStartVector() const { return start_vector_; }
const pkt_vector_t& GetStopVector() const { return stop_vector_; }
Context* GetContext() { return context_; }
uint32_t GetIndex() const { return index_; }
private:
PmcProfile pmc_profile_;
SqttProfile sqtt_profile_;
info_vector_t info_vector_;
pkt_vector_t start_vector_;
pkt_vector_t stop_vector_;
Context* const context_;
const uint32_t index_;
};
// Profiling context
class Context {
public:
typedef std::map<std::string, rocprofiler_info_t*> info_map_t;
Context(const util::AgentInfo* agent_info, Queue* queue, rocprofiler_info_t* info, const uint32_t info_count) :
agent_(agent_info->dev_id),
agent_info_(agent_info),
queue_(queue),
hsa_rsrc_(&util::HsaRsrcFactory::Instance()),
api_(hsa_rsrc_->AqlProfileApi()),
metrics_(agent_info)
{
Initialize(info, info_count);
Finalize();
}
~Context() {
for (const auto& v : info_map_) {
const std::string& name = v.first;
const rocprofiler_info_t* info = v.second;
if ((info->type == ROCPROFILER_TYPE_METRIC) && (metrics_map_.find(name) == metrics_map_.end())) {
delete info;
}
}
}
void Initialize(rocprofiler_info_t* info_array, const uint32_t info_count) {
info_map_t input_map;
for (unsigned i = 0; i < info_count; ++i) {
rocprofiler_info_t* info = &info_array[i];
input_map[info->name] = info;
info->data.kind = ROCPROFILER_UNINIT;
}
if (info_count) set_.push_back(Group(agent_info_, this, 0));
for (unsigned i = 0; i < info_count; ++i) {
rocprofiler_info_t* info = &info_array[i];
info_map_[info->name] = info;
const rocprofiler_type_t type = info->type;
const char* name = info->name;
if (type == ROCPROFILER_TYPE_METRIC) {
const Metric* metric = metrics_.Get(name);
if (metric == NULL) EXC_RAISING(HSA_STATUS_ERROR, "metric '" << name << "' is not found");
auto ret = metrics_map_.insert({name, metric});
if (!ret.second) EXC_RAISING(HSA_STATUS_ERROR, "metric '" << name << "' is registered more then once");
counters_vec_t counters_vec = metric->GetCounters();
if (counters_vec.empty()) EXC_RAISING(HSA_STATUS_ERROR, "metric name '" << name << "' is not found");
for (const counter_t* counter : counters_vec) {
if (metric->GetExpr()) {
auto it = input_map.find(counter->name);
if (it != input_map.end()) {
continue;
} else {
info = NewCounterInfo(counter);
info_map_[info->name] = info;
}
}
const event_t* event = &(counter->event);
const block_des_t block_des = {event->block_name, event->block_index};
auto ret = groups_map_.insert({block_des, {}});
block_status_t& block_status = ret.first->second;
if (block_status.max_counters == 0) {
profile_t query = {};
query.agent = agent_;
query.type = HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC;
query.events = event;
uint32_t block_counters;
hsa_status_t status = api_->hsa_ven_amd_aqlprofile_get_info(&query, HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_COUNTERS, &block_counters);
if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "get block_counters info");
block_status.max_counters = block_counters;
}
if (block_status.counter_index >= block_status.max_counters) {
block_status.counter_index = 0;
block_status.group_index += 1;
}
if (block_status.group_index >= set_.size()) {
set_.push_back(Group(agent_info_, this, block_status.group_index));
}
const uint32_t group_index = block_status.group_index;
set_[group_index].Insert(profile_info_t{event, NULL, 0, info});
}
} else if (type == ROCPROFILER_TYPE_TRACE) {
set_[0].Insert(profile_info_t{NULL, info->parameters, info->parameter_count, info});
} else {
EXC_RAISING(HSA_STATUS_ERROR, "bad rocprofiler type (" << type << ")");
}
}
}
void Finalize() {
for (unsigned index = 0; index < set_.size(); ++index) {
const hsa_status_t status = set_[index].Finalize();
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "context finalize failed");
}
}
uint32_t GetGroupCount() const { return set_.size(); }
rocprofiler_group_t GetGroupInfo(const uint32_t& index) {
rocprofiler::info_vector_t& info_vector = set_[index].GetInfoVector();
rocprofiler_group_t group = {};
group.info_count = info_vector.size();
group.info = &info_vector[0];
group.context = reinterpret_cast<rocprofiler_t*>(this);
group.index = index;
return group;
}
const pkt_vector_t& StartPackets(const uint32_t& group_index) const { return set_[group_index].GetStartVector(); }
const pkt_vector_t& StopPackets(const uint32_t& group_index) const { return set_[group_index].GetStopVector(); }
void Start(const uint32_t& group_index, Queue* const queue = NULL) {
const pkt_vector_t& start_packets = StartPackets(group_index);
Queue* const submit_queue = (queue != NULL) ? queue : queue_;
submit_queue->Submit(&start_packets[0], start_packets.size());
}
void Stop(const uint32_t& group_index, Queue* const queue = NULL) {
const pkt_vector_t& stop_packets = StopPackets(group_index);
Queue* const submit_queue = (queue != NULL) ? queue : queue_;
submit_queue->Submit(&stop_packets[0], stop_packets.size());
}
void Submit(const uint32_t& group_index, const packet_t* packet, Queue* const queue = NULL) {
Queue* const submit_queue = (queue != NULL) ? queue : queue_;
Start(group_index, submit_queue);
submit_queue->Submit(packet);
Stop(group_index, submit_queue);
}
struct callback_data_t {
info_vector_t* info_vector;
size_t index;
char* ptr;
};
void GetData(const uint32_t& group_index) {
const profile_vector_t profile_vector = GetProfiles(group_index);
for (auto& tuple : profile_vector) {
// Wait for stop packet to complete
hsa_signal_wait_scacquire(
tuple.completion_signal,
HSA_SIGNAL_CONDITION_LT,
1,
(uint64_t)-1,
HSA_WAIT_STATE_BLOCKED);
callback_data_t callback_data{tuple.info_vector, tuple.info_vector->size(), NULL};
const hsa_status_t status = api_->hsa_ven_amd_aqlprofile_iterate_data(tuple.profile, DataCallback, &callback_data);
if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "context iterate data failed");
}
}
void GetMetricsData() const {
const MetricArgs<info_map_t> args(info_map_);
for (const auto v : metrics_map_) {
const std::string& name = v.first;
const Metric* metric = v.second;
const xml::Expr* expr = metric->GetExpr();
if (expr) {
auto it = info_map_.find(name);
if (it == info_map_.end()) EXC_RAISING(HSA_STATUS_ERROR, "metric '" << name << "', rocprofiler info is not found");
rocprofiler_info_t* info = it->second;
info->data.result_int64 = expr->Eval(args);
info->data.kind = ROCPROFILER_INT64;
}
}
}
void IterateTraceData(rocprofiler_trace_data_callback_t callback, void *data) {
profile_vector_t profile_vector;
set_[0].GetTraceProfiles(profile_vector);
for (auto& tuple : profile_vector) {
const hsa_status_t status = api_->hsa_ven_amd_aqlprofile_iterate_data(tuple.profile, callback, data);
if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "context iterate data failed");
}
}
private:
// Getting profling packets
profile_vector_t GetProfiles(const uint32_t& index) {
profile_vector_t vec;
if (index >= set_.size()) {
EXC_RAISING(HSA_STATUS_ERROR, "index exceeding the maximum " << set_.size());
}
set_[index].GetProfiles(vec);
return vec;
}
static hsa_status_t DataCallback(hsa_ven_amd_aqlprofile_info_type_t ainfo_type,
hsa_ven_amd_aqlprofile_info_data_t* ainfo_data,
void* data) {
hsa_status_t status = HSA_STATUS_SUCCESS;
callback_data_t* callback_data = reinterpret_cast<callback_data_t*>(data);
info_vector_t& info_vector = *(callback_data->info_vector);
uint32_t index = callback_data->index;
const uint32_t sample_id = ainfo_data->sample_id;
if (info_vector.size() == index) {
index = 0;
} else {
if (sample_id == 0) index += 1;
}
callback_data->index = index;
if (index < info_vector.size()) {
rocprofiler_info_t* rinfo = info_vector[index];
if (ainfo_type == HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA) {
if (ainfo_data->sample_id == 0) rinfo->data.result_int64 = 0;
rinfo->data.result_int64 += ainfo_data->pmc_data.result;
rinfo->data.kind = ROCPROFILER_INT64;
} else if (ainfo_type == HSA_VEN_AMD_AQLPROFILE_INFO_SQTT_DATA) {
if (rinfo->data.result_bytes.copy) {
char* result_bytes_ptr = reinterpret_cast<char*>(rinfo->data.result_bytes.ptr);
const char* end = result_bytes_ptr + rinfo->data.result_bytes.size;
const char* src = reinterpret_cast<char*>(ainfo_data->sqtt_data.ptr);
const uint32_t size = ainfo_data->sqtt_data.size;
char* ptr = (sample_id == 0) ? result_bytes_ptr : callback_data->ptr;
uint64_t* header = reinterpret_cast<uint64_t*>(ptr);
char* dest = ptr + sizeof(*header);
if ((dest + size) < end) {
hsa_status_t status = hsa_memory_copy(dest, src, size);
if (status == HSA_STATUS_SUCCESS) {
*header = size;
rinfo->data.kind = ROCPROFILER_BYTES;
rinfo->data.result_bytes.instance_count = sample_id + 1;
callback_data->ptr = dest + align_size(size, sizeof(uint64_t));
}
} else status = HSA_STATUS_ERROR;
} else {
if (sample_id == 0) {
rinfo->data.kind = ROCPROFILER_BYTES;
rinfo->data.result_bytes.ptr = ainfo_data->sqtt_data.ptr;
rinfo->data.result_bytes.instance_count = UINT32_MAX;
}
rinfo->data.result_bytes.instance_count += 1;
}
} else status = HSA_STATUS_ERROR;
} else status = HSA_STATUS_ERROR;
return status;
}
rocprofiler_info_t* NewCounterInfo(const counter_t* counter) {
rocprofiler_info_t* info = new rocprofiler_info_t{};
info->type = ROCPROFILER_TYPE_METRIC;
info->name = counter->name.c_str();
return info;
}
// GPU handel
const hsa_agent_t agent_;
const util::AgentInfo* agent_info_;
// Profiling queue
Queue* queue_;
// HSA resources factory
util::HsaRsrcFactory* hsa_rsrc_;
// aqlprofile API table
const pfn_t* api_;
// Profile group set
std::vector<Group> set_;
// Metrics dictionary
MetricsDict metrics_;
// Groups map
std::map<block_des_t, block_status_t, lt_block_des> groups_map_;
// Info map
info_map_t info_map_;
// Metrics map
std::map<std::string, const Metric*> metrics_map_;
};
} // namespace rocprofiler
#endif // SRC_CORE_CONTEXT_H_
+49
Voir le fichier
@@ -0,0 +1,49 @@
#ifndef _SRC_CORE_HSA_PROXY_QUEUE_H
#define _SRC_CORE_HSA_PROXY_QUEUE_H
#include <hsa.h>
#include <atomic>
#include <map>
#include <mutex>
#include "core/proxy_queue.h"
#include "util/exception.h"
namespace rocprofiler {
extern decltype(hsa_queue_destroy)* hsa_queue_destroy_fn;
extern decltype(hsa_amd_queue_intercept_create)* hsa_amd_queue_intercept_create_fn;
extern decltype(hsa_amd_queue_intercept_register)* hsa_amd_queue_intercept_register_fn;
class HsaProxyQueue : public ProxyQueue {
public:
hsa_status_t SetInterceptCB(on_submit_cb_t on_submit_cb, void* data) {
return hsa_amd_queue_intercept_register_fn(queue_, on_submit_cb, data);
}
void Submit(const packet_t* packet) { EXC_RAISING(HSA_STATUS_ERROR, "HsaProxyQueue::Submit() is not supported"); }
private:
hsa_status_t Init(
hsa_agent_t agent,
uint32_t size,
hsa_queue_type32_t type,
void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data),
void *data,
uint32_t private_segment_size,
uint32_t group_segment_size,
hsa_queue_t **queue)
{
printf("HsaProxyQueue::Init()\n");
const auto status = hsa_amd_queue_intercept_create_fn(agent, size, type, callback, data, private_segment_size, group_segment_size, &queue_);
*queue = queue_;
return status;
}
hsa_status_t Cleanup() const { return hsa_queue_destroy_fn(queue_); }
hsa_queue_t* queue_;
};
} // namespace rocprofiler
#endif // _SRC_CORE_HSA_PROXY_QUEUE_H
+60
Voir le fichier
@@ -0,0 +1,60 @@
#ifndef _SRC_CORE_HSA_QUEUE_H
#define _SRC_CORE_HSA_QUEUE_H
#include <atomic>
#include "core/queue.h"
#include "core/types.h"
namespace rocprofiler {
class HsaQueue : public Queue {
public:
typedef void (HsaQueue::*submit_fptr_t)(const packet_t* packet);
enum {
LEGACY_SLOT_SIZE_W = HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(packet_word_t),
LEGACY_SLOT_SIZE_P = HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(packet_t)
};
struct slot_pm4_t {
packet_word_t words[LEGACY_SLOT_SIZE_W];
};
HsaQueue(const util::AgentInfo* agent_info, hsa_queue_t* queue) :
queue_(queue)
{}
void Submit(const packet_t* packet) {
// Compute the write index of queue and copy Aql packet into it
const uint64_t que_idx = hsa_queue_load_write_index_relaxed(queue_);
// Increment the write index
hsa_queue_store_write_index_relaxed(queue_, que_idx + 1);
const uint32_t mask = queue_->size - 1;
// Copy packet to the queue
const packet_word_t* src = reinterpret_cast<const packet_word_t*>(packet);
packet_t* slot = reinterpret_cast<packet_t*>(queue_->base_address) + (que_idx & mask);
packet_word_t* dst = reinterpret_cast<packet_word_t*>(slot);
const uint32_t nwords = sizeof(packet_t) / sizeof(packet_word_t);
for (unsigned i = 1; i < nwords; ++i) {
dst[i] = src[i];
}
// To maintain global order to ensure the prior copy of the packet contents is made visible
// before the header is updated.
// With in-order CP it will wait until the first packet in the blob will be valid
std::atomic<packet_word_t>* header_atomic_ptr =
reinterpret_cast<std::atomic<packet_word_t>*>(&dst[0]);
header_atomic_ptr->store(src[0], std::memory_order_release);
// Doorbell signaling
hsa_signal_store_relaxed(queue_->doorbell_signal, que_idx);
}
private:
hsa_queue_t* queue_;
};
} // namespace rocprofiler
#endif // _SRC_CORE_HSA_QUEUE_H
+15
Voir le fichier
@@ -0,0 +1,15 @@
#include "core/intercept_queue.h"
namespace rocprofiler {
void InterceptQueue::HsaIntercept(HsaApiTable* table) {
table->core_->hsa_queue_create_fn = rocprofiler::InterceptQueue::QueueCreate;
table->core_->hsa_queue_destroy_fn = rocprofiler::InterceptQueue::QueueDestroy;
}
InterceptQueue::mutex_t InterceptQueue::mutex_;
rocprofiler_callback_t InterceptQueue::on_dispatch_cb_ = NULL;
void* InterceptQueue::on_dispatch_cb_data_ = NULL;
const char* InterceptQueue::tool_lib_ = NULL;
void* InterceptQueue::tool_handle_ = NULL;
InterceptQueue::obj_map_t* InterceptQueue::obj_map_ = NULL;
} // namespace rocprofiler
+161
Voir le fichier
@@ -0,0 +1,161 @@
#ifndef _SRC_CORE_INTERCEPT_QUEUE_H
#define _SRC_CORE_INTERCEPT_QUEUE_H
#include <dlfcn.h>
#include <atomic>
#include <iostream>
#include <map>
#include <mutex>
#include "core/context.h"
#include "core/proxy_queue.h"
#include "core/types.h"
#include "util/hsa_rsrc_factory.h"
namespace rocprofiler {
extern decltype(hsa_queue_create)* hsa_queue_create_fn;
extern decltype(hsa_queue_destroy)* hsa_queue_destroy_fn;
class InterceptQueue {
public:
typedef std::recursive_mutex mutex_t;
typedef std::map<uint64_t, InterceptQueue*> obj_map_t;
static void HsaIntercept(HsaApiTable* table);
static void SetTool(const char* tool) { tool_lib_ = tool; }
static void UnloadTool() { if (tool_handle_) dlclose(tool_handle_); }
static hsa_status_t QueueCreate(
hsa_agent_t agent,
uint32_t size,
hsa_queue_type32_t type,
void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data),
void *data,
uint32_t private_segment_size,
uint32_t group_segment_size,
hsa_queue_t **queue)
{
std::lock_guard<mutex_t> lck(mutex_);
hsa_status_t status = HSA_STATUS_ERROR;
if (tool_lib_) {
tool_handle_ = dlopen(tool_lib_, RTLD_NOW);
if (tool_handle_ == NULL) {
fprintf(stderr, "ROCProfiler: can't load tool library \"%s\"\n", tool_lib_);
fprintf(stderr, "%s\n", dlerror());
exit(1);
}
tool_lib_ = NULL;
}
if (!obj_map_) obj_map_ = new obj_map_t;
ProxyQueue* proxy = ProxyQueue::Create(agent, size, type, callback, data, private_segment_size, group_segment_size, queue, &status);
if (status != HSA_STATUS_SUCCESS) {
InterceptQueue* obj = new InterceptQueue(agent, proxy);
(*obj_map_)[(uint64_t)(*queue)] = obj;
status = proxy->SetInterceptCB(OnSubmitCB, obj);
}
return status;
}
static hsa_status_t QueueDestroy(hsa_queue_t *queue) {
std::lock_guard<mutex_t> lck(mutex_);
hsa_status_t status = HSA_STATUS_ERROR;
obj_map_t::iterator it = obj_map_->find((uint64_t)queue);
if (it != obj_map_->end()) {
const InterceptQueue* obj = it->second;
delete obj;
obj_map_->erase(it);
status = HSA_STATUS_SUCCESS;
}
return status;
}
static void OnSubmitCB(const void* in_packets, uint64_t count, uint64_t user_que_idx, void* data, hsa_amd_queue_intercept_packet_writer writer) {
const packet_t* packets_arr = reinterpret_cast<const packet_t*>(in_packets);
InterceptQueue* obj = reinterpret_cast<InterceptQueue*>(data);
Queue* proxy = obj->proxy_;
for (uint64_t j = 0; j < count; ++j) {
bool to_submit = true;
const packet_t* packet = &packets_arr[j];
if ((GetHeaderType(packet) == HSA_PACKET_TYPE_KERNEL_DISPATCH) && (on_dispatch_cb_ != NULL)) {
rocprofiler_group_t* group = NULL;
const hsa_kernel_dispatch_packet_t* dispatch_packet = reinterpret_cast<const hsa_kernel_dispatch_packet_t*>(packet);
rocprofiler_callback_data_t data = {dispatch_packet->kernel_object, user_que_idx, obj->agent_info_->dev_index};
hsa_status_t status = on_dispatch_cb_(&data, on_dispatch_cb_data_, &group);
if ((status == HSA_STATUS_SUCCESS) && (group != NULL)) {
Context* context = reinterpret_cast<Context*>(group->context);
const pkt_vector_t& start_vector = context->StartPackets(group->index);
const pkt_vector_t& stop_vector = context->StopPackets(group->index);
pkt_vector_t packets = start_vector;
packets.insert(packets.end(), *packet);
packets.insert(packets.end(), stop_vector.begin(), stop_vector.end());
if (writer != NULL) {
writer(&packets[0], packets.size());
} else {
proxy->Submit(&packets[0], packets.size());
}
to_submit = false;
}
}
if (to_submit) {
if (writer != NULL) {
writer(packet, 1);
} else {
proxy->Submit(packet, 1);
}
}
packet += 1;
}
}
static void SetDispatchCB(rocprofiler_callback_t on_dispatch_cb, void* data) {
std::lock_guard<mutex_t> lck(mutex_);
on_dispatch_cb_ = on_dispatch_cb;
on_dispatch_cb_data_ = data;
}
static void UnsetDispatchCB() {
std::lock_guard<mutex_t> lck(mutex_);
on_dispatch_cb_ = NULL;
on_dispatch_cb_data_ = NULL;
}
private:
InterceptQueue(const hsa_agent_t& agent, ProxyQueue* proxy) : proxy_(proxy) {
agent_info_ = util::HsaRsrcFactory::Instance().GetAgentInfo(agent);
}
~InterceptQueue() { ProxyQueue::Destroy(proxy_); }
static packet_word_t GetHeaderType(const packet_t* packet) {
const packet_word_t* header = reinterpret_cast<const packet_word_t*>(packet);
return (*header >> HSA_PACKET_HEADER_TYPE) & header_type_mask;
}
static mutex_t mutex_;
static const packet_word_t header_type_mask = (1ul << HSA_PACKET_HEADER_WIDTH_TYPE) - 1;
static rocprofiler_callback_t on_dispatch_cb_;
static void* on_dispatch_cb_data_;
static const char* tool_lib_;
static void* tool_handle_;
static obj_map_t* obj_map_;
ProxyQueue* const proxy_;
const util::AgentInfo* agent_info_;
};
} // namespace rocprofiler
#endif // _SRC_CORE_INTERCEPT_QUEUE_H
+169
Voir le fichier
@@ -0,0 +1,169 @@
#ifndef SRC_CORE_METRICS_H_
#define SRC_CORE_METRICS_H_
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <fstream>
#include <iostream>
#include <map>
#include <vector>
#include "core/types.h"
#include "util/exception.h"
#include "util/hsa_rsrc_factory.h"
#include "xml/expr.h"
#include "xml/xml.h"
namespace rocprofiler {
struct counter_t {
std::string name;
event_t event;
};
typedef std::vector<const counter_t*> counters_vec_t;
class Metric {
public:
Metric(const std::string& name) : name_(name) {}
std::string GetName() const { return name_; }
virtual void GetCounters(counters_vec_t &vec) const = 0;
counters_vec_t GetCounters() const {
counters_vec_t counters;
GetCounters(counters);
return counters;
}
virtual const xml::Expr* GetExpr() const = 0;
private:
std::string name_;
};
class BaseMetric : public Metric {
public:
BaseMetric(const std::string& name, const counter_t& counter) : Metric(name), counter_(counter) {}
void GetCounters(counters_vec_t &vec) const { vec.push_back(&counter_); }
const xml::Expr* GetExpr() const { return NULL; }
private:
const counter_t counter_;
};
class ExprMetric : public Metric {
public:
ExprMetric(const std::string& name, const counters_vec_t& counters, const xml::Expr* expr) : Metric(name), counters_(counters), expr_(expr) {}
void GetCounters(counters_vec_t& vec) const { vec.insert(vec.end(), counters_.begin(), counters_.end()); }
const xml::Expr* GetExpr() const { return expr_; }
private:
const counters_vec_t counters_;
const xml::Expr* expr_;
};
class MetricsDict {
public:
typedef std::map<std::string, const Metric*> cache_t;
typedef cache_t::const_iterator const_iterator_t;
class ExprCache : public xml::expr_cache_t {
public:
ExprCache(const cache_t* cache) : cache_(cache) {}
bool Lookup(const std::string& name, std::string& result) const {
bool ret = false;
auto it = cache_->find(name);
if (it != cache_->end()) {
ret = true;
const rocprofiler::ExprMetric* expr_metric = dynamic_cast<const rocprofiler::ExprMetric*>(it->second);
if (expr_metric) result = expr_metric->GetExpr()->GetStr();
}
return ret;
}
private:
const cache_t* const cache_;
};
MetricsDict(const util::AgentInfo* agent_info) : xml_(NULL) {
const char* xml_name = getenv("ROCP_METRICS");
if (xml_name != NULL) {
xml_ = new xml::Xml(xml_name);
std::cout << "ROCProfiler: importing metrics from '" << xml_name << "':" << std::endl;
ImportMetrics(agent_info, agent_info->gfxip);
ImportMetrics(agent_info, "global");
}
}
const Metric* Get(const std::string& name) const {
const Metric* metric = NULL;
auto it = cache_.find(name);
if (it != cache_.end()) metric = it->second;
return metric;
}
private:
void ImportMetrics(const util::AgentInfo* agent_info, const char* scope) {
auto scope_list = xml_->GetNodes("top." + std::string(scope) + ".metric");
if (!scope_list.empty()) {
std::cout << " " << scope_list.size() << " " << scope << " metrics found" << std::endl;
for (auto node : scope_list) {
const std::string name = node->opts["name"];
if (cache_.find(name) != cache_.end()) EXC_RAISING(HSA_STATUS_ERROR, "ImportMetrics: metrics redefined '" << name << "'");
const std::string expr_str = node->opts["expr"];
if (expr_str.empty()) {
const std::string block_name = node->opts["block"];
const uint32_t event_id = atoi(node->opts["event"].c_str());
hsa_ven_amd_aqlprofile_profile_t profile;
profile.agent = agent_info->dev_id;
hsa_ven_amd_aqlprofile_id_query_t query = {block_name.c_str(), 0, 0};
hsa_status_t status = util::HsaRsrcFactory::Instance().AqlProfileApi()->
hsa_ven_amd_aqlprofile_get_info(&profile, HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_ID, &query);
if (status == HSA_STATUS_SUCCESS) {
const hsa_ven_amd_aqlprofile_block_name_t block_id = (hsa_ven_amd_aqlprofile_block_name_t)query.id;
if (query.instance_count > 1) {
for (unsigned block_index = 0; block_index < query.instance_count; ++block_index) {
std::ostringstream os;
os << name << '[' << block_index << ']';
const std::string full_name = os.str();
const counter_t counter = {full_name, {block_id, block_index, event_id}};
cache_[full_name] = new BaseMetric(full_name, counter);
}
} else {
const counter_t counter = {name, {block_id, 0, event_id}};
cache_[name] = new BaseMetric(name, counter);
}
} else AQL_EXC_RAISING(HSA_STATUS_ERROR, "ImportMetrics: bad block name '" << block_name << "'");
} else {
xml::Expr* expr_obj = new xml::Expr(expr_str, new ExprCache(&cache_));
std::cout << " " << name << " = " << expr_obj->String() << std::endl;
counters_vec_t counters_vec;
for (const std::string var : expr_obj->GetVars()) {
auto it = cache_.find(var);
if (it == cache_.end()) EXC_RAISING(HSA_STATUS_ERROR, "Bad metric '" << name << "', var '" << var << "' is not found");
it->second->GetCounters(counters_vec);
}
cache_[name] = new ExprMetric(name, counters_vec, expr_obj);
}
}
#if 0
for (auto& v : cache_) {
const Metric* metric = v.second;
counters_vec_t counters_vec;
printf("> Metric '%s'\n", metric->GetName().c_str());
metric->GetCounters(counters_vec);
for (auto c : counters_vec) {
printf(" counter %s, b(%u), i (%u), e (%u)\n", c->name.c_str(), c->event.block_name, c->event.block_index, c->event.counter_id);
}
}
#endif
}
}
// Metrics map
xml::Xml* xml_;
cache_t cache_;
};
} // namespace rocprofiler
#endif // SRC_CORE_METRICS_H_
+206
Voir le fichier
@@ -0,0 +1,206 @@
#ifndef SRC_CORE_PROFILE_H_
#define SRC_CORE_PROFILE_H_
#include "inc/rocprofiler.h"
#include <hsa.h>
#include <vector>
#include "core/types.h"
#include "util/exception.h"
#include "util/hsa_rsrc_factory.h"
namespace rocprofiler {
struct profile_info_t {
const event_t* event;
const parameter_t* parameters;
uint32_t parameter_count;
rocprofiler_info_t* rinfo;
};
typedef std::vector<rocprofiler_info_t*> info_vector_t;
typedef std::vector<packet_t> pkt_vector_t;
struct profile_tuple_t {
const profile_t* profile;
info_vector_t* info_vector;
hsa_signal_t completion_signal;
};
typedef std::vector<profile_tuple_t> profile_vector_t;
template<class Item> class ConfigBase {};
template<> class ConfigBase<event_t> {
public:
ConfigBase(profile_t *profile) : profile_(profile) {}
protected:
void* Array() { return const_cast<event_t*>(profile_->events); }
unsigned Count() const { return profile_->event_count; }
void Set(event_t* events, const unsigned& count) {
profile_->events = events;
profile_->event_count = count;
}
profile_t* profile_;
};
template<> class ConfigBase<parameter_t> {
public:
ConfigBase(profile_t *profile) : profile_(profile) {}
protected:
void* Array() { return const_cast<parameter_t*>(profile_->parameters); }
unsigned Count() const { return profile_->parameter_count; }
void Set(parameter_t* parameters, const unsigned& count) {
profile_->parameters = parameters;
profile_->parameter_count = count;
}
profile_t* profile_;
};
template<class Item>
class Config : protected ConfigBase<Item> {
typedef ConfigBase<Item> Parent;
public:
Config(profile_t *profile) : Parent(profile) {}
void Insert(const Item& item) {
auto count = Parent::Count();
count += 1;
Item* array = reinterpret_cast<Item*>(realloc(const_cast<void*>(Parent::Array()), count * sizeof(Item)));
array[count - 1] = item;
Parent::Set(array, count);
}
};
class Profile {
public:
static const uint32_t LEGACY_SLOT_SIZE_PKT = HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(packet_t);
Profile(const util::AgentInfo* agent_info) : agent_info_(agent_info) {
profile_ = {};
profile_.agent = agent_info->dev_id;
is_legacy_ = (strncmp(agent_info->name, "gfx8", 4) == 0);
}
virtual ~Profile() {
hsa_memory_free(profile_.command_buffer.ptr);
hsa_memory_free(profile_.output_buffer.ptr);
free(const_cast<event_t*>(profile_.events));
free(const_cast<parameter_t*>(profile_.parameters));
}
virtual void Insert(const profile_info_t& info) {
info_vector_.push_back(info.rinfo);
}
hsa_status_t Finalize(pkt_vector_t& start_vector, pkt_vector_t& stop_vector) {
hsa_status_t status = HSA_STATUS_SUCCESS;
if (!info_vector_.empty()) {
util::HsaRsrcFactory* rsrc = &util::HsaRsrcFactory::Instance();
const pfn_t* api = rsrc->AqlProfileApi();
packet_t start{};
packet_t stop{};
// Check the profile buffer sizes
status = api->hsa_ven_amd_aqlprofile_start(&profile_, NULL);
if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "aqlprofile_start(NULL)");
Allocate(rsrc);
// Generate start/stop profiling packets
status = api->hsa_ven_amd_aqlprofile_start(&profile_, &start);
if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "aqlprofile_start");
status = api->hsa_ven_amd_aqlprofile_stop(&profile_, &stop);
if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "aqlprofile_stop");
// Set completion signals
hsa_signal_t dummy_signal{};
dummy_signal.handle = 0;
start.completion_signal = dummy_signal;
hsa_signal_t post_signal;
status = hsa_signal_create(1, 0, NULL, &post_signal);
if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "hsa_signal_create");
stop.completion_signal = post_signal;
completion_signal_ = post_signal;
if (is_legacy_) {
const uint32_t start_index = start_vector.size();
const uint32_t stop_index = stop_vector.size();
start_vector.insert(start_vector.end(), LEGACY_SLOT_SIZE_PKT, packet_t{});
stop_vector.insert(stop_vector.end(), LEGACY_SLOT_SIZE_PKT, packet_t{});
status = api->hsa_ven_amd_aqlprofile_legacy_get_pm4(&start, reinterpret_cast<void*>(&start_vector[start_index]));
if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "hsa_ven_amd_aqlprofile_legacy_get_pm4");
status = api->hsa_ven_amd_aqlprofile_legacy_get_pm4(&stop, reinterpret_cast<void*>(&stop_vector[stop_index]));
if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "hsa_ven_amd_aqlprofile_legacy_get_pm4");
} else {
start_vector.push_back(start);
stop_vector.push_back(stop);
}
}
return status;
}
void GetProfiles(profile_vector_t& vec) {
if (!info_vector_.empty()) {
vec.push_back(profile_tuple_t{&profile_, &info_vector_, completion_signal_});
}
}
protected:
virtual hsa_status_t Allocate(util::HsaRsrcFactory* rsrc) = 0;
const util::AgentInfo* const agent_info_;
bool is_legacy_;
profile_t profile_;
info_vector_t info_vector_;
hsa_signal_t completion_signal_;
};
class PmcProfile : public Profile {
public:
PmcProfile(const util::AgentInfo* agent_info) : Profile(agent_info) {
profile_.type = HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC;
}
void Insert(const profile_info_t& info) {
Profile::Insert(info);
Config<event_t>(&profile_).Insert(*(info.event));
}
hsa_status_t Allocate(util::HsaRsrcFactory* rsrc) {
profile_.command_buffer.ptr = rsrc->AllocateSysMemory(agent_info_, profile_.command_buffer.size);
profile_.output_buffer.ptr = rsrc->AllocateSysMemory(agent_info_, profile_.output_buffer.size);
return (profile_.command_buffer.ptr && profile_.output_buffer.ptr) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR;
}
};
class SqttProfile : public Profile {
public:
static const uint32_t output_buffer_size = 0x2000000; // 32M
SqttProfile(const util::AgentInfo* agent_info) : Profile(agent_info) {
profile_.type = HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_SQTT;
}
void Insert(const profile_info_t& info) {
Profile::Insert(info);
for (unsigned j = 0; j < info.parameter_count; ++j) {
Config<parameter_t>(&profile_).Insert(info.parameters[j]);
}
info.rinfo->data.result_bytes.size = output_buffer_size;
if (info.rinfo->data.result_bytes.copy) {
const uint32_t output_buffer_size64 = output_buffer_size / sizeof(uint64_t);
info.rinfo->data.result_bytes.ptr = calloc(output_buffer_size64, sizeof(uint64_t));
memset(info.rinfo->data.result_bytes.ptr, 0, output_buffer_size);
}
}
hsa_status_t Allocate(util::HsaRsrcFactory* rsrc) {
profile_.output_buffer.size = output_buffer_size;
profile_.command_buffer.ptr = rsrc->AllocateSysMemory(agent_info_, profile_.command_buffer.size);
profile_.output_buffer.ptr = rsrc->AllocateLocalMemory(agent_info_, profile_.output_buffer.size);
return (profile_.command_buffer.ptr && profile_.output_buffer.ptr) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR;
}
};
} // namespace rocprofiler
#endif // SRC_CORE_PROFILE_H_
+48
Voir le fichier
@@ -0,0 +1,48 @@
#include "core/proxy_queue.h"
#ifdef ROCP_HSA_PROXY
#include "core/hsa_proxy_queue.h"
#endif
#include "core/simple_proxy_queue.h"
namespace rocprofiler {
void ProxyQueue::HsaIntercept(HsaApiTable* table) {
if (rocp_type_) SimpleProxyQueue::HsaIntercept(table);
}
ProxyQueue* ProxyQueue::Create(
hsa_agent_t agent,
uint32_t size,
hsa_queue_type32_t type,
void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data),
void *data,
uint32_t private_segment_size,
uint32_t group_segment_size,
hsa_queue_t **queue,
hsa_status_t* status)
{
hsa_status_t suc = HSA_STATUS_ERROR;
#ifdef ROCP_HSA_PROXY
ProxyQueue* instance = (rocp_type_) ? (ProxyQueue*) new SimpleProxyQueue() : (ProxyQueue*) new HsaProxyQueue();
#else
ProxyQueue* instance = new SimpleProxyQueue();
#endif
if (instance != NULL) {
const auto suc = instance->Init(agent, size, type, callback, data, private_segment_size, group_segment_size, queue);
if (suc != HSA_STATUS_SUCCESS) {
delete instance;
instance = NULL;
}
}
*status = suc;
return instance;
}
hsa_status_t ProxyQueue::Destroy(const ProxyQueue* obj) {
auto suc = obj->Cleanup();
delete obj;
return suc;
}
bool ProxyQueue::rocp_type_ = false;
} // namespace rocprofiler
+70
Voir le fichier
@@ -0,0 +1,70 @@
#ifndef _SRC_CORE_PROXY_QUEUE_H
#define _SRC_CORE_PROXY_QUEUE_H
#include <hsa.h>
#include <hsa_api_trace.h>
#include <atomic>
#include <map>
#include <mutex>
#include "core/queue.h"
#include "core/types.h"
struct HsaApiTable;
namespace rocprofiler {
typedef void (*hsa_amd_queue_intercept_packet_writer)(const void* packets, uint64_t count);
typedef void (*on_submit_cb_t)(const void* packet, uint64_t count, uint64_t que_idx, void* data, hsa_amd_queue_intercept_packet_writer writer);
class ProxyQueue : public Queue {
public:
static void InitFactory() {
#ifdef ROCP_HSA_PROXY
const char* type = getenv("ROCP_PROXY_QUEUE");
if (type != NULL) {
if (strncmp(type, "rocp", 4) == 0) rocp_type_ = true;
}
#else
rocp_type_ = true;
#endif
}
static void HsaIntercept(HsaApiTable* table);
static ProxyQueue* Create(
hsa_agent_t agent,
uint32_t size,
hsa_queue_type32_t type,
void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data),
void *data,
uint32_t private_segment_size,
uint32_t group_segment_size,
hsa_queue_t **queue,
hsa_status_t* status);
static hsa_status_t Destroy(const ProxyQueue* obj);
virtual hsa_status_t Init(
hsa_agent_t agent,
uint32_t size,
hsa_queue_type32_t type,
void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data),
void *data,
uint32_t private_segment_size,
uint32_t group_segment_size,
hsa_queue_t **queue
) = 0;
virtual hsa_status_t Cleanup() const = 0;
virtual hsa_status_t SetInterceptCB(on_submit_cb_t on_submit_cb, void* data) = 0;
virtual void Submit(const packet_t* packet) = 0;
protected:
virtual ~ProxyQueue() {};
private:
static bool rocp_type_;
};
} // namespace rocprofiler
#endif // _SRC_CORE_PROXY_QUEUE_H
+20
Voir le fichier
@@ -0,0 +1,20 @@
#ifndef _SRC_CORE_QUEUE_H
#define _SRC_CORE_QUEUE_H
#include "core/types.h"
namespace rocprofiler {
class Queue {
public:
Queue() {}
virtual ~Queue() {}
virtual void Submit(const packet_t* packet) = 0;
virtual void Submit(const packet_t* packet, const size_t& count) {
for (const packet_t* p = packet; p < packet + count; ++p) Submit(p);
}
};
} // namespace rocprofiler
#endif // _SRC_CORE_QUEUE_H
+251
Voir le fichier
@@ -0,0 +1,251 @@
#include "inc/rocprofiler.h"
#include <hsa.h>
#include <hsa_api_trace.h>
#include <string.h>
#include <vector>
#include "core/context.h"
#include "core/hsa_queue.h"
#include "core/intercept_queue.h"
#include "core/proxy_queue.h"
#include "core/simple_proxy_queue.h"
#include "util/exception.h"
#include "util/hsa_rsrc_factory.h"
#include "util/logger.h"
#define PUBLIC_API __attribute__((visibility("default")))
#define CONSTRUCTOR_API __attribute__((constructor))
#define DESTRUCTOR_API __attribute__((destructor))
#define API_METHOD_PREFIX \
hsa_status_t status = HSA_STATUS_SUCCESS; \
try {
#define API_METHOD_SUFFIX \
} catch (std::exception& e) { \
ERR_LOGGING(__FUNCTION__ << "(), " << e.what()); \
status = rocprofiler::GetExcStatus(e); \
} \
return status;
namespace rocprofiler {
decltype(hsa_queue_create)* hsa_queue_create_fn;
decltype(hsa_queue_destroy)* hsa_queue_destroy_fn;
decltype(hsa_signal_store_relaxed)* hsa_signal_store_relaxed_fn;
decltype(hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed_fn;
decltype(hsa_queue_store_write_index_relaxed)* hsa_queue_store_write_index_relaxed_fn;
#ifdef ROCP_HSA_PROXY
decltype(hsa_amd_queue_intercept_create)* hsa_amd_queue_intercept_create_fn;
decltype(hsa_amd_queue_intercept_register)* hsa_amd_queue_intercept_register_fn;
#endif
::HsaApiTable* kHsaApiTable;
void SaveHsaApi(::HsaApiTable* table) {
kHsaApiTable = table;
hsa_queue_create_fn = table->core_->hsa_queue_create_fn;
hsa_queue_destroy_fn = table->core_->hsa_queue_destroy_fn;
hsa_signal_store_relaxed_fn = table->core_->hsa_signal_store_relaxed_fn;
hsa_queue_load_write_index_relaxed_fn = table->core_->hsa_queue_load_write_index_relaxed_fn;
hsa_queue_store_write_index_relaxed_fn = table->core_->hsa_queue_store_write_index_relaxed_fn;
#ifdef ROCP_HSA_PROXY
hsa_amd_queue_intercept_create_fn = table->amd_ext_->hsa_amd_queue_intercept_create_fn;
hsa_amd_queue_intercept_register_fn = table->amd_ext_->hsa_amd_queue_intercept_register_fn;
#endif
}
void RestoreHsaApi() {
::HsaApiTable* table = kHsaApiTable;
table->core_->hsa_queue_create_fn = hsa_queue_create_fn;
table->core_->hsa_queue_destroy_fn = hsa_queue_destroy_fn;
table->core_->hsa_signal_store_relaxed_fn = hsa_signal_store_relaxed_fn;
table->core_->hsa_queue_load_write_index_relaxed_fn = hsa_queue_load_write_index_relaxed_fn;
table->core_->hsa_queue_store_write_index_relaxed_fn = hsa_queue_store_write_index_relaxed_fn;
#ifdef ROCP_HSA_PROXY
table->amd_ext_->hsa_amd_queue_intercept_create_fn = hsa_amd_queue_intercept_create_fn;
table->amd_ext_->hsa_amd_queue_intercept_register_fn = hsa_amd_queue_intercept_register_fn;
#endif
}
CONSTRUCTOR_API void constructor() {
util::Logger::Create();
util::HsaRsrcFactory::Create();
}
DESTRUCTOR_API void destructor() {
util::HsaRsrcFactory::Destroy();
util::Logger::Destroy();
}
hsa_status_t GetExcStatus(const std::exception& e) {
const util::exception* rocprofiler_exc_ptr = dynamic_cast<const util::exception*>(&e);
return (rocprofiler_exc_ptr) ? static_cast<hsa_status_t>(rocprofiler_exc_ptr->status()) : HSA_STATUS_ERROR;
}
util::Logger::mutex_t util::Logger::mutex_;
util::Logger* util::Logger::instance_ = NULL;
}
extern "C" {
// Returns the last error message
PUBLIC_API hsa_status_t rocprofiler_error_string(const char** str) {
API_METHOD_PREFIX
*str = rocprofiler::util::Logger::LastMessage().c_str();
API_METHOD_SUFFIX
}
// Create new profiling context
PUBLIC_API hsa_status_t rocprofiler_open(
unsigned agent_id,
rocprofiler_info_t* info,
uint32_t info_count,
rocprofiler_t** handle,
uint32_t mode,
rocprofiler_properties_t* properties)
{
API_METHOD_PREFIX
rocprofiler::util::HsaRsrcFactory* hsa_rsrc = &rocprofiler::util::HsaRsrcFactory::Instance();
const rocprofiler::util::AgentInfo* agent_info;
if (!hsa_rsrc->GetGpuAgentInfo(agent_id, &agent_info)) {
EXC_RAISING(HSA_STATUS_ERROR, "agent[" << agent_id << "] is not found");
}
rocprofiler::Queue* queue = NULL;
if (mode != 0) {
if (mode & ROCPROFILER_MODE_STANDALONE) {
if (mode & ROCPROFILER_MODE_CREATEQUEUE) {
if (hsa_rsrc->CreateQueue(agent_info, properties->queue_depth, &(properties->queue)) == false) {
EXC_RAISING(HSA_STATUS_ERROR, "CreateQueue() failed");
}
}
queue = new rocprofiler::HsaQueue(agent_info, properties->queue);
} else {
EXC_RAISING(HSA_STATUS_ERROR, "invalid mode (" << mode << ")");
}
}
*handle = (void*) new rocprofiler::Context(agent_info, queue, info, info_count);
API_METHOD_SUFFIX
}
// Delete profiling info
PUBLIC_API hsa_status_t rocprofiler_close(rocprofiler_t* handle)
{
API_METHOD_PREFIX
rocprofiler::Context* context = reinterpret_cast<rocprofiler::Context*>(handle);
if (context) delete context;
API_METHOD_SUFFIX
}
// Get profiling groups
PUBLIC_API hsa_status_t rocprofiler_get_groups(rocprofiler_t* handle, rocprofiler_group_t** group_array, uint32_t* group_count) {
API_METHOD_PREFIX
rocprofiler::Context* context = reinterpret_cast<rocprofiler::Context*>(handle);
const uint32_t count = context->GetGroupCount();
rocprofiler_group_t* groups = (rocprofiler_group_t*) calloc(count, sizeof(rocprofiler_group_t));
for (unsigned i = 0; i < count; ++i) groups[i] = context->GetGroupInfo(i);
*group_array = groups;
*group_count = count;
API_METHOD_SUFFIX
}
// Start profiling
PUBLIC_API hsa_status_t rocprofiler_start(rocprofiler_t* handle, uint32_t group_index) {
API_METHOD_PREFIX
rocprofiler::Context* context = reinterpret_cast<rocprofiler::Context*>(handle);
context->Start(group_index);
API_METHOD_SUFFIX
}
// Stop profiling
PUBLIC_API hsa_status_t rocprofiler_stop(rocprofiler_t* handle, uint32_t group_index) {
API_METHOD_PREFIX
rocprofiler::Context* context = reinterpret_cast<rocprofiler::Context*>(handle);
context->Stop(group_index);
API_METHOD_SUFFIX
}
// Get profiling data
PUBLIC_API hsa_status_t rocprofiler_get_data(rocprofiler_t* handle, uint32_t group_index) {
API_METHOD_PREFIX
rocprofiler::Context* context = reinterpret_cast<rocprofiler::Context*>(handle);
context->GetData(group_index);
API_METHOD_SUFFIX
}
// Start profiling
PUBLIC_API hsa_status_t rocprofiler_group_start(rocprofiler_group_t* group) {
API_METHOD_PREFIX
rocprofiler_start(group->context, group->index);
API_METHOD_SUFFIX
}
// Stop profiling
PUBLIC_API hsa_status_t rocprofiler_group_stop(rocprofiler_group_t* group) {
API_METHOD_PREFIX
rocprofiler_stop(group->context, group->index);
API_METHOD_SUFFIX
}
// Get profiling data
PUBLIC_API hsa_status_t rocprofiler_get_group_data(rocprofiler_group_t* group) {
API_METHOD_PREFIX
rocprofiler::Context* context = reinterpret_cast<rocprofiler::Context*>(group->context);
context->GetData(group->index);
API_METHOD_SUFFIX
}
// Get metrics data
PUBLIC_API hsa_status_t rocprofiler_get_metrics_data(const rocprofiler_t* handle) {
API_METHOD_PREFIX
const rocprofiler::Context* context = reinterpret_cast<const rocprofiler::Context*>(handle);
context->GetMetricsData();
API_METHOD_SUFFIX
}
// Set kernel dispatch observer
PUBLIC_API hsa_status_t rocprofiler_set_dispatch_observer(rocprofiler_callback_t callback, void* data) {
API_METHOD_PREFIX
rocprofiler::InterceptQueue::SetDispatchCB(callback, data);
API_METHOD_SUFFIX
}
// Set kernel dispatch observer
PUBLIC_API hsa_status_t rocprofiler_remove_dispatch_observer() {
API_METHOD_PREFIX
rocprofiler::InterceptQueue::UnsetDispatchCB();
API_METHOD_SUFFIX
}
// Method for iterating the events output data
PUBLIC_API hsa_status_t rocprofiler_iterate_trace_data(rocprofiler_t* handle, hsa_ven_amd_aqlprofile_data_callback_t callback, void* data) {
API_METHOD_PREFIX
rocprofiler::Context* context = reinterpret_cast<rocprofiler::Context*>(handle);
context->IterateTraceData(callback, data);
API_METHOD_SUFFIX
}
PUBLIC_API bool OnLoad(
HsaApiTable* table,
uint64_t runtime_version,
uint64_t failed_tool_count,
const char* const * failed_tool_names) {
rocprofiler::SaveHsaApi(table);
rocprofiler::ProxyQueue::InitFactory();
rocprofiler::InterceptQueue::SetTool(getenv("ROCP_TOOL_LIB"));
// HSA intercepting
if (getenv("ROCP_HSA_INTERCEPT") != NULL) {
rocprofiler::InterceptQueue::HsaIntercept(table);
rocprofiler::ProxyQueue::HsaIntercept(table);
}
return true;
}
PUBLIC_API void OnUnload() {
rocprofiler::RestoreHsaApi();
}
} // extern "C"
+11
Voir le fichier
@@ -0,0 +1,11 @@
#include "core/simple_proxy_queue.h"
namespace rocprofiler {
void SimpleProxyQueue::HsaIntercept(HsaApiTable* table) {
table->core_->hsa_signal_store_relaxed_fn = rocprofiler::SimpleProxyQueue::SignalStore;
table->core_->hsa_queue_load_write_index_relaxed_fn = rocprofiler::SimpleProxyQueue::LoadIndex;
table->core_->hsa_queue_store_write_index_relaxed_fn = rocprofiler::SimpleProxyQueue::StoreIndex;
}
std::map<signal_handle_t, SimpleProxyQueue*> SimpleProxyQueue::queue_map_;
} // namespace rocprofiler
+187
Voir le fichier
@@ -0,0 +1,187 @@
#ifndef _SRC_CORE_SIMPLE_PROXY_QUEUE_H
#define _SRC_CORE_SIMPLE_PROXY_QUEUE_H
#include <hsa.h>
#include <atomic>
#include <map>
#include <mutex>
#include "core/proxy_queue.h"
#include "core/types.h"
#include "util/hsa_rsrc_factory.h"
namespace rocprofiler {
extern decltype(hsa_queue_create)* hsa_queue_create_fn;
extern decltype(hsa_queue_destroy)* hsa_queue_destroy_fn;
extern decltype(hsa_signal_store_relaxed)* hsa_signal_store_relaxed_fn;
extern decltype(hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed_fn;
extern decltype(hsa_queue_store_write_index_relaxed)* hsa_queue_store_write_index_relaxed_fn;
typedef decltype(hsa_signal_t::handle) signal_handle_t;
class SimpleProxyQueue : public ProxyQueue {
public:
static void HsaIntercept(HsaApiTable* table);
static void SignalStore(
hsa_signal_t signal,
hsa_signal_value_t que_idx)
{
auto it = queue_map_.find(signal.handle);
if (it != queue_map_.end()) {
SimpleProxyQueue* instance = it->second;
const uint64_t begin = instance->submit_index_;
const uint64_t end = que_idx + 1;
instance->submit_index_ = end;
for (uint64_t j = begin; j < end; ++j) {
// Submited packet
const uint32_t idx = j & instance->queue_mask_;
packet_t* packet = reinterpret_cast<packet_t*>(instance->queue_->base_address) + idx;
if (instance->on_submit_cb_ != NULL) instance->on_submit_cb_(packet, 1, j, instance->on_submit_cb_data_, NULL);
else instance->Submit(packet);
}
} else {
hsa_signal_store_relaxed_fn(signal, que_idx);
}
}
static uint64_t LoadIndex(
const hsa_queue_t *queue)
{
uint64_t index = 0;
auto it = queue_map_.find(queue->doorbell_signal.handle);
if (it != queue_map_.end()) {
SimpleProxyQueue* instance = it->second;
instance->mutex_.lock();
index = instance->queue_index_;
} else {
index = hsa_queue_load_write_index_relaxed_fn(queue);
}
return index;
}
static void StoreIndex(
const hsa_queue_t *queue,
uint64_t value)
{
auto it = queue_map_.find(queue->doorbell_signal.handle);
if (it != queue_map_.end()) {
SimpleProxyQueue* instance = it->second;
instance->queue_index_ = value;
instance->mutex_.unlock();
} else {
hsa_queue_store_write_index_relaxed_fn(queue, value);
}
}
hsa_status_t SetInterceptCB(on_submit_cb_t on_submit_cb, void* data) {
on_submit_cb_ = on_submit_cb;
on_submit_cb_data_ = data;
return HSA_STATUS_SUCCESS;
}
void Submit(const packet_t* packet) {
// Compute the write index of queue and copy Aql packet into it
const uint64_t que_idx = hsa_queue_load_write_index_relaxed_fn(queue_);
// Increment the write index and ring the doorbell to submit the packet.
hsa_queue_store_write_index_relaxed_fn(queue_, que_idx + 1);
const uint32_t mask = queue_->size - 1;
const uint32_t idx = que_idx & mask;
// Copy packet to the queue
const packet_word_t* src = reinterpret_cast<const packet_word_t*>(packet);
packet_word_t* dst = reinterpret_cast<packet_word_t*>(base_address_ + idx);
for (unsigned i = 1; i < sizeof(packet_t) / sizeof(packet_word_t); ++i) {
dst[i] = src[i];
}
// To maintain global order to ensure the prior copy of the packet contents is made visible
// before the header is updated.
// With in-order CP it will wait until the first packet in the blob will be valid
std::atomic<packet_word_t>* header_atomic_ptr =
reinterpret_cast<std::atomic<packet_word_t>*>(&dst[0]);
header_atomic_ptr->store(src[0], std::memory_order_release);
// Doorbell signaling
hsa_signal_store_relaxed_fn(doorbell_signal_, que_idx);
}
SimpleProxyQueue() :
agent_info_(NULL),
queue_(NULL),
base_address_(NULL),
doorbell_signal_({}),
queue_index_(0),
queue_mask_(0),
submit_index_(0),
on_submit_cb_(0),
on_submit_cb_data_(0)
{}
~SimpleProxyQueue() {}
private:
hsa_status_t Init(
hsa_agent_t agent,
uint32_t size,
hsa_queue_type32_t type,
void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data),
void *data,
uint32_t private_segment_size,
uint32_t group_segment_size,
hsa_queue_t **queue)
{
auto status = Init(agent, size);
*queue = queue_;
return status;
}
hsa_status_t Init(hsa_agent_t agent, uint32_t size) {
hsa_status_t status = HSA_STATUS_ERROR;
agent_info_ = util::HsaRsrcFactory::Instance().GetAgentInfo(agent);
if (agent_info_ != NULL) {
if (agent_info_->dev_type == HSA_DEVICE_TYPE_GPU) {
status = hsa_queue_create_fn(agent, size, HSA_QUEUE_TYPE_MULTI, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue_);
if (status == HSA_STATUS_SUCCESS) {
base_address_ = reinterpret_cast<packet_t*>(queue_->base_address);
doorbell_signal_ = queue_->doorbell_signal;
data_array_ = calloc(size + 1, sizeof(packet_t));
uintptr_t addr = (uintptr_t)data_array_;
queue_->base_address = (void*) ((addr + align_mask_) & ~align_mask_);
status = hsa_signal_create(1, 0, NULL, &(queue_->doorbell_signal));
queue_mask_ = size - 1;
queue_map_[queue_->doorbell_signal.handle] = this;
}
}
}
return status;
}
hsa_status_t Cleanup() const {
hsa_status_t status = HSA_STATUS_SUCCESS;
queue_->base_address = base_address_;
queue_->doorbell_signal = doorbell_signal_;
status = hsa_queue_destroy_fn(queue_);
free(data_array_);
return status;
}
static std::map<signal_handle_t, SimpleProxyQueue*> queue_map_;
const util::AgentInfo* agent_info_;
hsa_queue_t* queue_;
static const uintptr_t align_mask_ = sizeof(packet_t) - 1;
packet_t* base_address_;
hsa_signal_t doorbell_signal_;
uint64_t queue_index_;
uint64_t queue_mask_;
uint64_t submit_index_;
std::mutex mutex_;
on_submit_cb_t on_submit_cb_;
void* on_submit_cb_data_;
void* data_array_;
};
} // namespace rocprofiler
#endif // _SRC_CORE_SIMPLE_PROXY_QUEUE_H
+15
Voir le fichier
@@ -0,0 +1,15 @@
#ifndef SRC_CORE_TYPES_H_
#define SRC_CORE_TYPES_H_
#include <hsa_ven_amd_aqlprofile.h>
namespace rocprofiler {
typedef hsa_ven_amd_aqlprofile_1_00_pfn_t pfn_t;
typedef hsa_ven_amd_aqlprofile_event_t event_t;
typedef hsa_ven_amd_aqlprofile_parameter_t parameter_t;
typedef hsa_ven_amd_aqlprofile_profile_t profile_t;
typedef hsa_ext_amd_aql_pm4_packet_t packet_t;
typedef uint32_t packet_word_t;
} // namespace rocprofiler
#endif // SRC_CORE_TYPES_H_
+39
Voir le fichier
@@ -0,0 +1,39 @@
#ifndef SRC_UTIL_EXCEPTION_H_
#define SRC_UTIL_EXCEPTION_H_
#include <exception>
#include <string>
#include <sstream>
#include <hsa_ven_amd_aqlprofile.h>
#define EXC_RAISING(error, stream) { \
std::ostringstream oss; oss << __FUNCTION__ << "(), " << stream; \
throw rocprofiler::util::exception(error, oss.str()); \
}
#define AQL_EXC_RAISING(error, stream) { \
const char* error_string = NULL; \
const rocprofiler::pfn_t* api = util::HsaRsrcFactory::Instance().AqlProfileApi(); \
api->hsa_ven_amd_aqlprofile_error_string(&error_string); \
EXC_RAISING(error, stream << ", " << error_string); \
}
namespace rocprofiler {
namespace util {
class exception : public std::exception {
public:
explicit exception(const uint32_t &status, const std::string& msg) : status_(status), str_(msg) {}
const char* what() const throw() { return str_.c_str(); }
uint32_t status() const throw() { return status_; }
protected:
const uint32_t status_;
const std::string str_;
};
} // namespace util
} // namespace rocprofiler
#endif // SRC_UTIL_EXCEPTION_H_
+392
Voir le fichier
@@ -0,0 +1,392 @@
/**********************************************************************
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted
provided that the following conditions are met:
<95> Redistributions of source code must retain the above copyright notice, this list of
conditions and the following disclaimer.
<95> Redistributions in binary form must reproduce the above copyright notice, this list of
conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
********************************************************************/
#include "util/hsa_rsrc_factory.h"
#include <hsa.h>
#include <hsa_ext_finalize.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <cassert>
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
namespace rocprofiler {
namespace util {
// Callback function to get available in the system agents
hsa_status_t HsaRsrcFactory::GetHsaAgentsCallback(hsa_agent_t agent, void* data) {
hsa_status_t status = HSA_STATUS_ERROR;
HsaRsrcFactory* hsa_rsrc = reinterpret_cast<HsaRsrcFactory*>(data);
const AgentInfo* agent_info = hsa_rsrc->AddAgentInfo(agent);
if (agent_info != NULL) status = HSA_STATUS_SUCCESS;
return status;
}
// Callback function to find and bind kernarg region of an agent
hsa_status_t HsaRsrcFactory::FindMemRegionsCallback(hsa_region_t region, void* data) {
hsa_region_global_flag_t flags;
hsa_region_segment_t segment_id;
hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment_id);
if (segment_id != HSA_REGION_SEGMENT_GLOBAL) {
return HSA_STATUS_SUCCESS;
}
AgentInfo* agent_info = (AgentInfo*)data;
hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags);
if (flags & HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED) {
agent_info->coarse_region = region;
}
if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG) {
agent_info->kernarg_region = region;
}
return HSA_STATUS_SUCCESS;
}
// Constructor of the class
HsaRsrcFactory::HsaRsrcFactory() {
// Initialize the Hsa Runtime
hsa_status_t status = hsa_init();
CHECK_STATUS("Error in hsa_init", status);
// Discover the set of Gpu devices available on the platform
status = hsa_iterate_agents(GetHsaAgentsCallback, this);
CHECK_STATUS("Error Calling hsa_iterate_agents", status);
// Get AqlProfile API table
status = hsa_system_get_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, 1, 0, &aqlprofile_api_);
CHECK_STATUS("aqlprofile API table query failed", status);
}
// Destructor of the class
HsaRsrcFactory::~HsaRsrcFactory() {
hsa_status_t status = hsa_shut_down();
CHECK_STATUS("Error in hsa_shut_down", status);
}
// Add system agent info
const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) {
// Determine if device is a Gpu agent
hsa_status_t status;
AgentInfo* agent_info = NULL;
hsa_device_type_t type;
status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type);
CHECK_STATUS("Error Calling hsa_agent_get_info", status);
if (type == HSA_DEVICE_TYPE_CPU) {
agent_info = new AgentInfo{};
agent_info->dev_id = agent;
agent_info->dev_type = HSA_DEVICE_TYPE_CPU;
agent_info->dev_index = cpu_list_.size();
cpu_list_.push_back(agent_info);
}
if (type == HSA_DEVICE_TYPE_GPU) {
agent_info = new AgentInfo{};
agent_info->dev_id = agent;
agent_info->dev_type = HSA_DEVICE_TYPE_GPU;
hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_info->name);
strncpy(agent_info->gfxip, agent_info->name, 4);
agent_info->gfxip[4] = '\0';
agent_info->max_wave_size = 0;
hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &agent_info->max_wave_size);
agent_info->max_queue_size = 0;
hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &agent_info->max_queue_size);
agent_info->profile = hsa_profile_t(108);
hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_info->profile);
// Initialize memory regions to zero
agent_info->kernarg_region.handle = 0;
agent_info->coarse_region.handle = 0;
// Find and Bind Memory regions of the Gpu agent
hsa_agent_iterate_regions(agent, FindMemRegionsCallback, agent_info);
// Set GPU index
agent_info->dev_index = gpu_list_.size();
gpu_list_.push_back(agent_info);
}
if (agent_info) agent_map_[agent.handle] = agent_info;
return agent_info;
}
// Return systen agent info
const AgentInfo* HsaRsrcFactory::GetAgentInfo(const hsa_agent_t agent) {
const AgentInfo* agent_info = NULL;
auto it = agent_map_.find(agent.handle);
if (it != agent_map_.end()) {
agent_info = it->second;
}
return agent_info;
}
// Get the count of Hsa Gpu Agents available on the platform
//
// @return uint32_t Number of Gpu agents on platform
//
uint32_t HsaRsrcFactory::GetCountOfGpuAgents() { return uint32_t(gpu_list_.size()); }
// Get the count of Hsa Cpu Agents available on the platform
//
// @return uint32_t Number of Cpu agents on platform
//
uint32_t HsaRsrcFactory::GetCountOfCpuAgents() { return uint32_t(cpu_list_.size()); }
// Get the AgentInfo handle of a Gpu device
//
// @param idx Gpu Agent at specified index
//
// @param agent_info Output parameter updated with AgentInfo
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::GetGpuAgentInfo(uint32_t idx, const AgentInfo** agent_info) {
// Determine if request is valid
uint32_t size = uint32_t(gpu_list_.size());
if (idx >= size) {
return false;
}
// Copy AgentInfo from specified index
*agent_info = gpu_list_[idx];
return true;
}
// Get the AgentInfo handle of a Cpu device
//
// @param idx Cpu Agent at specified index
//
// @param agent_info Output parameter updated with AgentInfo
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::GetCpuAgentInfo(uint32_t idx, const AgentInfo** agent_info) {
// Determine if request is valid
uint32_t size = uint32_t(cpu_list_.size());
if (idx >= size) {
return false;
}
// Copy AgentInfo from specified index
*agent_info = cpu_list_[idx];
return true;
}
// Create a Queue object and return its handle. The queue object is expected
// to support user requested number of Aql dispatch packets.
//
// @param agent_info Gpu Agent on which to create a queue object
//
// @param num_Pkts Number of packets to be held by queue
//
// @param queue Output parameter updated with handle of queue object
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts, hsa_queue_t** queue) {
hsa_status_t status;
status = hsa_queue_create(agent_info->dev_id, num_pkts, HSA_QUEUE_TYPE_MULTI, NULL, NULL,
UINT32_MAX, UINT32_MAX, queue);
return (status == HSA_STATUS_SUCCESS);
}
// Create a Signal object and return its handle.
//
// @param value Initial value of signal object
//
// @param signal Output parameter updated with handle of signal object
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::CreateSignal(uint32_t value, hsa_signal_t* signal) {
hsa_status_t status;
status = hsa_signal_create(value, 0, NULL, signal);
return (status == HSA_STATUS_SUCCESS);
}
// Allocate memory for use by a kernel of specified size in specified
// agent's memory region. Currently supports Global segment whose Kernarg
// flag set.
//
// @param agent_info Agent from whose memory region to allocate
//
// @param size Size of memory in terms of bytes
//
// @return uint8_t* Pointer to buffer, null if allocation fails.
//
uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t size) {
hsa_status_t status;
uint8_t* buffer = NULL;
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
if (agent_info->coarse_region.handle != 0) {
// Allocate in local memory if it is available
status = hsa_memory_allocate(agent_info->coarse_region, size, (void**)&buffer);
if (status == HSA_STATUS_SUCCESS) {
status = hsa_memory_assign_agent(buffer, agent_info->dev_id, HSA_ACCESS_PERMISSION_RW);
}
} else {
// Allocate in system memory if local memory is not available
status = hsa_memory_allocate(agent_info->kernarg_region, size, (void**)&buffer);
}
return (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
}
// Allocate memory tp pass kernel parameters.
//
// @param agent_info Agent from whose memory region to allocate
//
// @param size Size of memory in terms of bytes
//
// @return uint8_t* Pointer to buffer, null if allocation fails.
//
uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t size) {
hsa_status_t status;
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
uint8_t* buffer = NULL;
status = hsa_memory_allocate(agent_info->kernarg_region, size, (void**)&buffer);
return (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
}
// Transfer data method
bool HsaRsrcFactory::TransferData(void* dest_buff, void* src_buff, uint32_t length,
bool host_to_dev) {
hsa_status_t status;
status = hsa_memory_copy(dest_buff, src_buff, length);
return (status == HSA_STATUS_SUCCESS);
}
// Loads an Assembled Brig file and Finalizes it into Device Isa
//
// @param agent_info Gpu device for which to finalize
//
// @param brig_path File path of the Assembled Brig file
//
// @param kernel_name Name of the kernel to finalize
//
// @param code_desc Handle of finalized Code Descriptor that could
// be used to submit for execution
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* brig_path,
char* kernel_name, hsa_executable_symbol_t* code_desc) {
// Finalize the Hsail object into code object
hsa_status_t status;
hsa_code_object_t code_object;
// Build the code object filename
std::string filename(brig_path);
std::clog << "Code object filename: " << filename << std::endl;
// Open the file containing code object
std::ifstream codeStream(filename.c_str(), std::ios::binary | std::ios::ate);
if (!codeStream) {
std::cerr << "Error: failed to load " << filename << std::endl;
assert(false);
return false;
}
// Allocate memory to read in code object from file
size_t size = std::string::size_type(codeStream.tellg());
char* codeBuff = (char*)AllocateSysMemory(agent_info, size);
if (!codeBuff) {
std::cerr << "Error: failed to allocate memory for code object." << std::endl;
assert(false);
return false;
}
// Read the code object into allocated memory
codeStream.seekg(0, std::ios::beg);
std::copy(std::istreambuf_iterator<char>(codeStream), std::istreambuf_iterator<char>(), codeBuff);
// De-Serialize the code object that has been read into memory
status = hsa_code_object_deserialize(codeBuff, size, NULL, &code_object);
if (status != HSA_STATUS_SUCCESS) {
std::cerr << "Failed to deserialize code object" << std::endl;
return false;
}
// Create executable.
hsa_executable_t hsaExecutable;
status =
hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, "", &hsaExecutable);
CHECK_STATUS("Error in creating executable object", status);
// Load code object.
status = hsa_executable_load_code_object(hsaExecutable, agent_info->dev_id, code_object, "");
CHECK_STATUS("Error in loading executable object", status);
// Freeze executable.
status = hsa_executable_freeze(hsaExecutable, "");
CHECK_STATUS("Error in freezing executable object", status);
// Get symbol handle.
hsa_executable_symbol_t kernelSymbol;
status = hsa_executable_get_symbol(hsaExecutable, NULL, kernel_name, agent_info->dev_id, 0,
&kernelSymbol);
CHECK_STATUS("Error in looking up kernel symbol", status);
// Update output parameter
*code_desc = kernelSymbol;
return true;
}
// Print the various fields of Hsa Gpu Agents
bool HsaRsrcFactory::PrintGpuAgents(const std::string& header) {
std::clog << header << " :" << std::endl;
const AgentInfo* agent_info;
int size = uint32_t(gpu_list_.size());
for (int idx = 0; idx < size; idx++) {
agent_info = gpu_list_[idx];
std::clog << "> agent[" << idx << "] :" << std::endl;
std::clog << ">> Name : " << agent_info->name << std::endl;
std::clog << ">> Max Wave Size : " << agent_info->max_wave_size << std::endl;
std::clog << ">> Max Queue Size : " << agent_info->max_queue_size << std::endl;
std::clog << ">> Kernarg Region Id : " << agent_info->coarse_region.handle << std::endl;
}
return true;
}
HsaRsrcFactory* HsaRsrcFactory::instance_ = NULL;
HsaRsrcFactory::mutex_t HsaRsrcFactory::mutex_;
} // namespace util
} // namespace rocprofiler
+262
Voir le fichier
@@ -0,0 +1,262 @@
/**********************************************************************
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted
provided that the following conditions are met:
<95> Redistributions of source code must retain the above copyright notice, this list of
conditions and the following disclaimer.
<95> Redistributions in binary form must reproduce the above copyright notice, this list of
conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
********************************************************************/
#ifndef SRC_UTIL_HSA_RSRC_FACTORY_H_
#define SRC_UTIL_HSA_RSRC_FACTORY_H_
#include <hsa.h>
#include <hsa_ext_finalize.h>
#include <hsa_ven_amd_aqlprofile.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <iostream>
#include <mutex>
#include <map>
#include <string>
#include <vector>
#define HSA_ARGUMENT_ALIGN_BYTES 16
#define HSA_QUEUE_ALIGN_BYTES 64
#define HSA_PACKET_ALIGN_BYTES 64
#define CHECK_STATUS(msg, status) \
if (status != HSA_STATUS_SUCCESS) { \
const char* emsg = 0; \
hsa_status_string(status, &emsg); \
printf("%s: %s\n", msg, emsg ? emsg : "<unknown error>"); \
exit(1); \
}
namespace rocprofiler {
namespace util {
static const unsigned MEM_PAGE_BYTES = 0x1000;
static const unsigned MEM_PAGE_MASK = MEM_PAGE_BYTES - 1;
typedef decltype(hsa_agent_t::handle) hsa_agent_handle_t;
// Encapsulates information about a Hsa Agent such as its
// handle, name, max queue size, max wavefront size, etc.
struct AgentInfo {
// Handle of Agent
hsa_agent_t dev_id;
// Agent type - Cpu = 0, Gpu = 1 or Dsp = 2
uint32_t dev_type;
// Agent system index
uint32_t dev_index;
// GFXIP name
char gfxip[64];
// Name of Agent whose length is less than 64
char name[64];
// Max size of Wavefront size
uint32_t max_wave_size;
// Max size of Queue buffer
uint32_t max_queue_size;
// Hsail profile supported by agent
hsa_profile_t profile;
// Memory region supporting kernel parameters
hsa_region_t coarse_region;
// Memory region supporting kernel arguments
hsa_region_t kernarg_region;
};
class HsaRsrcFactory {
public:
typedef std::recursive_mutex mutex_t;
static HsaRsrcFactory* Create() { return NULL; }
static HsaRsrcFactory* CreateInstance() {
std::lock_guard<mutex_t> lck(mutex_);
if (instance_ == NULL) {
instance_ = new HsaRsrcFactory();
}
return instance_;
}
static HsaRsrcFactory& Instance() {
CreateInstance();
hsa_status_t status = (instance_ != NULL) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR;
CHECK_STATUS("HsaRsrcFactory::Instance() is not found", status);
return *instance_;
}
static void Destroy() {
std::lock_guard<mutex_t> lck(mutex_);
if (instance_) delete instance_;
instance_ = NULL;
}
// Return system agent info
const AgentInfo* GetAgentInfo(const hsa_agent_t agent);
// Get the count of Hsa Gpu Agents available on the platform
//
// @return uint32_t Number of Gpu agents on platform
//
uint32_t GetCountOfGpuAgents();
// Get the count of Hsa Cpu Agents available on the platform
//
// @return uint32_t Number of Cpu agents on platform
//
uint32_t GetCountOfCpuAgents();
// Get the AgentInfo handle of a Gpu device
//
// @param idx Gpu Agent at specified index
//
// @param agent_info Output parameter updated with AgentInfo
//
// @return bool true if successful, false otherwise
//
bool GetGpuAgentInfo(uint32_t idx, const AgentInfo** agent_info);
// Get the AgentInfo handle of a Cpu device
//
// @param idx Cpu Agent at specified index
//
// @param agent_info Output parameter updated with AgentInfo
//
// @return bool true if successful, false otherwise
//
bool GetCpuAgentInfo(uint32_t idx, const AgentInfo** agent_info);
// Create a Queue object and return its handle. The queue object is expected
// to support user requested number of Aql dispatch packets.
//
// @param agent_info Gpu Agent on which to create a queue object
//
// @param num_Pkts Number of packets to be held by queue
//
// @param queue Output parameter updated with handle of queue object
//
// @return bool true if successful, false otherwise
//
bool CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts, hsa_queue_t** queue);
// Create a Signal object and return its handle.
//
// @param value Initial value of signal object
//
// @param signal Output parameter updated with handle of signal object
//
// @return bool true if successful, false otherwise
//
bool CreateSignal(uint32_t value, hsa_signal_t* signal);
// Allocate memory for use by a kernel of specified size in specified
// agent's memory region. Currently supports Global segment whose Kernarg
// flag set.
//
// @param agent_info Agent from whose memory region to allocate
//
// @param size Size of memory in terms of bytes
//
// @return uint8_t* Pointer to buffer, null if allocation fails.
//
uint8_t* AllocateLocalMemory(const AgentInfo* agent_info, size_t size);
// Allocate memory tp pass kernel parameters.
//
// @param agent_info Agent from whose memory region to allocate
//
// @param size Size of memory in terms of bytes
//
// @return uint8_t* Pointer to buffer, null if allocation fails.
//
uint8_t* AllocateSysMemory(const AgentInfo* agent_info, size_t size);
// Transfer data method
bool TransferData(void* dest_buff, void* src_buff, uint32_t length, bool host_to_dev);
// Loads an Assembled Brig file and Finalizes it into Device Isa
//
// @param agent_info Gpu device for which to finalize
//
// @param brig_path File path of the Assembled Brig file
//
// @param kernel_name Name of the kernel to finalize
//
// @param code_desc Handle of finalized Code Descriptor that could
// be used to submit for execution
//
// @return bool true if successful, false otherwise
//
bool LoadAndFinalize(const AgentInfo* agent_info, const char* brig_path, char* kernel_name,
hsa_executable_symbol_t* code_desc);
// Print the various fields of Hsa Gpu Agents
bool PrintGpuAgents(const std::string& header);
// Return AqlProfile API table
const hsa_ven_amd_aqlprofile_1_00_pfn_t* AqlProfileApi() const { return &aqlprofile_api_; }
private:
// System agents iterating callback
static hsa_status_t GetHsaAgentsCallback(hsa_agent_t agent, void* data);
// Callback function to find and bind kernarg region of an agent
static hsa_status_t FindMemRegionsCallback(hsa_region_t region, void* data);
// Constructor of the class. Will initialize the Hsa Runtime and
// query the system topology to get the list of Cpu and Gpu devices
HsaRsrcFactory();
// Destructor of the class
~HsaRsrcFactory();
// Add an instance of AgentInfo representing a Hsa Gpu agent
const AgentInfo* AddAgentInfo(const hsa_agent_t agent);
static HsaRsrcFactory* instance_;
static mutex_t mutex_;
// Used to maintain a list of Hsa Gpu Agent Info
std::vector<const AgentInfo*> gpu_list_;
// Used to maintain a list of Hsa Cpu Agent Info
std::vector<const AgentInfo*> cpu_list_;
// System agents map
std::map<hsa_agent_handle_t, const AgentInfo*> agent_map_;
// AqlProfile API table
hsa_ven_amd_aqlprofile_1_00_pfn_t aqlprofile_api_;
};
} // namespace util
} // namespace rocprofiler
#endif // SRC_UTIL_HSA_RSRC_FACTORY_H_
+163
Voir le fichier
@@ -0,0 +1,163 @@
#ifndef SRC_UTIL_LOGGER_H_
#define SRC_UTIL_LOGGER_H_
#include <time.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/syscall.h>
#include <sys/file.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string>
#include <iostream>
#include <sstream>
#include <fstream>
#include <exception>
#include <mutex>
#include <map>
namespace rocprofiler {
namespace util {
class Logger {
public:
typedef std::recursive_mutex mutex_t;
template <typename T> Logger& operator<<(const T& m) {
std::ostringstream oss;
oss << m;
if (!streaming_)
Log(oss.str());
else
Put(oss.str());
streaming_ = true;
return *this;
}
typedef void (*manip_t)();
Logger& operator<<(manip_t f) {
f();
return *this;
}
static void begm() { Instance().ResetStreaming(true); }
static void endl() { Instance().ResetStreaming(false); }
static const std::string& LastMessage() {
Logger& logger = Instance();
std::lock_guard<mutex_t> lck(mutex_);
return logger.message_[GetTid()];
}
static Logger* Create() {
std::lock_guard<mutex_t> lck(mutex_);
if (instance_ == NULL) instance_ = new Logger();
return instance_;
}
static void Destroy() {
std::lock_guard<mutex_t> lck(mutex_);
if (instance_ != NULL) delete instance_;
instance_ = NULL;
}
static Logger& Instance() {
Create();
return *instance_;
}
private:
static uint32_t GetPid() { return syscall(__NR_getpid); }
static uint32_t GetTid() { return syscall(__NR_gettid); }
Logger() : file_(NULL), dirty_(false), streaming_(false), messaging_(false) {
const char* path = getenv("ROCPROFILER_LOG");
if (path != NULL) {
file_ = fopen("/tmp/rocprofiler_log.txt", "a");
}
ResetStreaming(false);
}
~Logger() {
if (file_ != NULL) {
if (dirty_) Put("\n");
fclose(file_);
}
}
void ResetStreaming(const bool messaging) {
std::lock_guard<mutex_t> lck(mutex_);
if (messaging) {
message_[GetTid()] = "";
}
messaging_ = messaging;
streaming_ = messaging;
}
void Put(const std::string& m) {
std::lock_guard<mutex_t> lck(mutex_);
if (messaging_) {
message_[GetTid()] += m;
}
if (file_ != NULL) {
dirty_ = true;
flock(fileno(file_), LOCK_EX);
fprintf(file_, "%s", m.c_str());
fflush(file_);
flock(fileno(file_), LOCK_UN);
}
}
void Log(const std::string& m) {
const time_t rawtime = time(NULL);
tm tm_info;
localtime_r(&rawtime, &tm_info);
char tm_str[26];
strftime(tm_str, 26, "%Y-%m-%d %H:%M:%S", &tm_info);
std::ostringstream oss;
oss << "<" << tm_str << std::dec << " pid" << GetPid() << " tid" << GetTid() << "> " << m;
Put(oss.str());
}
FILE* file_;
bool dirty_;
bool streaming_;
bool messaging_;
static mutex_t mutex_;
static Logger* instance_;
std::map<uint32_t, std::string> message_;
};
} // namespace util
} // namespace rocprofiler
#define ERR_LOGGING(stream) { \
rocprofiler::util::Logger::Instance() \
<< "error: " \
<< rocprofiler::util::Logger::begm \
<< stream \
<< rocprofiler::util::Logger::endl; \
}
#define INFO_LOGGING(stream) { \
rocprofiler::util::Logger::Instance() \
<< "info: " \
<< rocprofiler::util::Logger::begm \
<< stream \
<< rocprofiler::util::Logger::endl; \
}
#ifdef DEBUG
# define DBG_LOGGING(stream) { \
rocprofiler::util::Logger::Instance() << rocprofiler::util::Logger::begm \
<< "debug: \"" << stream << "\"" << \
<< " in " << __FUNCTION__ \
<< " at " << __FILE__ << " line " << __LINE__ \
<< rocprofiler::util::Logger::endl; \
}
#endif
#endif // SRC_UTIL_LOGGER_H_
+279
Voir le fichier
@@ -0,0 +1,279 @@
#ifndef _SRC_XML_EXPR_H
#define _SRC_XML_EXPR_H
#include <exception>
#include <map>
#include <string>
#include <iostream>
#include <sstream>
#include <string.h>
namespace xml {
class exception_t : public std::exception {
public:
explicit exception_t(const std::string& msg) : str_(msg) {}
const char* what() const throw() { return str_.c_str(); }
protected:
const std::string str_;
};
typedef uint64_t args_t;
typedef std::map<std::string, args_t> args_map_t;
class Expr;
template <class T>
class any_cache_t {
public:
virtual ~any_cache_t() {}
virtual bool Lookup(const std::string& name, T& result) const = 0;
};
typedef any_cache_t<std::string> expr_cache_t;
typedef any_cache_t<args_t> args_cache_t;
class bin_expr_t {
public:
static const bin_expr_t* CreateExpr(const bin_expr_t* arg1, const bin_expr_t* arg2, const char op);
static const bin_expr_t* CreateArg(Expr *obj, const std::string str);
bin_expr_t() : arg1_(NULL), arg2_(NULL) {}
bin_expr_t(const bin_expr_t* arg1, const bin_expr_t* arg2) : arg1_(arg1), arg2_(arg2) {}
virtual args_t Eval(const args_cache_t& args) const = 0;
virtual std::string Symbol() const = 0;
std::string String() const {
std::string str;
if (arg1_) { str = "(" + arg1_->String() + " " + Symbol() + " " + arg2_->String() + ")"; }
else str = Symbol();
return str;
}
protected:
const bin_expr_t* arg1_;
const bin_expr_t* arg2_;
};
class Expr {
public:
explicit Expr(const std::string& expr, const expr_cache_t* cache) :
expr_(expr),
pos_(0),
sub_count_(0),
cache_(cache)
{
sub_vec_ = new std::vector<const Expr*>;
var_vec_ = new std::vector<std::string>;
tree_ = ParseExpr();
}
explicit Expr(const std::string& expr, const Expr* obj) :
expr_(expr),
pos_(0),
sub_count_(0),
cache_(obj->cache_),
sub_vec_(obj->sub_vec_),
var_vec_(obj->var_vec_)
{
sub_vec_->push_back(this);
tree_ = ParseExpr();
if (!SubCheck()) throw exception_t("expr '" + expr_ + "', bad parenthesis count");
}
~Expr() {
delete cache_;
for (auto it : *sub_vec_) delete it;
delete sub_vec_;
delete var_vec_;
}
std::string GetStr() const { return expr_; }
const expr_cache_t* GetCache() const { return cache_; }
const bin_expr_t* GetTree() const { return tree_; }
args_t Eval(const args_cache_t& args) const { return tree_->Eval(args); }
std::string Lookup(const std::string& str) const {
std::string result;
if (cache_ && !(cache_->Lookup(str, result))) throw exception_t("expr '" + expr_ + "', lookup '" + str + "' failed");
return result;
}
void AddVar(const std::string& str) {
bool found = false;
for (std::string s : *var_vec_) if (s == str) found = true;
if (!found) var_vec_->push_back(str);
}
const std::vector<std::string>& GetVars() const { return *var_vec_; }
std::string String() const { return tree_->String(); }
private:
const bin_expr_t* ParseExpr() {
const bin_expr_t* expr = ParseArg();
while (!IsEnd()) {
const char op = Symb();
const bin_expr_t* second_arg = NULL;
if (IsSymb(')')) {
Next();
SubClose();
break;
} if (IsSymb('*') || IsSymb('/')) {
Next();
second_arg = ParseArg();
expr = bin_expr_t::CreateExpr(expr, second_arg, op);
} else if (IsSymb('+') || IsSymb('-')) {
Next();
second_arg = ParseExpr();
expr = bin_expr_t::CreateExpr(expr, second_arg, op);
break;
} else {
throw exception_t("expr '" + expr_ + "', bad operator '" + op + "'");
}
}
return expr;
}
const bin_expr_t* ParseArg() {
const bin_expr_t* arg = NULL;
if (IsSymb('(')) {
Next();
SubOpen();
arg = ParseExpr();
} else {
const unsigned pos = FindOp();
const std::string str = CutTill(pos);
arg = bin_expr_t::CreateArg(this, str);
if (arg == NULL) throw exception_t("expr '" + expr_ + "', bad argument '" + str + "'");
}
return arg;
}
char Symb() const { return Symb(pos_); }
char Symb(const unsigned ind) const { return expr_[ind]; }
bool IsEnd() const { return (pos_ >= expr_.length()); }
bool IsSymb(const char c) const { return IsSymb(pos_, c); }
bool IsSymb(const unsigned ind, const char c) const { return (expr_[ind] == c); }
void Next() { ++pos_; }
void SubOpen() { ++sub_count_; }
void SubClose() { --sub_count_; }
bool SubCheck() const { return (sub_count_ == 0); }
unsigned FindOp() const {
unsigned i = pos_;
while (i < expr_.length()) {
switch (Symb(i)) {
case '*':
case '/':
case '+':
case '-':
case '(':
case ')':
goto end;
}
++i;
}
end:
return i;
}
std::string CutTill(const unsigned pos) {
const std::string str = (pos > pos_) ? expr_.substr(pos_, pos - pos_) : "";
pos_ = pos;
return str;
}
const std::string expr_;
unsigned pos_;
unsigned sub_count_;
const bin_expr_t* tree_;
const expr_cache_t* const cache_;
std::vector<const Expr*>* sub_vec_;
std::vector<std::string>* var_vec_;
};
class add_expr_t : public bin_expr_t {
public:
add_expr_t(const bin_expr_t* arg1, const bin_expr_t* arg2) : bin_expr_t(arg1, arg2) {}
args_t Eval(const args_cache_t& args) const { return (arg1_->Eval(args) + arg2_->Eval(args)); }
std::string Symbol() const { return "+"; }
};
class sub_expr_t : public bin_expr_t {
public:
sub_expr_t(const bin_expr_t* arg1, const bin_expr_t* arg2) : bin_expr_t(arg1, arg2) {}
args_t Eval(const args_cache_t& args) const { return (arg1_->Eval(args) - arg2_->Eval(args)); }
std::string Symbol() const { return "-"; }
};
class mul_expr_t : public bin_expr_t {
public:
mul_expr_t(const bin_expr_t* arg1, const bin_expr_t* arg2) : bin_expr_t(arg1, arg2) {}
args_t Eval(const args_cache_t& args) const { return (arg1_->Eval(args) * arg2_->Eval(args)); }
std::string Symbol() const { return "*"; }
};
class div_expr_t : public bin_expr_t {
public:
div_expr_t(const bin_expr_t* arg1, const bin_expr_t* arg2) : bin_expr_t(arg1, arg2) {}
args_t Eval(const args_cache_t& args) const { return (arg1_->Eval(args) / arg2_->Eval(args)); }
std::string Symbol() const { return "/"; }
};
class const_expr_t : public bin_expr_t {
public:
const_expr_t(const args_t value) : value_(value) {}
args_t Eval(const args_cache_t&) const { return value_; }
std::string Symbol() const { std::ostringstream os; os << value_; return os.str(); }
private:
const args_t value_;
};
class var_expr_t : public bin_expr_t {
public:
var_expr_t(const std::string name) : name_(name) {}
args_t Eval(const args_cache_t& args) const {
args_t result = 0;
if (!args.Lookup(name_, result)) throw exception_t("expr arg lookup '" + name_ + "' failed");
return result;
}
std::string Symbol() const { return name_; }
private:
const std::string name_;
};
inline const bin_expr_t* bin_expr_t::CreateExpr(const bin_expr_t* arg1, const bin_expr_t* arg2, const char op) {
const bin_expr_t* expr = NULL;
switch (op) {
case '+':
expr = new add_expr_t(arg1, arg2);
break;
case '-':
expr = new sub_expr_t(arg1, arg2);
break;
case '*':
expr = new mul_expr_t(arg1, arg2);
break;
case '/':
expr = new div_expr_t(arg1, arg2);
break;
}
return expr;
}
inline const bin_expr_t* bin_expr_t::CreateArg(Expr* obj, const std::string str) {
const bin_expr_t* arg = NULL;
const unsigned i = strspn(str.c_str(), "1234567890");
if (i == str.length()) {
const unsigned value = atoi(str.c_str());
arg = new const_expr_t(value);
} else {
const std::string sub_expr = obj->Lookup(str);
if (sub_expr.empty()) {
arg = new var_expr_t(str);
obj->AddVar(str);
} else {
const Expr* expr = new Expr(sub_expr, obj);
arg = expr->GetTree();
}
}
return arg;
}
} // namespace xml
#endif // _SRC_XML_EXPR_H
+221
Voir le fichier
@@ -0,0 +1,221 @@
#ifndef SRC_XML_XML_H_
#define SRC_XML_XML_H_
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <fstream>
#include <iostream>
#include <map>
#include <vector>
namespace xml {
class Xml {
public:
typedef std::vector<char> token_t;
struct level_t {
std::string tag;
std::vector<level_t*> nodes;
std::map<std::string, std::string> opts;
};
typedef std::vector<level_t*> nodes_vec_t;
enum {
DECL_STATE,
BODY_STATE
};
Xml(const char* file_name) :
file_name_(file_name),
file_line_(0),
data_size_(0),
index_(0),
state_(BODY_STATE),
level_(NULL),
comment_(false)
{
AddLevel("top");
fd_ = open(file_name, O_RDONLY);
if (fd_ == -1) {
std::cout << "XML file not found: " << file_name << std::endl;
return;
}
token_t remainder;
while (1) {
token_t token = (remainder.size()) ? remainder : NextToken();
remainder.clear();
// End of file
if (token.size() == 0) break;
// token_t token1 = token;
// token1.push_back('\0');
// std::cout << "> " << &token1[0] << std::endl;
switch(state_) {
case BODY_STATE:
if (token[0] == '<') {
bool node_begin = true;
unsigned ind = 1;
if (token[1] == '/') {
node_begin = false;
++ind;
}
unsigned i = ind;
while (i < token.size()) { if (token[i] == '>') break; ++i; }
for (unsigned j = i + 1; j < token.size(); ++j) remainder.push_back(token[j]);
if (i == token.size()) {
if (node_begin) state_ = DECL_STATE;
else BadFormat(token);
token.push_back('\0');
} else token[i] = '\0';
const char* tag = strdup(&token[ind]);
if (node_begin) {
AddLevel(tag);
} else {
if (strncmp(CurrentLevel().c_str(), tag, strlen(tag))) {
token.back() = '>';
BadFormat(token);
}
UpLevel();
}
} else BadFormat(token);
break;
case DECL_STATE:
if (token[0] == '>') {
state_ = BODY_STATE;
for (unsigned j = 1; j < token.size(); ++j) remainder.push_back(token[j]);
continue;
} else {
token.push_back('\0');
unsigned j = 0;
for (j = 0; j < token.size(); ++j) if (token[j] == '=') break;
if (j == token.size()) BadFormat(token);
token[j] = '\0';
const char* key = &token[0];
const char* value = &token[j + 1];
AddOption(key, value);
}
break;
default:
std::cout << "Wrong state: " << state_ << std::endl;
exit(1);
}
}
}
std::vector<level_t*> GetNodes(std::string global_tag) {
return map_[global_tag];
}
void Print() const {
for(auto& elem : map_) {
for (auto node : elem.second) {
if (node->opts.size()) {
std::cout << elem.first << ":" << std::endl;
for (auto& opt : node->opts) {
std::cout << " " << opt.first << " = " << opt.second << std::endl;
}
}
}
}
}
private:
bool LineEndCheck() {
bool found = false;
if (buffer_[index_] == '\n') {
buffer_[index_] = ' ';
++file_line_;
found = true;
comment_ = false;
} else if (comment_ || (buffer_[index_] == '#')) {
found = true;
comment_ = true;
}
return found;
}
token_t NextToken() {
token_t token;
while (1) {
if (data_size_ == 0) {
data_size_ = read(fd_, buffer_, buf_size_);
if (data_size_ <= 0) break;
}
if (token.empty()) while ((index_ < data_size_) && ((buffer_[index_] == ' ') || LineEndCheck())) {
++index_;
}
while ((index_ < data_size_) && (buffer_[index_] != ' ') && !LineEndCheck()) {
token.push_back(buffer_[index_++]);
}
if (index_ == data_size_) {
index_ = 0;
data_size_ = 0;
} else break;
}
return token;
}
void BadFormat(token_t token) {
token.push_back('\0');
std::cout << "Error: " << file_name_ << ", line " << file_line_ << ", bad XML token '" << &token[0] << "'" << std::endl;
exit(1);
}
void AddLevel(const std::string& tag) {
level_t* level = new level_t;
level->tag = tag;
if (level_) {
level_->nodes.push_back(level);
stack_.push_back(level_);
}
level_ = level;
std::string global_tag;
for (level_t* level : stack_) { global_tag += level->tag + "."; }
global_tag += tag;
map_[global_tag].push_back(level_);
}
void UpLevel() {
level_ = stack_.back();
stack_.pop_back();
}
std::string CurrentLevel() const {
return level_->tag;
}
void AddOption(const std::string& key, const std::string& value) {
level_->opts[key] = value;
}
const char* file_name_;
unsigned file_line_;
int fd_;
static const unsigned buf_size_ = 256;
char buffer_[buf_size_];
unsigned data_size_;
unsigned index_;
unsigned state_;
level_t* level_;
std::vector<level_t*> stack_;
std::map<std::string, nodes_vec_t> map_;
bool comment_;
};
} // namespace xml
#endif // SRC_XML_XML_H_
+41
Voir le fichier
@@ -0,0 +1,41 @@
cmake_minimum_required ( VERSION 3.5.0 )
set ( CMAKE_VERBOSE_MAKEFILE TRUE CACHE BOOL "Verbose Output" FORCE )
set ( EXE_NAME "ctrl" )
if ( NOT DEFINED TEST_DIR )
set ( TEST_DIR ${CMAKE_CURRENT_SOURCE_DIR} )
project ( ${EXE_NAME} )
## Set build environment
include ( env )
set ( ROCPROFILER_TARGET "rocprofiler64" )
endif ()
## Util sources
file( GLOB UTIL_SRC "${TEST_DIR}/util/*.cpp" )
## Test control sources
set ( CTRL_SRC
${TEST_DIR}/ctrl/test.cpp
${TEST_DIR}/ctrl/test_hsa.cpp
)
## Test kernels sources
set ( TEST_NAME simple_convolution )
set ( KERN_SRC ${TEST_DIR}/${TEST_NAME}/${TEST_NAME}.cpp )
execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/${TEST_NAME}/*.hsaco ${PROJECT_BINARY_DIR}" )
## Building test executable
add_executable ( ${EXE_NAME} ${KERN_SRC} ${CTRL_SRC} ${UTIL_SRC} )
target_include_directories ( ${EXE_NAME} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} )
target_link_libraries( ${EXE_NAME} ${ROCPROFILER_TARGET} ${HSA_RUNTIME_LIB} c stdc++ dl pthread rt atomic )
execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/run.sh ${PROJECT_BINARY_DIR}" )
execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/*.xml ${PROJECT_BINARY_DIR}" )
## Build test library
set ( TEST_LIB "tool" )
set ( TEST_LIB_SRC ${TEST_DIR}/ctrl/tool.cpp )
add_library ( ${TEST_LIB} SHARED ${TEST_LIB_SRC} )
target_include_directories ( ${TEST_LIB} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} )
target_link_libraries( ${TEST_LIB} ${ROCPROFILER_TARGET} ${HSA_RUNTIME_LIB} c stdc++ dl pthread rt atomic )
+85
Voir le fichier
@@ -0,0 +1,85 @@
/******************************************************************************
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list
of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*******************************************************************************/
#ifndef TEST_CTRL_RUN_KERNEL_H_
#define TEST_CTRL_RUN_KERNEL_H_
#include "ctrl/test_hsa.h"
#include "util/test_assert.h"
template <class Kernel, class Test> bool RunKernel(int argc, char* argv[]) {
bool ret_val = false;
// Create test kernel object
Kernel test_kernel;
TestAql* test_aql = new TestHsa(&test_kernel);
test_aql = new Test(test_aql);
TEST_ASSERT(test_aql != NULL);
if (test_aql == NULL) return 1;
// Initialization of Hsa Runtime
ret_val = test_aql->Initialize(argc, argv);
if (ret_val == false) {
std::cerr << "Error in the test initialization" << std::endl;
// TEST_ASSERT(ret_val);
return false;
}
// Setup Hsa resources needed for execution
ret_val = test_aql->Setup();
if (ret_val == false) {
std::cerr << "Error in creating hsa resources" << std::endl;
TEST_ASSERT(ret_val);
return false;
}
// Run test kernel
ret_val = test_aql->Run();
if (ret_val == false) {
std::cerr << "Error in running the test kernel" << std::endl;
TEST_ASSERT(ret_val);
return false;
}
// Verify the results of the execution
ret_val = test_aql->VerifyResults();
if (ret_val) {
std::clog << "Test : Passed" << std::endl;
} else {
std::clog << "Test : Failed" << std::endl;
}
// Print time taken by sample
test_aql->PrintTime();
test_aql->Cleanup();
delete test_aql;
return ret_val;
}
#endif // TEST_CTRL_RUN_KERNEL_H_
+126
Voir le fichier
@@ -0,0 +1,126 @@
#include <hsa.h>
#include <string.h>
#include <iostream>
#include "ctrl/run_kernel.h"
#include "ctrl/test_aql.h"
#include "ctrl/test_hsa.h"
#include "inc/rocprofiler.h"
#include "simple_convolution/simple_convolution.h"
#include "util/test_assert.h"
int main(int argc, char** argv) {
bool ret_val = false;
// HSA status
hsa_status_t status = HSA_STATUS_ERROR;
// Profiling context
rocprofiler_t* context = NULL;
// Profiling properties
rocprofiler_properties_t properties;
// Number of context invocation
uint32_t invocation = 0;
#if 0
// Profiling info objects
const unsigned info_count = 1;
rocprofiler_info_t info[info_count];
// PMC events
memset(info, 0, sizeof(info));
info[0].type = ROCPROFILER_TYPE_METRIC;
info[0].name = "SQ_WAVES";
#else
// Profiling info objects
const unsigned info_count = 3;
rocprofiler_info_t info[info_count];
// PMC events
memset(info, 0, sizeof(info));
info[0].type = ROCPROFILER_TYPE_METRIC;
info[0].name = "SQ_WAVES";
info[1].type = ROCPROFILER_TYPE_METRIC;
info[1].name = "SQ_ITEMS";
// Tracing parameters
const unsigned parameter_count = 2;
rocprofiler_parameter_t parameters[parameter_count];
info[2].name = "THREAD_TRACE";
info[2].type = ROCPROFILER_TYPE_TRACE;
info[2].parameters = parameters;
info[2].parameter_count = parameter_count;
parameters[0].parameter_name = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK;
parameters[0].value = 0;
parameters[1].parameter_name = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK;
parameters[1].value = 0;
#endif
// Creating profiling context
properties = {};
properties.queue_depth = 128;
status = rocprofiler_open(TestHsa::HsaAgentId(), info, info_count, &context, ROCPROFILER_MODE_STANDALONE|ROCPROFILER_MODE_OWNQUEUE, &properties);
TEST_STATUS(status == HSA_STATUS_SUCCESS);
TestHsa::SetQueue(properties.queue);
// Adding dispatch observer
status = rocprofiler_dispatch_observer(rocprofiler_dispatch_callback, context);
TEST_STATUS(status == HSA_STATUS_SUCCESS);
// Querying the number of context invocation
status = rocprofiler_invocation(context, &invocation);
TEST_STATUS(status == HSA_STATUS_SUCCESS);
// Dispatching profiled kernel n-times to collect all counter groups data
unsigned n = 0;
while(1) {
std::cout << "> " << n << "/" << invocation << std::endl;
#if 0
status = rocprofiler_start(context);
TEST_STATUS(status == HSA_STATUS_SUCCESS);
ret_val = RunKernel<SimpleConvolution, TestAql>(argc, argv);
status = rocprofiler_stop(context);
TEST_STATUS(status == HSA_STATUS_SUCCESS);
#else
ret_val = RunKernel<SimpleConvolution, TestAql>(argc, argv);
#endif
status = rocprofiler_sample(context);
TEST_STATUS(status == HSA_STATUS_SUCCESS);
for (rocprofiler_info_t* p = info; p < info + info_count; ++p) {
std::cout << (p - info) << ": " << p->name;
switch (p->data.kind) {
case ROCPROFILER_INT64:
std::cout << std::dec << " result64 (" << p->data.result64 << ")" << std::endl;
break;
case ROCPROFILER_BYTES: {
const char* ptr = reinterpret_cast<const char*>(p->data.result_bytes.ptr);
uint64_t size = 0;
for (unsigned i = 0; i < p->data.result_bytes.instance_count; ++i) {
size = *reinterpret_cast<const uint64_t*>(ptr);
const char* data = ptr + sizeof(size);
std::cout << std::endl;
std::cout << std::hex << " data (" << (void*)data << ")" << std::endl;
std::cout << std::dec << " size (" << size << ")" << std::endl;
ptr = data + size;
}
break;
}
default:
std::cout << "result kind (" << p->data.kind << ")" << std::endl;
TEST_ASSERT(false);
}
}
++n;
if (n < invocation) {
status = rocprofiler_next(context);
TEST_STATUS(status == HSA_STATUS_SUCCESS);
continue;
}
break;
}
// Finishing cleanup
// Deleting profiling context will delete all allocated resources
status = rocprofiler_close(context);
TEST_STATUS(status == HSA_STATUS_SUCCESS);
return (ret_val) ? 0 : 1;
}
+14
Voir le fichier
@@ -0,0 +1,14 @@
#include <hsa.h>
#include <string.h>
#include <iostream>
#include "ctrl/run_kernel.h"
#include "ctrl/test_aql.h"
#include "simple_convolution/simple_convolution.h"
int main(int argc, char** argv) {
TestHsa::HsaInstantiate();
for (int i = 0; i < 3; ++i) RunKernel<SimpleConvolution, TestAql>(argc, argv);
TestHsa::HsaShutdown();
return 0;
}
+80
Voir le fichier
@@ -0,0 +1,80 @@
/******************************************************************************
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list
of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*******************************************************************************/
#ifndef TEST_CTRL_TEST_AQL_H_
#define TEST_CTRL_TEST_AQL_H_
#include <hsa.h>
#include <hsa_ven_amd_aqlprofile.h>
#include "util/hsa_rsrc_factory.h"
// Test AQL interface
class TestAql {
public:
explicit TestAql(TestAql* t = 0) : test_(t) {}
virtual ~TestAql() { if (test_) delete test_; }
TestAql* Test() { return test_; }
virtual AgentInfo* GetAgentInfo() { return (test_) ? test_->GetAgentInfo() : 0; }
virtual hsa_queue_t* GetQueue() { return (test_) ? test_->GetQueue() : 0; }
virtual HsaRsrcFactory* GetRsrcFactory() { return (test_) ? test_->GetRsrcFactory() : 0; }
// Initialize application environment including setting
// up of various configuration parameters based on
// command line arguments
// @return bool true on success and false on failure
virtual bool Initialize(int argc, char** argv) {
return (test_) ? test_->Initialize(argc, argv) : true;
}
// Setup application parameters for exectuion
// @return bool true on success and false on failure
virtual bool Setup() { return (test_) ? test_->Setup() : true; }
// Run the kernel
// @return bool true on success and false on failure
virtual bool Run() { return (test_) ? test_->Run() : true; }
// Verify results
// @return bool true on success and false on failure
virtual bool VerifyResults() { return (test_) ? test_->VerifyResults() : true; }
// Print to console the time taken to execute kernel
virtual void PrintTime() {
if (test_) test_->PrintTime();
}
// Release resources e.g. memory allocations
// @return bool true on success and false on failure
virtual bool Cleanup() { return (test_) ? test_->Cleanup() : true; }
private:
TestAql* const test_;
};
#endif // TEST_CTRL_TEST_AQL_H_
+252
Voir le fichier
@@ -0,0 +1,252 @@
/******************************************************************************
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list
of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*******************************************************************************/
#include "ctrl/test_hsa.h"
#include <atomic>
#include "util/test_assert.h"
#include "util/helper_funcs.h"
#include "util/hsa_rsrc_factory.h"
HsaRsrcFactory* TestHsa::hsa_rsrc_ = NULL;
AgentInfo* TestHsa::agent_info_ = NULL;
hsa_queue_t* TestHsa::hsa_queue_ = NULL;
uint32_t TestHsa::agent_id_ = 0;
HsaRsrcFactory* TestHsa::HsaInstantiate(const uint32_t agent_ind) {
// Instantiate an instance of Hsa Resources Factory
if (hsa_rsrc_ == NULL) {
agent_id_ = agent_ind;
hsa_rsrc_ = HsaRsrcFactory::Create();
// Print properties of the agents
hsa_rsrc_->PrintGpuAgents("> GPU agents");
// Create an instance of Gpu agent
if (!hsa_rsrc_->GetGpuAgentInfo(agent_ind, &agent_info_)) {
agent_info_ = NULL;
std::cerr << "> error: agent[" << agent_ind << "] is not found" << std::endl;
return NULL;
}
std::clog << "> Using agent[" << agent_ind << "] : " << agent_info_->name << std::endl;
// Create an instance of Aql Queue
if (hsa_queue_ == NULL) {
uint32_t num_pkts = 128;
if(hsa_rsrc_->CreateQueue(agent_info_, num_pkts, &hsa_queue_) == false) {
hsa_queue_ = NULL;
}
}
}
return hsa_rsrc_;
}
void TestHsa::HsaShutdown() { if (hsa_rsrc_) hsa_rsrc_->Destroy(); }
bool TestHsa::Initialize(int arg_cnt, char** arg_list) {
std::clog << "TestHsa::Initialize :" << std::endl;
// Instantiate a Timer object
setup_timer_idx_ = hsa_timer_.CreateTimer();
dispatch_timer_idx_ = hsa_timer_.CreateTimer();
hsa_rsrc_ = HsaInstantiate(agent_id_);
if (hsa_rsrc_ == NULL) {
TEST_ASSERT(false);
return false;
}
// Obtain handle of signal
hsa_rsrc_->CreateSignal(1, &hsa_signal_);
// Obtain the code object file name
std::string agentName(agent_info_->name);
if (agentName.compare(0, 4, "gfx8") == 0) {
brig_path_obj_.append("gfx8");
} else if (agentName.compare(0, 4, "gfx9") == 0) {
brig_path_obj_.append("gfx9");
} else {
TEST_ASSERT(false);
return false;
}
brig_path_obj_.append("_" + name_ + ".hsaco");
return true;
}
bool TestHsa::Setup() {
std::clog << "TestHsa::setup :" << std::endl;
// Start the timer object
hsa_timer_.StartTimer(setup_timer_idx_);
mem_map_t& mem_map = test_->GetMemMap();
for (mem_it_t it = mem_map.begin(); it != mem_map.end(); ++it) {
mem_descr_t& des = it->second;
void* ptr = (des.local) ? hsa_rsrc_->AllocateLocalMemory(agent_info_, des.size)
: hsa_rsrc_->AllocateSysMemory(agent_info_, des.size);
des.ptr = ptr;
TEST_ASSERT(ptr != NULL);
if (ptr == NULL) return false;
}
test_->Init();
// Load and Finalize Kernel Code Descriptor
char* brig_path = (char*)brig_path_obj_.c_str();
const bool ret_val =
hsa_rsrc_->LoadAndFinalize(agent_info_, brig_path, strdup(name_.c_str()), &kernel_code_desc_);
if (ret_val == false) {
std::cerr << "Error in loading and finalizing Kernel" << std::endl;
return ret_val;
}
// Stop the timer object
hsa_timer_.StopTimer(setup_timer_idx_);
setup_time_taken_ = hsa_timer_.ReadTimer(setup_timer_idx_);
total_time_taken_ = setup_time_taken_;
return true;
}
bool TestHsa::Run() {
std::clog << "TestHsa::run :" << std::endl;
const uint32_t work_group_size = 64;
const uint32_t work_grid_size = test_->GetGridSize();
uint32_t group_segment_size = 0;
uint32_t private_segment_size = 0;
const size_t kernarg_segment_size = test_->GetKernargSize();
uint64_t code_handle = 0;
// Retrieve the amount of group memory needed
hsa_executable_symbol_get_info(
kernel_code_desc_, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, &group_segment_size);
// Retrieve the amount of private memory needed
hsa_executable_symbol_get_info(kernel_code_desc_,
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
&private_segment_size);
// Check the kernel args size
size_t size_info = 0;
hsa_executable_symbol_get_info(
kernel_code_desc_, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &size_info);
TEST_ASSERT(kernarg_segment_size == size_info);
if (kernarg_segment_size != size_info) return false;
// Retrieve handle of the code block
hsa_executable_symbol_get_info(kernel_code_desc_, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
&code_handle);
// Initialize the dispatch packet.
hsa_kernel_dispatch_packet_t aql;
memset(&aql, 0, sizeof(aql));
// Set the packet's type, barrier bit, acquire and release fences
aql.header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
aql.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE;
aql.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE;
// Populate Aql packet with default values
aql.setup = 1;
aql.grid_size_x = work_grid_size;
aql.grid_size_y = 1;
aql.grid_size_z = 1;
aql.workgroup_size_x = work_group_size;
aql.workgroup_size_y = 1;
aql.workgroup_size_z = 1;
// Bind the kernel code descriptor and arguments
aql.kernel_object = code_handle;
aql.kernarg_address = test_->GetKernargPtr();
aql.group_segment_size = group_segment_size;
aql.private_segment_size = private_segment_size;
// Initialize Aql packet with handle of signal
aql.completion_signal = hsa_signal_;
// Compute the write index of queue and copy Aql packet into it
const uint64_t que_idx = hsa_queue_load_write_index_relaxed(hsa_queue_);
const uint32_t mask = hsa_queue_->size - 1;
std::clog << "> Executing kernel: \"" << name_ << "\"" << std::endl;
// Start the timer object
hsa_timer_.StartTimer(dispatch_timer_idx_);
// Disable packet so that submission to HW is complete
const auto header = aql.header;
aql.header = HSA_PACKET_TYPE_INVALID << HSA_PACKET_HEADER_TYPE;
// Copy Aql packet into queue buffer
((hsa_kernel_dispatch_packet_t*)(hsa_queue_->base_address))[que_idx & mask] = aql;
// After AQL packet is fully copied into queue buffer
// update packet header from invalid state to valid state
std::atomic_thread_fence(std::memory_order_release);
((hsa_kernel_dispatch_packet_t*)(hsa_queue_->base_address))[que_idx & mask].header = header;
// Increment the write index and ring the doorbell to dispatch the kernel.
hsa_queue_store_write_index_relaxed(hsa_queue_, (que_idx + 1));
hsa_signal_store_relaxed(hsa_queue_->doorbell_signal, que_idx);
std::clog << "> Waiting on kernel dispatch signal, que_idx=" << que_idx << std::endl;
// Wait on the dispatch signal until the kernel is finished.
// Update wait condition to HSA_WAIT_STATE_ACTIVE for Polling
hsa_signal_wait_acquire(hsa_signal_, HSA_SIGNAL_CONDITION_LT, 1, (uint64_t)-1,
HSA_WAIT_STATE_BLOCKED);
// Stop the timer object
hsa_timer_.StopTimer(dispatch_timer_idx_);
dispatch_time_taken_ = hsa_timer_.ReadTimer(dispatch_timer_idx_);
total_time_taken_ += dispatch_time_taken_;
// Copy kernel buffers from local memory into system memory
hsa_rsrc_->TransferData(test_->GetOutputPtr(), test_->GetLocalPtr(), test_->GetOutputSize(),
false);
test_->PrintOutput();
return true;
}
bool TestHsa::VerifyResults() {
// Compare the results and see if they match
const void* const refout_ptr = test_->GetRefoutPtr();
const int32_t cmp_val =
(refout_ptr != NULL) ? memcmp(test_->GetOutputPtr(), refout_ptr, test_->GetOutputSize()) : 0;
return (cmp_val == 0);
}
void TestHsa::PrintTime() {
std::clog << "Time taken for Setup by " << this->name_ << " : " << this->setup_time_taken_
<< std::endl;
std::clog << "Time taken for Dispatch by " << this->name_ << " : " << this->dispatch_time_taken_
<< std::endl;
std::clog << "Time taken in Total by " << this->name_ << " : " << this->total_time_taken_
<< std::endl;
}
bool TestHsa::Cleanup() { return true; }
+125
Voir le fichier
@@ -0,0 +1,125 @@
/******************************************************************************
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list
of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*******************************************************************************/
#ifndef TEST_CTRL_TEST_HSA_H_
#define TEST_CTRL_TEST_HSA_H_
#include "ctrl/test_aql.h"
#include "ctrl/test_kernel.h"
#include "util/hsa_rsrc_factory.h"
#include "util/perf_timer.h"
// Class implements HSA test
class TestHsa : public TestAql {
public:
// Instantiate HSA resources
static HsaRsrcFactory* HsaInstantiate(const uint32_t agent_ind = agent_id_);
static void HsaShutdown();
static void SetQueue(hsa_queue_t* queue) { hsa_queue_ = queue; }
static uint32_t HsaAgentId() { return agent_id_; }
// Constructor
explicit TestHsa(TestKernel* test) : test_(test), name_(test->Name()) {
total_time_taken_ = 0;
setup_time_taken_ = 0;
dispatch_time_taken_ = 0;
}
// Get methods for Agent Info, HAS queue, HSA Resourcse Manager
AgentInfo* GetAgentInfo() { return agent_info_; }
hsa_queue_t* GetQueue() { return hsa_queue_; }
HsaRsrcFactory* GetRsrcFactory() { return hsa_rsrc_; }
// Initialize application environment including setting
// up of various configuration parameters based on
// command line arguments
// @return bool true on success and false on failure
bool Initialize(int argc, char** argv);
// Setup application parameters for exectuion
// @return bool true on success and false on failure
bool Setup();
// Run the BinarySearch kernel
// @return bool true on success and false on failure
bool Run();
// Verify against reference implementation
// @return bool true on success and false on failure
bool VerifyResults();
// Print to console the time taken to execute kernel
void PrintTime();
// Release resources e.g. memory allocations
// @return bool true on success and false on failure
bool Cleanup();
private:
typedef TestKernel::mem_descr_t mem_descr_t;
typedef TestKernel::mem_map_t mem_map_t;
typedef TestKernel::mem_it_t mem_it_t;
// Test object
TestKernel* test_;
// Path of Brig file
std::string brig_path_obj_;
// Used to track time taken to run the sample
double total_time_taken_;
double setup_time_taken_;
double dispatch_time_taken_;
// Handle of signal
hsa_signal_t hsa_signal_;
// Handle of Kernel Code Descriptor
hsa_executable_symbol_t kernel_code_desc_;
// Instance of timer object
uint32_t setup_timer_idx_;
uint32_t dispatch_timer_idx_;
PerfTimer hsa_timer_;
// Instance of Hsa Resources Factory
static HsaRsrcFactory* hsa_rsrc_;
// GPU id
static uint32_t agent_id_;
// Handle to an Hsa Gpu Agent
static AgentInfo* agent_info_;
// Handle to an Hsa Queue
static hsa_queue_t* hsa_queue_;
// Test kernel name
std::string name_;
};
#endif // TEST_CTRL_TEST_HSA_H_
+107
Voir le fichier
@@ -0,0 +1,107 @@
/******************************************************************************
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list
of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*******************************************************************************/
#ifndef TEST_CTRL_TEST_KERNEL_H_
#define TEST_CTRL_TEST_KERNEL_H_
#include <stdint.h>
#include <map>
// Class implements kernel test
class TestKernel {
public:
// Memory descriptors IDs
enum { INPUT_DES_ID, OUTPUT_DES_ID, LOCAL_DES_ID, MASK_DES_ID, KERNARG_DES_ID, REFOUT_DES_ID };
// Memory descriptors vector declaration
struct mem_descr_t {
void* ptr;
uint32_t size;
bool local;
};
// Memory map declaration
typedef std::map<uint32_t, mem_descr_t> mem_map_t;
typedef mem_map_t::iterator mem_it_t;
typedef mem_map_t::const_iterator mem_const_it_t;
virtual ~TestKernel() {}
// Initialize method
virtual void Init() = 0;
// Return kernel memory map
mem_map_t& GetMemMap() { return mem_map_; }
// Return NULL descriptor
static mem_descr_t NullDescriptor() { return {NULL, 0, 0}; }
// Methods to get the kernel attributes
void* GetKernargPtr() const { return GetDescr(KERNARG_DES_ID).ptr; }
uint32_t GetKernargSize() const { return GetDescr(KERNARG_DES_ID).size; }
void* GetOutputPtr() const { return GetDescr(OUTPUT_DES_ID).ptr; }
uint32_t GetOutputSize() const { return GetDescr(OUTPUT_DES_ID).size; }
void* GetLocalPtr() const { return GetDescr(LOCAL_DES_ID).ptr; }
void* GetRefoutPtr() const { return GetDescr(REFOUT_DES_ID).ptr; }
virtual uint32_t GetGridSize() const = 0;
// Print output
virtual void PrintOutput() const = 0;
// Return name
virtual std::string Name() const = 0;
protected:
// Set system memory descriptor
bool SetSysDescr(const uint32_t& id, const uint32_t& size) {
return SetMemDescr(id, size, false);
}
// Set local memory descriptor
bool SetLocalDescr(const uint32_t& id, const uint32_t& size) {
return SetMemDescr(id, size, true);
}
// Get memory descriptor
mem_descr_t GetDescr(const uint32_t& id) const {
mem_const_it_t it = mem_map_.find(id);
return (it != mem_map_.end()) ? it->second : NullDescriptor();
}
private:
// Set memory descriptor
bool SetMemDescr(const uint32_t& id, const uint32_t& size, const bool& local) {
const mem_descr_t des = {NULL, size, local};
auto ret = mem_map_.insert(mem_map_t::value_type(id, des));
return ret.second;
}
// Kernel memory map object
mem_map_t mem_map_;
};
#endif // TEST_CTRL_TEST_KERNEL_H_
+45
Voir le fichier
@@ -0,0 +1,45 @@
/******************************************************************************
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list
of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*******************************************************************************/
#ifndef TEST_CTRL_TEST_PGEN_H_
#define TEST_CTRL_TEST_PGEN_H_
#include "ctrl/test_pmgr.h"
// SimpleConvolution: Class implements OpenCL SimpleConvolution sample
class TestPGen : public TestPMgr {
protected:
typedef hsa_ext_amd_aql_pm4_packet_t packet_t;
packet_t* PrePacket() { return reinterpret_cast<packet_t*>(&pre_packet_); }
packet_t* PostPacket() { return reinterpret_cast<packet_t*>(&post_packet_); }
public:
explicit TestPGen(TestAql* t) : TestPMgr(t) {}
};
#endif // TEST_CTRL_TEST_PGEN_H_
+78
Voir le fichier
@@ -0,0 +1,78 @@
/******************************************************************************
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list
of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*******************************************************************************/
#ifndef TEST_CTRL_TEST_PGEN_ROCP_H_
#define TEST_CTRL_TEST_PGEN_ROCP_H_
#include <list>
#include <vector>
#include "ctrl/test_pgen.h"
#include "util/test_assert.h"
hsa_status_t TestPGenRocpCallback(hsa_ven_amd_aqlprofile_info_type_t info_type,
hsa_ven_amd_aqlprofile_info_data_t* info_data,
void* callback_data) {
hsa_status_t status = HSA_STATUS_SUCCESS;
typedef std::vector<hsa_ven_amd_aqlprofile_info_data_t> passed_data_t;
reinterpret_cast<passed_data_t*>(callback_data)->push_back(*info_data);
return status;
}
// Class implements PMC profiling
class TestPGenRocp : public TestPGen {
public:
explicit TestPGenRocp(TestAql* t) : TestPGen(t) { std::clog << "Test: PGen ROCP" << std::endl; }
bool Initialize(int /*arg_cnt*/, char** /*arg_list*/) {
status = rocprofiler_on_dispatch(&profile_, PrePacket(), PostPacket());
TEST_STATUS(status != HSA_STATUS_SUCCESS);
return (status == HSA_STATUS_SUCCESS);
}
private:
bool BuildPackets() { return true; }
bool DumpData() {
std::clog << "TestPGenRocp::DumpData :" << std::endl;
typedef std::vector<hsa_ven_amd_aqlprofile_info_data_t> callback_data_t;
callback_data_t data;
api_.hsa_ven_amd_aqlprofile_iterate_data(&profile_, TestPGenRocpCallback, &data);
for (callback_data_t::iterator it = data.begin(); it != data.end(); ++it) {
std::cout << std::dec << "event(block(" << it->pmc_data.event.block_name << "_"
<< it->pmc_data.event.block_index << "), id(" << it->pmc_data.event.counter_id
<< ")), sample(" << it->sample_id << "), result(" << it->pmc_data.result << ")"
<< std::endl;
}
return true;
}
};
#endif // TEST_CTRL_TEST_PGEN_ROCP_H_
+144
Voir le fichier
@@ -0,0 +1,144 @@
/******************************************************************************
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list
of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*******************************************************************************/
#include "ctrl/test_pmgr.h"
#include <atomic>
#include "ctrl/test_assert.h"
bool TestPMgr::AddPacketGfx9(const packet_t* packet) {
packet_t aql_packet = *packet;
// Compute the write index of queue and copy Aql packet into it
uint64_t que_idx = hsa_queue_load_write_index_relaxed(GetQueue());
const uint32_t mask = GetQueue()->size - 1;
packet_t* slot = (reinterpret_cast<packet_t*>(GetQueue()->base_address)) + (que_idx & mask);
// Disable packet so that submission to HW is complete
const auto header = HSA_PACKET_TYPE_VENDOR_SPECIFIC << HSA_PACKET_HEADER_TYPE;
aql_packet.header &= (~((1ul << HSA_PACKET_HEADER_WIDTH_TYPE) - 1)) << HSA_PACKET_HEADER_TYPE;
aql_packet.header |= HSA_PACKET_TYPE_INVALID << HSA_PACKET_HEADER_TYPE;
// Copy Aql packet into queue buffer
*slot = aql_packet;
// After AQL packet is fully copied into queue buffer
// update packet header from invalid state to valid state
auto header_atomic_ptr =
reinterpret_cast<std::atomic<uint16_t>*>(&slot->header);
header_atomic_ptr->store(header, std::memory_order_release);
// Increment the write index and ring the doorbell to dispatch the kernel.
hsa_queue_store_write_index_relaxed(GetQueue(), (que_idx + 1));
hsa_signal_store_relaxed(GetQueue()->doorbell_signal, que_idx);
return true;
}
bool TestPMgr::AddPacketGfx8(const packet_t* packet) {
// Create legacy devices PM4 data
const hsa_ext_amd_aql_pm4_packet_t* aql_packet = (const hsa_ext_amd_aql_pm4_packet_t*)packet;
slot_pm4_t data;
api_.hsa_ven_amd_aqlprofile_legacy_get_pm4(aql_packet, reinterpret_cast<void*>(data.words));
// Compute the write index of queue and copy Aql packet into it
uint64_t que_idx = hsa_queue_load_write_index_relaxed(GetQueue());
const uint32_t mask = GetQueue()->size - 1;
// Copy Aql/Pm4 blob into queue buffer
packet_t* ptr = (reinterpret_cast<packet_t*>(GetQueue()->base_address)) + (que_idx & mask);
slot_pm4_t* slot = reinterpret_cast<slot_pm4_t*>(ptr);
for (unsigned i = 1; i < SLOT_PM4_SIZE_DW; ++i) {
slot->words[i] = data.words[i];
}
// To maintain global order to ensure the prior copy of the packet contents is made visible
// before the header is updated.
// With in-order CP it will wait until the first packet in the blob will be valid
std::atomic<uint32_t>* header_atomic_ptr =
reinterpret_cast<std::atomic<uint32_t>*>(&slot->words[0]);
header_atomic_ptr->store(data.words[0], std::memory_order_release);
// Increment the write index and ring the doorbell to dispatch the kernel.
que_idx += SLOT_PM4_SIZE_AQLP - 1;
hsa_queue_store_write_index_relaxed(GetQueue(), (que_idx + 1));
hsa_signal_store_relaxed(GetQueue()->doorbell_signal, que_idx);
return true;
}
bool TestPMgr::AddPacket(const packet_t* packet) {
const char* agent_name = GetAgentInfo()->name;
return (strncmp(agent_name, "gfx8", 4) == 0) ? AddPacketGfx8(packet) : AddPacketGfx9(packet);
}
bool TestPMgr::Run() {
// Build Aql Pkts
const bool active = BuildPackets();
if (active) {
// Submit Pre-Dispatch Aql packet
AddPacket(&pre_packet_);
}
Test()->Run();
if (active) {
// Set post packet completion signal
post_packet_.completion_signal = post_signal_;
// Submit Post-Dispatch Aql packet
AddPacket(&post_packet_);
// Wait for Post-Dispatch packet to complete
hsa_signal_wait_acquire(post_signal_, HSA_SIGNAL_CONDITION_LT, 1, (uint64_t)-1,
HSA_WAIT_STATE_BLOCKED);
// Dumping profiling data
DumpData();
}
return true;
}
bool TestPMgr::Initialize(int argc, char** argv) {
TestAql::Initialize(argc, argv);
hsa_status_t status = HSA_STATUS_ERROR;
status = hsa_signal_create(1, 0, NULL, &post_signal_);
TEST_ASSERT(status == HSA_STATUS_SUCCESS);
status = hsa_system_get_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, 1, 0, &api_);
TEST_ASSERT(status == HSA_STATUS_SUCCESS);
return true;
}
TestPMgr::TestPMgr(TestAql* t) : TestAql(t), api_({0}) {
memset(&pre_packet_, 0, sizeof(pre_packet_));
memset(&post_packet_, 0, sizeof(post_packet_));
dummy_signal_.handle = 0;
post_signal_ = dummy_signal_;
memset(&api_, 0, sizeof(api_));
}
+70
Voir le fichier
@@ -0,0 +1,70 @@
/******************************************************************************
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list
of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*******************************************************************************/
#ifndef TEST_CTRL_TEST_PMGR_H_
#define TEST_CTRL_TEST_PMGR_H_
#include <hsa.h>
#include <hsa_ven_amd_aqlprofile.h>
#include <atomic>
#include "ctrl/test_aql.h"
// Class implements profiling manager
class TestPMgr : public TestAql {
public:
typedef hsa_ext_amd_aql_pm4_packet_t packet_t;
explicit TestPMgr(TestAql* t);
bool Run();
protected:
packet_t pre_packet_;
packet_t post_packet_;
hsa_signal_t dummy_signal_;
hsa_signal_t post_signal_;
hsa_ven_amd_aqlprofile_1_00_pfn_t api_;
virtual bool BuildPackets() { return false; }
virtual bool DumpData() { return false; }
virtual bool Initialize(int argc, char** argv);
private:
enum {
SLOT_PM4_SIZE_DW = HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(uint32_t),
SLOT_PM4_SIZE_AQLP = HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(packet_t)
};
struct slot_pm4_t {
uint32_t words[SLOT_PM4_SIZE_DW];
};
bool AddPacket(const packet_t* packet);
bool AddPacketGfx8(const packet_t* packet);
bool AddPacketGfx9(const packet_t* packet);
};
#endif // TEST_CTRL_TEST_PMGR_H_
+297
Voir le fichier
@@ -0,0 +1,297 @@
#include <hsa.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <map>
#include <vector>
#include "inc/rocprofiler.h"
#include "util/xml.h"
#define PUBLIC_API __attribute__((visibility("default")))
#define CONSTRUCTOR_API __attribute__((constructor))
#define DESTRUCTOR_API __attribute__((destructor))
// Tool thread
pthread_t thread;
pthread_attr_t thr_attr;
bool thr_stop = false;
struct dispatch_data_t {
rocprofiler_info_t* info;
unsigned info_count;
unsigned group_index;
};
struct context_entry_t {
rocprofiler_group_t* group;
rocprofiler_info_t* info;
unsigned info_count;
rocprofiler_callback_data_t data;
};
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
unsigned context_array_size = 1;
context_entry_t* context_array = NULL;
unsigned context_array_index = 0;
const char* file_name;
FILE* file_handle = NULL;
void check_status(hsa_status_t status) {
if (status != HSA_STATUS_SUCCESS) {
const char* error_string = NULL;
rocprofiler_error_string(&error_string);
fprintf(stderr, "ERROR: %s\n", error_string);
exit(1);
}
}
unsigned align_size(unsigned size, unsigned alignment) { return ((size + alignment - 1) & ~(alignment - 1)); }
void print_info(FILE* file, const rocprofiler_info_t* info, const unsigned info_count, const char* str) {
if (str) fprintf(file, "%s:\n", str);
for (unsigned i= 0; i < info_count; ++i) {
const rocprofiler_info_t* p = &info[i];
fprintf(file, " %s ", p->name);
switch (p->data.kind) {
case ROCPROFILER_INT64:
fprintf(file, "(%lu)\n", p->data.result64);
break;
case ROCPROFILER_BYTES: {
fprintf(file, "(\n");
const char* ptr = reinterpret_cast<const char*>(p->data.result_bytes.ptr);
uint64_t size = 0;
for (unsigned i = 0; i < p->data.result_bytes.instance_count; ++i) {
size = *reinterpret_cast<const uint64_t*>(ptr);
const char* data = ptr + sizeof(size);
fprintf(file, " data (%p), size (%lu)\n", data, size);
size = align_size(size, sizeof(uint64_t));
ptr = data + size;
}
fprintf(file, " )\n");
break;
}
default:
std::cout << "Bad result kind (" << p->data.kind << ")" << std::endl;
}
}
}
void print_group(FILE* file, const rocprofiler_group_t* group, const char* str) {
if (str) fprintf(file, "%s:\n", str);
for (unsigned i= 0; i < group->info_count; ++i) {
print_info(file, group->info[i], 1, NULL);
}
}
void store_context(context_entry_t context_entry) {
if(pthread_mutex_lock(&mutex) != 0) {
perror("pthread_mutex_lock");
exit(1);
}
if ((context_array == NULL) || (context_array_index >= context_array_size)) {
context_array_size *= 2;
context_array = reinterpret_cast<context_entry_t*>(realloc(context_array, context_array_size * sizeof(context_entry_t)));
}
context_array_index += 1;
context_array[context_array_index - 1] = context_entry;
if(pthread_mutex_unlock(&mutex) != 0) {
perror("pthread_mutex_unlock");
exit(1);
}
}
void dump_context(FILE *file, unsigned index) {
hsa_status_t status = HSA_STATUS_ERROR;
if (pthread_mutex_lock(&mutex) != 0) {
perror("pthread_mutex_lock");
exit(1);
}
context_entry_t* entry = &context_array[index];
rocprofiler_group_t* group = entry->group;
const rocprofiler_info_t* info = entry->info;
const unsigned info_count = entry->info_count;
fprintf(file, "Dispatch[%u], kernel_object(0x%lx):\n", index, entry->data.kernel_object);
if (pthread_mutex_unlock(&mutex) != 0) {
perror("pthread_mutex_unlock");
exit(1);
}
status = rocprofiler_get_group_data(group);
check_status(status);
//print_group(file, group, "Group[0] data");
status = rocprofiler_get_metrics_data(group->context);
check_status(status);
print_info(file, info, info_count, NULL);
// Finishing cleanup
// Deleting profiling context will delete all allocated resources
rocprofiler_close(group->context);
}
// Provided standard profiling callback
hsa_status_t dispatch_callback(
const rocprofiler_callback_data_t* callback_data,
void* user_data,
rocprofiler_group_t** group) {
hsa_status_t status = HSA_STATUS_ERROR;
// Passed tool data
dispatch_data_t* tool_data = reinterpret_cast<dispatch_data_t*>(user_data);
// Profiling context
rocprofiler_t* context = NULL;
// Open profiling context
status = rocprofiler_open(0, tool_data->info, tool_data->info_count, &context, 0, NULL);
check_status(status);
rocprofiler_group_t* groups = NULL;
uint32_t group_count = 0;
status = rocprofiler_get_groups(context, &groups, &group_count);
check_status(status);
assert(group_count == 1);
*group = &groups[0];
store_context({*group, tool_data->info, tool_data->info_count, *callback_data});
return status;
}
void* dumping_data(void*) {
unsigned index = 0;
do {
while (index < context_array_index) {
dump_context(file_handle, index);
++index;
}
} while (!thr_stop);
return NULL;
}
CONSTRUCTOR_API void constructor() {
std::map<std::string, hsa_ven_amd_aqlprofile_parameter_name_t> parameters_dict;
parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET;
parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK;
parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK;
parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK;
parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2;
#ifdef TOOL_THREAD
int err = pthread_attr_init(&thr_attr);
if (err) { errno = err; perror("pthread_attr_init"); exit(1); }
err = pthread_create(&thread, &thr_attr, dumping_data, NULL);
if (err) { errno = err; perror("pthread_create"); exit(1); }
#endif
// Set output file
file_name = getenv("ROCP_OUTPUT");
if (file_name != NULL) {
file_handle = fopen(file_name, "w");
if (file_handle == NULL) {
perror("fopen");
exit(1);
}
} else file_handle = stdout;
// Getting input
const char* xml_name = getenv("ROCP_INPUT");
if (xml_name == NULL) {
fprintf(stderr, "ROCProfiler: input is not specified, ROCP_INPUT env");
exit(1);
}
printf("ROCProfiler: input from \"%s\"\n", xml_name);
xml::Xml* xml = new xml::Xml(xml_name);
// Getting metrics
auto metrics_list = xml->GetNodes("top.metric");
std::vector<std::string> metrics_vec;
for (auto* entry : metrics_list) {
const std::string entry_str = entry->opts["name"];
size_t pos1 = 0;
while(pos1 < entry_str.length()) {
const size_t pos2 = entry_str.find(",", pos1);
const std::string metric_name = entry_str.substr(pos1, pos2 - pos1);
metrics_vec.push_back(metric_name);
if (pos2 == std::string::npos) break;
pos1 = pos2 + 1;
}
}
// Getting traces
auto traces_list = xml->GetNodes("top.trace");
const unsigned info_count = metrics_vec.size() + traces_list.size();
rocprofiler_info_t* info= new rocprofiler_info_t[info_count];
memset(info, 0, info_count * sizeof(rocprofiler_info_t));
printf(" %d metrics\n", (int) metrics_vec.size());
for (unsigned i = 0; i < metrics_vec.size(); ++i) {
const std::string& name = metrics_vec[i];
printf("%s%s", (i == 0) ? " " : ", ", name.c_str());
info[i] = {};
info[i].type = ROCPROFILER_TYPE_METRIC;
info[i].name = strdup(name.c_str());
}
if (metrics_vec.size()) printf("\n");
printf(" %d traces\n", (int) traces_list.size());
unsigned index = metrics_vec.size();
for (auto* entry : traces_list) {
auto params_list = xml->GetNodes("top.trace.parameters");
if (params_list.size() != 1) {
fprintf(stderr, "ROCProfiler: Single input 'parameters' section is supported\n");
exit(1);
}
const std::string& name = entry->opts["name"];
printf(" %s (\n", name.c_str());
info[index] = {};
info[index].type = ROCPROFILER_TYPE_TRACE;
info[index].name = strdup(name.c_str());
for (auto* params : params_list) {
const unsigned parameter_count = params->opts.size();
rocprofiler_parameter_t *parameters = new rocprofiler_parameter_t[parameter_count];
unsigned p_index = 0;
for (auto& v : params->opts) {
const std::string parameter_name = v.first;
if (parameters_dict.find(parameter_name) == parameters_dict.end()) {
fprintf(stderr, "ROCProfiler: unknown trace parameter %s\n", parameter_name.c_str());
exit(1);
}
const uint32_t value = strtol(v.second.c_str(), NULL, 0);
printf(" %s = 0x%x\n", parameter_name.c_str(), value);
parameters[p_index] = {};
parameters[p_index].parameter_name = parameters_dict[parameter_name];
parameters[p_index].value = value;
++p_index;
}
info[index].parameters = parameters;
info[index].parameter_count = parameter_count;
}
printf(" )\n");
++index;
}
if (info_count) {
// Adding dispatch observer
dispatch_data_t* dispatch_data = new dispatch_data_t{};
dispatch_data->info = info;
dispatch_data->info_count = info_count;
dispatch_data->group_index = 0;
rocprofiler_dispatch_observer(dispatch_callback, dispatch_data);
}
}
DESTRUCTOR_API void destructor() {
printf("\nROCPRofiler: %u contexts collected", context_array_index);
thr_stop = true;
#ifdef TOOL_THREAD
pthread_join(thread, NULL);
#else
dumping_data(NULL);
#endif
}
+313
Voir le fichier
@@ -0,0 +1,313 @@
#include <hsa.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <map>
#include <vector>
#include "inc/rocprofiler.h"
#include "util/xml.h"
#define PUBLIC_API __attribute__((visibility("default")))
#define CONSTRUCTOR_API __attribute__((constructor))
#define DESTRUCTOR_API __attribute__((destructor))
struct dispatch_data_t {
rocprofiler_info_t* info;
unsigned info_count;
unsigned group_index;
};
struct context_entry_t {
rocprofiler_group_t* group;
rocprofiler_info_t* info;
unsigned info_count;
rocprofiler_callback_data_t data;
};
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
unsigned context_array_size = 1;
context_entry_t* context_array = NULL;
unsigned context_array_index = 0;
unsigned dump_index = 0;
const char* file_name = NULL;
FILE* file_handle = NULL;
void check_status(hsa_status_t status) {
if (status != HSA_STATUS_SUCCESS) {
const char* error_string = NULL;
rocprofiler_error_string(&error_string);
fprintf(stderr, "ERROR: %s\n", error_string);
exit(1);
}
}
hsa_status_t trace_data_cb(
hsa_ven_amd_aqlprofile_info_type_t info_type,
hsa_ven_amd_aqlprofile_info_data_t* info_data,
void* data)
{
hsa_status_t status = HSA_STATUS_SUCCESS;
if (info_type == HSA_VEN_AMD_AQLPROFILE_INFO_SQTT_DATA) {
printf(" data ptr (%p), size(%u)\n", info_data->sqtt_data.ptr, info_data->sqtt_data.size);
} else status = HSA_STATUS_ERROR;
return status;
}
unsigned align_size(unsigned size, unsigned alignment) { return ((size + alignment - 1) & ~(alignment - 1)); }
void print_info(FILE* file, const rocprofiler_info_t* info, const unsigned info_count, rocprofiler_t* context, const char* str) {
if (str) fprintf(file, "%s:\n", str);
for (unsigned i= 0; i < info_count; ++i) {
const rocprofiler_info_t* p = &info[i];
fprintf(file, " %s ", p->name);
switch (p->data.kind) {
case ROCPROFILER_INT64:
fprintf(file, "(%lu)\n", p->data.result_int64);
break;
case ROCPROFILER_BYTES: {
fprintf(file, "(\n");
if (p->data.result_bytes.copy) {
fprintf(file, " system memory copy\n");
const char* ptr = reinterpret_cast<const char*>(p->data.result_bytes.ptr);
uint64_t size = 0;
for (unsigned i = 0; i < p->data.result_bytes.instance_count; ++i) {
size = *reinterpret_cast<const uint64_t*>(ptr);
const char* data = ptr + sizeof(size);
fprintf(file, " data (%p), size (%lu)\n", data, size);
size = align_size(size, sizeof(uint64_t));
ptr = data + size;
}
} else {
fprintf(file, " local memory buffer\n");
rocprofiler_iterate_trace_data(context, trace_data_cb, NULL);
}
fprintf(file, " )\n");
break;
}
default:
std::cout << "Bad result kind (" << p->data.kind << ")" << std::endl;
}
}
}
void print_group(FILE* file, const rocprofiler_group_t* group, const char* str) {
if (str) fprintf(file, "%s:\n", str);
for (unsigned i= 0; i < group->info_count; ++i) {
print_info(file, group->info[i], 1, group->context, NULL);
}
}
void store_entry(const context_entry_t& context_entry) {
if(pthread_mutex_lock(&mutex) != 0) {
perror("pthread_mutex_lock");
exit(1);
}
if ((context_array == NULL) || (context_array_index >= context_array_size)) {
context_array_size *= 2;
context_array = reinterpret_cast<context_entry_t*>(realloc(context_array, context_array_size * sizeof(context_entry_t)));
}
context_array[context_array_index] = context_entry;
context_array_index += 1;
if (pthread_mutex_unlock(&mutex) != 0) {
perror("pthread_mutex_unlock");
exit(1);
}
}
void dump_context(FILE *file, context_entry_t* entry, unsigned index) {
hsa_status_t status = HSA_STATUS_ERROR;
rocprofiler_group_t* group = entry->group;
const rocprofiler_info_t* info = entry->info;
const unsigned info_count = entry->info_count;
fprintf(file, "Dispatch[%u], kernel_object(0x%lx):\n", index, entry->data.kernel_object);
status = rocprofiler_get_group_data(group);
check_status(status);
//print_group(file, group, "Group[0] data");
status = rocprofiler_get_metrics_data(group->context);
check_status(status);
print_info(file, info, info_count, group->context, NULL);
// Finishing cleanup
// Deleting profiling context will delete all allocated resources
rocprofiler_close(group->context);
dump_index = index;
}
void dumping_data() {
if (pthread_mutex_lock(&mutex) != 0) {
perror("pthread_mutex_lock");
exit(1);
}
for (unsigned index = 0; index < context_array_index; ++index) {
dump_context(file_handle, &context_array[index], index);
}
if (pthread_mutex_unlock(&mutex) != 0) {
perror("pthread_mutex_unlock");
exit(1);
}
}
// profiling callback
hsa_status_t dispatch_callback(
const rocprofiler_callback_data_t* callback_data,
void* user_data,
rocprofiler_group_t** group) {
hsa_status_t status = HSA_STATUS_ERROR;
// Passed tool data
dispatch_data_t* tool_data = reinterpret_cast<dispatch_data_t*>(user_data);
// Profiling context
rocprofiler_t* context = NULL;
// context properties
rocprofiler_properties_t properties{};
// Open profiling context
status = rocprofiler_open(0, tool_data->info, tool_data->info_count, &context, 0, &properties);
check_status(status);
rocprofiler_group_t* groups = NULL;
uint32_t group_count = 0;
status = rocprofiler_get_groups(context, &groups, &group_count);
check_status(status);
assert(group_count == 1);
*group = &groups[0];
context_entry_t entry;
entry.group = *group;
entry.info = tool_data->info;
entry.info_count = tool_data->info_count;
entry.data = *callback_data;
store_entry(entry);
return status;
}
CONSTRUCTOR_API void constructor() {
std::map<std::string, hsa_ven_amd_aqlprofile_parameter_name_t> parameters_dict;
parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET;
parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK;
parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK;
parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK;
parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2;
// Set output file
file_name = getenv("ROCP_OUTPUT");
if (file_name != NULL) {
file_handle = fopen(file_name, "w");
if (file_handle == NULL) {
perror("fopen");
exit(1);
}
} else file_handle = stdout;
// Getting input
const char* xml_name = getenv("ROCP_INPUT");
if (xml_name == NULL) {
fprintf(stderr, "ROCProfiler: input is not specified, ROCP_INPUT env");
exit(1);
}
printf("ROCProfiler: input from \"%s\"\n", xml_name);
xml::Xml* xml = new xml::Xml(xml_name);
// Getting metrics
auto metrics_list = xml->GetNodes("top.metric");
std::vector<std::string> metrics_vec;
for (auto* entry : metrics_list) {
const std::string entry_str = entry->opts["name"];
size_t pos1 = 0;
while(pos1 < entry_str.length()) {
const size_t pos2 = entry_str.find(",", pos1);
const std::string metric_name = entry_str.substr(pos1, pos2 - pos1);
metrics_vec.push_back(metric_name);
if (pos2 == std::string::npos) break;
pos1 = pos2 + 1;
}
}
// Getting traces
auto traces_list = xml->GetNodes("top.trace");
const unsigned info_count = metrics_vec.size() + traces_list.size();
rocprofiler_info_t* info= new rocprofiler_info_t[info_count];
memset(info, 0, info_count * sizeof(rocprofiler_info_t));
printf(" %d metrics\n", (int) metrics_vec.size());
for (unsigned i = 0; i < metrics_vec.size(); ++i) {
const std::string& name = metrics_vec[i];
printf("%s%s", (i == 0) ? " " : ", ", name.c_str());
info[i] = {};
info[i].type = ROCPROFILER_TYPE_METRIC;
info[i].name = strdup(name.c_str());
}
if (metrics_vec.size()) printf("\n");
printf(" %d traces\n", (int) traces_list.size());
unsigned index = metrics_vec.size();
for (auto* entry : traces_list) {
auto params_list = xml->GetNodes("top.trace.parameters");
if (params_list.size() != 1) {
fprintf(stderr, "ROCProfiler: Single input 'parameters' section is supported\n");
exit(1);
}
const std::string& name = entry->opts["name"];
const bool to_copy_data = (entry->opts["copy"] == "true");
printf(" %s (\n", name.c_str());
info[index] = {};
info[index].type = ROCPROFILER_TYPE_TRACE;
info[index].name = strdup(name.c_str());
info[index].data.result_bytes.copy = to_copy_data;
for (auto* params : params_list) {
const unsigned parameter_count = params->opts.size();
rocprofiler_parameter_t *parameters = new rocprofiler_parameter_t[parameter_count];
unsigned p_index = 0;
for (auto& v : params->opts) {
const std::string parameter_name = v.first;
if (parameters_dict.find(parameter_name) == parameters_dict.end()) {
fprintf(stderr, "ROCProfiler: unknown trace parameter %s\n", parameter_name.c_str());
exit(1);
}
const uint32_t value = strtol(v.second.c_str(), NULL, 0);
printf(" %s = 0x%x\n", parameter_name.c_str(), value);
parameters[p_index] = {};
parameters[p_index].parameter_name = parameters_dict[parameter_name];
parameters[p_index].value = value;
++p_index;
}
info[index].parameters = parameters;
info[index].parameter_count = parameter_count;
}
printf(" )\n");
++index;
}
if (info_count) {
// Adding dispatch observer
dispatch_data_t* dispatch_data = new dispatch_data_t{};
dispatch_data->info = info;
dispatch_data->info_count = info_count;
dispatch_data->group_index = 0;
rocprofiler_set_dispatch_observer(dispatch_callback, dispatch_data);
}
}
DESTRUCTOR_API void destructor() {
printf("\nROCPRofiler: %u contexts collected", context_array_index);
if (file_name == NULL) {
printf("\n");
} else {
printf(", dumping to %s\n", file_name);
}
dumping_data();
}
+7
Voir le fichier
@@ -0,0 +1,7 @@
<metric name=CPC_ME1_STALL_WAIT_ON_RCIU_READ,SQ_WAVES,SQ_WAVE_READY,SQ_CYCLES,SQ_ITEMS,WAVE_STALLS_RATE ></metric>
<trace name=SQTT copy=true >
<parameters
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK=0xf
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK=0xf
></parameters>
</trace>
+15
Voir le fichier
@@ -0,0 +1,15 @@
<gfx8>
<metric name=SQ_CYCLES block=SQ event=2 ></metric>
<metric name=SQ_WAVES block=SQ event=4 ></metric>
<metric name=SQ_ITEMS block=SQ event=14 ></metric>
<metric name=SQ_WAVE_READY block=SQ event=47 ></metric>
<metric name=TCC_CYCLE block=TCC event=1 ></metric>
<metric name=TCC_REQ block=TCC event=3 ></metric>
<metric name=TCC_WRITEBACK block=TCC event=22 ></metric>
<metric name=CPC_ALWAYS_COUNT block=CPC event=0 ></metric>
<metric name=CPC_ME1_STALL_WAIT_ON_RCIU_READ block=CPC event=8 ></metric>
</gfx8>
<global>
<metric name=WAVE_STALLS_RATE expr=CPC_ME1_STALL_WAIT_ON_RCIU_READ*(SQ_WAVES+SQ_WAVE_READY)*100/(SQ_CYCLES/SQ_ITEMS) ></metric>
</global>
Fichier exécutable
+27
Voir le fichier
@@ -0,0 +1,27 @@
#!/bin/sh
tbin=./test/ctrl
#export HSA_LIB=/home/evgeny/pkg/compute-psdb-16453/lib
export HSA_LIB=/home/evgeny/git/compute/out/ubuntu-16.04/16.04/lib
export OCL_LIB=/home/evgeny/pkg/opencl_modified/opencl_x86_64/lib
#export OCL_LIB=/home/evgeny/Perforce/eshcherb_opencl/drivers/opencl/dist/linux/debug/lib/x86_64
export LD_LIBRARY_PATH=$PWD:$HSA_LIB:$OCL_LIB
export ROCPROFILER_LOG=1
export HSA_TOOLS_LIB=librocprofiler64.so
export ROCP_TOOL_LIB=test/libtool.so
export ROCP_HSA_INTERCEPT=1
export ROCP_METRICS=metrics.xml
export ROCP_INPUT=input.xml
unset ROCP_PROXY_QUEUE
echo "Run simple profiling test"
if [ -n "$1" ] ; then
eval "$*"
else
eval $tbin
fi
exit 0
Fichier binaire non affiché.
Fichier binaire non affiché.
+81
Voir le fichier
@@ -0,0 +1,81 @@
/******************************************************************************
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list
of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************/
/**
* SimpleConvolution is where each pixel of the output image
* is the weighted sum of the neighborhood pixels of the input image
* The neighborhood is defined by the dimensions of the mask and
* weight of each neighbor is defined by the mask itself.
* @param output Output matrix after performing convolution
* @param input Input matrix on which convolution is to be performed
* @param mask mask matrix using which convolution was to be performed
* @param inputDimensions dimensions of the input matrix
* @param maskDimensions dimensions of the mask matrix
*/
__kernel void SimpleConvolution(__global uint * output,
__global uint * input,
__global float * mask,
const uint2 inputDimensions,
const uint2 maskDimensions) {
uint tid = get_global_id(0);
uint width = inputDimensions.x;
uint height = inputDimensions.y;
uint x = tid%width;
uint y = tid/width;
uint maskWidth = maskDimensions.x;
uint maskHeight = maskDimensions.y;
uint vstep = (maskWidth -1)/2;
uint hstep = (maskHeight -1)/2;
// find the left, right, top and bottom indices such that
// the indices do not go beyond image boundaires
uint left = (x < vstep) ? 0 : (x - vstep);
uint right = ((x + vstep) >= width) ? width - 1 : (x + vstep);
uint top = (y < hstep) ? 0 : (y - hstep);
uint bottom = ((y + hstep) >= height)? height - 1: (y + hstep);
// initializing wighted sum value
float sumFX = 0;
for(uint i = left; i <= right; ++i) {
for(uint j = top ; j <= bottom; ++j) {
// performing wighted sum within the mask boundaries
uint maskIndex = (j - (y - hstep)) * maskWidth + (i - (x - vstep));
uint index = j * width + i;
sumFX += ((float)input[index] * mask[maskIndex]);
}
}
// To round to the nearest integer
sumFX += 0.5f;
output[tid] = (uint)sumFX;
}
+390
Voir le fichier
@@ -0,0 +1,390 @@
/******************************************************************************
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list
of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*******************************************************************************/
#include "simple_convolution/simple_convolution.h"
#include <string.h>
#include <iostream>
#include "util/helper_funcs.h"
#include "util/test_assert.h"
const uint32_t SimpleConvolution::input_data_[]{
15, 201, 51, 89, 92, 34, 96, 66, 11, 225, 161, 96, 81, 211, 108, 124, 202, 244, 182,
90, 215, 92, 98, 20, 44, 225, 55, 247, 202, 0, 45, 218, 202, 97, 51, 39, 131, 147,
105, 143, 116, 11, 239, 198, 222, 92, 67, 169, 81, 250, 3, 40, 86, 101, 60, 131, 70,
116, 123, 17, 117, 168, 236, 64, 10, 31, 103, 142, 179, 209, 29, 40, 220, 13, 239, 187,
105, 50, 100, 186, 44, 104, 227, 131, 205, 32, 6, 20, 149, 130, 38, 10, 43, 18, 75,
53, 50, 178, 195, 230, 132, 225, 14, 96, 238, 253, 27, 88, 48, 128, 18, 92, 232, 246,
224, 182, 23, 231, 203, 172, 105, 241, 183, 148, 4, 2, 202, 55, 181, 142, 29, 57, 111,
43, 153, 93, 41, 181, 181, 89, 54, 200, 182, 31, 190, 150, 213, 213, 126, 160, 130, 232,
146, 57, 125, 151, 59, 71, 206, 240, 213, 236, 42, 68, 24, 195, 162, 65, 121, 87, 155,
175, 31, 81, 207, 222, 232, 164, 180, 102, 69, 55, 79, 216, 112, 204, 112, 171, 19, 63,
156, 233, 43, 198, 46, 67, 138, 208, 132, 4, 39, 32, 180, 71, 113, 131, 38, 90, 40,
219, 193, 109, 18, 16, 70, 131, 220, 182, 46, 240, 245, 203, 217, 32, 146, 7, 100, 28,
216, 233, 32, 255, 9, 213, 71, 123, 88, 110, 213, 128, 74, 150, 238, 93, 166, 52, 224,
131, 234, 15, 115, 224, 218, 76, 1, 108, 84, 101, 137, 44, 79, 170, 44, 88, 127, 116,
211, 216, 226, 168, 88, 45, 63, 70, 138, 230, 123, 107, 105, 101, 122, 220, 70, 84, 41,
71, 193, 125, 173, 75, 169, 252, 245, 213, 84, 117, 73, 40, 77, 44, 209, 166, 90, 16,
237, 229, 246, 104, 80, 95, 206, 202, 60, 20, 31, 101, 92, 225, 226, 9, 44, 140, 5,
34, 97, 89, 151, 171, 129, 229, 216, 82, 139, 51, 99, 120, 24, 89, 225, 104, 185, 175,
50, 246, 196, 82, 91, 32, 51, 62, 42, 96, 202, 47, 130, 44, 137, 26, 215, 10, 255,
176, 93, 138, 227, 193, 3, 251, 27, 229, 100, 212, 149, 151, 202, 89, 233, 38, 122, 29,
100, 164, 125, 46, 212, 0, 90, 93, 26, 50, 103, 25, 226, 197, 164, 198, 135, 168, 194,
162, 141, 38, 119, 34, 190, 66, 124, 167, 104, 247, 197, 204, 156, 67, 251, 112, 67, 85,
205, 93, 135, 53, 119, 106, 251, 28, 49, 130, 196, 243, 36, 82, 26, 155, 117, 216, 221,
241, 128, 70, 233, 70, 18, 133, 137, 14, 245, 204, 99, 195, 42, 235, 248, 161, 86, 243,
190, 135, 118, 130, 123, 154, 213, 150, 54, 74, 111, 20, 60, 240, 90, 37, 54, 109, 171,
191, 123, 161, 140, 222, 100, 182, 202, 93, 88, 32, 80, 23, 168, 198, 153, 36, 97, 111,
187, 151, 185, 43, 172, 245, 27, 6, 27, 82, 115, 199, 18, 239, 104, 158, 206, 205, 85,
152, 42, 174, 185, 123, 197, 98, 65, 95, 135, 163, 206, 66, 59, 136, 109, 231, 125, 137,
237, 153, 219, 97, 96, 237, 81, 201, 140, 31, 150, 226, 183, 192, 144, 113, 59, 86, 212,
125, 182, 91, 33, 132, 158, 92, 12, 12, 68, 138, 149, 50, 36, 113, 147, 133, 95, 229,
78, 235, 4, 228, 206, 188, 165, 95, 45, 225, 181, 1, 94, 107, 93, 128, 240, 251, 220,
252, 7, 32, 135, 156, 83, 171, 14, 230, 48, 109, 203, 126, 89, 208, 99, 39, 140, 9,
134, 185, 234, 60, 187, 73, 167, 24, 201, 152, 20, 166, 148, 27, 199, 28, 184, 26, 199,
198, 0, 248, 52, 204, 119, 141, 157, 218, 181, 41, 227, 59, 227, 206, 119, 159, 23, 31,
184, 224, 183, 204, 134, 76, 231, 77, 105, 160, 103, 48, 103, 104, 41, 155, 53, 160, 41,
210, 123, 222, 252, 95, 26, 223, 45, 146, 126, 68, 177, 54, 37, 105, 3, 171, 182, 235,
249, 31, 139, 97, 80, 243, 202, 121, 143, 0, 26, 184, 210, 149, 151, 207, 244, 177, 174,
34, 67, 45, 102, 245, 100, 140, 95, 104, 55, 21, 83, 49, 53, 223, 147, 134, 210, 93,
0, 97, 93, 26, 26, 48, 175, 178, 255, 164, 99, 174, 198, 167, 220, 45, 156, 64, 185,
252, 168, 241, 18, 252, 35, 71, 219, 182, 205, 173, 19, 206, 15, 113, 232, 42, 161, 152,
220, 160, 60, 64, 79, 3, 231, 43, 49, 132, 108, 235, 128, 21, 220, 146, 17, 255, 218,
236, 182, 168, 154, 201, 118, 170, 58, 94, 212, 220, 246, 177, 125, 51, 241, 204, 55, 216,
248, 104, 92, 100, 83, 221, 121, 48, 111, 138, 47, 73, 119, 230, 241, 17, 175, 103, 187,
234, 198, 144, 199, 188, 65, 68, 240, 51, 17, 39, 11, 9, 143, 104, 109, 227, 70, 231,
19, 181, 113, 66, 255, 233, 41, 241, 250, 217, 89, 182, 196, 31, 71, 139, 220, 137, 208,
204, 188, 225, 243, 200, 234, 131, 48, 88, 102, 119, 63, 121, 44, 177, 188, 44, 154, 229,
29, 149, 190, 118, 76, 130, 150, 147, 14, 114, 28, 222, 62, 217, 191, 50, 161, 170, 181,
210, 2, 28, 73, 66, 149, 117, 243, 81, 162, 141, 55, 191, 35, 245, 54, 111, 120, 204,
2, 134, 62, 31, 100, 125, 248, 36, 175, 153, 206, 101, 107, 209, 129, 181, 19, 22, 43,
7, 104, 205, 149, 159, 140, 184, 149, 195, 39, 14, 143, 42, 148, 205, 73, 249, 74, 66,
30, 250, 219, 237, 96, 71, 190, 225, 253, 210, 248, 40, 218, 96, 245, 111, 0, 130, 39,
150, 69, 79, 165, 212, 122, 57, 162, 195, 51, 237, 6, 82, 231, 225, 63, 71, 41, 253,
41, 38, 208, 33, 78, 170, 130, 68, 26, 131, 198, 66, 26, 12, 145, 191, 224, 11, 249,
130, 207, 44, 112, 213, 126, 88, 183, 190, 160, 225, 187, 201, 8, 140, 235, 87, 55, 109,
155, 81, 241, 98, 147, 11, 110, 37, 202, 79, 49, 195, 210, 0, 240, 66, 214, 110, 154,
142, 44, 58, 111, 232, 4, 119, 117, 239, 207, 172, 93, 106, 254, 78, 205, 145, 89, 59,
183, 35, 138, 232, 230, 92, 233, 214, 159, 191, 69, 58, 78, 114, 116, 189, 91, 121, 53,
208, 104, 4, 125, 198, 111, 123, 20, 60, 13, 109, 120, 196, 145, 3, 172, 119, 95, 150,
78, 255, 85, 147, 57, 163, 6, 174, 97, 97, 39, 151, 50, 144, 155, 175, 86, 11, 43,
107, 71, 56, 216, 191, 253, 105, 194, 170, 225, 34, 64, 47, 34, 150, 195, 91, 58, 201,
10, 155, 43, 49, 50, 93, 194, 206, 13, 25, 217, 56, 132, 33, 112, 92, 225, 109, 198,
164, 23, 167, 199, 88, 215, 234, 238, 155, 69, 40, 100, 80, 196, 144, 129, 246, 237, 68,
197, 250, 93, 159, 51, 225, 193, 163, 62, 163, 17, 4, 71, 41, 172, 15, 130, 132, 249,
112, 31, 63, 152, 132, 143, 92, 20, 17, 83, 1, 86, 25, 252, 179, 185, 47, 149, 122,
211, 211, 29, 229, 216, 101, 15, 133, 117, 145, 9, 111, 1, 40, 175, 154, 173, 62, 247,
193, 80, 75, 194, 166, 100, 191, 90, 29, 239, 239, 152, 194, 195, 182, 168, 156, 27, 183,
33, 145, 73, 43, 0, 75, 83, 175, 229, 0, 238, 221, 194, 63, 40, 133, 230, 140, 68,
64, 170, 51, 48, 66, 246, 243, 248, 159, 144, 20, 87, 177, 165, 160, 220, 166, 235, 48,
86, 209, 49, 68, 174, 243, 132, 214, 120, 106, 99, 189, 170, 13, 241, 219, 80, 232, 207,
72, 135, 95, 92, 223, 16, 2, 127, 237, 169, 107, 29, 255, 61, 79, 68, 236, 67, 200,
194, 188, 50, 38, 121, 221, 52, 107, 184, 132, 84, 136, 204, 219, 231, 41, 186, 248, 44,
58, 229, 213, 166, 3, 212, 227, 82, 25, 207, 150, 225, 146, 82, 20, 185, 204, 242, 237,
55, 170, 113, 139, 50, 62, 103, 26, 103, 34, 18, 148, 93, 247, 105, 3, 251, 62, 231,
77, 87, 182, 227, 57, 73, 54, 77, 2, 2, 63, 239, 57, 234, 97, 197, 29, 159, 44,
55, 7, 79, 74, 155, 172, 66, 5, 175, 61, 67, 150, 139, 155, 77, 111, 212, 151, 165,
34, 153, 167, 98, 137, 225, 77, 234, 166, 107, 138, 211, 163, 145, 34, 237, 45, 206, 47,
50, 126, 108, 117, 21, 248, 17, 98, 103, 230, 249, 12, 9, 147, 179, 107, 29, 149, 185,
7, 59, 37, 146, 14, 200, 35, 49, 182, 80, 0, 230, 130, 126, 83, 248, 148, 75, 9,
247, 178, 240, 240, 190, 249, 132, 114, 101, 161, 7, 30, 169, 67, 68, 59, 82, 12, 95,
131, 195, 176, 131, 169, 51, 2, 252, 44, 150, 72, 54, 141, 250, 38, 126, 185, 31, 3,
44, 132, 165, 52, 163, 78, 120, 231, 138, 202, 244, 234, 77, 183, 155, 209, 97, 207, 212,
94, 251, 107, 166, 49, 249, 161, 88, 120, 91, 120, 123, 135, 253, 33, 188, 160, 112, 52,
136, 250, 254, 125, 229, 76, 53, 128, 30, 150, 79, 243, 244, 75, 95, 155, 125, 88, 60,
213, 209, 152, 78, 77, 32, 75, 110, 220, 236, 222, 17, 117, 217, 15, 242, 190, 92, 39,
63, 123, 190, 143, 111, 178, 219, 206, 78, 88, 38, 138, 46, 247, 34, 124, 69, 66, 199,
179, 31, 179, 145, 48, 41, 106, 64, 27, 41, 157, 67, 105, 24, 1, 249, 135, 179, 212,
86, 1, 44, 124, 140, 91, 116, 175, 215, 185, 242, 159, 108, 17, 83, 254, 66, 124, 105,
131, 151, 146, 32, 218, 252, 57, 219, 245, 193, 143, 201, 23, 145, 246, 148, 30, 82, 8,
206, 41, 194, 192, 201, 47, 210, 28, 46, 20, 152, 151, 151, 48, 42, 184, 11, 38, 241,
231, 28, 179, 119, 230, 202, 8, 220, 94, 39, 46, 103, 245, 88, 42, 181, 33, 90, 136,
62, 136, 156, 214, 31, 52, 7, 74, 237, 19, 113, 223, 250, 141, 146, 113, 115, 92, 122,
80, 187, 161, 126, 35, 150, 215, 78, 76, 249, 168, 212, 55, 48, 113, 14, 80, 166, 21,
154, 147, 40, 12, 114, 35, 153, 5, 148, 12, 98, 15, 92, 29, 176, 219, 65, 71, 179,
143, 147, 172, 56, 104, 227, 104, 218, 241, 185, 128, 7, 84, 20, 47, 96, 135, 82, 249,
140, 231, 6, 238, 246, 99, 12, 167, 63, 77, 238, 242, 221, 130, 158, 21, 235, 129, 126,
197, 114, 56, 69, 121, 140, 90, 169, 237, 225, 252, 231, 109, 228, 237, 91, 219, 81, 104,
130, 144, 181, 113, 130, 147, 244, 32, 169, 223, 162, 39, 164, 21, 95, 234, 143, 236, 68,
57, 217, 37, 53, 192, 147, 25, 174, 239, 245, 0, 87, 119, 144, 13, 232, 19, 160, 220,
51, 73, 188, 214, 113, 96, 235, 209, 75, 122, 190, 144, 179, 151, 181, 233, 88, 73, 3,
7, 56, 248, 7, 143, 112, 152, 156, 89, 171, 61, 53, 223, 135, 242, 181, 248, 83, 161,
202, 158, 28, 136, 46, 208, 32, 228, 186, 121, 45, 189, 128, 102, 182, 136, 246, 38, 32,
147, 127, 204, 208, 181, 171, 87, 167, 97, 80, 250, 2, 26, 153, 31, 163, 200, 239, 195,
172, 169, 60, 218, 103, 188, 65, 30, 69, 55, 68, 102, 202, 196, 50, 154, 121, 221, 242,
33, 63, 67, 28, 66, 93, 181, 97, 0, 126, 81, 196, 43, 251, 0, 5, 98, 189, 70,
128, 3, 126, 197, 105, 72, 137, 155, 227, 3, 121, 214, 36, 184, 25, 65, 250, 118, 247,
91, 119, 117, 173, 60, 160, 168, 60, 166, 10, 250, 237, 139, 253, 107, 80, 102, 180, 217,
2, 151, 221, 123, 109, 1, 52, 134, 66, 46, 253, 57, 138, 117, 175, 55, 178, 79, 223,
239, 245, 234, 233, 226, 117, 231, 78, 198, 78, 2, 159, 80, 154, 124, 204, 7, 126, 0,
142, 193, 47, 140, 251, 185, 2, 170, 241, 180, 249, 208, 163, 239, 186, 141, 210, 48, 116,
32, 246, 195, 34, 150, 19, 188, 19, 224, 196, 146, 224, 83, 83, 15, 224, 78, 201, 226,
249, 186, 151, 243, 139, 58, 226, 70, 199, 181, 118, 60, 213, 109, 255, 248, 3, 19, 181,
23, 243, 122, 169, 212, 205, 252, 228, 173, 75, 173, 144, 68, 104, 39, 55, 243, 98, 26,
57, 41, 207, 175, 102, 165, 29, 102, 158, 32, 121, 83, 56, 109, 205, 225, 66, 155, 222,
38, 73, 42, 212, 218, 110, 60, 1, 166, 48, 99, 193, 105, 141, 145, 25, 244, 54, 54,
90, 213, 87, 212, 40, 143, 66, 246, 112, 132, 146, 79, 171, 220, 121, 128, 182, 232, 189,
184, 143, 237, 27, 80, 86, 169, 226, 112, 158, 25, 166, 248, 238, 253, 204, 23, 141, 15,
13, 254, 147, 160, 77, 63, 124, 199, 191, 50, 175, 124, 234, 62, 105, 6, 143, 192, 176,
113, 48, 78, 139, 215, 71, 121, 213, 20, 144, 98, 35, 158, 96, 183, 62, 174, 246, 187,
117, 182, 237, 37, 50, 216, 99, 156, 223, 243, 93, 143, 101, 142, 222, 240, 101, 37, 106,
58, 57, 250, 157, 93, 153, 254, 20, 216, 172, 10, 147, 34, 192, 129, 71, 243, 90, 171,
144, 57, 159, 238, 201, 4, 124, 167, 244, 225, 205, 95, 28, 7, 89, 185, 100, 243, 184,
121, 203, 100, 131, 95, 135, 68, 224, 207, 56, 58, 122, 201, 115, 25, 183, 61, 30, 51,
229, 18, 21, 178, 113, 49, 186, 203, 235, 31, 191, 163, 152, 138, 8, 28, 233, 143, 97,
202, 95, 153, 4, 217, 98, 120, 243, 26, 182, 17, 77, 155, 36, 99, 78, 150, 149, 8,
98, 128, 39, 33, 36, 192, 172, 45, 220, 149, 189, 61, 96, 28, 215, 100, 246, 58, 221,
233, 84, 147, 251, 162, 47, 31, 5, 125, 181, 154, 134, 23, 27, 174, 57, 64, 110, 229,
109, 75, 123, 43, 136, 219, 71, 95, 64, 61, 154, 29, 39, 238, 177, 34, 145, 225, 65,
150, 94, 247, 49, 229, 15, 77, 147, 72, 141, 2, 45, 251, 77, 169, 38, 213, 132, 110,
53, 196, 172, 207, 226, 212, 190, 148, 246, 79, 117, 56, 230, 212, 48, 23, 185, 63, 100,
76, 136, 242, 78, 181, 237, 156, 95, 20, 113, 227, 131, 167, 168, 47, 119, 139, 3, 53,
31, 250, 133, 149, 50, 107, 105, 99, 130, 34, 162, 231, 111, 42, 217, 190, 224, 199, 90,
63, 220, 204, 35, 95, 115, 203, 143, 234, 86, 147, 32, 118, 141, 165, 11, 192, 16, 117,
35, 147, 152, 198, 123, 7, 240, 84, 198, 209, 28, 33, 17, 248, 237, 52, 88, 97, 255,
231, 76, 86, 122, 109, 204, 8, 18, 216, 201, 35, 77, 237, 183, 229, 179, 50, 237, 164,
135, 179, 118, 164, 213, 135, 157, 195, 187, 245, 36, 187, 220, 113, 18, 87, 222, 222, 96,
241, 183, 42, 21, 4, 23, 205, 233, 203, 0, 214, 112, 136, 138, 230, 44, 95, 110, 201,
34, 41, 191, 71, 229, 155, 185, 247, 243, 151, 214, 84, 137, 141, 126, 159, 146, 149, 108,
124, 97, 109, 82, 209, 245, 221, 183, 34, 60, 37, 236, 95, 79, 171, 167, 53, 71, 96,
45, 58, 248, 3, 142, 129, 145, 12, 33, 36, 162, 142, 160, 3, 251, 243, 213, 240, 208,
141, 19, 13, 178, 255, 109, 2, 170, 20, 55, 241, 116, 101, 44, 108, 105, 186, 238, 251,
199, 15, 31, 106, 157, 191, 110, 152, 178, 67, 137, 131, 208, 156, 144, 131, 155, 253, 134,
70, 18, 190, 55, 134, 35, 99, 243, 140, 30, 225, 135, 230, 240, 166, 81, 142, 102, 191,
39, 25, 3, 177, 156, 211, 77, 45, 87, 233, 43, 221, 48, 61, 155, 103, 195, 191, 203,
182, 75, 233, 152, 211, 208, 136, 121, 33, 23, 224, 224, 62, 249, 227, 239, 149, 183, 61,
195, 15, 39, 238, 236, 87, 43, 136, 191, 239, 71, 138, 166, 147, 116, 62, 102, 68, 199,
224, 101, 223, 193, 70, 29, 186, 42, 13, 80, 225, 75, 19, 241, 115, 1, 221, 202, 45,
102, 137, 29, 174, 20, 195, 66, 136, 2, 168, 205, 201, 137, 50, 168, 74, 121, 198, 4,
163, 212, 85, 133, 31, 105, 118, 146, 106, 84, 93, 152, 187, 231, 181, 105, 251, 121, 171,
132, 123, 84, 81, 69, 221, 132, 238, 40, 253, 181, 45, 161, 137, 130, 39, 169, 235, 158,
59, 86, 242, 153, 239, 173, 128, 165, 23, 123, 30, 195, 0, 154, 23, 81, 224, 245, 214,
206, 30, 212, 131, 75, 117, 12, 206, 157, 181, 186, 59, 241, 17, 45, 138, 0, 219, 11,
165, 243, 135, 196, 182, 135, 95, 205, 217, 63, 195, 175, 14, 225, 131, 145, 45, 249, 158,
251, 150, 84, 182, 209, 70, 199, 255, 209, 199, 219, 220, 109, 206, 99, 50, 132, 234, 146,
82, 195, 209, 22, 114, 223, 247, 246, 113, 37, 239, 16, 33, 134, 100, 215, 88, 170, 158,
87, 123, 102, 50, 88, 211, 1, 187, 6, 134, 165, 152, 216, 105, 106, 239, 220, 74, 231,
210, 187, 12, 194, 204, 45, 72, 49, 4, 160, 219, 162, 248, 87, 8, 43, 176, 220, 44,
107, 227, 178, 17, 124, 139, 122, 230, 122, 87, 48, 97, 42, 236, 110, 236, 185, 155, 53,
234, 159, 214, 198, 66, 206, 30, 75, 249, 206, 40, 38, 57, 11, 217, 74, 136, 100, 197,
110, 223, 29, 159, 65, 71, 140, 175, 51, 69, 74, 105, 48, 234, 63, 246, 45, 13, 20,
121, 7, 226, 161, 46, 28, 173, 7, 103, 53, 108, 45, 164, 76, 74, 68, 141, 145, 208,
61, 197, 22, 136, 46, 70, 115, 110, 60, 161, 124, 81, 26, 132, 51, 188, 178, 79, 106,
186, 183, 160, 39, 228, 68, 115, 46, 136, 1, 192, 89, 62, 133, 112, 198, 180, 182, 58,
34, 243, 219, 158, 69, 245, 34, 120, 178, 213, 200, 28, 143, 128, 188, 182, 100, 1, 41,
146, 137, 43, 82, 227, 105, 216, 83, 48, 140, 10, 106, 175, 254, 70, 77, 67, 59, 112,
188, 237, 69, 133, 10, 212, 5, 198, 138, 105, 199, 180, 252, 81, 223, 79, 53, 73, 39,
137, 121, 180, 148, 228, 99, 146, 42, 177, 214, 102, 33, 147, 84, 102, 25, 94, 59, 31,
37, 197, 137, 237, 122, 133, 63, 90, 213, 116, 163, 253, 253, 29, 177, 145, 2, 21, 36,
45, 198, 251, 147, 231, 143, 232, 78, 168, 71, 137, 199, 108, 79, 80, 90, 201, 214, 153,
35, 172, 13, 199, 169, 11, 228, 91, 157, 231, 112, 193, 20, 54, 189, 167, 30, 77, 144,
108, 245, 215, 246, 189, 68, 69, 14, 158, 14, 228, 55, 50, 145, 69, 249, 58, 80, 222,
149, 237, 198, 5, 175, 218, 60, 109, 130, 91, 186, 18, 200, 175, 234, 190, 109, 46, 3,
123, 204, 18, 96, 4, 68, 241, 73, 62, 44, 154, 29, 193, 136, 227, 199, 55, 189, 4,
164, 64, 95, 95, 82, 39, 15, 60, 230, 124, 107, 233, 248, 55, 251, 89, 60, 63, 75,
134, 126, 119, 32, 156, 57, 168, 127, 0, 224, 61, 5, 133, 125, 100, 228, 208, 140, 243,
12, 114, 111, 119, 92, 104, 175, 87, 193, 236, 151, 13, 114, 21, 132, 146, 177, 189, 59,
49, 190, 27, 110, 195, 160, 236, 40, 132, 188, 181, 120, 201, 40, 232, 65, 132, 80, 241,
220, 18, 221, 115, 31, 79, 137, 164, 226, 58, 98, 29, 108, 32, 57, 219, 228, 218, 199,
13, 95, 132, 195, 215, 77, 235, 191, 143, 112, 16, 128, 76, 35, 93, 191, 66, 173, 73,
231, 143, 132, 73, 173, 240, 106, 231, 203, 78, 193, 147, 92, 33, 23, 31, 248, 100, 11,
184, 243, 123, 201, 115, 200, 236, 209, 135, 47, 126, 209, 22, 14, 85, 95, 188, 69, 202,
163, 17, 24, 101, 164, 117, 134, 187, 148, 127, 31, 159, 55, 19, 27, 1, 135, 227, 237,
89, 107, 28, 216, 60, 51, 230, 145, 147, 163, 215, 93, 70, 232, 118, 172, 140, 235, 50,
71, 128, 177, 103, 32, 233, 123, 60, 234, 2, 31, 216, 91, 139, 244, 52, 200, 40, 26,
90, 188, 189, 49, 25, 4, 25, 144, 176, 166, 124, 227, 237, 252, 148, 85, 29, 125, 208,
89, 104, 210, 121, 64, 46, 4, 53, 99, 204, 93, 125, 38, 25, 59, 88, 51, 64, 113,
195, 241, 23, 64, 212, 5, 60, 104, 90, 90, 230, 42, 179, 78, 253, 44, 143, 44, 49,
196, 143, 254, 34, 13, 36, 60, 73, 125, 112, 137, 239, 52, 122, 7, 116, 79, 12, 177,
183, 103, 11, 158, 146, 190, 237, 143, 235, 124, 188, 28, 65, 76, 26, 100, 89, 63, 160,
163, 188, 17, 44, 172, 69, 167, 179, 185, 246, 191, 107, 174, 38, 118, 76, 184, 53, 58,
72, 32, 182, 5, 61, 248, 81, 88, 92, 170, 152, 253, 77, 84, 14, 122, 1, 83, 34,
180, 13, 25, 115, 120, 199, 154, 238, 20, 83, 36, 79, 155, 68, 5, 160, 130, 254, 242,
218, 90, 156, 114, 87, 234, 199, 101, 101, 200, 185, 135, 124, 198, 160, 240, 62, 104, 138,
45, 125, 222, 81, 204, 122, 150, 210, 26, 24, 208, 12, 242, 42, 169, 101, 130, 148, 44,
232, 249, 245, 161, 128, 113, 103, 33, 98, 166, 137, 236, 212, 7, 202, 38, 211, 69, 188,
165, 95, 212, 118, 108, 199, 161, 22, 45, 35, 170, 90, 11, 163, 79, 173, 36, 193, 20,
69, 35, 187, 207, 16, 144, 214, 219, 182, 170, 32, 114, 79, 128, 71, 198, 237, 15, 103,
4, 60, 139, 175, 150, 151, 82, 230, 68, 119, 168, 89, 188, 204, 20, 140, 220, 165, 98,
184, 91, 12, 217, 205, 92, 90, 20, 35, 71, 36, 138, 76, 96, 22, 251, 247, 173, 78,
222, 241, 197, 134, 75, 130, 83, 96, 14, 47, 5, 113, 232, 96, 126, 193, 45, 218, 28,
66, 253, 99, 103, 136, 176, 200, 158, 171, 191, 76, 249, 158, 62, 190, 37, 137, 65, 120,
233, 80, 168, 238, 193, 145, 79, 63, 82, 125, 26, 111, 191, 24, 210, 39, 161, 131, 239,
64, 46, 175, 140, 39, 77, 202, 230, 115, 84, 40, 235, 62, 120, 148, 45, 57, 37, 124,
121, 120, 249, 148, 231, 185, 172, 186, 224, 77, 61, 207, 141, 107, 126, 26, 147, 204, 229,
121, 63, 58, 161, 43, 120, 25, 191, 165, 83, 228, 34, 205, 92, 27, 97, 67, 213, 13,
253, 182, 91, 59, 133, 233, 166, 4, 4, 57, 209, 233, 179, 16, 35, 85, 59, 155, 111,
250, 65, 194, 223, 99, 144, 59, 127, 241, 127, 85, 255, 125, 11, 90, 184, 145, 68, 95,
150, 72, 153, 103, 49, 76, 120, 85, 161, 179, 241, 16, 174, 51, 211, 142, 150, 99, 201,
22, 85, 73, 108, 84, 199, 120, 175, 128, 9, 243, 223, 160, 59, 120, 8, 109, 197, 128,
194, 103, 52, 180, 119, 227, 231, 75, 113, 126, 175, 59, 148, 4, 132, 1, 89, 75, 121,
8, 204, 131, 251, 171, 36, 55, 36, 44, 165, 233, 172, 103, 80, 224, 28, 200, 195, 3,
20, 53, 129, 195, 112, 22, 200, 244, 23, 34, 64, 145, 42, 12, 20, 38, 184, 56, 94,
220, 101, 3, 198, 17, 107, 22, 242, 135, 222, 182, 138, 243, 235, 11, 182, 91, 34, 127,
80, 58, 161, 145, 203, 204, 158, 224, 242, 86, 24, 81, 51, 126, 84, 249, 143, 191, 15,
130, 70, 238, 57, 209, 225, 36, 221, 152, 128, 255, 24, 208, 57, 186, 97, 4, 134, 255,
229, 121, 86, 254, 202, 137, 124, 31, 130, 12, 222, 146, 142, 37, 129, 199, 247, 98, 236,
212, 251, 108, 211, 20, 60, 13, 206, 158, 18, 84};
SimpleConvolution::SimpleConvolution() {
width_ = 64;
height_ = 64;
mask_width_ = 3;
mask_height_ = mask_width_;
randomize_seed_ = 0;
if (!IsPowerOf2(width_)) {
width_ = RoundToPowerOf2(width_);
}
if (!IsPowerOf2(height_)) {
height_ = RoundToPowerOf2(height_);
}
if (!(mask_width_ % 2)) {
mask_width_++;
}
if (!(mask_height_ % 2)) {
mask_height_++;
}
if (width_ * height_ < 256) {
width_ = 64;
height_ = 64;
}
const uint32_t input_size_bytes = width_ * height_ * sizeof(uint32_t);
const uint32_t mask_size_bytes = mask_width_ * mask_height_ * sizeof(float);
SetSysDescr(KERNARG_DES_ID, sizeof(kernel_args_t));
SetSysDescr(INPUT_DES_ID, input_size_bytes);
SetSysDescr(OUTPUT_DES_ID, input_size_bytes);
SetLocalDescr(LOCAL_DES_ID, input_size_bytes);
SetSysDescr(MASK_DES_ID, mask_size_bytes);
SetSysDescr(REFOUT_DES_ID, input_size_bytes);
if (!randomize_seed_) TEST_ASSERT(sizeof(input_data_) <= input_size_bytes);
}
void SimpleConvolution::Init() {
std::clog << "SimpleConvolution::init :" << std::endl;
mem_descr_t input_des = GetDescr(INPUT_DES_ID);
mem_descr_t local_des = GetDescr(LOCAL_DES_ID);
mem_descr_t mask_des = GetDescr(MASK_DES_ID);
mem_descr_t refout_des = GetDescr(REFOUT_DES_ID);
mem_descr_t kernarg_des = GetDescr(KERNARG_DES_ID);
uint32_t* input = (uint32_t*)input_des.ptr;
uint32_t* output_local = (uint32_t*)local_des.ptr;
float* mask = (float*)mask_des.ptr;
kernel_args_t* kernel_args = (kernel_args_t*)kernarg_des.ptr;
if (randomize_seed_) {
// random initialisation of input
FillRandom<uint32_t>(input, width_, height_, 0, 255, randomize_seed_);
} else {
// initialization with preset values
memcpy(input, input_data_, width_ * height_ * sizeof(uint32_t));
}
// Fill a blurr filter or some other filter of your choice
const float val = 1.0f / (mask_width_ * 2.0f - 1.0f);
for (uint32_t i = 0; i < (mask_width_ * mask_height_); i++) {
mask[i] = 0;
}
for (uint32_t i = 0; i < mask_width_; i++) {
uint32_t y = mask_height_ / 2;
mask[y * mask_width_ + i] = val;
}
for (uint32_t i = 0; i < mask_height_; i++) {
uint32_t x = mask_width_ / 2;
mask[i * mask_width_ + x] = val;
}
// Print the INPUT array.
std::clog << std::dec;
PrintArray<uint32_t>("> Input[0]", input, width_, 1);
PrintArray<float>("> Mask", mask, mask_width_, mask_height_);
// Fill the kernel args
kernel_args->arg1 = output_local;
kernel_args->arg2 = input;
kernel_args->arg3 = mask;
kernel_args->arg4 = width_;
kernel_args->arg41 = height_;
kernel_args->arg5 = mask_width_;
kernel_args->arg51 = mask_height_;
// Calculate the reference output
memset(refout_des.ptr, 0, refout_des.size);
ReferenceImplementation(reinterpret_cast<uint32_t*>(refout_des.ptr), input, mask, width_, height_,
mask_width_, mask_height_);
}
void SimpleConvolution::PrintOutput() const {
PrintArray<uint32_t>("> Output[0]", reinterpret_cast<uint32_t*>(GetOutputPtr()), width_, 1);
}
bool SimpleConvolution::ReferenceImplementation(uint32_t* output, const uint32_t* input,
const float* mask, const uint32_t width,
const uint32_t height, const uint32_t mask_width,
const uint32_t mask_height) {
const uint32_t vstep = (mask_width - 1) / 2;
const uint32_t hstep = (mask_height - 1) / 2;
// for each pixel in the input
for (uint32_t x = 0; x < width; x++) {
for (uint32_t y = 0; y < height; y++) {
// find the left, right, top and bottom indices such that
// the indices do not go beyond image boundaires
const uint32_t left = (x < vstep) ? 0 : (x - vstep);
const uint32_t right = ((x + vstep) >= width) ? width - 1 : (x + vstep);
const uint32_t top = (y < hstep) ? 0 : (y - hstep);
const uint32_t bottom = ((y + hstep) >= height) ? height - 1 : (y + hstep);
// initializing wighted sum value
float sum_fx = 0;
for (uint32_t i = left; i <= right; ++i) {
for (uint32_t j = top; j <= bottom; ++j) {
// performing wighted sum within the mask boundaries
uint32_t mask_idx = (j - (y - hstep)) * mask_width + (i - (x - vstep));
uint32_t index = j * width + i;
// to round to the nearest integer
sum_fx += ((float)input[index] * mask[mask_idx]);
}
}
sum_fx += 0.5f;
output[y * width + x] = uint32_t(sum_fx);
}
}
return true;
}
+96
Voir le fichier
@@ -0,0 +1,96 @@
/******************************************************************************
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list
of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*******************************************************************************/
#ifndef TEST_SIMPLE_CONVOLUTION_SIMPLE_CONVOLUTION_H_
#define TEST_SIMPLE_CONVOLUTION_SIMPLE_CONVOLUTION_H_
#include <map>
#include <vector>
#include "ctrl/test_kernel.h"
// Class implements SimpleConvolution kernel parameters
class SimpleConvolution : public TestKernel {
public:
// Constructor
SimpleConvolution();
// Initialize method
void Init();
// Return compute grid size
uint32_t GetGridSize() const { return width_ * height_; }
// Print output
void PrintOutput() const;
// Return name
std::string Name() const { return std::string("SimpleConvolution"); }
private:
// Local kernel arguments declaration
struct kernel_args_t {
void* arg1;
void* arg2;
void* arg3;
uint32_t arg4;
uint32_t arg41;
uint32_t arg5;
uint32_t arg51;
};
// Reference CPU implementation of Simple Convolution
// @param output Output matrix after performing convolution
// @param input Input matrix on which convolution is to be performed
// @param mask mask matrix using which convolution was to be performed
// @param input_dimensions dimensions of the input matrix
// @param mask_dimensions dimensions of the mask matrix
// @return bool true on success and false on failure
bool ReferenceImplementation(uint32_t* output, const uint32_t* input, const float* mask,
const uint32_t width, const uint32_t height,
const uint32_t maskWidth, const uint32_t maskHeight);
// Width of the Input array
uint32_t width_;
// Height of the Input array
uint32_t height_;
// Mask dimensions
uint32_t mask_width_;
// Mask dimensions
uint32_t mask_height_;
// Randomize input data
unsigned randomize_seed_;
// Input data
static const uint32_t input_data_[];
};
#endif // TEST_SIMPLE_CONVOLUTION_SIMPLE_CONVOLUTION_H_
+154
Voir le fichier
@@ -0,0 +1,154 @@
module &m:1:0:$full:$large:$default;
extension "amd:gcn";
extension "IMAGE";
decl prog function &abort()();
prog kernel &__OpenCL_SimpleConvolution(kernarg_u64 %__global_offset_0,
kernarg_u64 %output,
kernarg_u64 %input,
kernarg_u64 %mask,
kernarg_u32 %inputDimensions[2],
kernarg_u32 %maskDimensions[2]) {
pragma "AMD RTI", "ARGSTART:__OpenCL_SimpleConvolution";
pragma "AMD RTI", "version:3:1:104";
pragma "AMD RTI", "device:generic";
pragma "AMD RTI", "uniqueid:1024";
pragma "AMD RTI", "memory:private:0";
pragma "AMD RTI", "memory:region:0";
pragma "AMD RTI", "memory:local:0";
pragma "AMD RTI", "value:__global_offset_0:u64:1:1:0";
pragma "AMD RTI", "pointer:output:u32:1:1:96:uav:7:4:RW:0:0:0";
pragma "AMD RTI", "pointer:input:u32:1:1:112:uav:7:4:RW:0:0:0";
pragma "AMD RTI", "pointer:mask:float:1:1:128:uav:7:4:RW:0:0:0";
pragma "AMD RTI", "value:inputDimensions:u32:2:1:144";
pragma "AMD RTI", "constarg:4:inputDimensions";
pragma "AMD RTI", "value:maskDimensions:u32:2:1:160";
pragma "AMD RTI", "constarg:5:maskDimensions";
pragma "AMD RTI", "function:1:0";
pragma "AMD RTI", "memory:64bitABI";
pragma "AMD RTI", "privateid:8";
pragma "AMD RTI", "enqueue_kernel:0";
pragma "AMD RTI", "kernel_index:0";
pragma "AMD RTI", "reflection:0:size_t";
pragma "AMD RTI", "reflection:1:uint*";
pragma "AMD RTI", "reflection:2:uint*";
pragma "AMD RTI", "reflection:3:float*";
pragma "AMD RTI", "reflection:4:uint2";
pragma "AMD RTI", "reflection:5:uint2";
pragma "AMD RTI", "ARGEND:__OpenCL_SimpleConvolution";
@__OpenCL_SimpleConvolution_Entry:
// BB#0: // %entry
workitemabsid_u32 $s6, 0;
cvt_u64_u32 $d0, $s6;
ld_kernarg_align(8)_width(all)_u64 $d4, [%__global_offset_0];
add_u64 $d0, $d0, $d4;
cvt_u32_u64 $s5, $d0;
ld_v2_kernarg_align(4)_width(all)_u32 ($s0, $s4), [%inputDimensions];
ld_v2_kernarg_align(4)_width(all)_u32 ($s1, $s9), [%maskDimensions];
rem_u32 $s7, $s5, $s0;
add_u32 $s2, $s1, 4294967295;
shr_u32 $s8, $s2, 1;
add_u32 $s2, $s7, $s8;
add_u32 $s3, $s0, 4294967295;
cmp_ge_b1_u32 $c0, $s2, $s0;
cmov_b32 $s2, $c0, $s3, $s2;
sub_u32 $s3, $s7, $s8;
cmp_lt_b1_u32 $c0, $s7, $s8;
cmov_b32 $s3, $c0, 0, $s3;
ld_kernarg_align(8)_width(all)_u64 $d1, [%output];
cmp_le_b1_u32 $c0, $s3, $s2;
cbr_b1 $c0, @BB0_2;
// BB#1:
mov_b32 $s6, 0;
br @BB0_6;
// @BB0_2: // %for.cond32.preheader.lr.ph
@BB0_2:
div_u32 $s5, $s5, $s0;
add_u32 $s9, $s9, 4294967295;
shr_u32 $s9, $s9, 1;
add_u32 $s10, $s5, $s9;
add_u32 $s11, $s4, 4294967295;
cmp_ge_b1_u32 $c0, $s10, $s4;
cmov_b32 $s4, $c0, $s11, $s10;
sub_u32 $s10, $s5, $s9;
cmp_lt_b1_u32 $c0, $s5, $s9;
cmov_b32 $s5, $c0, 0, $s10;
ld_kernarg_align(8)_width(all)_u64 $d2, [%mask];
ld_kernarg_align(8)_width(all)_u64 $d3, [%input];
cvt_u64_u32 $d5, $s6;
add_u64 $d4, $d4, $d5;
cvt_u32_u64 $s6, $d4;
div_u32 $s6, $s6, $s0;
max_u32 $s10, $s9, $s6;
sub_u32 $s12, $s10, $s6;
max_u32 $s11, $s7, $s8;
mov_b32 $s6, 0;
mad_u32 $s12, $s1, $s12, $s11;
sub_u32 $s7, $s12, $s7;
sub_u32 $s9, $s10, $s9;
mad_u32 $s9, $s0, $s9, $s11;
sub_u32 $s8, $s9, $s8;
// @BB0_3: // %for.cond32.preheader
@BB0_3:
cmp_gt_b1_u32 $c0, $s5, $s4;
mov_b32 $s9, $s7;
mov_b32 $s10, $s8;
mov_b32 $s11, $s5;
cbr_b1 $c0, @BB0_5;
// @BB0_4: // %for.body35
@BB0_4:
cvt_u64_u32 $d4, $s9;
shl_u64 $d4, $d4, 2;
add_u64 $d4, $d2, $d4;
ld_global_align(4)_f32 $s12, [$d4];
cvt_u64_u32 $d4, $s10;
shl_u64 $d4, $d4, 2;
add_u64 $d4, $d3, $d4;
ld_global_align(4)_u32 $s13, [$d4];
cvt_f32_u32 $s13, $s13;
mul_ftz_f32 $s12, $s13, $s12;
add_u32 $s9, $s9, $s1;
add_u32 $s10, $s10, $s0;
add_u32 $s11, $s11, 1;
add_ftz_f32 $s6, $s6, $s12;
cmp_le_b1_u32 $c0, $s11, $s4;
cbr_b1 $c0, @BB0_4;
// @BB0_5: // %for.inc48
@BB0_5:
add_u32 $s7, $s7, 1;
add_u32 $s8, $s8, 1;
add_u32 $s3, $s3, 1;
cmp_le_b1_u32 $c0, $s3, $s2;
cbr_b1 $c0, @BB0_3;
// @BB0_6: // %for.end50
@BB0_6:
and_b64 $d0, $d0, 4294967295;
shl_u64 $d0, $d0, 2;
add_u64 $d0, $d1, $d0;
add_ftz_f32 $s0, $s6, 0F3f000000;
cvt_ftz_u32_f32 $s0, $s0;
st_global_align(4)_u32 $s0, [$d0];
ret;
};
+87
Voir le fichier
@@ -0,0 +1,87 @@
/**********************************************************************
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted
provided that the following conditions are met:
• Redistributions of source code must retain the above copyright notice, this list of
conditions and the following disclaimer.
• Redistributions in binary form must reproduce the above copyright notice, this list of
conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
********************************************************************/
#ifndef TEST_UTIL_HELPER_FUNCS_H_
#define TEST_UTIL_HELPER_FUNCS_H_
#include <time.h>
#include <cmath>
#include <iostream>
#include <sstream>
#include <string>
static inline void Error(std::string error_msg) {
std::cerr << "Error: " << error_msg << std::endl;
}
template <typename T>
void PrintArray(const std::string header, const T* data, const int width, const int height) {
std::clog << header << " :\n";
for (int i = 0; i < height; i++) {
std::clog << "> ";
for (int j = 0; j < width; j++) {
std::clog << data[i * width + j] << " ";
}
std::clog << "\n";
}
}
template <typename T>
bool FillRandom(T* array_ptr, const int width, const int height, const T range_min,
const T range_max, unsigned int seed = 123) {
if (!array_ptr) {
Error("Cannot fill array. NULL pointer.");
return false;
}
if (!seed) seed = (unsigned int)time(NULL);
srand(seed);
double range = double(range_max - range_min) + 1.0;
/* random initialisation of input */
for (int i = 0; i < height; i++)
for (int j = 0; j < width; j++) {
int index = i * width + j;
array_ptr[index] = range_min + T(range * rand() / (RAND_MAX + 1.0));
}
return true;
}
template <typename T> T RoundToPowerOf2(T val) {
int bytes = sizeof(T);
val--;
for (int i = 0; i < bytes; i++) val |= val >> (1 << i);
val++;
return val;
}
template <typename T> bool IsPowerOf2(T val) {
long long long_val = val;
return (((long_val & (-long_val)) - long_val == 0) && (long_val != 0));
}
#endif // TEST_UTIL_HELPER_FUNCS_H_
+372
Voir le fichier
@@ -0,0 +1,372 @@
/**********************************************************************
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted
provided that the following conditions are met:
<95> Redistributions of source code must retain the above copyright notice, this list of
conditions and the following disclaimer.
<95> Redistributions in binary form must reproduce the above copyright notice, this list of
conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
********************************************************************/
#include "util/hsa_rsrc_factory.h"
#include <hsa.h>
#include <hsa_ext_finalize.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <cassert>
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
// Callback function to find and bind kernarg region of an agent
static hsa_status_t FindMemRegionsCallback(hsa_region_t region, void* data) {
hsa_region_global_flag_t flags;
hsa_region_segment_t segment_id;
hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment_id);
if (segment_id != HSA_REGION_SEGMENT_GLOBAL) {
return HSA_STATUS_SUCCESS;
}
AgentInfo* agent_info = (AgentInfo*)data;
hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags);
if (flags & HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED) {
agent_info->coarse_region = region;
}
if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG) {
agent_info->kernarg_region = region;
}
return HSA_STATUS_SUCCESS;
}
// Callback function to get the number of agents
static hsa_status_t GetHsaAgentsCallback(hsa_agent_t agent, void* data) {
// Copy handle of agent and increment number of agents reported
HsaRsrcFactory* rsrcFactory = reinterpret_cast<HsaRsrcFactory*>(data);
// Determine if device is a Gpu agent
hsa_status_t status;
hsa_device_type_t type;
status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type);
CHECK_STATUS("Error Calling hsa_agent_get_info", status);
if (type == HSA_DEVICE_TYPE_DSP) {
return HSA_STATUS_SUCCESS;
}
if (type == HSA_DEVICE_TYPE_CPU) {
AgentInfo* agent_info = reinterpret_cast<AgentInfo*>(malloc(sizeof(AgentInfo)));
agent_info->dev_id = agent;
agent_info->dev_type = HSA_DEVICE_TYPE_CPU;
rsrcFactory->AddAgentInfo(agent_info, false);
return HSA_STATUS_SUCCESS;
}
// Device is a Gpu agent, build an instance of AgentInfo
AgentInfo* agent_info = reinterpret_cast<AgentInfo*>(malloc(sizeof(AgentInfo)));
agent_info->dev_id = agent;
agent_info->dev_type = HSA_DEVICE_TYPE_GPU;
hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_info->name);
agent_info->max_wave_size = 0;
hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &agent_info->max_wave_size);
agent_info->max_queue_size = 0;
hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &agent_info->max_queue_size);
agent_info->profile = hsa_profile_t(108);
hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_info->profile);
// Initialize memory regions to zero
agent_info->kernarg_region.handle = 0;
agent_info->coarse_region.handle = 0;
// Find and Bind Memory regions of the Gpu agent
hsa_agent_iterate_regions(agent, FindMemRegionsCallback, agent_info);
// Save the instance of AgentInfo
rsrcFactory->AddAgentInfo(agent_info, true);
return HSA_STATUS_SUCCESS;
}
// Constructor of the class
HsaRsrcFactory::HsaRsrcFactory() {
// Initialize the Hsa Runtime
hsa_status_t status = hsa_init();
CHECK_STATUS("Error in hsa_init", status);
// Discover the set of Gpu devices available on the platform
status = hsa_iterate_agents(GetHsaAgentsCallback, this);
CHECK_STATUS("Error Calling hsa_iterate_agents", status);
}
// Destructor of the class
HsaRsrcFactory::~HsaRsrcFactory() {
hsa_status_t status = hsa_shut_down();
CHECK_STATUS("Error in hsa_shut_down", status);
}
// Get the count of Hsa Gpu Agents available on the platform
//
// @return uint32_t Number of Gpu agents on platform
//
uint32_t HsaRsrcFactory::GetCountOfGpuAgents() { return uint32_t(gpu_list_.size()); }
// Get the count of Hsa Cpu Agents available on the platform
//
// @return uint32_t Number of Cpu agents on platform
//
uint32_t HsaRsrcFactory::GetCountOfCpuAgents() { return uint32_t(cpu_list_.size()); }
// Get the AgentInfo handle of a Gpu device
//
// @param idx Gpu Agent at specified index
//
// @param agent_info Output parameter updated with AgentInfo
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::GetGpuAgentInfo(uint32_t idx, AgentInfo** agent_info) {
// Determine if request is valid
uint32_t size = uint32_t(gpu_list_.size());
if (idx >= size) {
return false;
}
// Copy AgentInfo from specified index
*agent_info = gpu_list_[idx];
return true;
}
// Get the AgentInfo handle of a Cpu device
//
// @param idx Cpu Agent at specified index
//
// @param agent_info Output parameter updated with AgentInfo
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::GetCpuAgentInfo(uint32_t idx, AgentInfo** agent_info) {
// Determine if request is valid
uint32_t size = uint32_t(cpu_list_.size());
if (idx >= size) {
return false;
}
// Copy AgentInfo from specified index
*agent_info = cpu_list_[idx];
return true;
}
// Create a Queue object and return its handle. The queue object is expected
// to support user requested number of Aql dispatch packets.
//
// @param agent_info Gpu Agent on which to create a queue object
//
// @param num_Pkts Number of packets to be held by queue
//
// @param queue Output parameter updated with handle of queue object
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::CreateQueue(AgentInfo* agent_info, uint32_t num_pkts, hsa_queue_t** queue) {
hsa_status_t status;
status = hsa_queue_create(agent_info->dev_id, num_pkts, HSA_QUEUE_TYPE_MULTI, NULL, NULL,
UINT32_MAX, UINT32_MAX, queue);
return (status == HSA_STATUS_SUCCESS);
}
// Create a Signal object and return its handle.
//
// @param value Initial value of signal object
//
// @param signal Output parameter updated with handle of signal object
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::CreateSignal(uint32_t value, hsa_signal_t* signal) {
hsa_status_t status;
status = hsa_signal_create(value, 0, NULL, signal);
return (status == HSA_STATUS_SUCCESS);
}
// Allocate memory for use by a kernel of specified size in specified
// agent's memory region. Currently supports Global segment whose Kernarg
// flag set.
//
// @param agent_info Agent from whose memory region to allocate
//
// @param size Size of memory in terms of bytes
//
// @return uint8_t* Pointer to buffer, null if allocation fails.
//
uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t size) {
hsa_status_t status;
uint8_t* buffer = NULL;
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
if (agent_info->coarse_region.handle != 0) {
// Allocate in local memory if it is available
status = hsa_memory_allocate(agent_info->coarse_region, size, (void**)&buffer);
if (status == HSA_STATUS_SUCCESS) {
status = hsa_memory_assign_agent(buffer, agent_info->dev_id, HSA_ACCESS_PERMISSION_RW);
}
} else {
// Allocate in system memory if local memory is not available
status = hsa_memory_allocate(agent_info->kernarg_region, size, (void**)&buffer);
}
return (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
}
// Allocate memory tp pass kernel parameters.
//
// @param agent_info Agent from whose memory region to allocate
//
// @param size Size of memory in terms of bytes
//
// @return uint8_t* Pointer to buffer, null if allocation fails.
//
uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t size) {
hsa_status_t status;
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
uint8_t* buffer = NULL;
status = hsa_memory_allocate(agent_info->kernarg_region, size, (void**)&buffer);
return (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
}
// Transfer data method
bool HsaRsrcFactory::TransferData(void* dest_buff, void* src_buff, uint32_t length,
bool host_to_dev) {
hsa_status_t status;
status = hsa_memory_copy(dest_buff, src_buff, length);
return (status == HSA_STATUS_SUCCESS);
}
// Loads an Assembled Brig file and Finalizes it into Device Isa
//
// @param agent_info Gpu device for which to finalize
//
// @param brig_path File path of the Assembled Brig file
//
// @param kernel_name Name of the kernel to finalize
//
// @param code_desc Handle of finalized Code Descriptor that could
// be used to submit for execution
//
// @return bool true if successful, false otherwise
//
bool HsaRsrcFactory::LoadAndFinalize(AgentInfo* agent_info, const char* brig_path,
char* kernel_name, hsa_executable_symbol_t* code_desc) {
// Finalize the Hsail object into code object
hsa_status_t status;
hsa_code_object_t code_object;
// Build the code object filename
std::string filename(brig_path);
std::clog << "Code object filename: " << filename << std::endl;
// Open the file containing code object
std::ifstream codeStream(filename.c_str(), std::ios::binary | std::ios::ate);
if (!codeStream) {
std::cerr << "Error: failed to load " << filename << std::endl;
assert(false);
return false;
}
// Allocate memory to read in code object from file
size_t size = std::string::size_type(codeStream.tellg());
char* codeBuff = (char*)AllocateSysMemory(agent_info, size);
if (!codeBuff) {
std::cerr << "Error: failed to allocate memory for code object." << std::endl;
assert(false);
return false;
}
// Read the code object into allocated memory
codeStream.seekg(0, std::ios::beg);
std::copy(std::istreambuf_iterator<char>(codeStream), std::istreambuf_iterator<char>(), codeBuff);
// De-Serialize the code object that has been read into memory
status = hsa_code_object_deserialize(codeBuff, size, NULL, &code_object);
if (status != HSA_STATUS_SUCCESS) {
std::cerr << "Failed to deserialize code object" << std::endl;
return false;
}
// Create executable.
hsa_executable_t hsaExecutable;
status =
hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, "", &hsaExecutable);
CHECK_STATUS("Error in creating executable object", status);
// Load code object.
status = hsa_executable_load_code_object(hsaExecutable, agent_info->dev_id, code_object, "");
CHECK_STATUS("Error in loading executable object", status);
// Freeze executable.
status = hsa_executable_freeze(hsaExecutable, "");
CHECK_STATUS("Error in freezing executable object", status);
// Get symbol handle.
hsa_executable_symbol_t kernelSymbol;
status = hsa_executable_get_symbol(hsaExecutable, NULL, kernel_name, agent_info->dev_id, 0,
&kernelSymbol);
CHECK_STATUS("Error in looking up kernel symbol", status);
// Update output parameter
*code_desc = kernelSymbol;
return true;
}
// Add an instance of AgentInfo representing a Hsa Gpu agent
void HsaRsrcFactory::AddAgentInfo(AgentInfo* agent_info, bool gpu) {
// Add input to Gpu list
if (gpu) {
gpu_list_.push_back(agent_info);
return;
}
// Add input to Cpu list
cpu_list_.push_back(agent_info);
}
// Print the various fields of Hsa Gpu Agents
bool HsaRsrcFactory::PrintGpuAgents(const std::string& header) {
std::clog << header << " :" << std::endl;
AgentInfo* agent_info;
int size = uint32_t(gpu_list_.size());
for (int idx = 0; idx < size; idx++) {
agent_info = gpu_list_[idx];
std::clog << "> agent[" << idx << "] :" << std::endl;
std::clog << ">> Name : " << agent_info->name << std::endl;
std::clog << ">> Max Wave Size : " << agent_info->max_wave_size << std::endl;
std::clog << ">> Max Queue Size : " << agent_info->max_queue_size << std::endl;
std::clog << ">> Kernarg Region Id : " << agent_info->coarse_region.handle << std::endl;
}
return true;
}
HsaRsrcFactory* HsaRsrcFactory::instance_ = NULL;
HsaRsrcFactory::mutex_t HsaRsrcFactory::mutex_;
+234
Voir le fichier
@@ -0,0 +1,234 @@
/**********************************************************************
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted
provided that the following conditions are met:
<95> Redistributions of source code must retain the above copyright notice, this list of
conditions and the following disclaimer.
<95> Redistributions in binary form must reproduce the above copyright notice, this list of
conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
********************************************************************/
#ifndef TEST_UTIL_HSA_RSRC_FACTORY_H_
#define TEST_UTIL_HSA_RSRC_FACTORY_H_
#include <hsa.h>
#include <hsa_ext_finalize.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <iostream>
#include <mutex>
#include <string>
#include <vector>
#define HSA_ARGUMENT_ALIGN_BYTES 16
#define HSA_QUEUE_ALIGN_BYTES 64
#define HSA_PACKET_ALIGN_BYTES 64
#define CHECK_STATUS(msg, status) \
if (status != HSA_STATUS_SUCCESS) { \
const char* emsg = 0; \
hsa_status_string(status, &emsg); \
printf("%s: %s\n", msg, emsg ? emsg : "<unknown error>"); \
exit(1); \
}
static const unsigned MEM_PAGE_BYTES = 0x1000;
static const unsigned MEM_PAGE_MASK = MEM_PAGE_BYTES - 1;
// Encapsulates information about a Hsa Agent such as its
// handle, name, max queue size, max wavefront size, etc.
typedef struct {
// Handle of Agent
hsa_agent_t dev_id;
// Agent type - Cpu = 0, Gpu = 1 or Dsp = 2
uint32_t dev_type;
// Name of Agent whose length is less than 64
char name[64];
// Max size of Wavefront size
uint32_t max_wave_size;
// Max size of Queue buffer
uint32_t max_queue_size;
// Hsail profile supported by agent
hsa_profile_t profile;
// Memory region supporting kernel parameters
hsa_region_t coarse_region;
// Memory region supporting kernel arguments
hsa_region_t kernarg_region;
} AgentInfo;
class HsaRsrcFactory {
public:
typedef std::recursive_mutex mutex_t;
static HsaRsrcFactory* Create() {
std::lock_guard<mutex_t> lck(mutex_);
if (HsaRsrcFactory::instance_ == NULL) {
HsaRsrcFactory::instance_ = new HsaRsrcFactory();
}
return instance_;
}
static void Destroy() {
std::lock_guard<mutex_t> lck(mutex_);
if (instance_) delete instance_;
instance_ = NULL;
}
static HsaRsrcFactory& Instance() {
hsa_status_t status = (instance_ != NULL) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR;
CHECK_STATUS("HsaRsrcFactory::Instance()", status);
return *instance_;
}
// Get the count of Hsa Gpu Agents available on the platform
//
// @return uint32_t Number of Gpu agents on platform
//
uint32_t GetCountOfGpuAgents();
// Get the count of Hsa Cpu Agents available on the platform
//
// @return uint32_t Number of Cpu agents on platform
//
uint32_t GetCountOfCpuAgents();
// Get the AgentInfo handle of a Gpu device
//
// @param idx Gpu Agent at specified index
//
// @param agent_info Output parameter updated with AgentInfo
//
// @return bool true if successful, false otherwise
//
bool GetGpuAgentInfo(uint32_t idx, AgentInfo** agent_info);
// Get the AgentInfo handle of a Cpu device
//
// @param idx Cpu Agent at specified index
//
// @param agent_info Output parameter updated with AgentInfo
//
// @return bool true if successful, false otherwise
//
bool GetCpuAgentInfo(uint32_t idx, AgentInfo** agent_info);
// Create a Queue object and return its handle. The queue object is expected
// to support user requested number of Aql dispatch packets.
//
// @param agent_info Gpu Agent on which to create a queue object
//
// @param num_Pkts Number of packets to be held by queue
//
// @param queue Output parameter updated with handle of queue object
//
// @return bool true if successful, false otherwise
//
bool CreateQueue(AgentInfo* agent_info, uint32_t num_pkts, hsa_queue_t** queue);
// Create a Signal object and return its handle.
//
// @param value Initial value of signal object
//
// @param signal Output parameter updated with handle of signal object
//
// @return bool true if successful, false otherwise
//
bool CreateSignal(uint32_t value, hsa_signal_t* signal);
// Allocate memory for use by a kernel of specified size in specified
// agent's memory region. Currently supports Global segment whose Kernarg
// flag set.
//
// @param agent_info Agent from whose memory region to allocate
//
// @param size Size of memory in terms of bytes
//
// @return uint8_t* Pointer to buffer, null if allocation fails.
//
uint8_t* AllocateLocalMemory(const AgentInfo* agent_info, size_t size);
// Allocate memory tp pass kernel parameters.
//
// @param agent_info Agent from whose memory region to allocate
//
// @param size Size of memory in terms of bytes
//
// @return uint8_t* Pointer to buffer, null if allocation fails.
//
uint8_t* AllocateSysMemory(const AgentInfo* agent_info, size_t size);
// Transfer data method
bool TransferData(void* dest_buff, void* src_buff, uint32_t length, bool host_to_dev);
// Loads an Assembled Brig file and Finalizes it into Device Isa
//
// @param agent_info Gpu device for which to finalize
//
// @param brig_path File path of the Assembled Brig file
//
// @param kernel_name Name of the kernel to finalize
//
// @param code_desc Handle of finalized Code Descriptor that could
// be used to submit for execution
//
// @return bool true if successful, false otherwise
//
bool LoadAndFinalize(AgentInfo* agent_info, const char* brig_path, char* kernel_name,
hsa_executable_symbol_t* code_desc);
// Add an instance of AgentInfo representing a Hsa Gpu agent
void AddAgentInfo(AgentInfo* agent_info, bool gpu);
// Print the various fields of Hsa Gpu Agents
bool PrintGpuAgents(const std::string& header);
private:
// Constructor of the class. Will initialize the Hsa Runtime and
// query the system topology to get the list of Cpu and Gpu devices
HsaRsrcFactory();
// Destructor of the class
~HsaRsrcFactory();
static HsaRsrcFactory* instance_;
static mutex_t mutex_;
// Used to maintain a list of Hsa Queue handles
std::vector<hsa_queue_t*> queue_list_;
// Used to maintain a list of Hsa Signal handles
std::vector<hsa_signal_t*> signal_list_;
// Used to maintain a list of Hsa Gpu Agent Info
std::vector<AgentInfo*> gpu_list_;
// Used to maintain a list of Hsa Cpu Agent Info
std::vector<AgentInfo*> cpu_list_;
};
#endif // TEST_UTIL_HSA_RSRC_FACTORY_H_
+181
Voir le fichier
@@ -0,0 +1,181 @@
/**********************************************************************
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted
provided that the following conditions are met:
<95> Redistributions of source code must retain the above copyright notice, this list of
conditions and the following disclaimer.
<95> Redistributions in binary form must reproduce the above copyright notice, this list of
conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
********************************************************************/
#include "util/perf_timer.h"
PerfTimer::PerfTimer() { freq_in_100mhz_ = MeasureTSCFreqHz(); }
PerfTimer::~PerfTimer() {
while (!timers_.empty()) {
Timer* temp = timers_.back();
timers_.pop_back();
delete temp;
}
}
// New cretaed timer instantance index will be returned
int PerfTimer::CreateTimer() {
Timer* newTimer = new Timer;
newTimer->start = 0;
newTimer->clocks = 0;
#ifdef _WIN32
QueryPerformanceFrequency((LARGE_INTEGER*)&newTimer->freq);
#else
newTimer->freq = (long long)1.0E3;
#endif
/* Push back the address of new Timer instance created */
timers_.push_back(newTimer);
return (int)(timers_.size() - 1);
}
int PerfTimer::StartTimer(int index) {
if (index >= (int)timers_.size()) {
Error("Cannot reset timer. Invalid handle.");
return FAILURE;
}
#ifdef _WIN32
// General Windows timing method
#ifndef _AMD
long long tmpStart;
QueryPerformanceCounter((LARGE_INTEGER*)&(tmpStart));
timers_[index]->start = (double)tmpStart;
#else
// AMD Windows timing method
#endif
#else
// General Linux timing method
#ifndef _AMD
struct timeval s;
gettimeofday(&s, 0);
timers_[index]->start = s.tv_sec * 1.0E3 + ((double)(s.tv_usec / 1.0E3));
#else
// AMD timing method
unsigned int unused;
timers_[index]->start = __rdtscp(&unused);
#endif
#endif
return SUCCESS;
}
int PerfTimer::StopTimer(int index) {
double n = 0;
if (index >= (int)timers_.size()) {
Error("Cannot reset timer. Invalid handle.");
return FAILURE;
}
#ifdef _WIN32
#ifndef _AMD
long long n1;
QueryPerformanceCounter((LARGE_INTEGER*)&(n1));
n = (double)n1;
#else
// AMD Window Timing
#endif
#else
// General Linux timing method
#ifndef _AMD
struct timeval s;
gettimeofday(&s, 0);
n = s.tv_sec * 1.0E3 + (double)(s.tv_usec / 1.0E3);
#else
// AMD Linux timing
unsigned int unused;
n = __rdtscp(&unused);
#endif
#endif
n -= timers_[index]->start;
timers_[index]->start = 0;
#ifndef _AMD
timers_[index]->clocks += n;
#else
// timers_[index]->clocks += 10 * n / freq_in_100mhz_; // unit is ns
timers_[index]->clocks += 1.0E-6 * 10 * n / freq_in_100mhz_; // convert to ms
#endif
return SUCCESS;
}
void PerfTimer::Error(std::string str) { std::cout << str << std::endl; }
double PerfTimer::ReadTimer(int index) {
if (index >= (int)timers_.size()) {
Error("Cannot read timer. Invalid handle.");
return FAILURE;
}
double reading = double(timers_[index]->clocks);
reading = double(reading / timers_[index]->freq);
return reading;
}
uint64_t PerfTimer::CoarseTimestampUs() {
#ifdef _WIN32
uint64_t freqHz, ticks;
QueryPerformanceFrequency((LARGE_INTEGER*)&freqHz);
QueryPerformanceCounter((LARGE_INTEGER*)&ticks);
// Scale numerator and divisor until (ticks * 1000000) fits in uint64_t.
while (ticks > (1ULL << 44)) {
ticks /= 16;
freqHz /= 16;
}
return (ticks * 1000000) / freqHz;
#else
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
return uint64_t(ts.tv_sec) * 1000000 + ts.tv_nsec / 1000;
#endif
}
uint64_t PerfTimer::MeasureTSCFreqHz() {
// Make a coarse interval measurement of TSC ticks for 1 gigacycles.
unsigned int unused;
uint64_t tscTicksEnd;
uint64_t coarseBeginUs = CoarseTimestampUs();
uint64_t tscTicksBegin = __rdtscp(&unused);
do {
tscTicksEnd = __rdtscp(&unused);
} while (tscTicksEnd - tscTicksBegin < 1000000000);
uint64_t coarseEndUs = CoarseTimestampUs();
// Compute the TSC frequency and round to nearest 100MHz.
uint64_t coarseIntervalNs = (coarseEndUs - coarseBeginUs) * 1000;
uint64_t tscIntervalTicks = tscTicksEnd - tscTicksBegin;
return (tscIntervalTicks * 10 + (coarseIntervalNs / 2)) / coarseIntervalNs;
}
+85
Voir le fichier
@@ -0,0 +1,85 @@
/**********************************************************************
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted
provided that the following conditions are met:
<95> Redistributions of source code must retain the above copyright notice, this list of
conditions and the following disclaimer.
<95> Redistributions in binary form must reproduce the above copyright notice, this list of
conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
********************************************************************/
#ifndef TEST_UTIL_PERF_TIMER_H_
#define TEST_UTIL_PERF_TIMER_H_
// Will use AMD timer or general Linux timer based on compilation flag
// Need to consider platform is Windows or Linux
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#if defined(_MSC_VER)
#include <intrin.h>
#include <time.h>
#include <windows.h>
#else
#if defined(__GNUC__)
#include <sys/time.h>
#include <x86intrin.h>
#endif // __GNUC__
#endif // _MSC_VER
#include <iostream>
#include <string>
#include <vector>
class PerfTimer {
public:
enum { SUCCESS = 0, FAILURE = 1 };
PerfTimer();
~PerfTimer();
// General Linux timing method
int CreateTimer();
int StartTimer(int index);
int StopTimer(int index);
// retrieve time
double ReadTimer(int index);
// write into a file
double WriteTimer(int index);
private:
struct Timer {
std::string name; /* name of time object */
long long freq; /* frequency */
double clocks; /* number of ticks at end */
double start; /* start point ticks */
};
std::vector<Timer*> timers_; /* vector to Timer objects */
double freq_in_100mhz_;
// AMD timing method
uint64_t CoarseTimestampUs();
uint64_t MeasureTSCFreqHz();
void Error(std::string str);
};
#endif // TEST_UTIL_PERF_TIMER_H_
+52
Voir le fichier
@@ -0,0 +1,52 @@
/******************************************************************************
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list
of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
*******************************************************************************/
#ifndef TEST_CTRL_TEST_ASSERT_H_
#define TEST_CTRL_TEST_ASSERT_H_
#define TEST_ASSERT(cond) \
{ \
if (!(cond)) { \
std::cerr << "Assert failed(" << #cond << ") at " << __FILE__ << ", line " << __LINE__ \
<< std::endl; \
exit(-1); \
} \
}
#define TEST_STATUS(cond) \
{ \
if (!(cond)) { \
std::cerr << "Test error at " << __FILE__ << ", line " << __LINE__ \
<< std::endl; \
const char* message; \
rocprofiler_error_string(&message); \
std::cerr << "ERROR: " << message << std::endl; \
exit(-1); \
} \
}
#endif // TEST_CTRL_TEST_ASSERT_H_
+221
Voir le fichier
@@ -0,0 +1,221 @@
#ifndef TEST_UTIL_XML_H_
#define TEST_UTIL_XML_H_
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <fstream>
#include <iostream>
#include <map>
#include <vector>
namespace xml {
class Xml {
public:
typedef std::vector<char> token_t;
struct level_t {
std::string tag;
std::vector<level_t*> nodes;
std::map<std::string, std::string> opts;
};
typedef std::vector<level_t*> nodes_vec_t;
enum {
DECL_STATE,
BODY_STATE
};
Xml(const char* file_name) :
file_name_(file_name),
file_line_(0),
data_size_(0),
index_(0),
state_(BODY_STATE),
level_(NULL),
comment_(false)
{
AddLevel("top");
fd_ = open(file_name, O_RDONLY);
if (fd_ == -1) {
std::cout << "XML file not found: " << file_name << std::endl;
return;
}
token_t remainder;
while (1) {
token_t token = (remainder.size()) ? remainder : NextToken();
remainder.clear();
// End of file
if (token.size() == 0) break;
// token_t token1 = token;
// token1.push_back('\0');
// std::cout << "> " << &token1[0] << std::endl;
switch(state_) {
case BODY_STATE:
if (token[0] == '<') {
bool node_begin = true;
unsigned ind = 1;
if (token[1] == '/') {
node_begin = false;
++ind;
}
unsigned i = ind;
while (i < token.size()) { if (token[i] == '>') break; ++i; }
for (unsigned j = i + 1; j < token.size(); ++j) remainder.push_back(token[j]);
if (i == token.size()) {
if (node_begin) state_ = DECL_STATE;
else BadFormat(token);
token.push_back('\0');
} else token[i] = '\0';
const char* tag = strdup(&token[ind]);
if (node_begin) {
AddLevel(tag);
} else {
if (strncmp(CurrentLevel().c_str(), tag, strlen(tag))) {
token.back() = '>';
BadFormat(token);
}
UpLevel();
}
} else BadFormat(token);
break;
case DECL_STATE:
if (token[0] == '>') {
state_ = BODY_STATE;
for (unsigned j = 1; j < token.size(); ++j) remainder.push_back(token[j]);
continue;
} else {
token.push_back('\0');
unsigned j = 0;
for (j = 0; j < token.size(); ++j) if (token[j] == '=') break;
if (j == token.size()) BadFormat(token);
token[j] = '\0';
const char* key = &token[0];
const char* value = &token[j + 1];
AddOption(key, value);
}
break;
default:
std::cout << "Wrong state: " << state_ << std::endl;
exit(1);
}
}
}
std::vector<level_t*> GetNodes(std::string global_tag) {
return map_[global_tag];
}
void Print() const {
for(auto& elem : map_) {
for (auto node : elem.second) {
if (node->opts.size()) {
std::cout << elem.first << ":" << std::endl;
for (auto& opt : node->opts) {
std::cout << " " << opt.first << " = " << opt.second << std::endl;
}
}
}
}
}
private:
bool LineEndCheck() {
bool found = false;
if (buffer_[index_] == '\n') {
buffer_[index_] = ' ';
++file_line_;
found = true;
comment_ = false;
} else if (comment_ || (buffer_[index_] == '#')) {
found = true;
comment_ = true;
}
return found;
}
token_t NextToken() {
token_t token;
while (1) {
if (data_size_ == 0) {
data_size_ = read(fd_, buffer_, buf_size_);
if (data_size_ <= 0) break;
}
if (token.empty()) while ((index_ < data_size_) && ((buffer_[index_] == ' ') || LineEndCheck())) {
++index_;
}
while ((index_ < data_size_) && (buffer_[index_] != ' ') && !LineEndCheck()) {
token.push_back(buffer_[index_++]);
}
if (index_ == data_size_) {
index_ = 0;
data_size_ = 0;
} else break;
}
return token;
}
void BadFormat(token_t token) {
token.push_back('\0');
std::cout << "Error: " << file_name_ << ", line " << file_line_ << ", bad XML token '" << &token[0] << "'" << std::endl;
exit(1);
}
void AddLevel(const std::string& tag) {
level_t* level = new level_t;
level->tag = tag;
if (level_) {
level_->nodes.push_back(level);
stack_.push_back(level_);
}
level_ = level;
std::string global_tag;
for (level_t* level : stack_) { global_tag += level->tag + "."; }
global_tag += tag;
map_[global_tag].push_back(level_);
}
void UpLevel() {
level_ = stack_.back();
stack_.pop_back();
}
std::string CurrentLevel() const {
return level_->tag;
}
void AddOption(const std::string& key, const std::string& value) {
level_->opts[key] = value;
}
const char* file_name_;
unsigned file_line_;
int fd_;
static const unsigned buf_size_ = 256;
char buffer_[buf_size_];
unsigned data_size_;
unsigned index_;
unsigned state_;
level_t* level_;
std::vector<level_t*> stack_;
std::map<std::string, nodes_vec_t> map_;
bool comment_;
};
} // namespace xml
#endif // TEST_UTIL_XML_H_