From 85278f08a0f9ca7264d8aef4cc04ca94061642b9 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Thu, 9 Nov 2017 17:26:19 -0600 Subject: [PATCH] ROC profiler prototype sources importing --- CMakeLists.txt | 100 +++++ DEBIAN/postinst | 19 + DEBIAN/prerm | 18 + LICENSE.md | 39 ++ RPM/rpm_post | 1 + RPM/rpm_postun | 1 + Readme.txt | 50 +++ _clang-format | 60 +++ cmake_modules/env.cmake | 112 +++++ cmake_modules/utils.cmake | 116 +++++ doc/ROC_profiler_spec_v_1_2_0.docx | Bin 0 -> 91500 bytes inc/rocprofiler.h | 272 ++++++++++++ src/CMakeLists.txt | 14 + src/core/context.h | 399 ++++++++++++++++++ src/core/hsa_proxy_queue.h | 49 +++ src/core/hsa_queue.h | 60 +++ src/core/intercept_queue.cpp | 15 + src/core/intercept_queue.h | 161 +++++++ src/core/metrics.h | 169 ++++++++ src/core/profile.h | 206 +++++++++ src/core/proxy_queue.cpp | 48 +++ src/core/proxy_queue.h | 70 +++ src/core/queue.h | 20 + src/core/rocprofiler.cpp | 251 +++++++++++ src/core/simple_proxy_queue.cpp | 11 + src/core/simple_proxy_queue.h | 187 ++++++++ src/core/types.h | 15 + src/util/exception.h | 39 ++ src/util/hsa_rsrc_factory.cpp | 392 +++++++++++++++++ src/util/hsa_rsrc_factory.h | 262 ++++++++++++ src/util/logger.h | 163 +++++++ src/xml/expr.h | 279 ++++++++++++ src/xml/xml.h | 221 ++++++++++ test/CMakeLists.txt | 41 ++ test/ctrl/run_kernel.h | 85 ++++ test/ctrl/standalone_test.cpp | 126 ++++++ test/ctrl/test.cpp | 14 + test/ctrl/test_aql.h | 80 ++++ test/ctrl/test_hsa.cpp | 252 +++++++++++ test/ctrl/test_hsa.h | 125 ++++++ test/ctrl/test_kernel.h | 107 +++++ test/ctrl/test_pgen.h | 45 ++ test/ctrl/test_pgen_rocp.h | 78 ++++ test/ctrl/test_pmgr.cpp | 144 +++++++ test/ctrl/test_pmgr.h | 70 +++ test/ctrl/thr_tool.cpp | 297 +++++++++++++ test/ctrl/tool.cpp | 313 ++++++++++++++ test/input.xml | 7 + test/metrics.xml | 15 + test/run.sh | 27 ++ .../gfx8_SimpleConvolution.hsaco | Bin 0 -> 9392 bytes .../gfx9_SimpleConvolution.hsaco | Bin 0 -> 9392 bytes test/simple_convolution/simple_convolution.cl | 81 ++++ .../simple_convolution/simple_convolution.cpp | 390 +++++++++++++++++ test/simple_convolution/simple_convolution.h | 96 +++++ .../simple_convolution.hsail | 154 +++++++ test/util/helper_funcs.h | 87 ++++ test/util/hsa_rsrc_factory.cpp | 372 ++++++++++++++++ test/util/hsa_rsrc_factory.h | 234 ++++++++++ test/util/perf_timer.cpp | 181 ++++++++ test/util/perf_timer.h | 85 ++++ test/util/test_assert.h | 52 +++ test/util/xml.h | 221 ++++++++++ 63 files changed, 7598 insertions(+) create mode 100644 CMakeLists.txt create mode 100644 DEBIAN/postinst create mode 100644 DEBIAN/prerm create mode 100644 LICENSE.md create mode 100644 RPM/rpm_post create mode 100644 RPM/rpm_postun create mode 100644 Readme.txt create mode 100644 _clang-format create mode 100644 cmake_modules/env.cmake create mode 100644 cmake_modules/utils.cmake create mode 100644 doc/ROC_profiler_spec_v_1_2_0.docx create mode 100644 inc/rocprofiler.h create mode 100644 src/CMakeLists.txt create mode 100644 src/core/context.h create mode 100644 src/core/hsa_proxy_queue.h create mode 100644 src/core/hsa_queue.h create mode 100644 src/core/intercept_queue.cpp create mode 100644 src/core/intercept_queue.h create mode 100644 src/core/metrics.h create mode 100644 src/core/profile.h create mode 100644 src/core/proxy_queue.cpp create mode 100644 src/core/proxy_queue.h create mode 100644 src/core/queue.h create mode 100644 src/core/rocprofiler.cpp create mode 100644 src/core/simple_proxy_queue.cpp create mode 100644 src/core/simple_proxy_queue.h create mode 100644 src/core/types.h create mode 100644 src/util/exception.h create mode 100644 src/util/hsa_rsrc_factory.cpp create mode 100644 src/util/hsa_rsrc_factory.h create mode 100644 src/util/logger.h create mode 100644 src/xml/expr.h create mode 100644 src/xml/xml.h create mode 100644 test/CMakeLists.txt create mode 100644 test/ctrl/run_kernel.h create mode 100644 test/ctrl/standalone_test.cpp create mode 100644 test/ctrl/test.cpp create mode 100644 test/ctrl/test_aql.h create mode 100644 test/ctrl/test_hsa.cpp create mode 100644 test/ctrl/test_hsa.h create mode 100644 test/ctrl/test_kernel.h create mode 100644 test/ctrl/test_pgen.h create mode 100644 test/ctrl/test_pgen_rocp.h create mode 100644 test/ctrl/test_pmgr.cpp create mode 100644 test/ctrl/test_pmgr.h create mode 100644 test/ctrl/thr_tool.cpp create mode 100644 test/ctrl/tool.cpp create mode 100644 test/input.xml create mode 100644 test/metrics.xml create mode 100755 test/run.sh create mode 100644 test/simple_convolution/gfx8_SimpleConvolution.hsaco create mode 100644 test/simple_convolution/gfx9_SimpleConvolution.hsaco create mode 100644 test/simple_convolution/simple_convolution.cl create mode 100644 test/simple_convolution/simple_convolution.cpp create mode 100644 test/simple_convolution/simple_convolution.h create mode 100644 test/simple_convolution/simple_convolution.hsail create mode 100644 test/util/helper_funcs.h create mode 100644 test/util/hsa_rsrc_factory.cpp create mode 100644 test/util/hsa_rsrc_factory.h create mode 100644 test/util/perf_timer.cpp create mode 100644 test/util/perf_timer.h create mode 100644 test/util/test_assert.h create mode 100644 test/util/xml.h diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000000..5649152cb4 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,100 @@ +################################################################################ +## +## Copyright (c) 2016 Advanced Micro Devices, Inc. All rights reserved. +## +## MIT LICENSE: +## Permission is hereby granted, free of charge, to any person obtaining a copy of +## this software and associated documentation files (the "Software"), to deal in +## the Software without restriction, including without limitation the rights to +## use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +## of the Software, and to permit persons to whom the Software is furnished to do +## so, subject to the following conditions: +## +## The above copyright notice and this permission notice shall be included in all +## copies or substantial portions of the Software. +## +## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +## OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +## SOFTWARE. +## +################################################################################ + +cmake_minimum_required ( VERSION 3.5.0 ) + +## Verbose output. +set ( CMAKE_VERBOSE_MAKEFILE TRUE CACHE BOOL "Verbose Output" FORCE ) + +## Set module name and project name. +set ( ROCPROFILER_NAME "rocprofiler" ) +set ( ROCPROFILER_TARGET "${ROCPROFILER_NAME}64" ) +set ( ROCPROFILER_LIBRARY "lib${ROCPROFILER_TARGET}" ) +project ( ${ROCPROFILER_TARGET} ) + +## Adding default path cmake modules +list ( APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules" ) +## Include common cmake modules +include ( utils ) +## Set build environment +include ( env ) + +## Setup the package version. +get_version ( "1.0.0" ) +message ( "-- LIB-VERSION: ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}" ) + +set ( BUILD_VERSION_MAJOR ${VERSION_MAJOR} ) +set ( BUILD_VERSION_MINOR ${VERSION_MINOR} ) +set ( BUILD_VERSION_PATCH ${VERSION_PATCH} ) +set ( LIB_VERSION_STRING "${BUILD_VERSION_MAJOR}.${BUILD_VERSION_MINOR}.${BUILD_VERSION_PATCH}" ) +if ( DEFINED VERSION_BUILD AND NOT ${VERSION_BUILD} STREQUAL "" ) + message ( "VERSION BUILD DEFINED ${VERSION_BUILD}" ) + set ( BUILD_VERSION_PATCH "${BUILD_VERSION_PATCH}-${VERSION_BUILD}" ) +endif () +set ( BUILD_VERSION_STRING "${BUILD_VERSION_MAJOR}.${BUILD_VERSION_MINOR}.${BUILD_VERSION_PATCH}" ) + +## Set target and root/lib/test directory +set ( TARGET_NAME "${ROCPROFILER_TARGET}" ) +set ( ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}" ) +set ( LIB_DIR "${ROOT_DIR}/src" ) +set ( TEST_DIR "${ROOT_DIR}/test" ) + +## Build library +include ( ${LIB_DIR}/CMakeLists.txt ) + +## Set the VERSION and SOVERSION values +set_property ( TARGET ${TARGET_NAME} PROPERTY VERSION "${LIB_VERSION_STRING}" ) +set_property ( TARGET ${TARGET_NAME} PROPERTY SOVERSION "${BUILD_VERSION_MAJOR}" ) + +## If the library is a release, strip the target library +if ( "${CMAKE_BUILD_TYPE}" STREQUAL release ) + add_custom_command ( TARGET ${ROCPROFILER_TARGET} POST_BUILD COMMAND ${CMAKE_STRIP} *.so ) +endif () + +## Build tests +add_subdirectory ( ${TEST_DIR} ${PROJECT_BINARY_DIR}/test ) + +## Add the install directives for the runtime library. +install ( TARGETS ${ROCPROFILER_TARGET} LIBRARY DESTINATION ${ROCPROFILER_NAME}/lib COMPONENT ${ROCPROFILER_LIBRARY} ) + +## Add the packaging directives for the runtime library. +set ( CPACK_PACKAGE_NAME ${ROCPROFILER_NAME} ) +set ( CPACK_PACKAGE_VENDOR "AMD" ) +set ( CPACK_PACKAGE_VERSION_MAJOR ${BUILD_VERSION_MAJOR} ) +set ( CPACK_PACKAGE_VERSION_MINOR ${BUILD_VERSION_MINOR} ) +set ( CPACK_PACKAGE_VERSION_PATCH ${BUILD_VERSION_PATCH} ) +set ( CPACK_PACKAGE_CONTACT "Advanced Micro Devices Inc." ) +set ( CPACK_PACKAGE_DESCRIPTION_SUMMARY "ROCPROFILER library for AMD HSA runtime API extension support" ) +set ( CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.md" ) + +## Debian package specific variables +set ( CPACK_DEBIAN_PACKAGE_HOMEPAGE "https://github.com/RadeonOpenCompute/HSA-RocProfiler" ) +set ( CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/postinst;${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/prerm" ) + +## RPM package specific variables +set ( CPACK_RPM_PRE_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_post" ) +set ( CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_postun" ) + +include ( CPack ) diff --git a/DEBIAN/postinst b/DEBIAN/postinst new file mode 100644 index 0000000000..3d0228844a --- /dev/null +++ b/DEBIAN/postinst @@ -0,0 +1,19 @@ +#/bin/bash + +set -e + +do_ldconfig() { + echo /opt/rocm/librocprofiler/lib > /etc/ld.so.conf.d/libhsa-rocprofiler64.conf && ldconfig +} + +case "$1" in + configure) + do_ldconfig + ;; + abort-upgrade|abort-remove|abort-deconfigure) + echo "$1" + ;; + *) + exit 0 + ;; +esac diff --git a/DEBIAN/prerm b/DEBIAN/prerm new file mode 100644 index 0000000000..b3f509a9fa --- /dev/null +++ b/DEBIAN/prerm @@ -0,0 +1,18 @@ +#!/bin/bash + +set -e + +rm_ldconfig() { + rm -f /etc/ld.so.conf.d/libhsa-rocprofiler64.conf && ldconfig +} + +case "$1" in + remove) + rm_ldconfig + ;; + purge) + ;; + *) + exit 0 + ;; +esac diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000000..c68a4aa40d --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,39 @@ +HSA ROC Profiler LICENSE + +The University of Illinois/NCSA +Open Source License (NCSA) + +Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved. + +Developed by: + + AMD Research and AMD HSA Software Development + + Advanced Micro Devices, Inc. + + www.amd.com + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to +deal with the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + + - Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimers in + the documentation and/or other materials provided with the distribution. + - Neither the names of Advanced Micro Devices, Inc, + nor the names of its contributors may be used to endorse or promote + products derived from this Software without specific prior written + permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS WITH THE SOFTWARE. diff --git a/RPM/rpm_post b/RPM/rpm_post new file mode 100644 index 0000000000..57c5c8113e --- /dev/null +++ b/RPM/rpm_post @@ -0,0 +1 @@ +echo /opt/rocm/librocprofiler/lib > /etc/ld.so.conf.d/libhsa-rocprofiler64.conf && ldconfig diff --git a/RPM/rpm_postun b/RPM/rpm_postun new file mode 100644 index 0000000000..6b3c8f2887 --- /dev/null +++ b/RPM/rpm_postun @@ -0,0 +1 @@ +rm -f /etc/ld.so.conf.d/libhsa-rocprofiler64.conf && ldconfig diff --git a/Readme.txt b/Readme.txt new file mode 100644 index 0000000000..1dd2a6ec23 --- /dev/null +++ b/Readme.txt @@ -0,0 +1,50 @@ +ROC Profiler library. +Profiling with metrics and traces based perfcounters (PMC) and SQ threadtraces (SQTT) +and AqlProfile HSA extension. + +Library supports GFX8/GFX9 APIs. +The library source tree: + - doc - Documantation + - inc/rocprofiler.h - Library public API + - src - Library sources + - core - Library API sources + - util - Library utils sources + - xml - XML parser + - test - Library test suite + - ctrl - Test controll + - util - Test utils + - simple_convolution - Simple convolution test kernel + +Build environment: + +$ export CMAKE_PREFIX_PATH=";" +$ export CMAKE_BUILD_TYPE= # release by default +$ export CMAKE_DEBUG_TRACE=1 # 1 to enable debug tracing + +To build with the current installed ROCM: + +$ cd .../rocprofiler +$ mkdir build +$ cd build + +$ export CMAKE_PREFIX_PATH=/home/evgeny/git/compute/out/ubuntu-16.04/16.04 +$ cmake .. +$ make + +$ cmake -DCMAKE_PREFIX_PATH=/opt/rocm .. +$ make + +To run the test: + +$ cd .../rocprofiler/build +$ export LD_LIBRARY_PATH=$PWD +$ export HSA_TOOLS_LIB=librocprofiler64.so +$ run.sh + +To enabled error messages logging to '/tmp/rocprofiler_log.txt': + +$ export ROCPROFILER_LOG=1 + +To enable verbose tracing: + +$ export ROCPROFILER_TRACE=1 diff --git a/_clang-format b/_clang-format new file mode 100644 index 0000000000..0c81671e05 --- /dev/null +++ b/_clang-format @@ -0,0 +1,60 @@ +--- +Language: Cpp +# BasedOnStyle: Google +AccessModifierOffset: -1 +ConstructorInitializerIndentWidth: 4 +AlignEscapedNewlinesLeft: false +AlignTrailingComments: true +AlignConsecutiveAssignments: false +AlignOperands: false +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortIfStatementsOnASingleLine: true +AllowShortLoopsOnASingleLine: true +AllowShortFunctionsOnASingleLine: All +AlwaysBreakAfterDefinitionReturnType: false +AlwaysBreakTemplateDeclarations: false +AlwaysBreakBeforeMultilineStrings: true +BreakBeforeBinaryOperators: false +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BinPackParameters: true +ColumnLimit: 100 +ConstructorInitializerAllOnOneLineOrOnePerLine: true +ExperimentalAutoDetectBinPacking: false +IndentCaseLabels: true +IndentWrappedFunctionNames: false +IndentFunctionDeclarationAfterType: false +MaxEmptyLinesToKeep: 2 +KeepEmptyLinesAtTheStartOfBlocks: false +NamespaceIndentation: None +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: false +PenaltyBreakBeforeFirstCallParameter: 1 +PenaltyBreakComment: 300 +PenaltyBreakString: 1000 +PenaltyBreakFirstLessLess: 120 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 200 +DerivePointerAlignment: false +PointerAlignment: Left +SpacesBeforeTrailingComments: 2 +Cpp11BracedListStyle: true +Standard: Auto +IndentWidth: 2 +TabWidth: 8 +UseTab: Never +BreakBeforeBraces: Attach +SpacesInParentheses: false +SpacesInAngles: false +SpaceInEmptyParentheses: false +SpacesInCStyleCastParentheses: false +SpacesInContainerLiterals: true +SpaceBeforeAssignmentOperators: true +ContinuationIndentWidth: 4 +CommentPragmas: '^ IWYU pragma:' +ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] +SpaceBeforeParens: ControlStatements +DisableFormat: false +SortIncludes: false +... diff --git a/cmake_modules/env.cmake b/cmake_modules/env.cmake new file mode 100644 index 0000000000..58e9dc5fbc --- /dev/null +++ b/cmake_modules/env.cmake @@ -0,0 +1,112 @@ +## Build is not supported on Windows plaform +if ( WIN32 ) + message ( FATAL_ERROR "Windows build is not supported." ) +endif () + +## Compiler Preprocessor definitions. +add_definitions ( -D__linux__ ) +add_definitions ( -DUNIX_OS ) +add_definitions ( -DLINUX ) +add_definitions ( -D__AMD64__ ) +add_definitions ( -D__x86_64__ ) +add_definitions ( -DAMD_INTERNAL_BUILD ) +add_definitions ( -DLITTLEENDIAN_CPU=1 ) +add_definitions ( -DHSA_LARGE_MODEL= ) +add_definitions ( -DHSA_DEPRECATED= ) + +## Linux Compiler options +set ( CMAKE_CXX_FLAGS "-std=c++11") +set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall" ) +set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror" ) +set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror=return-type" ) +set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions" ) +set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden" ) +set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-math-errno" ) +set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-threadsafe-statics" ) +set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fmerge-all-constants" ) +set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fms-extensions" ) +set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fmerge-all-constants" ) +set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC" ) + +set ( CMAKE_SHARED_LINKER_FLAGS "-Wl,-Bdynamic -Wl,-z,noexecstack" ) + +set ( CMAKE_SKIP_BUILD_RPATH TRUE ) + +## CLANG options +if ( "$ENV{CXX}" STREQUAL "/usr/bin/clang++" ) +set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ferror-limit=1000000" ) +endif() + +## Enable debug trace +if ( DEFINED ENV{CMAKE_DEBUG_TRACE} ) + add_definitions ( -DDEBUG_TRACE=1 ) +endif() + +## Enable HSA APIs intersepting +if ( NOT "$ENV{CMAKE_HSA_INTERCEPT}" STREQUAL "no" ) + add_definitions ( -DROCP_HSA_INTERCEPT=1 ) +endif() + +## Enable HSA proxy queue +if ( NOT "$ENV{CMAKE_HSA_PROXY}" STREQUAL "no" ) + add_definitions ( -DROCP_HSA_PROXY=1 ) +endif() + +## Check env vars +if ( NOT DEFINED CMAKE_BUILD_TYPE OR "${CMAKE_BUILD_TYPE}" STREQUAL "" ) + if ( DEFINED ENV{CMAKE_BUILD_TYPE} ) + set ( CMAKE_BUILD_TYPE $ENV{CMAKE_BUILD_TYPE} ) + endif() +endif() +if ( NOT DEFINED CMAKE_PREFIX_PATH AND DEFINED ENV{CMAKE_PREFIX_PATH} ) + set ( CMAKE_PREFIX_PATH $ENV{CMAKE_PREFIX_PATH} ) +endif() + +## Extend Compiler flags based on build type +string ( TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE ) +if ( "${CMAKE_BUILD_TYPE}" STREQUAL debug ) + set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb" ) + set ( CMAKE_BUILD_TYPE "debug" ) +else () + set ( CMAKE_BUILD_TYPE "release" ) +endif () + +## Extend Compiler flags based on Processor architecture +if ( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64" ) + set ( NBIT 64 ) + set ( NBITSTR "64" ) + set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64 -msse -msse2" ) +elseif ( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86" ) + set ( NBIT 32 ) + set ( NBITSTR "" ) + set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32" ) +endif () + +## Find hsa-runtime headers/lib +find_file ( HSA_RUNTIME_INC "hsa/hsa.h" ) +get_filename_component ( HSA_RUNTIME_INC_PATH ${HSA_RUNTIME_INC} DIRECTORY ) +if ( "${HSA_RUNTIME_INC_PATH}" STREQUAL "" ) + find_file ( HSA_RUNTIME_INC "hsa.h" ) + get_filename_component ( HSA_RUNTIME_INC_PATH ${HSA_RUNTIME_INC} DIRECTORY ) +endif() + +set ( HSA_RUNTIME_NAME "hsa-runtime64" ) +find_library ( HSA_RUNTIME_LIB "lib${HSA_RUNTIME_NAME}.so" ) +get_filename_component ( HSA_RUNTIME_LIB_PATH ${HSA_RUNTIME_LIB} DIRECTORY ) + +set ( HSA_KMT_NAME "hsakmt" ) +find_library ( HSA_KMT_LIB "lib${HSA_KMT_NAME}.so" ) +get_filename_component ( HSA_KMT_LIB_PATH ${HSA_KMT_LIB} DIRECTORY ) + +set ( API_PATH ${HSA_RUNTIME_INC_PATH} ) + +## Basic Tool Chain Information +message ( "----------------NBIT: ${NBIT}" ) +message ( "-----------BuildType: ${CMAKE_BUILD_TYPE}" ) +message ( "------------Compiler: ${CMAKE_CXX_COMPILER}" ) +message ( "----Compiler-Version: ${CMAKE_CXX_COMPILER_VERSION}" ) +message ( "-----HSA-Runtime-Inc: ${HSA_RUNTIME_INC_PATH}" ) +message ( "-----HSA-Runtime-Lib: ${HSA_RUNTIME_LIB_PATH}" ) +message ( "------------API-path: ${API_PATH}" ) +message ( "-----CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}" ) +message ( "---CMAKE_PREFIX_PATH: ${CMAKE_PREFIX_PATH}" ) diff --git a/cmake_modules/utils.cmake b/cmake_modules/utils.cmake new file mode 100644 index 0000000000..3e4d77357d --- /dev/null +++ b/cmake_modules/utils.cmake @@ -0,0 +1,116 @@ +################################################################################ +## +## The University of Illinois/NCSA +## Open Source License (NCSA) +## +## Copyright (c) 2014-2017, Advanced Micro Devices, Inc. All rights reserved. +## +## Developed by: +## +## AMD Research and AMD HSA Software Development +## +## Advanced Micro Devices, Inc. +## +## www.amd.com +## +## Permission is hereby granted, free of charge, to any person obtaining a copy +## of this software and associated documentation files (the "Software"), to +## deal with the Software without restriction, including without limitation +## the rights to use, copy, modify, merge, publish, distribute, sublicense, +## and#or sell copies of the Software, and to permit persons to whom the +## Software is furnished to do so, subject to the following conditions: +## +## - Redistributions of source code must retain the above copyright notice, +## this list of conditions and the following disclaimers. +## - Redistributions in binary form must reproduce the above copyright +## notice, this list of conditions and the following disclaimers in +## the documentation and#or other materials provided with the distribution. +## - Neither the names of Advanced Micro Devices, Inc, +## nor the names of its contributors may be used to endorse or promote +## products derived from this Software without specific prior written +## permission. +## +## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +## DEALINGS WITH THE SOFTWARE. +## +################################################################################ + +## Parses the VERSION_STRING variable and places +## the first, second and third number values in +## the major, minor and patch variables. +function( parse_version VERSION_STRING ) + + string ( FIND ${VERSION_STRING} "-" STRING_INDEX ) + + if ( ${STRING_INDEX} GREATER -1 ) + math ( EXPR STRING_INDEX "${STRING_INDEX} + 1" ) + string ( SUBSTRING ${VERSION_STRING} ${STRING_INDEX} -1 VERSION_BUILD ) + endif () + + string ( REGEX MATCHALL "[0123456789]+" VERSIONS ${VERSION_STRING} ) + list ( LENGTH VERSIONS VERSION_COUNT ) + + if ( ${VERSION_COUNT} GREATER 0) + list ( GET VERSIONS 0 MAJOR ) + set ( VERSION_MAJOR ${MAJOR} PARENT_SCOPE ) + set ( TEMP_VERSION_STRING "${MAJOR}" ) + endif () + + if ( ${VERSION_COUNT} GREATER 1 ) + list ( GET VERSIONS 1 MINOR ) + set ( VERSION_MINOR ${MINOR} PARENT_SCOPE ) + set ( TEMP_VERSION_STRING "${TEMP_VERSION_STRING}.${MINOR}" ) + endif () + + if ( ${VERSION_COUNT} GREATER 2 ) + list ( GET VERSIONS 2 PATCH ) + set ( VERSION_PATCH ${PATCH} PARENT_SCOPE ) + set ( TEMP_VERSION_STRING "${TEMP_VERSION_STRING}.${PATCH}" ) + endif () + + if ( DEFINED VERSION_BUILD ) + set ( VERSION_BUILD "${VERSION_BUILD}" PARENT_SCOPE ) + endif () + + set ( VERSION_STRING "${TEMP_VERSION_STRING}" PARENT_SCOPE ) + +endfunction () + +## Gets the current version of the repository +## using versioning tags and git describe. +## Passes back a packaging version string +## and a library version string. +function ( get_version DEFAULT_VERSION_STRING ) + + parse_version ( ${DEFAULT_VERSION_STRING} ) + + find_program ( GIT NAMES git ) + + if ( GIT ) + + execute_process ( COMMAND "git describe --dirty --long --match [0-9]* 2>/dev/null" + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VARIABLE GIT_TAG_STRING + OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE RESULT ) + + if ( ${RESULT} EQUAL 0 ) + + parse_version ( ${GIT_TAG_STRING} ) + + endif () + + endif () + + set( VERSION_STRING "${VERSION_STRING}" PARENT_SCOPE ) + set( VERSION_MAJOR "${VERSION_MAJOR}" PARENT_SCOPE ) + set( VERSION_MINOR "${VERSION_MINOR}" PARENT_SCOPE ) + set( VERSION_PATCH "${VERSION_PATCH}" PARENT_SCOPE ) + set( VERSION_BUILD "${VERSION_BUILD}" PARENT_SCOPE ) + +endfunction() diff --git a/doc/ROC_profiler_spec_v_1_2_0.docx b/doc/ROC_profiler_spec_v_1_2_0.docx new file mode 100644 index 0000000000000000000000000000000000000000..7b24060f16fe8a254fdcceae22db231b26b194c1 GIT binary patch literal 91500 zcmeEtW3wnhkma>)+qP}nwr$(CZQHi_u5H`CZ{N)BMy&q9Y*j=@S7v^x>~rE&Mpi3G z1B0LdfB`@N000mIpgyjbi~#}wq(cG#AOk=EX$#xixtQ9y=&N`-m^$mydDz+z6oCLy z6aWDIi~s-QziJNb^@>=T?Rxh&VsA8Gp<73hM@V|BH$FK{K{@=PzEMN<*7(lRX(TFikixht zFw&N|_WC`sXx zk*s`YmU(?jA-VBf(h7G$&59#2kva^>y+dm&unZN&jrfq>usoXYtD@uC*;qou-*$2E z>{?6e@_CI`l%9ZeQ^RVi-YC?~>yID4P4fKO&Zo0~jFOSAq~F!T6hJ z7k@*>PKzZjZQzKyBu4x(y#2$>Ur2CVfm|a^d>?%h>cOE@sD*OKRoN}K)M%(;?sYYk{7(3bHrTtQ*`gK(o|uidLdUo z^oF&+dOQG9LwiaYJysnKVX{wO&3>kcB`K$Zqcu^Z=CQyM-yKi-@?bdj!FeQ%PrU;%d{Yowb?RwL~X?0P{((eSgewkri~8i z&cyS)UCeH1)%F`8`35$%szZkiMt~JEXRTDouW5YxJIw<{3Y``$K_RXN8&;m4jUItw z1h`GAJDUMr6xj_>jlRd*W~dT|!aG4l^r37@1gT}mwG(_IkUnUl&UR(I>yHKI4Rgq5 ztDGO@y3l_wYNLzh_9e}=A)(|o?x95B!nmN6ft2QTal@A*9+rL|TIB&sni)Rtru1Bd zJWLzbSy@H_i|Tb>-T7_=Po$1e4C*SS&|iDlA?#GLpG;iF}f^>M+i8J z?;#UyX{|ow?0`iu;;8BoOPpEH{LYFV=AB@Yn33u;zqRD_Bm43?0qlG~{RdA`5iDWJ zIAui4krqGpq|K4o_42EVFwYU(Pk9rJJ7X|K@F?v4Z&-bj`&xp9b+22NZXQ zT|E2cv}%vuJz}DUWEoqeT?OEqGb7~uyv_gp>HlTzWYO_80VXdoK$LUfe0at;|6zEh zW6l(~1J;J7vr&HA8Mp~U@3J=Ewp&{kG&}qh3oFIM@_+JC2Nwj$FSnZ0fSx-%yuSIP z1J7}9wDw|48^ZsiH@5mcIPnE1{noN@osot)XRy)2K|l86$o7~!-4SsY4OK{!t{8e4 z?+N!e;Y>{Pu@U4RnVGM_09S|nJbSkZ6Q34V?|L2@^Uiu?z3uc(mM82lEAFwcEf9Yi zD4vop@sx?S;_O@c!~(bl2x{YmqXg<|K4*PoZRXtLAU@J;lmq60FK9zHs7J59@?7d& zT#u>cX??1i8(4Ic!n~ijW@D0iHrltAX_oJ7Fz>=D{WS0ROKTjcHHfO1V5Ks|syRR5 zk&BLT7$2{d6C~!e#WzRsMU51-=lb4JNmqVC2Dq-RN$8gPCADP7aaTMako=Ni?Y^uQ zJ4l5{${j)vbuF{9ARvRI*an&5_>i-%#2CjXqvdVC;$>8E0*-=gaf04nA=*|P3* zi=e{K0G)V*B2r&H8q~cHJBm(RgIifM_Y%i9wq)1kNl&w{wFwr-#_7rk^_; z9cB0cM7Q%jBxg$R4MLl91I%=Dx3hsm?FoWqAo5#_@aEhg15qz|cu%fyAne8xoGEiS ziBk;$r8i4Qsf*+>YG7nxN`|TQRnmx5QH%K1Oxh-CCYq^fJW?iXuX7ZU1Pg5hV;=7i zVt+buL`>xWG2<1-lme4OZIWU@vrtP9vtm>u(@Aaa{Lqve4Gmfz`|&J4y5Sy1{<=*FJGCE3T^-`{>>v|1fa(ozC~= zNM!vjkl-oE)>R&7{A{)IaczE@$ILKEBQ5)O&w3m-h2i&EyFv3=jvmSc$6kM8#SWi3 zd?ktE;-ww!fg8gvNnF1SGRBe3c26L8Zj9&kRGX=q6!~yXoz`2ZEIKqUh^?nHUSeE8 zT?myw9O=;1m%EQ{JCO2?H`n76JJL^T^6L$}-<%0c?`nADpUYzIKGJ&6^o4C}mOyvN z)HZRm5n2~L(=@K(7ri?`K@UtpNPR0ocn_4~$TBt2TTeGNW`z-NCZdn{3Yqy2vqq?yN7hcWO!$rl z>@YPFvBQ*4?;_a2HDq}y=tcwZuor89F#tY>>x3XDz6Ed&_4h`A9g5bB%*!l+OS@2k zGbCC=HxjL_HuBBm5SU*YI@_urExip*zn0#*#$;U-#mfDpS0+dZh!`|W71v~kL@iO9=C6c-ItJpkTCqb9p)HV z+y)>QAQzMYo_o;h=<_*YPH1Qi6%bO8Q_2BOHWb(0fA)a|&^PEYEI7Tuk0G+Aa^uu8ePwkG z8;t?(<3d&KH?sHmXUO7~`y+^73=PQZ8EIm?2Qn3B`*y+o#IZY$Uff6EhJWwyJ&~`G zul?Cxr{gfQHjSLSc?ZPt6(yl5aUrJad7;up>L|$QnvFRdFb!Xz`-x zEs=Bj(a#!H;$hA-fo~*bHwd0(bDg;91>h+|)TaQtNhmR6uD9eY%vI|qDnK^F1-~W~ zU~s>Cl^w2n+?R2=Y?rV{)PBIUQKxSw3UW^a=j?KrMFo>41qj?N8CAs-?Gm=lZyV7sQxj4%&m?YZT} zIs9^R@H0uM%Z|TP)^Uc;J9Vq9gQMUjjlgk{>M=~C8j_#aO^asD9_%=>cgrT4zgrw> z{IT7&#xttK)6^H@B?XhHTV7a@JtF%5@n34MR|5OqMp4lGaBy|^>H z(kinL%%6(1MyCgsA8{$NXHG ztLp7PiL>8)$3~$iDtDP_;>zNa(B#STtU0_yOi{CgJCCVu^CiIM54M;|EQ+fHvtlMo z05DF3cj=g<_*`R=1&_rJqYE0 zc>do855xII@$pTiq1$4&wR!{3`W)PBHBWz?oG(!HLUk&KrUZ|g4SoVy)n@L#N?fYZ z)Gha7SLHCLAQ;EZG8l4g^GDI5&&!wa-qZ4^w8o*T13na8FL(+G^e+SSEe3?GAnnAY zH5hZW4E_>T;>73Hu&=fBi~-d57v7Xq@o! z72E;{0Pwh!5o-<$;OpNLOu;G?<95YwclT)3&rl-~J3M&y*n<~d%pKm*A&uqL<9vDJ zOi3+eoveF1Sp2QFQ~##;W_+1QlV#|@1>DVCjW%YZ6#S^2x1pA)n|j@WJgOLE0nkY7GWIlX^yT z6Yf&6HmC4A31!45tZ->cj!UWG5LU-0pyru=JDH5;BWTX%*2JM@c(u60thDMd5rypY zs5Kob%d2sGc2_Lo6H)spe3vJ!(0xiuvru*Zy83)GXVJ6b`p`t27V35uEo%4RBNn17 z5ML1ys``tVX%%* z5nX}*@l_g)%NOPI-+81%lbmMC#HDHa63B5UdZDm5wn#^$4iV!MBF^uR@j#C)Aw|m8 z`3nXi-WP0Eu$aPp@4HrR04@Z{2JuxV*dfZbsoH;Wmga9Fc5YqwiK;v#oWHnwy$OT? zjI@SQn+JG|3mP$JY)WnR)AA`_weip`N4 zzV-lBnXgi17+QN_8n76^CO1niLzHjRv=`_*kLFce* zEMZ-VbaoLY+u~}q-k^3=l3MF)Kd+PiQ^y#W{hC~RroK1+HZ2TOj^?;%iQ~^AQV6C+ zeq~;@AcgD0%-9&2j9Gw8=~=fi^5-6ws#gIJ{JGiMGUarxPrLyHW&PD>1DwNtJ!cjx zv;iM-mRdiw>>S?a&nM8PAt(Em3y4GjmX7B<($GQGZWvtBP3%ysdiNWs%b%+GXkrx$ zCuv20eX9)@7i*INaoj>VLe<*FIPffrd9nc{`= zqBk6yyKjZXIG{U)bJ^~-tNu4;%s7Jznqk^oqa37mQJwXwNVBLHK2ut)x7L^z?ZD;i zOu@g47xvy`oHtWj2N+?<*L35rtp(P&DDd}gvZAS7%6RAXr-;>LcQ*1;69TfJHy9>3nf)^Z{HzbE4@o{lmhDr3=|N_k#?nRl z_WZRjsnaz32WjU^s0G+{#s<6gEWPo^1Vmx(6OK#EDrhTM<%lsRP!6afR@~FofUvQ8 z47;r<01HkaVnI;iS*M>1@b_Zl>`BU(Q{Fy+6v51ikpaK`=EKjp3yF;I zp%huQ+Nmijb^$GIoo-~@ZA_9{OGZUyDVjP9E5EZ#;?djI2=)Fauk+D+PwGepu$q#> zUjAR6j(Orely>8f1oWFPSKn}68Szl|ol6foCJn;OP($J2g*t1^TM*J>#7{Mn(?@`$ z4_JwqwGc#-GMvblkc4@#D_RJQCHpjEXK!G9{pvt&AJC;j7G3-eV(DSEZMWFBm?fn} zF&_O_JGY2*6~rlml*VADEcb=nv1Aw8Z!;Md@i(>wxA^CON-k3yBOM|9tSB=p+S@9_ zl&be9@xl4{*)j#I+xx4gj!I$!H#iTa6=Rc_Cr`}hVJs`G#n5$}DJ`_!&t!deb=m{h zG$8C@Z$JFXx41!aJ=}z2>a|sJ0$Rt={+kB&Go%Z;fHuzT>5a8Lqx1O|cu_B*P zdiQ?vM9iks35b?&;O-_c9LutO%;reV2+GOxb1v{+w@f@!x*ao{iXF;d&9b5gidC{E zL*RYx0jLFP!aTG~-vH-*RCf1N3&mMN>Lm7wliFD@h$Ek(x%eePW?+=BM;#4n7B&Ho zP@Cg1Z5g3G8+!1?BBF9Wr<7ykvNB12?qWN1&*>|BZ(=E@?S*TxuTR%)M3)xvLU4Wt zdeP2%H)P%E-2H6}tsn;lEe;CB-;2N-F7O-TOpf1#tg;&|M`nG~NIj&DMhEVjFjHI5 zN3pFqN{{!0)-y}+hle-ij-`CbjZLB)QCJ?R@YG`x$P%DZ_@kLy_A^bDUF2 z>g{GfQ0MunXOIKDAzuC!5WUR13d0$La^^*vb(oi)x()1)1x##EiPHF$DI&(p3X5%6 zYeZYDbOoyjwN9Kzz5Q+g+UBGo@h$crq=MueE}-*cLZ!`94*#kz*b}I*KZ|)4-Zxe# zR!Ia+R&uh6$#HfUuw~rmlOexzv0({CO6{v;&0V@{iIg`^cAc+cDo;HmXlwK1@0(P@ zeNXK_1Sri`mwQKCbftle$8pBQqRv0>ddG$ZTW_8I)?{9J9f!zcAXxf@d!|qbI9Jd~M(u1Ha5`B_!fCy4kz+0A5?vYV3PIi5F@x@V7JK^tLN(L&KHot~LA}Fu3*C`h3y>X_1v?J$C9Mj+Yug+t@u-_svcCJvU~(_ zef$)j0F$Y(BcjAsK+L*Z%DU5{XR!PTl#fji!SRJ$rs|^V20t&kT#g<#QaCy{)?n6Q zJPS|c!L5oOqV6mX?g!y9mg5|8tr>&ksrMm^>xgOy7+h3G(b=J9l@P0!{oPdTDokd!3nXpnan z?E8Z}fjo`Jd)go{nUr`kWsqx;%D62PGKV5y2J$#+K%PRL#%~&u^2p<;6|J;br7~!a zm{^0bZS3|z4$bfS+C4%-)tYzW&T@&^bi?WJiIyNLOG>e({fvV} zB4n*=SPC9D)8!wVqGC4H5dn%~ulqy1fb2t$ z{vt1;6aDUy{dP`%Kcb59ww)?x0f)kTpI5MxmrVV=^Z%vyB3*U?i#7(>{R;&Sh{0%} zzZ;Oq%2E#Ufh_c^$Uf@uxE^tM zLLL)0+}2O50O|i*Fxjm#;!f1J1R0x#$604pR1{UlNy zGAU<Llk+v}Lm4+{f#h4nRA`f)g|JZjz$tV=sDWON@bP2%qTsnme44nzX{*;oaJ2h%?vfyOaPR4-uVZCl3ANxnT*R!6&z2lA#isMhq$yzgWf>AHkrvuvks<($6m;7~DAQtQ&V=?* zIkmn5t;_qH&cV|)1t3}g99%pD;*fp=QCT1-ET0=dfe%9J0Sy`iVmabfD$`YKi|{>_ zTKO5+0hF1M?A^_8O@``Q++lx#(Vo5O9@eKRXWK-ib(y=jKtfkxehgL%?{l&-cHV!M z0&wMXy;QUet`cSr-{P*c?oENiA%J*wnnirgO!CrGVSLdn)lH!yzI8izol1+d_7};y z>W1+;j)PlHqi}0!I_i1YMA&$5vP*-)V6#s&%HuY27Yf@3T_3+nK^docJK08Bi4fz` zP|?0Up(2Obj=9B`f@38`RCKvVzSoHrsDbFAJs7MWYg<)-rkz=Uvr6V|mhNq*UBK`f zYJac-gRSnH@h!l44ZXR3(6!!q@r3jgBub#i!fy1i1#gpW8z;^_df`2-CqCM277X0a z7|5Gwy=%vzg4`;xz8IkUUH`DZ@JV52gOMcpnnu%5Lpb(J(rI&-yeJC8BA>z?6eIT- zACnU*&b7J(xNa=Xf`}nRTkpu2k%pe7@6en|j@J_7)H#&b>YLGOP=i|uit6l#a46n% zlhv3LN2Ll9b3Fj*>1w-c-6n z?%*IZnprCD6y0t9>)owkZOEnI?kZoo0VDjHpsF2+B1U<#y8bd#S=6q7#TxaBF8CIk zgejV2ZK>b$&H+l|UMBr96z0fNO;~na#|cD6@!KU^xB=p&5pfq{bYcp=6)p(U5*+qC z4-qCEu<4nYah5e`wbKO`>4#CT7fuQ+GAXbj23~>jg{@4Q`t0K_B&frA$B97kkWP{A z$JH-f&GcFf*SzGDS*3FfF8m1Nork0=ytoT5bkCdw@xou%ayalB2wXEbYivdMefG}8 zh-*tVG<=&~&x&+uq@bz~#bSM&tu6ZBTD<5*yv&Ziz-hw^@7UMbTo6DSUd{EM0MtYs zIUp6*L-Y-hRl(G8exU1a*6B-i_QK=C&r%Q}6rgFnfn>0h=J94sij3!C^YBB|`!BjW;uc3z2x6FLm7Ga9U}vw#Ox{9fA76q|XaNB7YhN_~+{S<(nd3c*C z$UghD_^O=spg$Gyd?=}?mSs$cDkqG>EV>8tKo1XTz@?FdIqZIw5YM&8S8 z@T+nPEB_MbAh?AnVCq|Z`j{FMY<|5^>Rx#hRam*rn3$asMK~nInA2DqX7^%ALji`S>5(~Iau`=L)G z3omr3(0HXX2oiLfkbXDDvwDx$pwRw=LmfH4FQ2$r+zh=UU4Od>AC6R1zun$qbpnNbcEl}UCO{nQ}NAigcD~@?i)Jv+H*txC)}wwy;-v|@V^zPPWSfErTh!+ zM{!FH`+s-7l2|?cdRVFC_^)-?vE6Mc^=D6_{(j&uUF$>r6v=m;gkQB$?3j9|y}Jmr zTdvURo`BU}$ew&yQMJ^49-$YG=`GHV4cC5UC7PI*$UI*ZX!Lg7L=G?RG!U^hbqH`F zE|t1t+A8Vn|StBY`et%nYP(%@onDl_2RW>&kg@fPQi7xpU!e5 zR)bz!sZ4r##AJ_$uF<63d-;m5QKiX9t6f_UbHwhe7-G>^;c;Eg;yrN^8b{%F_hE3Z z@63C25T7q7t?PLW!4~#yv7@3l{t;B;&@pJ%#paXCQ7626%Te;!$yyESwmT@>PI`3f z+fZx05V3;6)l4cq8?aMB9oXG+P3S%mA1-&K_DRqiJC&C2oSkt9TpJ`_pceM>_)S2! z#f;+5pOmTEur)RPkZDdaOlYdROK75^_(|+;<0`%EIMmVZ7?7)E!%&GHp0sY;#Lqn& zmwyp^dyvYOnN3gg{1wuV?Nv(fXS%6+72uy;ubWr<@T$9f&vherF27eR%f3*THCvCc z^3XR6)}m;u_a6Ib&b)K&M#Y4QQfu76+*_)fM(48n`(icao?0EYXc(%*>TwBA>*_?R z;b&UmGIKWgTxbTD?+{4yWzyyoi_4=b9y1!^?in}da>>S?$+PoES?t!;gV1_=P&)5E z2{MYaO%(mE1nj284*G37`kYGF;q9_J2GjC`aQa^K>sxLWLzZtoRveSqScUugC7FTg zD)(2_41ueMugA(fVBYi7qv$q?nGOM}n?@+vVVByZxn16KnSWvQ?9YpJsO>zLi}5!L z^84H{jRkcv)=yUNRa>{2CEN6Z(KN+o&0S{U*ZupSVbC_)@_f|Rr>iUS+*~;B-HgOC z;6z^xQ~Q(fDQ8uT9;nCb&g5kAqj2wXdlXNyWc2S%=T@*fnxDXF_fE?)?W#4$K=_-r zUDpRwAN|@``!RM>xRChk8VrUN>o{zfX!_8cIa71r-@H!GuKr$oezx$Rwe+>_G}q$J zCMVs;8SqWI=a?K6RF0#tGqzcPh}s8Vw@th~*}C!9h<>cl*v|#F+w8ULC1q+8tX3)6 zH=Gl{5~xApk4DFpsdg{Y(@?)D*$l^S0}-Z~G!kT<*zk5A%}&~n_Rh^t`d&Vku(Azi zE!79srMmP@_qTRY*CFnoEyM8nh7Y3TO1A5>KLOr!{ik}Ef}MTc|8ijmEPWvv%k#zM z)=~vFIN_p2p7@=;f?;^D@wK!CCXC3qDq;e-jTq z7?Cc)vlD%)Y%+#GUHfsk_Oz|;#pdC}XFXk?nH}w~2PpW-%G$zgKU9#Wn)A)mq31A4 z*;#v`AH2xPiCo!Y zm$2As!#39y*X6P|glDa#rm{VTx7IdB!|Sib1)r1pr%bt4ej69F&-HZtu8sk0eMiA9 ze>iL3!m*tEXkAM)ih3!uCMh!Fv*uA9QtnP`^JaNXR6;#kla0l9HAqBeQ~M_OhCP}6 z_G&Q^{ZAQ87Y^f+tv8xB#86FNq>YFmk^TwZi@uqiXD#>P{kZnqDPPy~RXb<+OMx>y z@wE*{O@?8}WX}^j6&(Wlr^jl#HuI+8d%M4)Gx9yBseZM)Ep;|}3r@L?63jPRo(mne zF@P58&^mLM{17w^RJSC;>_86GsJGIg*?uOpOyTDaTs9tPAty zF=eg!rg_!;a|G6W3&h=SdUbo|`l01=q2|s;?A*!Bo9CnUdD3|S&^|ZRx>}a%8K)Cf z8E3B_)(+|mT|RPsxbCcdYChLiXUUt+oyTbQ9|3+yS?!tI%7odSrTW3x3-)bP?h9YD z$MeNlMroDiOdFE7b)1wHsomFEKtky!bO=AxyQpZPdXzfgSt7$CG`yisw)&#&!$Hj->d!Zjk)C=0P6fX10QW%B=UPu$ zAY8aS1QB!!1B~|-`{SD6o?q2`z9^`}!5A!4nsBxY^*0D=MjmpL%`S3LMMjp;5s@)7 zkNKDg#+^LrXP$P%1Y-+K)m1cbMt)`jF~R3ZA$tf2$|!n%>KrkMo^i=X70}i9EVS1V z;k9E!MI7tej0}c&K-L364lO}h=zcha^=0W8k6zRZt-`0>#JYSbdwuJxw*aX_r;5u= zah$cgXrO}{f#Zbjz(oesfGxO;Rl!{XxNBMHCdA zaLMvK#vM9zp*TRca*WcOK0GIy#v zMY+rxLW3q|dxiN}v+D7d!;T*n?qTPf3s_>jq-35f_g+_QV1!tO;V*MfDYG-@Ivkwu zcH-&7Mw>R;x3R2m#?|nliZyXp%lw=PO6Tot(fK>;p{}Io9PscjI|L5c#oj&TcF`;c zX)BiKd(-Rn631XM7hjo{y{+AV@7i}A(J0fhX>{GOh#HT)*d}(bWRMdUd=#eF!%MT% z>!}QHxz%m8S=KvnC!wfY^0SF|)$s1H99wU$ULW}0|Cy5-HV4d!E)JLRMB7z0dqLW= zcVoglIUP&09z&kynZE1|hHlXhQ)NU?n})fW&ua}smNHVTjwvy3Q>LWuQ}J@ZcNPol zvv!9X{_+yF7__TA)hp4m-lUba&-0FVO07o6#oh-3Z)1BEy-oFpI{&kD-uT9JsXNb= zr&s(Gsv}8rLe=Tmhyhg0`~COdecJfqiMAVk${qQ;Nbf99u0Nhg7}&aWcSjg-R?6gL6VNz~PWIJcgSu?U*5$U8)FZIOfO zpdg5&jGdW7ZVojKAw19JHSw*aRx+Y#a%HrSLO8+Sjgk&^DVCd8y!DEKU3;K&izU>q zCwPljZRI~N8^dn6mbH&3HtiIc@@&?XR~uf6naV-AAi2W6o^n&H+bD7^Qh>hY<$f65 zx!I;34HK7oDm2sVR#|%~zn@Kv!dgM9OFNJoj}C{X(dOt2o@@u6w}`}nv6NS6uv4xl z@x!;gDKqjv`yHKvC_~OE*N6n!3r+JyCgiq`QT7y?=wxar73U0PD;rHN_1--I%uI%) zd;63jR4{uvHLz2{pRKu?r;?eW&Ad)#9)~5V;V>098QI}IZKk$^R&x{E95w()J(8O& zV&_ELIy_>rLd4`~Nv*d@OD6CKT8?gH^-8o0KwH$MqzowfjMqeL?^+VbrqQ$J1SIy4 zq*@t#L6qQ8+E)R8!dzLD8M{_2HXb8@h?KAIyo zwby=3gYX9#E5r3iBs2;@Gyr;eb9;m)?C>Eu1CN*C_>l^&#`@-X3Qdd&yc6?@5#Pr8 zGY1;C?|%&M-4HXR#6};Vo+4~s(05Jwr1?5%m3%|g+-bi0^FCsW?T_3Ie%fJv8?OH= z)i_*?OTR77?KFn`u~TmV1&1V|ig$%?B!D~zjfN%P6Gy>dUB5^{;@^p)Z9R_fKrVWR zjlPgd-l`~XTUz^r+FrdY4e|kmC;*LFVt5t?_}D@G7;G<K>6)<%vZt zdz}NcgRUS-gI-EMB@~4gm!Rn0c4O*0C#ZSR#hX7Dyt74;x|I%xq2&+dnyi{xcdu4D z2Q5G}>=Mj`9NPxlELtn}={pcDTzm-xuSgERSZ@-BRPQo^;M37lfa?f-*Kahk<};lp zy}W6u)3t57-Ilq`f|R>J%TC_?TqS&LjZ67~$TuAZUmP?DH5rM9%L#W-) zdgYru@aUuUIA)0PxrKcpl)kk&z~ooMb_yJ9hI6=;Z9MgB-7?5g zF|SH#LCNv7=6K0L&HiLg<@iF_CHwAFVrM_IL0>IMR`qFAqGlz-=3JAr&0vwn7Gmov ztL&@voC-hITx*M>aPBHH;A&f}6J~p-#)Oo!0>jk0(gt?IoRte~)ji}au|`ij#MD+@ zr3`<2R5j}Cw{oTted$oUww0l3?n~L-ZMb$40H6TD{j2A8AlGDR8`TlS6Pv2=nY15x z0Id>n7XZFPLUggrp^qOtAq8J7XKmN(n6gc+Y>#dXM%g@-^5X8RH1nC5rawh4o%U{W zN;Jk6;kI{X0-VWXi)ldZl+deQgC+?Uo zk!N-AiLABh54D3B%O=mT8td-p#K#6{47E za3ltYMys|V5|f9Fvd)zLsS+VSLXgNZ)#0vN`YKgJLFLBmgmDO z8j9VR;fH`3EKacj70Viv<^?1qc*zf>jpvxA9mEH0;^V=Iyv^=AGFK`ik4? z%7=vZfxY)GNB{Xb7p1opDW3EKCL`)cLiUm+3gQ-gu zD{ZI77xfN{om;g{jb`po`Jx&6l}Y0^BGhaiU~Q}bOR@I+nIn8mA8A>lqwf$QHCl$d z*WZ?)O&-2-5hzTeMWn^+5V3iNQ}hul?BY2`DAh8CP-|R|%&V27!kJ{8#nV~N312bW zr?6DRYA|g4bo8lmz|0vK0i`1)b}k4Souz4E!PB7%2X3ncqFq^jxD4B~B zu8F}USD7X`IYuGFW_;AvpAZpy_T(TdT}trr3PexQKn>mfu&8WEa+7##5BXY-dtJ*u z>qvrFiPgTH_c&BQl%(aw>Uu${8P+zC2M~fJu2!>;->|TbbUkx!3acWg(Qi3QZLM!& zA7s?iE8tCFHAx6if&F3Cm^EziuKFVF9XSi)(Wluw6;{s!Ja?e$j)15oKWWugT3N;= zsU?8Lr($_FmeCa}1e^B}moM0CGk854ZGs*?yBl3g!P7)tUmMq0a9fZIJ_%4yt1<$) z>Pm#5%Chv?+KFy(MzZwe8l5#nE9irf0Tpi_V9VcqJqOk(#=xx^bWl31R-0dnrEqy1 zxwOKZp6YPjF-{C?ht-w;rO2~1OzwXrk_o>Kj&6Nx!c1~RZ4grg2|-3&q;;deeWQhpq&QVH=|2ou88{Y zk1L#Oq6vQJ(3%!?$aL=vxc7$J7g8nKZs~zpMgD%9o&{V_k#{>=I`~;1$Q8S0ic#9_R`nPS#d;5r;fJ=3_thq;9a_Cc{ zqerIOWbaKb*42VIJ-jo8EpKFQle;`1>gJt|acl1sWZHqfLko$^o?JjFAud6l25zfb zgPGK-?N);9R@+^m^a&8FZfsa0J$AWmv~w@W@Y3_Mn}%sw2*^&g%SpnHNk@p2qxw)O zGh~^Sx0pNeLvDVK-T=h&uJf&F?xIvs793fD^!K+t^#%zA0sc2y=B2?ug5`@vriM3|BsG9OeM*vJ-w z?x(+8nBa}W_UQIx6J=cbDjOc2J9gpP#29aqLB#zQmu$*#rOdqTh!;OPb)HNO(MKMG zaUPh?TMr+HOKn^8*WWl)={vi-=3!7d%uy`gqmaU^@&B&*3~2Ggho8@DOaBreEv9^` zQ`_?rc=*$w&<2J0t%(h5gZcYCB7zw&n==lp@9!#bugiXzg=FHDWcy)sg!2V?Z$?-( zUc3xDT-}A;_*T>jPmakw;!^B!ul)U?*rP5&41_Ct9A)Q+1Y(~;@GC0%4cqbg>kx>J z_RPyTb>@_wjN4b~Z!eBGS}(OVWd49OVc*)+Cs zi&=ikXeoX4c|Uk=-IeA=dH*&=EFC=L0f*NE6TZq=Sa93oT&I18ry|~$h|{eZ+xVu3 zn*4QJtRK`tsouk(9mxkB5$*@}Ln0*j!V&v~_>>bWvR8m-yz(o~!;kln6Xg=lefMCF zd;0^|;Vg#d1+o|OK+bQu=mTdvl42tA1H#>C8cnIOBB7Wo|0G!m-s%>(WL+m+sKh&Z zH;%m~o@maA-ip1WS>Ge?Fc4d^1`9}f=ePjAxKgzTnu?rST7KbM;06G@JqOac)4}m* zt^p@1Y6*(NpkOawh=bEqF^^&lk3N`9z-S^)Fs}@8T5EQ4Q^D{o{AqSlfDWCcZBxfsi~h ztD7RuDOe<^0dx-Qng}LumK06NEfeJY0Bd~NLMSu{ZPWpiE{+4r%+tdz7XljoV@`t( zX9F}wSZ79tc`rMx(O|{QS*~76!yZ5(PjiM)stu%xZV{7zYY@3!+9t49wkwr0d$F!V zUck^D4L%$>O)vM@X|u8%nL^|zGjJckzTh_n8j-7Ck4UKqF&K<^hLr0-Ba4;`5lvLS z($T`P2MhhZfO;!Ss|5NKVzSd|w7zPa!@=?ms?d4a&3zJ63AxL;?>VM=Egc$_C-0fp zOvACGsll=Y0Lp(0oMU$mGOCraMIoDQ`inK%U8+^0(TZTQ{*it4q&WHt8EhD#`P3Y3$eA!i5 zGO7Y)RiqLpOsprm6gVuL$iD2+6gyg~y{jv>O7i{C>cqQUA6G=hv_%G)*%nTzk}Nj{ zL5K-~wN^Mq)%?@O1wNb05_Z)aNaxir0{98S4r*XmK!qUXIJGAXz;>k0tI=seW{R`; znUX-Ii|#HstB^&OCqLPl38o1Xwh?=hcUdn3``7+-$lbh>K1ulN zZ?E4$--LjXHVOX^X46A{OMR9Z6jDesT73IPW3b& zFho9*F(Q?bq8~uyIOXNsavtVlyM;ohaHzkK3WVb)7yIIVu*^+a%PT?TuB=sYH7fS) z*qjzil@sQ5K^zw)N#bsIP#2d5#URJ^Emc7$XoTG_as}TftTzE*5;c6GEdKopn{yo# z=CawOlu5H#No73VND4-8a0oLQ-M)# z{|!t)v%e>yv8wY-AvZFA67r~n=6bPQtEoC+KEn(~oBmr^_07SNsyFb_a|1wr}8j$l}k@NIv! zIIA%YIJC^W+Xo$eu{Lp$Q3Y$Bae}yjVKGG)doNfc>vpyZJN{bstED|h7fnl>%qct~ zsm^58F^D^8-^+Kr%QTzreVycDhuy5-NekIAW8ws@wvkykT34ZnyLcr99|E46uEPdSjj0Qno70Y+vk-b9T_cI=FBkhmXl0Y!JewZoY~qzI5nqI zr~sRdb^#@X-p=ed5YOV;mM%!CD$QRo)R;Q+=At!Tus1Aew4u*svR-05(urEggKgaIWLf|P z8CEx4x+PS7F0b^Nl$z`Leung>7Y8A^bc~t@2@ls*r6gtRmY`?Ijah?wU>z}*Guy@q znYrz=T-BP0h|vdg9~|Q~LNIGB*a7R3FXSG{OPf1&>nnuhhNQN|V#c8G`j+64FkpMx zbUf>ZE7oq-Q0v81ltHLHOR*i9fl|3ga)p7aAqk15H#@)vZLgK8HlP5%xK#0(MEhnz zOxPppc+|$TyJJCs3~r=a11gl@G{lAxppGmN-A*_u+O9TU#LSHPkZn7Y_k&GDvl~(e zu6z24jXL$iw|BgL7dvvQ!vmZAD0bx3%KK~oS$3o#$$3=eU&D^PB{)(fzsQb!v+uq9 zT!)IB>hPKD-xt`C;@^=S`3+0SA1AMN`|p>7;wTdTT?dKYGWf%wxFJKsQtyx0t4xyw z0e=%9hrkq$(&#q=atKBe_zjdpP>lW}D)%I(_H60<_tH=MPE_ul+W!Mlx!b!*7FyHY zu_Jy_)CtChvkfB?biBVDW>-G@-K(Kljt(?)kZLEy4cktaAVa~I>lzb z%A9|C{`ng={TC?At0>hl^v2v)Fwo4Zr;}% zUV+fR-C)WW`07V^l|*j4&_4&SjxNU?UcJMscX*Y#!>fPB;T0})HiCAz#|n#ae2iay zCA=Ctt~3AS@Ct7_yh4f*VP0C9qt$)`rHBss2y#Ahy^!gbD+T0{*bz&OoN>E~wse1K zsZM8Y%7sx7nFxB~X_7s3CBozoCWkh*5TEE(-+_l^PdhY^B{*qa>r52Z!s(jRgwZe; zwy;Ghf=mRP17(a^Qg;@XfuWXV`@j{=fXMA@6ZbP4UPSfMh)<1ma7ez1iL2qRqlVqA zRf>bU{RN3vmSiDh&D+HcJ{hw$o?VSC56&fHX{-D#!8pV@Tb(lqqL4cxsu%l+HoV0g znGwBO7_%%S(ldMkimV~o>?$=o217$Qq#CLz#DGR;iz3I@{^hcmXo=}sB7vAl1M3W% z;%><(2f3h54O*dLwv5#y>2YmD0{NCgR*RW`%DG`!vg{dDOz*%Gifl+Rk6e2Rn6}dj zfxS7z7)i^#Sh0w>9mXpZIVE=BPAzaENOrAxR$~)IVaQQ(Bnb>E17*vq-~}Oj;Yupj zM&s(mDVa!rI+TKS?n5@PN?SC~D=={l(`JJc*H}d-;-ne1)!DMu!LOk^Ni%bg5s2kvJ-k!g zWtH#XidYeYIYTi+E6KEWRv-7QA_pKW$B*^8*FwLAY2$FPMm`(2W!XE zm+NzLO(XQOM)uNYG$5S zJL$N!ljV8lC+oED)f8ajRkd+)C3JRd-HVVW9m2U7iKN7}vpP+d!7iMNY~l1}wy+z5 z(}-)P@OERf`N_PJTjL>@IoLz_w1+JGjO6Ozczs4X%i<4I(FkWEW2i`ZsjyU2QpGCzD69{DP zap(Z=aECYggZjvQe9bA96=*VFAU%SVGkrvbXRnE+&3OO$RB10aZzkxY?&_9ca+Jn< z>WFl5x01a0)MM#hPX}-3awPFd3@_W!=pEt`PMHdp!xr6 zse{oZjH5I;f%NZ$E{2Y5-8#Bwl@(J)!?M%6{7Y9Ki#R=>3}5cNVR2u0t};{rTX`n3}HSrWw0bgY2(@4%^+t9DIIOwT) zzFEnFCU9gNw?lqliiBbOme6xi3mT#xMJ+ItfTmyiQBk0-dCv7IH~;fr zi`(bn*WL*F{3L1^i#ZCBvoClkZ+TA4R($lFJ}`>#gB_>u&en4O!Aa}UMJfk(c9_Dz zvPv^>m{ndnKL7Oi#ZRS(@s6b!OZ$nXcoh8pW=nySFh;>QOMy@@gyT22=h-d}l!D>g zL0IDxBu4+_0eanjv&wtF-x4iuH z#_gAfLG|?VIl80|5$u-P+FZb|&Bc>h_(pSq(io17uFmLL&?G__puqSE3caL+#9;62E=Yjqb=YhW0&`$@%yx-6d=f!+wLl1p!?0#eK zgJBdMrki_{|6EP}wCVl*CV!yler1zC*_^4-$+KHk#IOG7Qa;eD zzXPkHvGK%~v2_uQp3au~*zg#H4{Oec1?Vsp(bHMlcZc$aU_v8(FdM@Rm+Fs2s~&z* z8Ghu0GKR{k4_-6;YWd+`L%Vsd=SLUMeYp4E9|q6ygEcyA=k)Snd^q(VpLe}so$QVv zyw_MmwymM>*QMvz$>{7oSMdIj4c_U9OZ4I}SvExB=4qc$+cN>nkEI|7} z_JFY){YS2=(RO?oubB;RGO5b|kNdv~aQeTV8P;-{r)@@xB3FLM9* z-@_w|bX-dgKU+Fc^q>FIqO(_i*m&)ST{?J^|UvtXWw{Lka`^P*Z z5B$KgpN&~~cJI;bj``LY9sINepG-*$bcVk;$^927*4#;=to&4N_Sb5gcX+>wro2_G z>i^uG`d@ICe2SVsnV~ZsG3x;QskN$7K|R{}RW=@dt6}9JLj^ zHOPN~@_gk6sJ9H+J1;-%fV!KxU&72iad38NeQ`ba!!vXDoP6_Ta|gXGCWXNtw@g43 zMon8LK4as?e2kg+oS}Oal85O>MDnFga)-a3nWRsQ&|4H?1NOI~d%y3*5rKZ)}twjln^nc%W zOy6br?lOFL8NTn3;bRuG+tP>DVHkD}jpJkd^0n?#U+(FuL;B;^_h<>kmK+To=Q9_B zVojD0dxw}b={n1lR}j^bSK+b{<}+%&L)Ty<2JGMrcG`08XPuODZBPwOUD7q;90=NN zl18xr!xD;G++1h-M%g%%p4vI>FNP+&^^&G*Kd2lNmdnVzZ1%u9I#ik~6Lt*Kpl8GB z6+0*<*~>Fv$?Wi~ZusCr#{JqowU+Ci%PB!osTX@dWD{b4kUBG5l-H%VpNSqNfCcBU5~{ zC--~e5L|5wIc~+R?Ock^Bqasyu~kN;7dv1`*k*8YHMih&LCz&2x*UnH*P7j-u>5ct zPbYN12Ml=X(`MVv3tZM>C(NUKLX_ENC){-!+`cE$xd@&?Q|o z74OWwD^k)&Y(2@J@r}-%P!@1cxwy#GbWfc#pvUlogCFFRb7cZl&!B%G#}W~eX6 z^L%F+R%&)my;U!PwgD&loAUk5(cgK$l9Wwcmv^Dy<(Ef0DsMJc*UGOU_neEdJ` zomrEk*pi_CN?G+IZG%|EGGn*R*g-4;u`@OgtU^eLeUX0s2oaH)kr`2yH|tLK+`hp> zm_U-p(b@bve*CzHiP&|I?hOSS;)2lIX`5R7L^BPzIg1dEY|ecXp=)rlq6g=ptH!3S zUG1JFt_jD{bepJvm4SE%g?+F~*?d+=1C)T7?(ur&Tc$SBQ++-(dxX?j$bf(zV%Dg6 zDxeqFp>WWx6MHAv=_{8E|$4kOL?#NnoZ)Da9~C=Uc| z@Ptg5qn8O*k2AQ_y$QP7?r{(AU=X_BZG*mnLqUa(#Dq@8YKF$TY8G`CR$?Qh5eNiI zH}*v3epdw6h0IfEgx2{`&I&@_o^pGh9IxfdBhFphVH)q43GW3~=muruur>oIi8_|V z08bNGcnVu&%?(s*E<^(Bm~RPG*{oSkx=|GBp@CsV&ucO@FIz8+1M_}4Rn%-#%rTn8 zNT%)HfM-~u#ar2Sg|wM=g+Z6TUINo~o_bbfF?j8VhA(rRM*6%3l(KX5Ti+$Ev~sdd z-?^Mqq5ydyY&?Z1XWt7bz~Ag@xXTK_QQJP=F-7SFo481?&2^3I2g?3(t3SeEzqXP!AyT15lrG7d%UMp5X5oQO?v#0~ZA zDB%%aqvllzHg0w;Iw~SN*K*MJuC}eb%bG1+xHhD6J*Z%Jxr2uSEyVLa1lmmi z9efpLLsMY)u9aC?VzjLjnoj18R*FmI9Gx`t+(74hOZ6bVozb;pqe&I`fJ$~79UHXP zxF&``NUUM(N!p|tYBxXndXrN$7EqJ!XWhMnJIZK?wn(}paGIH&iV0ye$6Om_46nwm z7JC_LwDjtPhp>bFzC1j1*TFpQrwF6P$|kLWo_%R(cy0$gYS@kS+W{yP2k=`8BWOLb zHuv+YQA+n| z0u8Q5hxUi5WwV#Cq$#K(#^%)7xNhrH3Iqm)gtBkx-d2jkDB#C=EUY%22n_aJ3TEBP zn!$c|)^D~46DoLf^ErBNB#W zY!0-VrOKAubG3%)LtbGg?`BvhVjN%^7lQE=GTcsw0>Ri6Ak1-jD0mG$5$lXCI}32c zhH<2)T}MPh3XsG^^O1OE&vBOUN8q+Yw^7#0d@`qdsoxy-NckvMULNEUTu-Mne>i!h zm)Jq(ND`aKeeo37a6*^2uH#C<$*M*St3YVTH8K-}D0mfqM}6=vwS`x=QA1~sPK5;{Z>VuDqh%6bgLhZ4S1Byo2@@77%y^Qd;T zdN9p9!27iU^dQgS__c&3{1WUjveU53Rl2XU4U#DLeWzBYV2ohdut13Xw| z7v5{y@@}5Z?xvg+gET`vlr?8dwnOWA#O`XXm)@D8+&mN8KP@!hMt6J9NCW@= za+LSqULN-F2L3a{%@XkKe?84IOSrceqs%kP+p9%piSqXI*&*I$N_YtV6Z;H1k!XX1 zkGi7v(oflE?sm(mG%K!S7F9-W?>A9ciJV(vG}eo}v&nq!R7S}WW`c{KJl2Q_OAP3- zN&_GQ33FdNWMA{F#`is14-DFJdvo&?s6A#Qrodf`uoH&ED^|kOvo9rTczX=+9vh93 zliDtk=iFq*#O(5IWt_I8w2Q~9Pgr$v7rbL+3&6atN05uanEplUB*V@ zP&_zeAn%}^QR>IDFDoV~hxe(pQ<}pnJL<++FovF)cw?a{Ij9kkY z8$Y}}riJRG+_ z7HCg__xt78xN4ekJACrRO*_P(V#sq(|g4Ox(?Td?A9{M!O?O zi=DzQ^HN#f+6A>jkwe{3MJ1*UdAGM41|%PS8Vl?Lhd>^9RtW3S95Xg~n&&^Gqc7PN zpNSPG z%*yfZiMdfnX){!S+N_Jj!rv7a42+Xj-DY^y;z~nq`?EN@iOi-T4&6%05MeaWz$nC^ zqb>=c0&?O_KaX$f)sXtN4u4`U{PB95Q}Z>Q=2PIuIW^xKyKgUL$zsWq&or7#I?boR z4|8g=>PQFMrIgg(SPJ!nJW4j!oSLuwb`|)g^S-ZNU;Egb?)Bzk{8xSb6!>TQl{Bcu z;K;6DelM2$Yy5`p!?c?JWBdOwujXgO&^HbPZ$p|t@#g~$TDAk<;UMH!IA{rH`Q)Hy zFw58ESqK5aUv{SO?ay$~;(PE(L7x=#NkRWzDaa~3UFoEsrJ#duxgDb(7?J@ux^L9! zsq~$5B!#uPsiaF-Kjzd}>-Du!&@#O3c+ty9B4%M3N|XcL4d6qiYNiruwiBS;hw<9OG2lmzu)O57CO zo1>Gt8*q1`PJPDJ;58k4X?JjI0H|r=7$EN(7GCP`WoYRva%x9%xj`2g`Grh2Pl4}o zg_=N_pDgn&qkoXeW_ZuA@Ug&0C;g#+zP;)%h6O~c{0FBke6m8zO9Jp86RXpEEhdI( zsS)kTXwQ-I=Gq;ZZw+4S^!3&1^_a43Vg2MoFvUX4t9)AP@*DbjLpuMJ)bqj(zaBIA z??49VU1UIi1sR?pZCBgL643UG?F2%xFU&yTRWjMRr?ogC#VPH zfbFDB7n!%S>DlhNHYzR5zqOt0YEnQ#WH)+VG7ZcO^W|7_Wk;eL^sMZmt?#f@ykqtX zgVnV{XJfJ@Iw_|>NytZ~-ZP%A-pXPktfxRr@%oe-Jbh=JjQ(_OWJ<1V_(F0HPmc)I zch+GK)mN3XYO^^fNzM<#ScbbjX!~et@;a+w?k==yR~KFDexX3c@~6bmtJzQPRnv6PV zy*X-oL$-BHEKI=4X|&Wk$6!}i2ik(05^FFgY4Dsu9XSPzd#Lbi9~4|gL}QO4C+lSL z6o~s` z1hwXE5!_&#$DrCf1u))fbgwNY)1_M3!@vXJ4bO3?yVr%PdSOqRdZ63&$ zbTXCw*d>2r=ZS zndTi%sgi@EQ`m#4OVcqHBgI`M34{Pw{vd0zc8H#r?FMV7*yKhRAQ-5SMRXH_P6)XcFhFV&s9B|aSYEI$Lo0v6RvXg71wQ(Y|qsJw9Dx_gsJ!VYRk$mKudpL8sLv_B@zGcnJZA z^1x&OhqGqw!AlxRytnPoK}2#yzJb6EA&y>K5z9pU4Wj+1NPaMD+smJIt@Y5ZiAVn- zMy!PN0ZaS}My$B-9ZUEJ*2RdWct3#={>Z2cMi2^+gVgHkFbbE@nbZ1^ive#Cr-m9& zc)}4dtlaIo?pzQcIm6T^VkRcy=3UJTZ{T*TFoTrOoP_pPb1YOo!?M*GV>*}|K^#Mq z7|tk%?l0A#Bl%=SP6ybczA25PksQy>O@T-5bV9Lg5u<%6Uy215MC#cA-N!#f@M3fy(wmR1p@p*4jF3$UWP)+ zwxLt?pew-I%Hoakp7r#l`F~W*)tKw{w0NGg-J;RCA2t2+dJ6n-;(UKu|L!<`Bk=i( zwdwEI1u^uVK9?ki<)1Ad`Yl{y&R6H>x0bh;m)RmKUL(J+wt(j;!UsQ8|8q%4`P;ahZTs8rOFqem z8h80yHvcD;xB>j~dfkTuA`LBHV=E8QQ*31)4z+LsTq}$88wd zaNIBpo2w%26SO^eA$4=8KswR-P*dd@Ed4ZhHPE6~CDZJNX1Lg}Petm%L2m)SM>e~w zuKJ@WI>5N2F2Z@(<-EB~+`94)FsTI>397*HVCPe0Uj0;KE!5=GbuU^<+>wpPbEb+1 zlGy2Id}0Z=HnYOF{M*S3wKFxPq2clwaE;8PbuqbiRNY(Q9~9idEQEbb!gq8!TI;rF z25M2?B6P;pUcM!paas0!WPRBAyb2YIDx)s%SRd9pTy8Ma(A|zzjhjGj6!1ujDRFaH z*>r(Oay!X)hq({Pv!b1MqNagYNUcK0>#nZtjRv8RZLh#*c-8!9y|*Y7wv0}YJthuC zcFL|bo*so_cJz%ptJokb%y{hyn1*Juu~W>-FIwO*SV?uehzuK8X{KfZ1>yMnD~|&K$uq3T!Hh9_RaG zz+Qw+o%kcdu&D-eH?y=3ehl5Jr@)QI>xbBb03o*b;#g_3Q{bx$$A{hqjm3rI=e;+UKW{JPxO#7_lf#WHB6vFluC|Gg-J|19AD33Nz8#-@#EElV>LCr*dqQ74e>OJR-kyk z@mqRLo{Ii&zekG#|5i=^tcph8Rnhq0r=p)L`^N%(-6~iT6TPFgx1U=3Q)_=}?N6=! zq1IN$eGvSl*0#_^Yg>;+|L(cyABAfpZcO~ppPi>b=Q_@n9<7UhT+vMUbaUI+MgN`1 zFXgZ&_mYgeK;O(qjhiWsPFx6@ni~gz$NO0lw-lt+!Mas91skH}<7_nD67F%-(2_#t zT(0BDgK=;f6Z19)+ZMot!q6d4BB7fOTIt$tf}Rl8x`~2$mo2&+w3RDzABFjY1I`Fg zOxi^ub4`E|dF+8CN$TRv0NSn2nheQe@AN2IDkT|iPi@KJIbeDKrChMrI{nnzKcTf5 z{9d}ze+R9-eOGG}zo50B#`Radjbopy?N7b^skcA%_TQzqjffz?_$OD}uY3NJ2Tq{O zAww-helIwXur^A0?jDtjr(%D(a@`H^BDt+cFy(zC&9~QWUZ2NBZwoJad(Y)x^|nNS z;;b6_iEbn2w(j#E3-H$Q_YE?Hes~?Pda~J94~;z)Tb_1oNvS=PpY4F8^!6lcNZ%kd zCpeMWBPTuHY0@Uy5vPp?<XRpyx2I2D`}E1PRR60_o&$(I5eEzn7@8HjX3XiS za~$X+xkBjblLvm}ljpBKd6(~f^6VY2)!jZsZGl%Bw2@;3U56f1Z4+?V!Z3g!0Y2`c zwvVn?#Moc7M%&Msx1l(!(?CGO(~$CX8!Yg}C(qO0L>K$?$@_5IpQjb+LgFtb4h(&b0P)O0@+AVq7AAg`n95Uw#To+n ziwi+hbuzCVVy6dTb=^9__f?%;AQgyFY>$Ys)kdW#kuvG^vu?x(SqeNrc{KjPRcnwX z&R6UKOg|oC_&omjuAgLB)NcBpas2VsltIGD5<>nDhzb6$2BzOWv)=#sTYf(4{ac1w z(CH(DdN=y{a%S{r5o$@n{vJXtnb$uN>Jy=sspu1-{?!r6XW{FNPd^_)!kk*>F;)|X zMDt=#@9mAlHRbtWI3kxvAi0s|z6;8Ae`D9zScKSRJ?7H6c4Pi5O5E|@ zOvBzurUBlH%*^g3c<7Yfo?-77>5Od0JQ83f0P8Fh*3g{{BX<7fV5B@K+HKQGk~T!b zc$!$K^GQmC_XkrWPeRP=yYOIXbu5_1W>0JGM!waUcY%bdPWOB7a!Q^8*?hIbm73P+>jQK;8b66nuTSw!n4tJ?{~LifU zOA0`kAhB#w^S?@2FNw`rgkRhG^x*|E8nC_UGBf6FL;8vGqd`g0H1FOUS|n-9!P_$MQMG7|O4NS};E*$NgdVU~ZIk?!#G zqzb@-Q#R!qTw-T-fCNpt)Oim>d&33vZGT=5z@E3683+iB+yHU%(F_4VXLqRzdZ=D=tm2Po_?uR|&F4fxh02*tkI^qjwB%8F?bP z_E@%3RRi0i$Y+gAB9^+sePWQs=^6`T83$3hfyG@DDoo+ox&vl;&wICZa!|k=&@wko zyr^{d;bn+T%dS9c;9-WLEp`M89SnU((G-kiJd#qbz~Wc(ZH&=ZK6t6azaC@s)d^)4 z_z#FN`sy3!ojM0BBGt-wy zURG6I{<*D8Z%Y6A=j9`OwQK@D{%seZPJjE|I+bbno0d+$9jY?CF8?`)iru=RWH4gjtI4&9hf5T*_@xHu(K7_zszQgxMm`>ip;VSzLTM z)!p}b=l{9d=C`@LJiG3K%*!rxJEp(?$8Wl&zxT8H|M<} zTo@z2<#_Z9$CI01cCh>8_)m`i$qJ3IUoKKF!aGN18fkr)@aC{K<;@N|sN;RYK0x?L+ky8i+{6&Jx zdD|Wg-`15=)wW;UA@wP2EA%N$_U2Rr+PS z_^NL{w~K#jyO;ur0_*;Dwu=cECsFi23x1bCdF^oRAGGt2g^zc`<1MG{{|r6|h*)ii z=oWhokoOiowp-{EK0e`N8N{FP@vjaa_ZxGB3FGJC@5C_a&2{9jy=)`-p zxv_f&-L0U)zETAXd$Y>dYg)**hn)g3wz=(}8NG}7$jC{hw8s+PEr!Hvk#7+gn}nSd z3(Z*^nGn@H`4HIsV>P_rY=oWO-VjH}()GlE~7NFX}LXt&v9F1|=k-lu%!Mshb zoW!4iNA2}q5>BbgWG!$OZ;Y~Es_45wc`tSNwQ>nPl?QOi9L_#Z?0pn1@1wbK9n0^l z^%-%qd|D`` zO;zzN9)sT`k#NLio-GMsKZPUx2k96a+Z-4PqBQp1`vrJiuo^$cD2`@Ob4DDL;h+o$ zWzi4HQkk3Q&M6t820A~<+I4AVgeI}c{mp~M4j%X?@;)&eFC>Yz8f$cw#KTRMu98W1 z0wOZudHU3S)*o&rgAP{{xu^SKw)-q+%dLdb)d$U#+8{y9lu0$J-KPinsgNUY=MPU; z(!DaU+6UIiw94^fLvAz&kBMxyrnRP{>g{8-sZt%zXx?0LV^t_NCb!d(IP8z6MMY7^ zoeJO04a`Dj*i26O>y~i0~sAlK< zRBGX)5V0%AuKbc+xmgu^snZ%LxLkSRQJah_t%<@f8ux|ln4Yxs0!!De?8Q`3g;{oS z6RSV=s%)jfuF@-Jl^gO8rY^Ledbfsfd!et4j)GB8=%EyXP>2cL~k(xHknZ@$Hn321s@}xnR%1=`{J{S(-vsAo0%&Upc__eOg zTa5X*x-lk2>Y-HN@2bPbT*)k$n}ns<{nu)|M<#FY2joiSN{PgfV{Vws}46NYlK3nwRc!W{cD^!mM1v4HSGGYDC3i^)W&>8o>;UE0OjrJmfrQfpeXd^@kiGMQ56=CM^% zE93qu@hZ{f5hJZu<4l=S?pw75h$1SD
Bt;(CyTb!^iFm*;PX@)MqLx}84R*`Tl!?1}2icZYymI(q9 z!vt}OLQFQ5-Xvl0;|cuu7a^;JIgnh(IR8a}f8gI&z-6zv=GWGuM_1EEV_qNSRT0nv zLlS1wIcg`=vgz{(wcK}5sFlto8P=}Q#c#Qow`9_6G86N9%O9e#&ewpWEX13&UhZEBX) ztfVO5y*~(G@!GsLlXL}cM}^Xu!0o4q!ZQ3#l1j(36x<>UZ@|OM;AMtMa!fvzguG3D zZ|EWx=&vl{zi=ofzt=N>(uUlEu8sfv08lp_tJcrKGqqmNkj(2C9ph*UNZEOD{ndJp z>#g@&Z=LWu-v^U}!S;)3Fs@1F(2MWUFCgZqCLsVmVhNT`!lg_Bu)}f`Z5Qc-hk8AL zqIv641#t`&3XBN)xBKTu91Mf~Fr2GYHC3{{iJDN?(QJzSL8$y#a&;52e--s_+fV%)x<2RVW3`}$KnF~L{8Pufa1R!Ewn z66Bbdp~N?@%7KXxG1cXrSB+&s*9@&^{t~n?sr7n5r^&gdi{L#m3Vi}NE=Yy}-rj<< zMI>+I_uPO`3qAlAS`$P#{AQp(TWi@G{yP4R=qYFM7Y5RdfQayK{677DiYz{7cLwN3I& z8>J4UjX+@6m({XHTdGB6 zwNf!NYA*HKq#dCUJEtxm%Ma^jR^0(!7GB?h( zI$3@(Pc~oGR5F!(Hpb06PNMI}btzM2r>{@5w)v18&g5aAtCH4s;~V2q5-P9M>(xUp zF?Lk|Yz+47wXW6Biv!^A>p&9r2*o+3Uo;Ap)-XcRiUQthN-6)Drul>f8 zU~PME-cOCv<7E6?ALXd3pxr!nnFQ6lX`5!OHm8%fx6N`YUZ@X~W42Vemox16%1m@+ zIbIX)=y6_uoVD*~t0YT4SdGV5@l`y1`&4{>qK&IcogzyWE+y0pY=K`IH?e!O(=0Zh z7QGywlph;|sq%0yiG|x-Jz1y~`x;#scJhK=s`6UPqVRP*y)Qf}SK~hQz_a2*t42=5 zp;5SM>-XJ-Hkm6-Tgr@|UWdb2!Mwdmh^xX~%3aLtP;XmzWS=a*E=%T|TTNK?F2yFQfV2w5WEP|LBsHHZ zLVs~b)gEOtyQ(eadk|Dt6zQJr$i2FiY33fLrMkLo-s@HEJ~O|mxAQlHh0)926k5aO zsJSZ4?qi(9bqqBls**Cw&}nrhrRZ9>@oZ-xxhqf1?(lw5P39Z=uwpDa zUDL9ZlBNoD@AdgA|CG#%`J^CH?RlDEg)~=QzTOr2=UKMYq^pM6im5H8R?`fUWS)8{ zYSbvy?u~NxvE9s(bpNJoEw^&wL*xFAeSB_$DM@^>lsZqdc|Bu3*9H1%)=ex2x7{o^ z>E6W4&nfmUG498BveT@X>{u(L%foDGCU;eSI@g<4S1z;iob3*uCPS$`Emr}WE=4)j z8Hkhm^P-o&p;DYt>KHT#2sP<`ni}2J9*Y&05tHSrdG*?=x9X4i+f}S076(@ARjOaF z@{1Ug;_05DB*|WtC!y)>%PMDwle)p7EZq%ECOQ+!79P5?%FFuP@?iNSHB9xB_hsgG zC>C-ZYH>SiD7k7w?zb3V*R5>H5El<|otz|<%w4ZpOwm{9g}T^#t}#WbvzTZ3=V85& z9#p#RbWUteXr1LAtILes7*@2jGO9AGZgx5xk*~@8lI=8RoIa?kQo1P*?q3syJ0898 zcBu^M*t9M*pNp;3{AzyHSP0Ma1mAi}rMd&gP%^K#ZK}4&h>21y#&x;rED0u>QYL=e zt54=kfntu3kCTv&G6!`dr*+bzDb zkHR8%Cns-&zSv*gRvX#pLcKyRQdAwvu}+d!9(ODKyr~V)?%2_cg-&bWH}i~MxSf9X zNECZ|JJ)|%n32v{FWgQ)d$dmTM0EEHS(JIUUI;74+BdUiksBVM^0qd*S~2L?BS_`$ z()Hb4-jbn5Q#OyK>lOU8f?`gGaBpI$XQy5T@nkHygHM0O&xJw3`PRG`hvNBWF&)Fjf?v){l zR+|x&btE^x@UWXLK6TgdX5wJNjYu=E)7wv?%ieuUUfQ)r)Gdpa8U>j2s}Y!>)kthT#SjNC>}dxG{IVPl5}i+V{j(X)^2Ruwr$&)Ofs3+ zn%K5&O>7%)@W!@n+jj1p^L@AOpIcqEc30Q#AFHd^UQazs`gV0KqXbk$PcjQ0K7Ujz zEd&^%Pnjb6NzgS3EtVP{(}3PU%_+c)y`sBQ9Z_Dr$ytK|N}Lb#q^{E! zAU;B!aT)Hm1a2xP1DhHx@0j_}l7D;R=-h?^yUiT;Qsid>W;k0No+^$E6sdSFgA6*l zoIDqd>>)6cf(RQ9Kd$tyJmyDL@u5kM?BIM3Y3R9w{LiK0&cue!AjI3Q5DfYD5RUCr zvgYpJ$%g{fxppqbj}LMdCvKf+juuEPuCT}17?*>ck=2J`0%A<)^7dX(m!HnMcDZ6Z zr*7O^zS6~P3IdRs>X(*r@3Fs(WoI?{{M5WY2Cz;BfrS>*3S4%)qbSZEVD}Bf9umSn z!f{WO&~f~tJcz%0xeVRWLW1=STXoY}4^vVf;hY~Cd9E!>Vx+u0YcMGlug3+vzcbZ& znAQZB2!0#bH9!7-FcTJB1bGzLyS|J=cA)OHoiy6UVgcYmHfn!x>Olli*DuN)njr!g3k zvM38-qGv`_=nkM393V@Rt$Upy#p++ldt=P<$!(J?>1Ag? z2TLk1w?;s6$Ql{E!v(tXvK7)2Mw>BPpiH*0o313eJHgq09X>tNSbs*PBU9+E;h*=- z7b~J1WhBbng!LCHq{gy_$j5Kzuj;6A9=4C;H`Fcp2TjpL0(?Tn8+@+beoFL5a)sYQ zC$a?6I-vi(%L;>1l4-9wwR7TTS=F3&aE9#(kh`&8vQk1cxF|SKhnb_FZJ}hAE|~U# zrha>h!@;1kd)FaT`B!pG`CuOtu6U@$DF1$Eg0ZK<)ORE?#&3g1{sqtA6;bIJ zo;PA2FmtcLeWFDFyT{ET0hEEUffeEZZ72%!}`HIo>$%6F& z$>^2u$^wsG<832~HBiA#f4CQ+kxYl^&9HBph>+=X{$cblYMW$mhieyPFBFT==lg;Y z)T7et77aI)@uF$vwHph&73f3*_G|_xEERFdocQ$2`O17_2nGa~FU2N+niv9&x*e2B zmm1)@ZCv{uVh6M3e8!_bt59ID`pKn^X=Kf}KU9H{eW0PE9Ag|}T;J!VerFvMjj;da zFf&+^EgCoV(iI~5j@-EchK(h`{kMhd*A$mg4Zk`*>-oRW1(ntqc6Qf$82^h`%Ty|e z)xBV*6oPP9N($7bhDzw}Z8mJ$#0V<^=SWg2uUD+#mEzJpw z5GbcBi10#5`;K{PFeo(ml(a<+m@OCOuViQ0s(T+=@Qodq&oxBJGS+SG#;xvYYvm^o z&3y9C)&|+od%>W00Xh}v4a6-&hJVf6ByJ$Q!i{?F24|_kmaa&G(HftQt3LzN3FCl& z}ANzxW zJ{OJV28a~-E@6oVZ|QV)0XM8BuG{r=WL5GflQ@5;xvs{g%`{3c>iuAh?|&QL^T73Z z$GnO3sqVt!;_$^O$<37eL%dhL!zMSPJ@#%~-P33EB$!XLD96Nb6>32L75_x|K>Ed1X_ zsqwPc#X@R^J@sANhFI*Rm7%b4TaWXCoOC#9}BgfB6(?*Bd`f{Em z`9B}(@_eM6g$NaJ_5v9GOQ{jsl$q1YIpf@`Oz!ex)74qiO}lGCnCiJ;+Rje)2dv@mU2BLlok>c7!@=z)%M-r;|_Z6A; zahY7;L~bPH>uEmh2Lm-!e4g+~=+=Csr|l)^q2>MXOJ);3wp58qL({85ww3lCs>rIN z!~3fTRFAGTBB5)R$UB-wK3bAa(rsGbWtQd(5JB#6H;GrhXK;lcvh&YF4|Z!g%PCNt8o_C6Qkq zijj3(CHe!p6s=}VdY}G{44fk&Knqz=67h#^Qk^5u&*~N`VR>QrrLeB9pz!O}g$}hr z2Bt&KnQ~_bLN-V9t_i(g3z?Vy9lK0Ace8f7UcF4S^E!YYLil64``bLKB@WJ9IPg_j z(D`kC;ZXSdOP?QQ^;Z^4#dE(1*b;0rL?8?)xRtJQLp~&UOi-Fyo^r`TnzJ@~3S}#K zdC7)&m{ET&{2GQp!Syj83X3v5oS_0Wx?&i#z{PC@D01&w2_=hzRHmVXk)=qz(YrTx zfRd_p_aB;{&Mm5mi;s#yEUz_Sxxc&|{owxf5}*S;9`Hj&V@BX&236m+|HCEJ&;2dw zjH!qtIvxN;G3o!;pSfGdh^(_uhfk_B>9m?trIhok-fS#s{`@H4$v0qA9QM z@XktfwZRLhy4gjJR2sUP{0`n2-RE>XsXTt_P7S#QoCSef`vz>)1+iXE9It z0U8&lL!zEKt%}8P!`w1e7;AGJAe1h_LzsKq17WijueL*_votr!kW<+v6o^v#BDwyAks2FP#nTQKf2u;p%T)J%^9HUbc8*#=b{8;`FRoG!zoZ?0SQ<9U6@(5a<%h{G~TDG~rErY&&ada!t1>qYr2Jip+yA z`6E|8A!#LLKavb1r3n8VCPz`20yl5X8x$mkhQ?)l8Fs?P6o=xgyO>Vzf?f^ex{M;uMU)LKGsKg`zrH` zivF#;f(}Yo-j%}-UwT-eT8lW&o!lL$t4`M5Tm<3@VJ34c@i0s{>dieZ0y-lV$J!*n?NpA~i~1imeY zI|dWhq{2M{^wyZQ@r%vkHq#R91jn9=Z^an&7ncQ*65s%C?~Qb_fPFJcz>EMMG`uK+ z5=J0o8TNNy_FA3q3uLNWuth%D_a#x{3y+HR=rzEE!5$%d1nE38CrU7ayR=asT zIL{igJu>*;!~61t`iXI+<${Ji$8g8g-ySKSU6W_GS3`zM0Y8VUY}e$Hy55jR3t!%q zip9>`;5YificWGRtT2+UlIWVU8hb1?3o&xs+QJ95;h%{&>t*)(gyJ$2BGFi6>n36; zyP*{vQ0osN(~i18xNSCaAXQ|2fp zam|faXmml_jWt0On@M@jiKEy8u#VfW1e#rbHMWRD=4zw)+m0b$hY*ri* zuQR*7x;AD}exWSr4-Y1c{J`ZpqC|Qoq(FL~OA7~g(3EW<_!#iMaeD3dI+2V81#{1` zEeW`|QzYSgEBv)fFZo;or<9s59tg5mbQ;JAL)@`G?R9h273A*L@nPjIy`NK)2D)s0 z@K+T44=eg}uYBIt%HCmrfxA4}HBx5v@=LgC5QMmL;1jf$PL^!L)#{b`V7cTU?H#6* zx<3Lecd>tkBb|p6d1nS_hT3~Q<-4KScVe*n4<^eUa>~7}N6t95yG(4~jdO2#)v_8V zzPdj?1mC4e37n$fubqj-T=@P*9wKRO9|>$F4cl#Rw_PchfJ8|Caaam(6}(&8re$#! z0Bn-Yc7+oG2GQ112KP5ik`RUugO1F)Th`{)nkZ`RxXaz3l9S=B-Cp;5DplV zpv=3X@$a!IgETBzUZ&1-Y-y2AshYBTk|IQxQj0YuF+&|fVl$9LDTBhoI0Li{Yv1?# z#nrG3Y0-nYOt^0T{vL>-`)YBlepBhAO0$B)^F}H%3JtSA>Jbe(Z zEa+YsXjFN=V&7d|k^+c6##Ii2+gsX}pZo~>Xi9pD%0966e4E{;dekFJ&=)d$p*gB9 z1DbGu1pJO*dZ`G?Pmth1Dk$<7T@~6M9E8p;HE!e1f=T%R%J)Ga&^i7-ex$t3_4}SC zRoXj+NNZ_$)+hJ2AuG?vzU0KE`wJUsW}Ul@hw1YlY_zPTj@ON3Bpw(^Ij2D5F~Y$3 z!+!*X_Wxv$p2fL zo%pT3BWK*e8>IEz;KE%T4Y^&7e-3t^g#g2a=cd8fdL8QH18a80cFA9~_R)^_NS~gIJ)})KFD8Y&&P5 z;2uS$&FtfOKT`VvvO`E3L*+UG46 zf)90yxNm}JL9Da>8o(XMBgfD9@;{NtUb5gV>GH3DG>RmHecZIp6+8PCc&0%l`G&W4 z-w{XT7@wYCul<=J89*DH*2K)aEpfvxnu|d1+^98;q|arcj&YPXG)e2Vfd~%_%odDw zHK^yvk`MIw6U>UQLa%##>WgOaSw8sPJzZ=dK}O?RQ>f~b5CFbVzWk-jUhP@J}Gt-GI}ZwJk3iNw3a7}2Tuj0M!ij| z>L6-TtMsbboR`5FoPYgfHRf1|C&eh%ay@xrUjS|3~YU!3z%zaZbZv!d0;d`wMGDrc`q%-x?ncZr#!>9qy9~ zU$D!+NxKZhrRi1X*=D3eCG6Ul)pgd-Yq(5=kxHjC3Q3ZC-M%>lo{)lKQ#|r@29Kuy zYNV`sZnm$E+N48&-eP(;1C+G;FXixH2PS z#Y_3U`)51~UJAjo-;uE4Q62j%&T-T{i3zcYs5-z$bFas@LQK5fJN*i%;YrbfF&$tf zFhEX)i8drZyH};9oD0cXg-?liUe==CwhDAHJmu9rg7e~ob?##6o*t@1+p)v|r;C6! zEIsN0Ue|S=Y64*R^YPwiTQ02^@w*c%jnDYPn=EV*N2Y>V)zp9}-jOtuS9ZJy9Q3mn zgZJcj@7Lk(j={ItUzAzskdrI}D|K@w?M*=B@7Ti1$S#)btK=t8TaSx)QS!_)F7 zf7-9Q4$qmy?U%y&##7OxSoY)ebf*YS;H0>Ui~!N}T&fp5;apa>GBR-xaiY<%!+1*; zCX^y0UkaMdXRN_5%mRFVm4Dg|tSDh{S>CP}LH0jD_C9Q#9DK}cf}5SM^*O3!qz8rX z@ZHPJHaLF%!gGXa0x&Y`^_+!?-r1>;afFF@Bo^aX50lpz^9M zCRVlsUwC0L9+vw^&sF=se2}{xR^MG=bd%?kta2-=hYKH=SgY_ zp%QgA^h+NwO||i55DoL{FnxINgh75C#zHQe@4sCzI}Z)>M2alS)GErhimXGk?GGl- zi+!=sA?h(iTp5klHz)p30s1ioRK{*Alg=&1PYyrDxUF19QhbqCes64^pYILN`#Iu9 zMl|Br5Y){1K)1>LhjDgIQTlyKn3*-_2|E5Hf`iuNv$#pEhBgEHz9Jom34)I;PW|E#(70k*hun1$ZgvQ z;u{Ob*8^x5WVUmx+>{MT{yh5PJO(9?pqvJw>{Uq8k#;u@JKW;}N^|DMOev*8Z~#eg zAX$r`F*B1$1~p~@UMv`mL)ncer+1K#NK<)_IAv8grqHU!kb7G-*8^}5Wb}l~8l6`Y z>ZH8+#4NfRPUyLm*Vxh9Ii_9CyF|%fb0yhVi!>qqD{lr~n#7@C;~1_KDqr`y^DY+; zC7eT}B1A1V!<#l>&PBRM#C)rE-@>Z2^$ zmrs1XGN2w_f2u&LHglizX&Gh|pXc$gxXw&5A4DGJ9{l5yT*A=(!dBSFkc(nv({A(p zBf1%8eLz4{Ti=;aW_f=_l*lRG@W*Lqf~RwoVHIbjlTrj+`kkO^i7oW8XruA9$nK9b=&9IOSkb^E zRP*63lcT^!V`5R_so32Moc|SN$$<6YG$IG9HV8o zp2rYTzvrQK9$D-&3SAs$YPEqCdp9kI3@_BZ5zqeh0+LWR?Br~^bzSZNQ+z#chtHmI zs@eGm8yG&8HB=dDn?6!v>yRYy1n$@H*ofSiN1QBhP;-WL)gIhhoE^*{Wui)OdZ;B~ z(7C0wiIHj}_twdzNc{c;V{bp!2L2W&3L!sZJfQ#Oi#MZKN8qhyo!crsjwpI)8vTtU zq+U&r?^Sv(e9+ew;?u38WLX-0mKolYE;ebwG}Pcc7N-*2g@dg*@{Y&Yl6uTzu`$um zC(pmrT5!Szb#+w%tPoO9+(NLAKn7xa(F`ph<0F4-$aUX<%`-=eO@C^{7ee8Qh}+l= z>#07z&a`PoNE|X8$u0`A3o<-mLW5oa2L^V4uAO4(wJUj+(l^*QWSNl3bkk6gg@QXp zp2hhF*1-zTMd6_}*S-jgR>^U5C5Zs0y1$H=n&~G*^<)}IcmVvZ+&*1q0%A*=;o5bXi<|pmu!wFDswX)s`NY z;kAy}lCTwB>$a#LXVSsc3&+_p1Vf@X{QL-7Z|-z2vlv%|-hQgRKWKuZXoQ&#RyYRw zYj^&P2zBq#-b|-I?@|Sr$En<;dREDjC}DUXo$O11{#y+VI+%r=UY7Dk1({V)I2Kzo zNvGg;sz9dTAWm!T)j`Z5|A}edj=}CH`W&#~Vh0U?d(8)Ldf4$;8tZSQ46e8*9(+>1 z4uc&NDaS9O&TMP`Ra{}n`XaL-6zl*$XYSswz}d$j8sk-Ltv%&YC!B&%y_b=I95 zPBTLb$?gdl?vm+r9kjh>;Oz`JWru8rmSv2=Opm7jqB2>8c_fvBlb#jS@{zSq5Dr{V z->X2pO+bwGC!K^pV@6N)AhF6E6sw5J&wbrtR~z%|&Q^Swrh$PfN)>q(r5F+sVw2~? zH;Qr6Ff`fQ-$e{LyP^RUn14LF?)}WBvdCS54r1uOfnus!q>ZlsUV;Df=Tm$E4WCz9 z&-0fT*YJ@DjV#CtW4hK?U|;_rZkDk)ZRflHF5;EY!d#t!-;OvF0yi#Fy%@#s)Qyx2 zC>>ulzNbKWN5gsuZ0Mg>Wd~I5FD`LAThcQ2G_2y@sdv3LlQL4F4x!@4lT()7`8Nf0 z#6A;Ce{Fb0NM72L0}N13tQoShyjf=pHGc_i{>V`=wRoLbG+10pz{IQs7^vHLumCp@wgVB|jz1j)UTFZg8QLk0 z5XUQ8tPZ}ddAUle1Uw^^t`{!m4@=A7H(95uQ&tLxoyL0cYYnP#qp|Jo_#4H0IT<9Y zxC|Z#kd$#B71ExdZa$AJTeJ_A^<7@WI=LS~j8h%oin#}WT_a>csUWK;Ax)64W%Z+* zU!AKyt)%v)+$>k!3ntB;*AD6}iu#?GcF-x$EUiX$MLDzwvWYc3Dh zT$RCRqX>dze{{g8QQw;2%`;&4#K)NPEqTl{Hsn)5x9%A)N$6R91P}waswZ$ zu6^o7XQW$Hr}vTTD^;ZVMP@;#<~w)ZG5~O6ix5?%v52I6v8TA5V~GWLZ&%ME&bcn2 zsTtxFUEVGE=A3?A`_x*ghOvNsUOr@d&3k8AEftbI#=W<_O|p zXnS;&NiLW{uiFl2NJ+`>^m@#En%}P3cgL$kCRH2OC*57jPGX;o_L6VRkaO@!Qs4QC@d0kSyNH!M4KGEy&DF3y8? z1=wNE)98H!Ip^j7yc7P~XiR=G4Hqdw=E9HID<(y0CoHF47>GP^YWXF?!3rtmc(v6W z&@JpGMQ`0`)TyqG>Pb%8vTWG`(S~lDC)3^gX=dlZjM=UxH}BtA878r%Q~swfeW;aB zzD%DRZ{^!t@5}?w1|xZ4<{UOLgHt*rO(LQ1P9KtS;}sS^!Ym5y$v|x_a6;h^k~LLi zs_inJMNQQTk^kW9ts+yfEPh?TtM;XPHp&@u4up9gkML+furmQtgXMI5(l1N`L%p;b zl{go$$TZ4V;sW%%DENWMdI)fG9%wq%0NH5p)TVxl5mLI%)yI;Z0iScl&9^E?2NL`S z-MuQJipk0g7-E8lY)7sY_YbO(gJ_Od5e|?f{|je+?{3H~%Gtu5ck}0&AuVQWW1_B_ zWa^W_msBPu)-J~6%VkMwv`J0R7dS+#h<3hof8>L{-dmqb66XCb*pq;{GbBloJoYSZ z3-J&Q6nLXi!V6nCun5wJ_A(ne@OdNs;`M~~^+xg3tUT>S46ha%E~Jtr;Z(G*uz($& z56_?=*`Ozs6+h<+-SDFaZK<0WJvTt0|z)h~&hh+DmMKBSmzXo^!j{GWizy|RChn0YA z8DxfoN4pd@N5t;Gd7(7T6n_pDlNt@sjLVBE66x+8AYg@gVMHU-@9!3wLws2inC>`L zKb<60$9`pD=njBFr5u8HH&nY6DgVrOu109LTU}#x(`FGFA&zAvU}*I>X{K!!(^h99fLpubxb*Bv(8fb?f*Yl)NRej$TfX zAJ=b{yGS1_>Rnv*!V}C9y)N*PbdVYm_=QA}_CL$j?4*h4LZ43iFcT9r$Xy=>i4(^b z8QEy$Pfk@hVSr?2%QSs{fDllsloUZel^0J>Qukrnl*-n4T=T)xOOxWuK~>CyE|lA! zV`!dKerDyI^*H=?`9wVrt~m2mBI*kmiE|{c3c5wu!ksUfueW0#)(n=p#s-Z=>WZSF z+}`<-c9o1%|3S+3LR`R~7Zrq~i`l_|pB&R$Mou4XIt!{}_}8x2h8S2|Y#xSXIy4zZ z%Q9n*BIy@5N*gVrkj^_Uclg|8Y-rv8Q;FovM&Wv zh^OzGj2U->tMjU7N1+gAhiKggH^RIGgpWq_HH?boR1etr-0++@5hFO-h`DFs`LH18 z>3uIAkf^K$XlEpoI^0zxnrqR_8EsphME5_fq~XNLEo+V!HiI|lX>x_Bd2szCDSe=u zUb^HVsbIQjKF3=e#K#$rCu@kuzl`6mQcea-9~oap6#pNH8-%lPr??jXfTgrup_ijiw2V~H33^9(Tu z0W|Zx8jnbx*U@cVdrXhD8!^1gSfg45KCCSRQpIe_0_gIuZ({VgoPFY6J)~)dMj|yy z(L+H!36}#cHv>--kLbqJ${fFB`%%f>y3?NhKupW`Z1Zkl>|c17?HS=tk;gh|g`sc) zad^6dJcbiIrjVwLcHbO66?KGF_9kUW8dM5&a>KagQ%maY3mEyBY4EUyrNRP9|&#kPLa^Adv&Ugx8#Q|{v36S$?gd7Q{U#k<6d8% zSo@NVWt-@=ZXY!Y5Y!A_>9qN~y4AGTOXZ@K9z@C%02=Hb2Gv>~PMe=3ueIBsWLB#f z(%6j)Rzur%sE*W*AGVuc9ZPD-N(9WvCv=^C)sfZotnJ)Tg3zNSJo19k1K_yfy>2P4 zRT8~SURTW# z$%sT3ZC7RY6qo$TIV3RTZ=Py24WsBEHn0b?1Q894j4qfyU6k^AndH>?)4;6(&?hB< zYW^kquxQ{P8sa{tLYVMu^pL+N!N`*lP6p zS#l|6rNpMEs5>Z(c~`;7D4LEj(gE&$Fdyb@xqzA&xhgg)^3c?23FI$B;;H4Z(XLL$ z?qW@N1xh7S`nFL|jN@q!|2DT@Z?BhPiMtNaLJnSjmC<(B$-ual{q?vCzK+fqu;3s{ z<#Hqnc@OIh^?hk85?p9OO|a6tUM~5ey++M?%rM1lhFVtHH6Qk+%yT^f)o^*5nOTA-EUy zw#VLyvIX*kwpsKHfL&~$DhNs3MhVM&&TPgMAkjxRBDFY2^UU<@sR%Fe7{;LE zMMXwK;t8>6)DOX0Wo78?WAt`_9*SAs_vai(8HmU+t+cD#CL{L)cNNj}@)QQpIHCpd z(7X&_wa9+eK_F6SF^E>8im%+kNN_l25g|e|=HsWJsGv&g@+yh-)q0^dTuPoLAtO{8 z%c7m~;x5Fg05?~i^J-f=i%R+UFDZc-7|l4#vb*$}(HoY}5O)4(qjp&vR3)gz?;Pa` zu6cIm2ge_jcG`GyS7S!9E60$s5uB{M)J(9a(x7NYUgdCAB0vA)VZu*+Oz7T}VuNvW ztJ8KxilQ_{v_-ue=5>m!=A&lvDKR1=mezua+W*=E#o}&I;tfA1hdi`{|b>F zdcNlivYg*J^Dq<{3H~~J?9>cpDAQmNZ70_}t8{?KKZ%owe+%YZ)%JXh{}MrNPS2kuHDiF9fI{FMWmQTbEXH}yaS~Xxwz+04 zh_(`m;59~Aoz5%b&{djJ62l0OtLd;F{Dvu{Z|8X`tc>r7GpT0}=GYf11uts-h?@Zw zGkzcFFsCSCqVIevXGo#gF~IFwl|vMrDI3Be&P zaoh1=@10w3#+abGKY?qwO*?Khi28CbsOa3O;M9nwgkGy}J23+mx+-TW_+MQZ8U9?ds*V`Di?Em335DLM*~XoVY0g}Xo7 zKd{*-#b&S7|1?^}yGFnez~aO{-`}8;v4GyEf5OUyAAZM3(93YhSfVL>*}AL~8Xn?$ z-PLB7q|+eBD1q!=?W4(>EiE<+wav3;?QpDoEM%(aj=|FXluryT>;BIbwx~b%GC8{~ z{QdGQ2|*dqqO!OpbJ9#ospO4;m{lAnoVC3bYnKeXujEMe2O>Nk6My$kSlGc8sEU-{ zu%)60@h#{t=#RVs5znBw$HE-++Xz0UUw5tqL(y@R@_5CZ9vX&89d?8H%h4CH7r806 z%eq|Dwr#RQ3J2D|c>|x|s`I{FH&jp^-sG<#tD9G=pE1i8%N8La{pe4s&Ro5VXI+(f z!1xhYvRRjZZxnx}09lXM$PieLNFnj=gF#|_v+&*H{SMH+g+tSf5Bm+OK0HOq<1Pid zZj0~TFvz4CF)ztW0iNKi%`*1(ao06pS3O^O8()H;nfgfD+U7igqx{|9>V^9x|;G$E#Q9}XU4se)obw9PJT$95 znYC}{TApf6wAurdNx+{){&h5zT6|66HK#cQcx-DIj=?-l35nE05!pyyA$t6_HB7HH z07rHzXLigF!x{K0^O?nditw`g3`YMSUtp8<7+L4MfT~4!1Jt1|zh8qhj1F`9A4iov zv-;0bsX%&Qv`;#6;>6lbGyApSd^tKj-tyVJCJsCwl9E*O?=lJNU2Gf$H-0UrV_Yf< z*L~%%WKm3be6USu_}=9y6FnpGi$LA!l{KqE$LGyDrfqK^Az1yR>?}D@Vrs@bx$R@| z(5h*ft{;-$C0oGxiLOpYD>8`-ITkeW%%=MBeOMP6eLOEI5`E;Ci3DGJ0vb@8SdK9~ z0AcoVx%E1@Pgx)mu;r+&F65h%0UD{ z@`gc)wBCd*oA(M=Ps^P*uPyDo#S=>8iV<&3LItMOA$YtwMB-tj6&zkL-Uz zsA@JqLKQl3AACAggor zHP!tBynA+Y;geJI3;ll1+=>;{!+^>vmKV=<$rg5g_+)>gJPm}+6lo5*Fr0#r+&o}K zcTSz>{l?s7bF)RN-<_+rlZK$0fh)$#h1cK*mPs39%gne6-vA2ZM^U+i@tCf_-=QYW zDKdD*oknTlkO&cql>+=}dl5;eOOy;f;3jIj|4g*`P^Xz-95t1VnUR#&8H-xyea3t0 zW#Q~52W5GrmU`&MNxxhnC$Z%C4J+q3ABv4jaSaO6qMIEvUqMTO-}`v(X|Bzks{DMw z=4~%UXOH~PA$%}q^A1vg#4{pTY7O+Hs_rEEEKi+^ijibQ#eaFGgiK4o5NcH7nkE?D zRh3tLwW#zamhym_a+`2K0{G1cC{oRj!LMMt;}w-Dj_RVYZF>>R5N2YrQ0>JbFpE@Z zQ%x(B`}>XGg+cirae?R^;>z~xydoT{u=nqB1hak0It8S-U5^5zY|vO`Rk&F{X0k#MYWcp4$fjQrFDy|Q+-k$W?{`Ix7ooDpL|GrR?F)xWsP!?l!!yDf$&g^l0qV}AccZm zqfbDB$|uhJCe^IUW^|jmSYQ4SgX#>Ej3u$9&0YeT8&a|iLv!b%{#}x=@l*D)X;82A z34ze8Q!{5j(ks1kjUHzhUYn*)LP%t4EcgIAk$xtj?QyWWxMWf$inxs(?X#* z3--lZ*%kC5w;3?u*IN1mc-(<oLo=Gn!C>($XxO(2fA&Dh2k-J` z{6q+CpsRH7Mi%b*W!Ykby!8CEpq|1zww_}hQc%a?c8jcUhL zRqkb+nwt=vdRM7h^iByj!4J^hWZ%1o>c7|Z#jq14qnuYB$QSZ5EaNRtGZa_qJAv}1 zlw@S~ER@#5KAaa}6t69TcZgSIm$j?v@kV#TaDYoBLDB4L;co8Psp+hHeXi?SJ@z55 zZA;Um8tdOU5OFTqehe<-To1oy9i|~&Me|f>+90uuF`r}Kp9H68pdW}3n@Q~Lvuy_7 z%$LkbIqSpC*{9=PMSokwKT{Fonf!plMMD#TbpnTJ*6IIw2&|^sP10*c}l=0PL20Rh8#U{BU zI+*Z(HvZZ60ibJ%i55K=x?qv!zg*nEUb(q-@#L@^k8g_jzk3Mm&`PWTeunazwX8V$ z$AKpre2?c7?3MYIe5(#7fvp}4Oo`W zQ*`&h*;7^&WdEitZ(&K@TZbR}5kWI+RJQyBb!l&Xhre^6drZGr1sZj@kr-yBW zh*HR5kDH{kN1C#PjGHwLVN&34AX_ch$gd-kaGNM^gR)Qc6)VQo+K32#9w)T`#2e&U@VL z%jU6J_sjJYJmiZW!`Mu>^}GB2R18_Pt?L#*VWOO5yW+VIQ)5}m`MYY{WqL`{xvez9 zB8cn_JiZ(1ec5Omr}_wVKdTbklnh1@1W`oA6%1nk1+O4e6kY6FC>U<;8&iz4qTBYj zC38@!lI$x?^;5_`mkpwj*=>Z{@4OLhif6fYJecZ6ePaw_<*^AxI?_VTvxxTg?YD_L zB8z729rdGyXO}7V_>ksFhU=c|MB56zSv);b#5!(lbeQz(4O5!E0zl$a4=Iv%m^sR@ zF-5<+6ah{~mzA)rpzbAWet`stzwm-LKr(Dov&*q;k5xnkiks!1C5*ULJNW)6WyPd@ zVQ;!j_{b;Nq5&^uRN#0YT5i^$zux7wA#`pbE{*e+uIS*8tAqj-)p2z!#@GWj(8B0? zntQ+oCMU@ms?6I@1q*8E?crX^vPA_tJ>-@%M4d=EEHHYcIr1c4OCpdEz2PYWyE zRUq{=*Um2N%O`vNxGnl!MTOWcYN{fty3Tj)dMZ)783x1J4Koe7^n=N4H1tLOQE{ErobgZz*O zhrj@V0)hEw)ubTwt1Gv||L#iu89DyH)y>Y)gjwIw)Y^&J!qm{j)RC3R-Nu^fKMy+M z>m$KfQ9|Frw@DSQ&D`L5ks7%i4ZKdk5k{@K+W!VqxSg!yH}UrNjX9dVbblK!Wj5vx z5*J^9(Vt{sLDVA+)&H_z@naSiJqWb6M^b=eN;YI?5Ly!EFfPv*XiJwYwpV9KLF+D> z@Fa_CgH34F8;Tc5<)fk}KSS3_^jWVld7Jj%4WMTj_d7BB@30s_Sg0)p~? z_5UBu+5V&Xg|@cpJ`a}fcEu|)o$#c(V)!M`GG00&>5PIXH{`>`cn1xDz-uj4t-%rB z^6Q;QHw!+P3MHLVq3ld!xlnnVS6Vu8Q}~;0lAVL9Kn7RPxdVJzG&qz&?(}Bn)zeGs z4{up~x)hIbG65g^Po8dq&zss@>TC)$7|;0v_TAn5B$-kEMN3pFis3=goX|i_hE!=n?%;3VV)sxXViB!lBgEYi7gsqt9%pV4a6lVP>m#%_K1cj1{qBxBD z)1ii^MpYbt`%J|;)!48Z?A7ba$Ajt)78lZ^xFcPnnuB|=op#cOZy^zp{=5Rwca~0> zNKop`dJ|QqNw!=mtnBZn4?=8Pn#JBLxz<_oNffjdpE-g8`L6bQX?QrxK$&GUGG7b;Uj+PD18;Pf$kq}d^y$QOw@ z!-8mu{6Mdfju#WP9){w9P;yT6TQ2pBC((Ix&{_6qVIidMP1tURc}Hra+9 zRq9A7sFbBI_cMP^Acjfnv9KGWBTR)yWvcSJ%dY)pNAJ>}uu_|`oz+d4j%uAk6(sw8 zs?#K?DTWFawi=;r`L{+irQ|pg4+O?nX2?Ft8R*4eW+gOW0F_@6jb&*DrrcaEmCZaP z;^AT?uLTZr@U*t0Y89AGS=87?Bt{BZh4cwOBYcm;_en*S9??gKBuYr79OMeqMJX+T zWiYJ&hp~5n&a4R*MZeg#ZD+!XHL-2moH&`-wr$(CZF^!HFaLkuIrrXo&RgsDw;Esf zu3mdrS9jO0s;*5bdk9VZpcu)VtQ}SA`>9*1PFSX+MSPGKGH`|emzB`k^;1SKl36gA zS>QLH&J)AyX8LEhem^`LOGW__>Q^I5{Eo;gg~0C~E=+MI>~rALhfB@FF(3Ui7?`t6 zn8&OicDg}H4e>Rh^5bv6S3ymgRxsLx3RCUBJs@KKb!CTz5%oF9fmCCZms3p&BwWft zFELKInBk7_G3(&^i&nDw2Tr^ZEq8)It@N_cVQ4 zwQ@BpAZZO33_LxfAmy;s*TffnZV0tFyq8$?$Rf0ws7Kq>STvynEfCi)rY-@+;Z0=w zV53tQbzB(qL3qK@q*Wk5bL364E7OGGhcGl&y;qsBT7x`Ncn;#sy z*v`Rh+0uJcU>Ek44O?25pi3#8gFN>Mf)<7Pycx3Bq47 z0|Z9ZoKn<}a2i*`jpCh~7bS(l+?6^fFpWFt+tuoUU*I;hhpeBjU8V_rEhUKb4AEb7 z+#|i{vPFuaZI=Zyc?vZH7G#90j4bp)IAcOM1w7thu+qxHStd&1Np)ji=yHaqt~QIX zs=CO(f6xH=8NmniVF7ya0Xrp>oVtm5^)!^=>SMC=w2CApEXa6q2#`6zmX9pOdKDOf z4OIQ7ncJsrHtT#<_op`1$SsrJh3oKXiAC$S;5IY2?mAZti#S?(N|fb&t5C9%XtwU{ z=Bq5f?S>le6$~zAS#oYmiBu*@QI`Y*mieP^GjcB28`hP8t9Q-2Z{BlqtAlfLQH6s- zC5HF|PK^SlX;>^_@|3E4A7J3flWD21u9d#bX&oLw@6E~{e%7nO{f>U~aGp&K7tBIR zB1r_W0`Tc@qxHjz%vXic6pQ5k5;3~>aCel@rw-PV&4)uol4o#}JA^*9Ule&W`3$O}xqv9l&9 z(P7IHR`?)N!F@(;{UbJfQ-%`KYJ>PW*!v5?f!GPkhc6fF-Rrk*1KPcx9j)F-akCsw zO2k#3uITDH1!#FovjI~zkh5`q^XlIH^VoeE%`8NlmCQpp6`XiyM%n5zb~I(xoLckD zImj^IPrG~BDcdKe{nEvAXM-Vn(eSkJu7EhWkQt!-Rs>7}LcQmo#N1^q*1$pD51(>8 zZ_Segd<7Yvf4w@-eYz^Oo_x6Yv%7Wh&ke)%i`X+|+I4L@B!0t@$hsxGS{Pr+T`lFR zBroxAqji<0YdcT#ZWUd?VR_6~YL`aft&CKzOj!23O>t!w&v4(GB&1$p>7X5!pSbbp z{2W`vP#JuUX57jId#`NfIn^Ih&b(y6Eb0sK-dc{)$>A+%WXj1{BiR;ev;Msa5u2`~ z<9UJuIVSvR=hi&tkuZ0h(ea3SPPL*~Q@lg)tM_C2;(}?HE+w_Y|A!LQC^xL*qt9AL zoHs8LZt7LSSbq}qgx!+GtGmwOtfVo4*)^OTH)AX*vgkM4^FQ!D-~e*%pD+LbwC;b~ z`~T&7&T-dm_FEo0`M>^LD{`p~$I zUTqdgfjA*LvRQu6;ssyP-~pjR*o>$pHG+vGI?^0q)U=niQO zX6#0nuNXArN|?6~0d(eUlk zXa@RK|5TD6JXlynefUx+R^6n)KO2pF+r9JP72M;%!qq}}^h5)8TWI&1SymVO|8jAz z6m0b%DD3~h1M>2L?{ac@vUGq*M>i>LH>#pb<7okg6$#T}E{E<-wdtpoKumYW7+=e~ zOgmLJBW-sC_eaD%*X#wo%X8f-qjF;mEDZ$l#_+pRwlcg5T|a|KdbEWQj5z?()Iy|r zQDc=D#lLa4Dcv5`rd1L@0Zvsv&f!9=Jb;5kX%UR(CPo+DmUT2ZjnSPwYo|SkMHW~{Ti8} zRbh7Amf(PCYd1WMFJ5yrfWQ;|h+MEIewu6naUUdqT5{9E0FO2UduFrK3M9~kbK@ai zVU{VYu^gX7NJY`fj|ZWWT}Q)+(Ye9A*0|)x&2N<)ewnAv77;g#m=f%K+e4H?e74J? zT4qir(8(IqWP>g#@u45Jj~L#(D5ZbINtzNT{+fh-Z1>6@8E%%`~Kg zdJC*!byy&pB@Yo2#3(UD<*NIvib{@df(g6k<+=f1siXyJCPx{8!c4%@)T|fLh|CO~ zy(dg9VJ(An8IoDO%2)qSdfYnqcCmKEtbDc}m&5=QjL6m$;mX6tb>9Pbt-l$G3Wawv zpfBg6kG@v!@+@SSZt$AmS7WX@>pnL^($++}Mq#Gy!h}RuUL#GBiONIK*1L2Ccs!P{ z;BL(_^rrA@`jHq@zA~w*MzLT)fi!V~7FigWNI(y(!4h3fG2wa!lFft2Jdnozr=SHt zv&k5YJ;tAMflc|~HSrpZGXmVBa2mo8`~sz6rk`OvZK0hbLKJ~g?2e(lS8~dA`1yX zU*9ebrT*9=FijB)u$)l8gSq>%MEr(&#_Lg9-czEDvL%Yt+GRjSw2wN}Z` zM!C6{mG@}_-^Q`+dUsY1LiSGz22_bU1srG(iyv&Wnj>R8S5byZzUx7F5gNJxVx!Uk zFQk_bSoWlgCHOsd@%3{uha2HL!C%CELD!J}pKov7v%k_!$ZnEgvok3_1~_IPbJVEl zUM`O-B>|w8VV~LFuor^FN8d70@&Wce__0C4~5W- z-{Z{MJU7(8vp>^Wvq`?+AWUsthTA0EJJ;#ZP(NP_L>9x~NPPwsqs{$EOk|Q0kJUGS zTh%1pxtXA_2^kqV@0fevb~8f6$J%PNw|3m58GMesD}QW%@7^8;-C8-b*q&d<+g(R( zN{bl7z;~vl&nomebsX771#h#MbtZ$`nD+g#Q~8Y8tw2}7(*wXAl!LD?Gaq=jeUE}I zHMVl48dc>A2fjF;50yAR?8%tOOP;)pW6XZln5aFwe*Or_egZZBUa9zn3g*unz2iG%SaI{ z4rzvz|NfBgQ_^ zhF;#NAYMbS&8b_;I!-3dh2fcAn!&IvA(CG(W-{r}n#a4XrpT#F6H2z(l6zEqdR*?P zynL$us7C68DRO(8I7L$uz)mVVN~GEcGtOlBOFd0`Yhk>t0cVqOJZ^kihrD}bE86&W z78;v9SJF<)DsJaZxP8ECav+hG;`=OpX100+zl%w~&;5%n zrD;C=`+LC9^|$NDRj7wR(S52C%}gg8?ANy*oUec$<+C5y^$1GI1eOFmGwJvIt9&#Z zKeMefr-|A#^MZIaGF+hIvrHe~!ZB{ShY@}rVSEW}EX8{+*Cw4TUv)pL-+c=|Zf<0z z+0pHcGDYxUj<(f$-=j<(C&;d+4b@~vYAK`%koCEB>tam4JYOz9p`D&$n5>FdQcnkU zk(B8&em{ke zPn)s|Xl?1SUZ93=vdNKm5M*T4_OY9|)s_^i<1fFF6$stP}$ozOv@(;ymR zu|V;dtITPI`3e}rrU36nPK zjIhI3Adm1tm!StaFwi722yj2|*&5bCO6qW75`_ig_75*aeN45H3MTb2&cfesWas9N zE}LgZ1zvM{j!3ncR;n@OQ!*ZnMOm?=cDMm8VMlBMItHPu3xNkeLg zEHg&L(elSB;qu~wf(zyQgnXIl!7!nXO9Kkvp8cxS#@bTv2#p{yud>!b5R7_Keav?X z2K$jac9D|3W#OKx8#M_xZ%T9$RBl47n(&!o;@8%#_-V-nDkYa-u!Cm%b&;EYul^~C zY^V(v>s%-SvMNbPpMR!>Lnudb;7A_mJ(;bt|2yf-v*Nb3!wA9uGEnMbSk- zRAb=4J~W`)FiHYe)dl{&bS?vx{o|3T=ajDJqS|umH$r2&`RUKKe6LKki^)%brP-~y z#qLk0X|&(2Adw^ZPw3n}vn7z4NqUR%kIm%kFN5M~wD@?~pnqGAk+^&5ln;zzU0mRk zpYzaUIrYzzDhV!^D@B?cxjFxGdq2BCM!_igH+4iH>jE#mnlCPyI{%={I#t21 zCP8f5oEhyjjmUc?*~W3&7Wncgg{+ZZZS&yI+AbRW^ijDw<%-|sMp)kl6FtN3D`ah^3pAm8Rispiga2qAPB$66+PAI?1UGrfdNXyVqdEIc8 z+-j=}1hmhkoYETJqZc;oWzxr_Mzz`J`>5m`1Q;8|-;7vc4WPif^^6Yg?P(h^Wa4J9 zWj$k=N9LIIhiQUxavGq#KuRG$Se&|{&0T)89(u{B?Y z6TiX=4@dj}SRj-w`}X}mxOPXn*c|5e^`N${!t=JJyPuUVKMJ_;7t80UFJmW@K)@Xo zJks$H>*eKe;vW3q#}Rj@N7sX`_!oYytr!>RSS$e8l*aJJpXuS)%Yo{+ln16xdaLdK zE*k$>aoyT+0L>!lKJkyZOw1-PI*$l#2w~d8h4MMZO~9-=XS!+D_+f72{=wR_N77@> zfHTE_(8x8nVB7%l##XnM{{gn7KQcepXKau~eR&Ov9m``y=lWSDj}%4QTSakUE4>s? zLt$Rm^7{m-U_VRr8kDb;n4-SH(aCHtQ-%8|pcbd8qg%5Hf3rA})~#~is5t!jVx5eG zUJm~CmRX_$S`DXIc|B)v(Tsbkl2=ER(5W-e&~+M9_(8Ml_equzP&PVA&oAS%OL(_e zHO#j!XRop=rAQQ#itX>w{_k|)7maw-@;&}U|9=XHf5qY zs*C+Yw5CNsRc9d}t*cZ!iV7?9Y;Ai*dV86mzMkT3p049t6vyam{EX-Q2$y;}nxN^A z`wc*dF^v2b^%SlPVB5bxekH@A|E+6iOA%4#puv<3)ugJ&A2qZQB&|v&B8@JL8XXIH z4NE#GvtDNQ!k+O--7`Zi$XEB{;3N4k4A!?xdlcB_wttZ)%B#k?4GHuB`OG9^IiD1&)sS?J(@^eU(?d& ztHYcOom0AQI$XkoLAnh0y)|Ed<-S~|rz?Ov0*V5ou%;neYrE|5c5b{D%tusnyr3)C zZXz};FkN4ZhU{;fDya@Y&na6jW&=m7`P?aHra16qv#4Wj$H%$p=Um@M|QUv zQl*$jOf@i;2OB7z4Tcg#rOUNN!zvqhmZc10#tjiO-ye3T5}K-&KifZZ`Miv#@^b;L z=9ZWbPMYAC393F6%82Kc%~)VxJSL{VD54Qwp|!R1PED-YZZ8pNGpMp6CTX5PF- zBQu@{oR}M6E+v4!)W!YOMEEa@ge?w6pv#o4oBHKLb6@ECNTY)|CPXumidiEL1@asV zk)qk1Ry{ZN;aUlk8;dy{4_z&SngWvaLWWvq%us;gFFA|4xyn-`7-MNtm6aIlidIZ& zqCVQQ^cn!upBaGYynPpb7{9*=YM>o=^y76t0p6^#L|SMX!E1aK379 zs7D?NHK8H%OQCIA)dcUXOnlwnhKV*&X=xbD=$=EKcDcIuYAur{XSaioU(yC4^uw3$ zSj1UY`mwyzQwC|?f6yKqkM1TiGBVDuwQ{aD3(^5^EJx&hZN0yA!PyD;a(?XtZw8lO z&LB&xh0R61CXGX#MqSNR?hwK4Op4h@d={6wpWzn!OXgT*=JRRdzui|}8f){E?$8nR z5QScD6IwFX`1i;lql(}gmT=Vs`!gSY^McAz=ch8#!<_Z=+hh(?Jk5bY{&<6Y8ge*Z z@T#fWyVc&u82{s@GJorIvi7}@41LeM|Igg|SAtiNux*?Ey`^kZT=?*NQv{K2qAMww zhciD|YqazA0D-7jXB*?FJuY88qtz)_&K(8zZ)_fS9rt&2zEv8{*qwo%Bn*UyfNGh> zz-Sj8EbsHa3j!afPNjlQDmolQZCLYpw^+ybtpn;CB{kv;3vRba6WS{F!de57Q^6nt zb&YZ;Q3FRftb31~umC9W|5GwuMAyYp42l{4>tkVA?729OTbO&mWjxRA)h0X!Acf;ZayjyX39P8yIM=D zpNzEK0$2M)U_5m0f3eAB`ys6*+P*AHq4K+Ay9Ybib1SV~W2@NY+{dpn+%&FsID9k@4M;CMJ^R$l{}u5aIj@SmFs^fyiX6Nmug1@4fl*Z{yk z5&gU7UoEz^v5~nxgSoZ7sWB71g`KhK*UHxp08!$X_%8quFaTim{R4cR1CWHB4K3XO zKmgEh$bYIX0q_Dwwg$!kqn_^?ps#g6008L!J^=r(0P!Bp?tZpsxWyKL7v>1oqwe_l^EXfP?}7gMdLmg9CvA0sd|JUv1xy z4F(PY0tyL*2>5OW0RaXE1A|8R=K^3L5Ku6101^ZdG9(j<0II${FhmD1-~ElN~}VL@xqXf`$o#nUuytZ5TNfT0zm@s0nV?+ z=(&$PED9%GRbw43G}Bxyl3G|bv%I_y=pLC>^DGFW}W_Xzv`vvWQXlUcjbvgdrv}!oWeLTh^0y;MsPN{Ucje z`%tWhCX^wSkwoO>-Om`g;yz&`C4Hm*+zL%)6k>pk@Ve>hm>e(+rF}$8aizWUZE`+( zi^-v2#E;Pbe!in1@@CmScjr^wj#8HK%8nx_#LYW?{Bx|`eM~|0$+GnYIC0lqX!n0j z=A9k|4{!*#Jb>W#(dr1uJ}c@roDVweCU~O%hQrIn&C9+3nI>3fAo^rpKK}+vzB2jgN3?uB49-#cb09|8`NzCT{`9HJ>GKp?CA@m3z2x?~KmE z>H1rN&HMx-LPx$hm!dx)Np5r>j~@maek@$?%6$XO4%YfQ^#Os(W_qi`jhMjrrbp7MoM)2J>yO)P1(s|gMTF%#zxM*n2c{kl7bG( zE0%m(Yn)f9V&4-NRW1i4G71XHK;*gF4e=~KvdGI(7fPh(L;(6xD9tt622IFAh|T;k zl9Rf~z>Uyxvj-n~wv1Lc@Od1eweu@^KIUm&r5xVwVA(L&MtOR2O0 zCXIpQGws?59uY1sAN2cQ0L=LJgiU8U5i{>18J;Jlp+BjhM_yC?}nzlvChrowITtE>srrL}nnz$HcHMTT%`4iiP_= z3qXZEQ#-LC;!XpdXu!PC`lGNW=qie>AF)+vp-b+eDMyTGeH01gdIl-+GWiTM9ieyL zne)0$sVw{aZdYd##6o+!8HwMABGOxE!>81K=#LAMB}Df#(TKT!u}mI9);+mVeK0=({KsmC*|zQn?lgTX{!zfBjqgouy<0gi6GR%U6Ouz^r| z>hs89ys%Uwh}L3job3 z>H&6jRT*2mw%D10EC71U8=if`g^2PSS5-kNt2UiNaYu&TsxSwM~RMdOBjpMwx~YX(kx zk6rbzC`BF+_e)Rk@%ch4c6+N>WJgqD$jjFqUp%DEPC{jdj%zl@;>$oAqL5{XyO{a4} zjPlJ30d`F-c&N)0Pm-C}wXx)$pMSTsZ1EtByOCCr3Mbl6;{&o*H&ET=oy1v3NeuLR z!ad}k{vg7iV#Bb8o2>`zh^78O(340D>#fXXyJ*_Y!kbKfXUp?mOw%YA1Aia2z5r8S zfM&>LKMZR!lig(aeS)Kqhlf-fj5tFEqlA5G_OSyY{(a=ieGUJzLq!*2>vA;Zg7K0n ztGv#)Xq~jI`fYQD2Fr3G<4X9Gq)x>&!Q!JL_4`3S`wQyLYMn)p>faxCV5?7FTdA~p zI_|}sGOiNL1`nDgJe_5Q&3Q%Zc{c42+xw+%>}gUelC}}INIokeCr>~SI=f0rGTGTS zX$GAr0S-UojB;@Z0&?Lo1 zLxQ@*oGEq4Y;gH;6u8Lb%);Z~qHp*KoX62FTcJ>_Tkp^D5gK?(;nE982>32=C zjr-{6ny)YTdp@UqOlf;#g6_ zG;qhBj|+crCQKpkw^hAao7>tbwafZ=K|^U;P|-DUKdI8P6q5!@lR1#0ej73xc@FN& z*e6I0($|I+l`_#RU1u1^S)5s()lS$F7aFJ8mz6XYBqgg2O4+U9iYC{g2Ir$#w`qq| zeRw!s-xigv!AT%CN?4egFk+IaTi!eL6j(5+Hmcf6*PV1n zugWP9SdS+1uJzYJG%-;}6tVAp>LwYt(!y+$+@)Z4xbtQA8P0Afscerqu6bq{NS7-| zi!`H;YFgLn$@fct^jgg{MlM>fvu16)HC7~-#Wk8SQv$p|OMg(P@!F?AKmidLyV86C zNEtMb_J-y+8Rivjjx!}RB*kV14ak_woJ%F+cfhEe%2XQ0tT7bO3WMxbqn2?s`&5TR z?#MmtWF0DK1BPR&utvJB-1+ba{T{5;?e}_>0^TOe$AZAc_NoS(Dl{qQ@@Afe%q5hS zQvdWk2|6&9yH+%#c^5p!2$eRMSb#nTdsj#(^WqjRVi!i!zwqJ5*wdb#Rix`L#1EoUHW{-P}Z#{EJ z^X4q85K}-U_Tyvs&mq0smUG^a^!G%HYH2AOKCQ52pw(ZPU+9n3|7X2!srhHU=K0U{ zy5ZmTI<%$tc{FBO^S(@!k!dGYjBA^F+w0%9}mv zojqOV6bvaIg35ib#DIA%z}^`&JsLGkAjcjXG#*NexUHi3VK_2Fovinfw{CP=T5&eW zAKzrw7nP8nni|WD*P9wMFwO}p<=|Puq7-nd1}>h0!rUXZLvEMaV`3M9L9%dw$W1fO z6b-di)-j&lZ_?Y(pyl=J_H}K49e0`;^MV-`-s+xIMX+Pb!bmzrzq2!G(1j0fzPZAs zOqGtxrYw|-TB3JQCU(tPP+5)6I#H^~!S z1h`+`4$rN#pJ2=2zAia;)J^cW|IABsQDbZ3sVNcZDt1S_V~O)3Ny>Ud&x#uKLYU}O zn2HWyX@*rh0Y#(LQXAp=?Yf3msT%R@ZeGz8rV>s|__j@H>M-3D1ynys?l2|BA3Fli z1I~Z<0!5kG`_xX?l{Cl3ud-p&5qrFE&y&OS9e{lE|5!Skof&PUDkP2~4rHkk3Cr{$ z^8+oBblKcXmN*m^$=OkfaaI~hm_~t2QD7l5{a{jVI4_$#tsHu;;jkMO}mWmdU)Fj+|mRGh%gwHY{h4*;k0YNFlzI^^(7E zf`9*BP#JN&j#szgIEvXHBbX1X(#ddCXsIhqk%Kc3?(VlrIyt8lT7o&)VoXd$&D2ac zNR(9~K@%gDy;_PiETgvN?X?pcV1}# z`wILhV(2d3nnRkj&;%@Gc-YEhS;+2VqG@lb2yY+RHd_Cxs`w=b@_ODSbXPS)kzezbKVTT&Z52*ntNm86|WV7gs(j7G@w)*@q#jB<+_8M?49t?fs{&=rc|7 zA8B&Hw3$Y8JkqJ|Vo?J3F#L7t&DuhIK2U^20Cv*3+isbBAwBH2o;a#pQaV5p&qA9HyEbj>+3162f<0sX}g0R;lLv60B4h9p~|3en>nvc?! z(c5{<(<+-HmI*_%Blzh-cZ1EPK6g;?s4yLL%IpnK7k%{ZU9)cAZ^= z+IsWWf8nw%zO->#>xeNXMJZz9jt5KlkRmXPQ$Z8A*k$3&Oi-#%yGYOi8ReKIIRl=t z*|bnVdo@+1P9_;4caXzp;_A83Cq;KjY=47ZoO#e#Y3yV<8IIK04&HG#M-zzcJZV5l z^9k=U4GBoLrgH(OOuL<}_)KE8Trv!<08boFpHB2~G?h8aA-bGzUuU?}RlnI{z7;E% zE9(4Kr%cllMnMpzFwLk!_L53LDFNy+LvtF7Y7T^}-vAwBzetmKpS)r87KWbR%2=|t zB3bpe4u3$+jJPk5C~1jX$+9dCDHo%ZzJiN6)5!%p>qcXPRS2J;9|SV#CTU-a9&Vu_ z6E1Hfcb@q#2yN{j>)t0W7_Z1WA*=ZXsqZ&Uj5BHJrb5C~fgxU%!ZMfGoj1`2gm(PH zj64^m4P1IBlgyU{L@Eyz&uF`JzqC)cxc+$2w!C}Y++T{eTctX+|CjYkqc-Fx( zw{nnxiv_^VvNi{|6El-z84#hZOkYMa74hEyRy_9IhS(WfVCgtSi**1SaUQ9+d(q5Dm zWtK&K>{6BPCz&(puzJld;WQ^D9*-G~WFg$OKV{+!MX z7y-zr2`QET8M0ePFTd#sqM;*+jK4UH^q)(ZL6W|e5LQ5!L&6qb1^ao?R(86=HW`+u zy{9|wU-yhie0%h~sS!TaDp6Rb9oI`GhKtNK_2WXu=q@22!qhmBEAU}_z|FTHtmbQX~uBcIM(jjG?oI)1x+dy zekc@Cq!km5?CJX)dGjf4_$OhOU(ByC*e9@B#We{yE2A0a*WhoWVKnD4lw6gd&9R*< zM2&r1QSKPvA2ywwE@&6WBj5{~@i1dK7jH1qZST41;;;ABw3Nr9wbUVcUmK&nKH(?} zD~n4P>$zXmK331>#U|r;w5y%dKwXElE7N%;wM$Fz!FLyAD0p^o=LoFt4wERZ-F+fO zH#0R?m@XU3fLNy?Qw|QEAGAo;l{tYgt3-B-ov9jencRz$me0X!T}Nfx7V?v`hP&5~uhx|53soz)W@ z#xdVELT~28`ljegt6pu?`$FS!;zL`pdGX&%TugQZSR9l@HGv(?qfn&c;UO;t16Gkj zy`xu^w3DqR<+&5h1PJMV0C36Sb>y*47Q5z(w$gbt{O}}>;-vnFlPP2~R^k4VQ*51B zpcAwfks1khSwCk|<)zkq{D_eadnLAT{g!hiR!H;f5ub!%5Gc)5jqZa_M`hp`+SPKh z$K|HL=GpXP4Ux0t6hVjBTUU{Ar27-%(5e+>$|DV<+>0uz(2!KO&?gIo4np$YCh#b| zNGY?^@0$lrX3f|E%WxpHC1hn6R1@w)BvsvH^mI;a(5nPf{OC8#*&s%Fa*6;Z)C#ka zk@Z_S_qU`MNYrnND5#9SmE%xd~GWXDS*R*20*@KEUe0>aB@&WIE z^4h5m0T0S~-ibU!A_Gz)-PJ!y?C;<9r|HZFew{ckEUb7bt;j!$m;qWGvtYub_Kn+P zr;EMdmIC2`+q&&aiQN?JDz#FCvczotR8TsE!GZj(SX!+DjsDFY zQ=SZzhT9dNNo#{PM^ok8S7|;8gsGp>GOcsSr#2Ur5k=6HRgI)$ zf0mWq3k%OpXZ+OKZFZI2?WmWi(mPyXRtxExj6(38#sCm(BCWLY%IiD{z&$#klAtP| z+>JKtS&FTrQ@qt`+Gqbx!%^)hl%)@Nk%deuY||=LSA^}@Q&WpY*_|C9rL=MH8V>tc z&IWE{xSQLou5Lns=>2f|G08=;mefaCc5p(-MYxvd%y=Ic+O>7{cd^<<;r@&=c>Aaz zx^Rw%A-I~$IPC5GHm{N|c$At?H0PBMLPbN?FJ0h})=gf_CE7J?Eicb2Ews0;I47pe zs-&Hop@3u_xYq&0L?@xq^fMB+Q-8LI`8ZF5La`G7#6J3+GDOcDG^<}xwS}As#;;Q{5 zH8QSn9XClT3OC_z0#qa(?)V#7pSNz5kM$D1`Ta=eF5L%jGjp*c;#1dxYy?tIpn4QA#}SJ@J&3BSOWL6%Tb;b;{)v-+f9q4#`~@iE71QP+hK$C3 zx8Hf^VvA06_yTa*p&K*T88IB$kVW0{@))aApKO9g>N}t#EVt{un^S`zBeGbhj|Evs z_+h8jty(KB3cLkslaz^LBPJsYa6_e{{1}l?d?FeZ!-|D>)R1xt&4WxM{5OfD=IBE8 zEY*zPe}RL+m}s{f5{UX$?{|AAGhY)KS<1);Yg}ZnHQoLCQ=(#DgZ6%)?QSV zIW#D@fbN^i>=YvtV}i8_w#L#eStscs70L5bV!A$!@8Ap3R75RFQWY; zsG_a}kHwwjjiOMcOdaU}Y=7#DNDI2P!@syB~1>jF<@tfjAO)e!_wFaGda)AvyV%uvP8idC;k?elsH^1*`FJ z-t63rGf(rZ_Nyv@n9bh*D2K_6_Jaxl3(Ps65gzs6exb9ismCg()9`%B{`OGzXZd}Z z{aTyh4bv5DEc_DOo8w-UX@TEuwtk>M{Z1RCn%}Lq_K&Sfn^&{>4jLsnN2XN{jZXe0 zM!mM+P=Z!{-+C68)e`zOT3JP9)dnxx`jQ6ll_FaLAW@6b4tl^dY=|+2g8-S^0P}p8 zG6}{OT5-(Su}}p|<@DC)(Oy|x1oV=yMEzJAXxnV;8Gz5ojfQt@_;+!Z&{pU3`RLHm z?)7BwZ@#V!0Uxi|ef{7mwiLBE!m`^UZF_43`74&TdI)bY>Hr)=K0i)Drj{#+1U zuaEO+jA+3fs=&s+a$@z=R2~rtk9p0N_$l!aYI#*og;6GP;j8^4z~Z zHL4wMH`dz9hpyXyRjKN?mE+&xksP=1k%WH8LNVWpXc@%1l+FH?kTYI*%tI&I9v)bC zL`@-w5YZtE4J_^c(24PlUvfw5A(NllGx-BYUC~ie1siBFD_X<`0XNhcP$_Cy{Q)N< zJd_4FqyC&Q5x$N@Kk$eIdkKXz)~#&nS_Pik5->d0o<_NuVx@LM;aVZ67Gu4B-WaAx zRc6>@pRGhc3)9O(+Q{pZ2~6U}EgIwNwXtsgW@4W6dfe(FNnGsWugk)!w}uj){NSK4 z3`w}VP0OO-;8EOIHrn)PuA>jUUu>4WX!9Tf>m09f7OfnAX4`0UeM<;@-n0BoFg2$s zW`hB9-|5tyeEHSsWV=3n%&S5~z_s(E!`C^hmY1*Ks)vCWaQ(~c5HSp}kSW6Bl~gHFWgK zi4PU@Q_EPi)-S+>_rmo?My!@?Ob+9?qK&T6)5ecZx|E{^%cQ@JKdO%7&HV$KHl&c( zm?QKLFZE#ZG)?%J^M!`9 z;dZVARu!5}cD2K~&MM~%!Lz1JWLqCXvjr7@tOe#Fbc=G8_%%))$cEh{Nyl|8`>o>M z*Ro0YkUgDJ3!G~5lans37cV#MOr%~kWhdfnr1w41LoFyJ%UpU2;0er24g_kQbyp`&nihBB}H3rPj7dIm5S zDn(}mdK$xr$3l(u&qpvltW=1|wHj^PC+tF(PA=Nk51sObN1RTD?p^k`{;sRWwf(4V9A-g@eZa$1DO)TufNuTdVK(t(1QN`nEm;)M&eJ zd}pAh0@4BiKy653%6-xIe{eY|C6Vv^>+7p5^3PS-2$MP zi3CA?Gq2OZZSl6bhXFnDdg=csZA5Q{IZ@iHRpmM`xmX0O|H%T!VRjXY&c6 ze}T;u@b?FIQts81VNY2`jEG$01cufG8MXZw7Xx>&nid`LS11ISr13uG0p!h?mabYL z6bMSigT-sC=0V9-hIvwoz**=-sCx>N>UwpM)=~)J@nEN?YD|Ab<*WKIPR-c;WM(z`;HnOkQY2tf@wG+A~IQ*r!6QGd*%H&Nl&hL*-CepDLZt837LdrP|-dKt|E(XAOL=33-ks<$=RN zDo~YDWAC^mH3q^3(6pXE^fQbH9)!*ko@{bOW>IjC%uUP|mS%hAp2`^oKLaf(u2!6f zdNo}u4-|wa>}H&2lBn8NkibVKE7Yn-KlJ3}&x}_@C?%m36o3H+m3Q zP7-7q#2F)~HEf9dRVp@Lkb=9DgGLssAyT9>@(iLyx{mPZ=o12APJ*JPRdA z21m1if8A^vvL9}uwPFNP=-7`2$^c*zsJ~jM{tMIQd!QcoW{rGXRrA631x$Tgm&cu) z6v1{}fv46pnI_GHBY1-|7&qKSn0qo9X-}I+q5AW(3jZm-F{|&?mG0p#sP8z;Qcnvc zf@$bY0Nr)8BDg?ZgQj2vIkKil=|`t-WwhV>I*D;>0s8q_kzX4EYZgUoarGi z-Y#lV0LpA58$SeaX>Xe9&2cB-gt$f>8QEdY$FP>i;rc(&DSFNsH;fs9(tza-d|zwS z-;|Yh2lsMr1zH&v$(z(*xs&F3V_b<`A^X?H8z?~$^jF*msbPABFdeG*NXA9J3t=A} zVr0Vt`~D6HOROy0C*ew;iYSC#Hx%%||5mc(%Q-dX6Ztfx9*)I6CyuL4DWg zn9%){W*G=KtljLPzd0%@KWaMU0Dg@F7Te1m>{^kP1o9>PnYlQ zbTCmwy72is<1SoM=W8qTt_<{Q=L&Ppd)7qyowbQT!v^ak;@0)pVS@KMZ*qfS6C}vj z+?fc!)_U3fgoyWoAG<5#FstMLw+EdxLQJTBmXuTVumF9;4G9XMUww>_b-0YkK}0iD zcvujT2xIBr2;g7E4_6)#ICNXoo|)FegUEc{muEx|O72y|4C}kd`Jf0H&j@I5DL@;rk*ap^z#Nk<7WJsfCbl&Sn{LC3{wXzU*8oiB1j)nX+47) z{QDl+1uopCTu)i$+;p5VzR|P|y07mFTnCAX&>ybH4a?y7hY@!_84WAvMj-SLYP;(M zpb=rsA}(2g9ZDvVPT3pl8)jYb`pvqommkw2eKuT=5cXpGme>`xpL2TepUfML{?8D^ zVaK-!p~TBB)e+4}aYu>Hu`E0%yVpIFgyGLHqt>w&NVNWm7NRatw0;ZqJ2EDm5NVa~ z5JaKlhdbB;Z%buf7ib%lUD?LjGR->Bmm>06Z#aomjP}XVy+rSO)+?ad@$<+GB|M zGmwz}8}d+E?kz{N>>c>TJ6iGICu?6^Nb>)~*Z~3FH&gmlgbN$6%fmRs)&cR2h4%7( z2f4Q$Clsgdq0%;(SFDX~XLU6HTuD?f2yQGzdI+Ip%I2P##9O+V=bQ~)Jam5!bc5=;N1fFRAANJldICEy{5;QZr&CJZq%*@Qp zOl@X{Hn+KLx0#vgahsXj%*@Q5=e~DmKbYB#*xx&f$O?s2DpjQ_Q&LsdIXMYtW8up* z0Z@r%V;06$Z?Ct_hD~p;wAdFtn}?|eOj1%REfQlLFmcFZV%%xLSw8dofhsF>y8&ic zPmP8vPJ33)1QP8l8W=>t^k+Itrq89e1s!v+`^euAeGP+X1>@=J2se&29M{)Qy|2S~iO z@6(AlQLZTq-R?JYQw7ZTW!%`?uV23#EK0P#)#I?+T74L@xv_K*TK&0Gmn%Atm?VVz zEfMOVsK3AqJdXcJ8RUCu>WCV|a{q|J*K)rz_9no8o+BfK#K=PwZcJ^|yieoit zJJp|H4x9GIYLn=Qe>*ke0;WX3ba@ST?6DLZ7d?S@NrY)8Dd-!X!StyO$D9A{9x z{T-=`ud{MvDcj!q)tguYaEvk*XgLX$9B-o9{S-E-RY%#SM~gEj292=d83oqtlMfwi!o=_?~nzio=Up1}YBQscKYiDso`d{>)sC8`5M83D`H=qUhou-6 z8;gDX{`T>xuO`vhTAHKVS*R4}pO?SBPYBaoOl|;+P{nypw#oC}>8}qi2)3xk$nG}~ zIIrevMEl%0{Hi>B*@L^KffVQl^Q*%wq+eG`@~X*k;(dmtG5>m#I41TNE_UW)bTlWJ z=-&#cepr!?0|=3rj~Dd)nVwOnGFW?R*>}eIp%CnRTnHTn?_T#t`ZAEg5Rtl{_AWD2icT|-b*%+a!PD1X;DMeXf}k)I zY4^JGR^ektK| zZlbKnYX@h>_h|<)!`vcawiHB6X9o2ledP!2ctG`Uc*s~4Lj^;2qi0uhI~>`80Dj)*`r+q1aw20VX&$jtT+gy|F%Wh3-$^vy>bk@g_h1U`h!4)iuk zFboa0plLEKF$nydAESu-x}U$E3X1Ol=!9h3FrvTT_+!3VuOQGO@qJ6}hJ*&o#=*C7 zD(uGh9FQpy?hf+SHWco+oY8hJ=q@)D))oW7f80h79so7r)I?Q>%kJ+FVTByXvjad? zrd8sk!beG+eh00@FlTXI5vEtHc@Bf{t0Nv(T7;XUlq>VXgv$=r9K}fE$W1h%+DQ=w zqXX4r744tJpMv##DlX#F1{tGAX#jtt%R{*~kHX3D?g>^BAUu%w2l-F~sX`lvvDQJ2 zQeYs?BE-WJNUQ&VFy1iXL=9tWP**2Mb#7XVqC(>SJ^L5EAG9}E$@x^Q?pF#pXBkM? zB??@3O945GMvRRva;_74cpl5WH zi4b){bd6MK&dg3Et;kYxN4>!XD9NN+2Gg-$4yZyVZr*)BVhQhpmV#f<8?f)W!Y0`o zNw{9f3Hb;iU_t>PNspPZYY%UfEF2H@@x7PQo&!hU%LE*(YPFo>UC}0w43WD8q6waZ z@%SOxG(^^ z@MAExLhXK62_bTO9Hk|L)N6$WkTt_B$mwmccL(mwvz<$xu0W5Rcrj2?c32im9j*12 z1S*DNOBg8xDj3xRW9Q0iafuebp2ZwWj!+}byJLrYaB)0`zYx;Y=meQJIa;nNW>fOM zZ@K}tiSE^mz&aty>5jlF*&mTu-5|>0kwiqq8WBBh_E1EtDJl3iTj5+E#SLV8g)erO z63)Fe%c<~LKKG57o-`siew}krI3aox+wkc_C$Kv#o0guScYE$y0dejXg~bSZG3~8q zv{blIf)Ir7FdHBEx}|(Tp_dwC1a_R81w*nVW42UoCF{qUTAtn_as0q>r0R;kIdNWl z&161~*Hv=F=k+$)-~>525G^FKJ%0xm;x$IRGk#d@ExfXQfNK2wIrj-u@EJZk^Q5;@ zA~XNIq_;Fb(xKmvluQZ}KIM-xb_qls!|2NgF@Ht0M34O@EVi9kHJtJLlaKx}#=l`( z3c^m?kvGVR8PWdrY^=-FDD(5DM_P>wY&JkwFKMXLi0%BL(z&2H5z8cfsr40mE zcvE2xZu$y5h!;3T+^^@$P&6k%(6cu(F&oF5zKDXqcAqZSDM1-GJBY{UW=Kc)su&8U z|7K|KOqpkO`*&oW{yl(|Zz3Y$FBfCbmbyXe0Om;RH7BB@AFvNG3nO_jJuNh?8=rEwAf&~xZNcY^2q$Ib_!iTYtI3d-#{aO&8 zUC=jEAov)-2h{|%X2iq}C<0;o=*j;y|=HLRjkUt(}ho14P8T1e( zzN}s81xz6C?|v0gGH4~C4WBhe_(9)f7e-kKBjOR&E@K zlXRxM+aekWlGbN5Xl($%IgSEy||VulF@0_lD?^c-EADL z{#i}mWbijZPb%4;$oNyF?C3|@n`+(qh{tODqdUMg5JW>lCaiJC-u(7V=-u7-?i}#wy-M)N#B{H9y8G8N(4%ehVyma?c@6o_ zr^DNm*XeONI~x%2cDpE_+j%Y#1$$!Y2=c^r_<)meG5_mx+}{h4-ChcK3m9gx|`A4KNA( z-rC70uyOJD<;xXxmFIZ7L)1MOxt3je<-c(1)pa=xxOqLI3z)uMg7g{&EIzsqKx}OC zBRaQ!y&FIMi4O>P)bf7Zqubnq%dqqKT4s{E{Bq)A?rrZsIeh9G1;0PqINLjXE}%-R zb@BChxmf$yY4h<1uO2+(2ShRU1nYi&IJW_Vhi@Hyy9U;UOVc{M>p`UVuMZb}+%~}k zHyoTCFVo&nEj#-Wg$#QH936kFEL4{kdE@Sd@W8BR!%cc0Fi zpAT)H&4Gb$ADHkj-T}Cyo^B79i;t)8%u%II;2Q?LUEVb(!%tVwyRA=~m$kjm!%mlX zXG-~wca`u&@&iY`h60&b`VCv_rU`)-tUY}CPRlvnLYD!(xHa&XMg&>6%v$#%(3ya} z%kOW1xoWK=C9bt-9>3 zcT&p0Y)RiM%QbykA3(ZeW;aD82tVlo7+$JVexY}d3tk&c_GiC zn3=pEfH7u#c|EDy*I(N39R@D@ybPBB|;h~O~&7To`!w4hI zJdi!W#T9^A1jP7Y(sP&WkO)mOe?mjTdzG2XLj1BiuA>{GAGcM*Tv^ahclQ z6*^)RyA(5L4{pj>aBZ@=+Y#A_u1jojWBx$d4X2854!k~c3{%RP2drX}NHon|vw^9a zA7+JO+O+-gySNb}{<7fS~ z^;qBXZQHCh!-{v2$&X`tY%_u<%OP%!<80i}Gzklbs*Fp^P#_k>$bQT6%JDO8|BP-? zIny2MVfjNR+^(alp|EZ6{PrM_xI|vvKV;@9eb6uTAgKH4?%Q2h(np@dTd|3r%VHbj z*FtwqG4uA&j_OLhlPo}KM7-6uc!@-sL)pHm{aVpqx^BwR1N)ga?FrD`c3Sm7wfyCa z@lxiCJz#szLQ_)&uoi&=SQ27nqM{w`TMnMIub0H2nSvcTS*~I}uUNu_h+9P^Ob^86 zNjgfNDd`UX7Co^}=AzHzRn5);RCRj!x4z6{m+)*e)2=dPR0893XShB6<32fxm zrdwyg6?9uFJ1oCwmRuV>u)HQ_BrxMWGR4l^qX&_m_FK@@Q}S5QwQ4ui!$X_C(vCz) z+nO}7SN9EeCk!?LNgtMfg2`xZ?1uMBSxE`U@u!vfgu02ceR1oWT~F2%?_Zw}={cd+ z--Di3TLEDmvH;Pg7~$<0Vcq zJ0y4+h!7UWg(!ooSH=HjSt`qK*)Wf7vZ%BIC)jclXZ)k&jGwguojyp;Dx%^foA)>A z^56lxPxC-O7Y?s2aY`a$!qUwMjP$mKJ#nrZR;lPg@CrX2 zyi#`I0mc6#5f|o&Zz^-Oya&|ghiw{LvWE+0s-(xTPX4z*B0TizSkTd6xnWY)-K#zK zgU3|{66HkDK?+8dF4{!A;BSWQ;Hr2Tcwv3jnQry2o@ehB3$EgcW6jHW<7Al1J!6#_ zQ*Z3g^g*#V2(P<{{H8Ab-hY0SeczdmE^)=&CbdrsYU^(tX8aClEMOe!Bg4$l6b&IO zfxk`7R5N@;~;m!bJ6VA%m>pG>*YgjpG1sat!d8Z3+0JZ%jnui?Rw#x(U zgkq&q4aVvN40bg3-Xcc%RWze#(Ej^+iC1K4$;T173o&;&@EvaJ#2);HUuh>-Hb?I} zbpD^KL;EpxFU@I)Bk38=;pG8Q?~LH#i?(Xr4^I9!$YR&P({84;>6WMXJ6d8fToLLY z(@H^nJ6YKBdm0c#IXnEb&_WRH63#4wE!M2lk>1``mw5iJ(RT#EP9c}?hoxzz8M7u% zY|M|(lJ09o%&0bg)Of}3bmC^;1$P8L0OnX3Q`5yYjg?aB9FMrZ{6;FF*7HoV5{g!= zVI+Ep(}(U>fb0+ce4%64|N~a z0HNb$;XTpBO9r)kcKNYC9o%z=1hdsY_e_sqz!Q28U^isC0iVzk4w;GFD+PwNfc^|& z`q=@$>GPllbm4525Ne-1y8h>Q4wTfUTkgA@|<#AK2z6bacFZK+jd-hsMcm>{5g$zit!ct!+mH zHGSTU33nzWy20)VA+zqrzax9B^A*ArAJs zpg?w{iX+C|Y|k86ogG9~#$pWJ4(k$1>i2p9`MK@{pyncE`yDkSFC216$V^e)t{|^W zH)Rqm-s46`VdZV5BpX%vVk`g4)i<}HIftnD9B%6=&oyeD?RO#F^iHYiw`n~voGu~q zv}S{3vdtTRNa3)a{_tyAmkQZCMmyr*tKwk0OMd8|eY-x61X0eOlA`NlANmzF*Bf@_ zCnHZ>C6@G0Yr-J~MU%9K5+H?;bW2r9YtobOZ%S&hqH#ZXCHsb#9FFK29*li72&47{ zj%|)7+Od-;pTkW$3ZMZthGsHkl!X2uG!f&*v;ej#8{4yIx4vI5Qw0!*eSA?t@;y{S z3RurQE$pG_OOJ8UQr~qLYDQ2Y$`&XOoB5(;sq7YgayjkvUT8;&ll&BeAT>QaR+p8KivaVjHmf$<(*-UlwEc6iP{aV<>QOc*skk*sOjhnr1>Ur|H z_rO%S(Z?9oH)nrzkL3U?YpI^re9~@Ta!6MX=!rvL?`Ss-z!q-~yLyk}b6YKynLEb&FNAG+;T7QakQzl<`#bV9qv{&<1}!Rh)! z9Qn3nqotNPUlSh13K+hYm$?*K$w={KU$+C3=^Dz9OyFoMP9RsjvyhwZOdl z6fW65Dv9-rw8-0ZivT0P)&)(XvA&99P{T>pkpDGJW5>nWoG#&GR#p{j_(ubjCTH zBdl6zE6+dK19Zs3v_|nTh!|V`;Ki_Rr@wO(DarwMP zDO*V>D*C(v!ED|kQqY=bCY1|Wa@0Uj73siLeJ)$X>72ERL#1jU;|k%W+2^uFx1Mt- z&meTDO1%#{tMVjI8)a{E8%vW#zny!)MK|IS$6IO1#w1)d)57#a5KJ=P=tIG3Ec1J1wAX>7kF&BY&eRk%~Rn}!TR_mAHz`#eBOGl8Ko964DW6IbP% z@LyR{13dd95U9=xz*%2p!Ysl_o~z2gZDp%3{lr{1RH}qD5`{X9}a@)l!o&r zwaT@Cnrt)@4r7ZuO#L%;I%CMj{~6^b3|L@-$; zR%EYILuh{ITrzr_g`CI=&$_C5S5dcxfOm%CIOXl`DGzF3Sx)_8$*sZFcLBV}MdSj*H8TaH95T{X$N<=EdY3J`MW{E}=DQasf< zsDZKnvm z44g1@+hBRetq#?2DSCtUi9m(2+5q1UMXZ6MoSNn&ont7eeukgOelZY$@fYde)|`ya_6Y? z9wfZ9vL{m8`+m0Qu`7MPs5(dO-kkmkTR><00~v*>YTF!rD`P$d-4h*DrYmcKZG<5| zzYwi;zx#NoQl|TPtf<`3V|%koi_9F+w>ex2_$EJfby~fZ?gH4snVorFmsOjAx^S-i z>UodLMyegPBl4#vs~Xy&sKynE!U|%~V>wfUHvBr7hX}Zo%`X_d&-zsiV^7U+-aHVi zn+Fd<`mTzdG^I1CO~oTNI^Ugya zD|vZG*yFRB1q_amjhUof`+hgW!D6%vYw~jdml>c3*$Hsu4r_krK_U#T2?wnN@ceNW zxx3?p^26Nn5bRP-I1AQFU(StOPIHqZ&V3^y4js+{;Qdmw@#gb^M#c2sdl~ewuiW+0 z6DpN*Sd3B}IjsIhUYnd?&iX_lOnGHaIBIJcjyDSbV^*5PEr7WJmG%#r-miQ=^DXbv zE$Rx_UV#{-E-Yy2qrZD9b@JS*9y@<)RjLp8&LrbSEgh?93GfOwyH5F*d_v#-w{0MeNH4qb#T4>Jahbdqa1(FN_FV))CCuLpy#vBy`lSzKN~Tu zBTR24D!1bk0SkBKz39c84SB*%HOM;u=kLoz|4&%5>rAzp=z<%+r&aN$@uEcFH{s{K z%Qa&t&1b+K0_UwkBir-nG*lMJ_W)POq`^)rOZJ$f>tF=Ko19rXlNPgubw2RXfic;R zTm4n>^9kPx%Udz+2OT@Z@t=Po2(u^cIi4ukpr-@rEHN>je=-Y)5fn9IlgKew67uin zFJez2TRsf@oM@B;`1V?=rQdN8%gme|hM~g;NiQ|X^q%p(*~TBjRKe%BJ3>uwOgZA! zl1^O0ZD|Sgi*fhn0bs146(Ov05T|ys>HDkXC$&B+`S}}y`m>y^wTYdRjaSz%9jA}C4>!XdYeGApO>H?-s|iBoxPg@0k9v>jjgR2ogXjLool7jN>@g`-#cu3c>-P`?*jrqGLVOT zdHgY_hsW5Rvsk*e9GP8;^L}cw&DA;oviRueww{ALOm!}F z$#TI2oM%ZmA}aVJWNO;G1!TsVe|$yl6x8>R)rjc5tbK9X;>AzPpXL*7KVpo##}Bsl zyENls;L*yR`nqd_Q(C*j@#6Qt>jeT4{@?Nb|3S-FY311X0YtO}uLA)=2Ilx@6`7m6 zkNv*^v(F9IT=zK9{I@DTpwsg_iSW-X$Ma%R8Z7wsHrlz(rVm*pRFeK=k~WcZ5MC`a zIg*QTh}4U$s8E)xThJ%_gY4!w7KkK0e-77Hj=YH`vlrj!I9&sw>wa(-+%X4mpZWLn z3UZ(L*c=YK2^F&NoC%~fJ?$X}eyu$zInAB(kd_aF?A}k(a-T6Y+J78gGb5&(ACzv* zazy_fg;dZC7z&_7j-(UvS+7C{>(G7E;f60__7%i`d>a;kOlcDN6)Sbtnv|Fxt(x z!?h}Zx*t*sW$v<~U`x^%x!%khwplT=ipsYMP)4vt6orbU}A?SfQGPz31$ox#q z1uh%0>fT`%Vm;s}!;gJ`CJ~a{4W0J-uFxdL7U8>emlE3P=ICAKDF0cx0i4vzp!U9i z{zW}(WFAIi=e&MYpfVE9&(Av<=(l>^^1y|o5e@_L;sojoqniVpFnXo<9h6TF^UC^K z`p^r9&t(bSqCPa=$uT8~QvSrMTTO+M&s$1!XU$oVs@~J23*l%#gCbg})ZCEp>qWOr zg8=53(lRiS68u*Pb*_;=;z*WVer~+~)F`wg%1tGgiB22y7ZArtS$^R!Q}i@=5Z6is z%A%AALc2u821DPs#Z~)_&NeuW8EY$%!rKja++Ix)salPh8#@ekpXv}r!%AY%yJX1l zn;gHi1ejRRvQB4&&8Q(3Zam!KZ8>ZF(8>ZAguI|378rtfvl$lLVPZ81MM>}!IF9;6 z1Y4(}jSZ=loy$wKb7!w75YM(w@E(H8%V)>PPQIMAoy&wx;Gs|sLM^1`F1$lam!-es zn5mX5lTjHzYBy%zGfkqaMi2?F?xlD4DGB;UMoM^hr>ujS08em}XfIsLBvkyn49 zKl9S)bycWDm!yw)A1+>UUIXx+?-rr6uF0wFbK5tB%p-%QyYA>*@#S1xa8$$fHOev{b|gvmO6HjPc5 zgX1~TXyiQKk=A=EdHebA>_#46789EB{EP?{*^lFOR5U!x2$$nxWDM^w%4xP!DmO0c zR@fhv%B;IEH&aH{2L*s&r~0<%!QI5r*`&U=#QZQn!;v(#4L;EML)7`nb1pP@WDlaU zBXi5+JxX-+WYaRbs*QdRB1-k`4rEk_>?+^wR}be#jJ8WIybO-Trg|n8u!DdH?R9tDR8&nY@mm- zR$vopQxKVSx$%qw4cc`gb)Afoq3*wV5a$e}(5=jyOQQ4n`9iv4MnbQQL(~g5DBb&w zlnUzg7cW4Pr&Xkopx4l*Fh+Zmel}LP&}TfjGLxN3q5e)dTpB}?($>UkykJGI^>nWp zaM*`7mnq_rL3lE6v3R-7^;0tId@aPSE)m!YM;@Q()WdB2;GJ*@EVan1)0*(a(q29s zet~E7u)G@OU$4528EBRr_iFOVqh@&2~5KC zHR-e8cjlA`?ZTOD=9GTy;S6%wsH&Lg>)EFy!Hx66DrTvxyI@@L!#vM}jVSWvv#hnc z5jF}8#ZFLTd$ATZJ<_p>r-b^G8i{T;nuU~5*J=xZ?)G4dx5nfMIhEzK*77^sex)xX z;&K0>xv_PTydcuoD2!_-(2K-l6F+M+i?AjLnn0;0{V|J=kvu_NZI*&l=#j)KR#1d- zj_N!Mx4Kt|(bO8PiSmb1*8>|elSCI8dIH5-DjX;m%&N>{1^5`5dOWn^B7C%A!v7Zh zpdEaS)jgoBanp*|yQoKnc&ft$|5q8W@19*BQz3y{RUv`+xBT(04=ob)%>fFYR5&$f z0-VZB0vySI7v%)FiE4a|!U6&e(SM6ZkFWJr*$m-P+05pU3Q0_Sk?6OkB2k$CE~!Oe zZgjws|6TomH30(|o{yWhX_$w$sTEjH)4ehj$cr)*u>URxWqtp-vf?R%i)6C%tZ$$J zOY?1}y&pw%8s$o_mFv63&9C`ZrIiAV9cXbf5*il$uD?iU$ExVRnR zItG~TBxtJmz=jk|B-seDC$bBhEs?U@Xe8|->cX>08!ch*+Gq&vBBThBr`DFF7}ieO zw2k0Uvi8eJ2)A58u#n<#XQN+y zOt6*gQ$<9rDuZHvR2QSRs~|}wSY&24EsucFuDA$o#OaT*1u;|N2x}<875%G%*vU%2 zhyp6dA>OCDQ7^_hu%d?etExOg*;5_RPmpdK74#dI)EIcNraCybFR(Cg*+`91p@Lv{*d~%`8FiM#A*S!Vguu{1F}cdBH-|#s zNHL-@#lxsKCj)GR#dO?Ex+EJ|j1n0_o;KUJEE6_hb=&OA6Ks8@ILM64hb-hID_s7n zlKt!84_o~f^Ag+)hDDY5@$}QO7UU^Q2)h5r)4*#z)V*{m+yC++V(kk5=i*) z`@A2=@C()X?VbPkHZSVtSB{ZWx0&4`&l=&M)m2q34ioNZ6g^em}rHV?l0xk)ulK4!@S_x^aYjeka`Ar4dZgrXL_#y&AJRRzvoM9V~V zcA1R?`hys2C`8+(R?93C*?r{KPA4SI#EOR0qY-f{XXok`(6%Tq(0|@JRsJBr?YKlP z1ysgFz)^&$$kZh3Xz-C*bJ|%E3&oc`x}}JQytUUim?Gi*fOLBrEw=?JV^N7Gz~wvp z5VyW1ZdP;DE+zODQh^UU*SP85Mq2(9%sGQ2+oSG~Kbk=;qV z5U;JX=w9mUgJs+t7CVwIDIZ!nw{=*m`bbS%{^w_BSA%6j96JY+fmCm`LI+Mv#l8F) zo1KE517B;J_*>yM3v2!(t$e-eAv0*mi`!{~WJ)`oQMU`!vJ@Cxh5Q?VDixf8Br0(C zA4>cIRMAlK1OWv-y-t}`ns3$^L?LLM)()}Db$`=EynW)mV5ar2;*?go7IfrKxuzP% zeFBTDnmps}F=b@xg&YSFwo_K^)t1~p7c-R4-+?IhzD9-$%ZG9=_=Ws@$K!bo-vcPU zP*_%}c2!bPx9fCqUK(|%>Q^ioQ?G<^h-|NqXgDSr3`fpZ=T?|~8Ct(KYT{Qhv^mW> zM{S}Pu%#9S7DkG28Kz4==H)nG)Ipuh@cs-A`%3+qH*fA1h+lze(N1e_RYPC`N&0Wcgj-TJqxN$m2_@S(WGL2ekUX z`OtE?rjC`T$vWg)Woli2H1kf8TfC^$IFKI9@_yDSl|rbhbrF@-Eb>RPm|ec6B=_n5aDQ-Du(5Q=e7HW^o!A~A zD!s-?io5L&auC3#AFKW8YtQH7&q^TQ@>-WQY9~lEdL^r9aAf3Fncbc+h;XUnq$n3E z63D;GlOk|@lyQCxA3uq(6|y~9RYLaZJ?S@hA5>t+rbU$%DbW&!6Pa3s7HVg+c1=ZI%6=Q37~oz!n9`LkjCT2EiR`v z!!Q1s@#B2D0T+v3(v%0H{^%)E37P72`ClvY zmjqs}qn**6W>V|rQ}1}YmlB!~WbG-j!wTyP_%5YOTLF!z?nIfX<8=!1dEBO|6wX_! z5F3#cWt}H!@L3A?dAx;OME8M>V!2wRd|z}QJ753%krMMB2>Bi02#E%e#Q+H=b5&j0rE-a71|v-yV1N?qM*0? z?NIsH!@jQaj60g7CG;XLzTPVzpt#{ebt5Cam3Vp=nLE>oH0wS5YeiXjm1YZv%h}t% zeH*+Wv-oU(KlgB(KPP)QvLm)skrF2b`8BIaqTR%`y9K`|W>2v$L?g3Be%p0T^SPWJ zxy{(U6CI1PqWc*tb&)YY8hINes!$vrqR=9<;~b)3>#|qZIgQAd$aZhnn~(q zv~I&@&tKRqj_Iq%#>+i{n%Q*YPrLS?Yv%7LK%g!#8{1o{=GzUrtp5xw@gDNzZ-b3dkz{(4jC><*VOU}-G1E-1HE#wI`% zRD=q6`>+L%`7_kd_-(e20HqVGJ@tW;D2D@D+$h!^|PN{l)aYhcF6V-E4bRhvOMtvHIHZV@$i z=o{BJ2~%DP8I_}DtKLQLDct0TK+EWG3~LzE(NBpxd>;Bt#^t@?nlT2eSN_FC67prbq}*(H`5=fn7V@ znDbRC9AoD;RI!RQrJV+4#AbuhsZDt9g5+UQ6p@)S4K3znjPqN$58AtZQyd90+y8Tv z4T(mM<7wqkSq#H%`l%sf)nz04$bCFT3SMLb4`@9X>EXpt#J`djdz3gSvlO&)4=bCF z4n==FetGAO$q{YIC=5qL3gIez=sUiG@C89t*o4KPcr;lIMHsR${1DHUOZLYi+*fJK{H*V94G)ZBP^}M* zP_ZSY(s*z3?u7^MLy<`1vP30vF3_svsz8`2RsQ=ki%$!^;{*SHv+fM$Kh9w&)0f@gvTq(RGBQAUP zl}xc4WITHuprKo_Z<6FVv|++*cYONVbb4|!-dy^19qn9;@@`@IG=ZNLYqE(;K}Zy| zVX9VJ9eccBZxxLAlt7F(hu~E7{qpe)NOw(fcG&rA3%(@htU^qa;mjt6ccl{W+KtuJWfjDv}M_I+J=f~;`gZl550mw08cvjE^ zJsUso>Bl$`k}|i$cT)aq*^stsMWQDyO(orx(bs(Q)Xg|N6(bqso>sk(s1PHWGsk70 z?DY*MA{9C{_W$(Sg>(a_Rc7q)E2;}2@gAqW%qHc9CR-f&v=W1= zYEOqa@Fi2e1~HnifZB`eSrN^Up(a!b*@Rs5x9jKlS|=k5)NW_9a7`T9DVCTv)9<7U z>|)bOMU8|p_7aO>n7TIz4DAxFK;}Gsrq_1OwD@fwi??J8YfTLcp#+9(7nzH8#`cgQ zeLNYuHja61)jKo8JQfBy-ROF1@#Z+#p^UN?mdLhaeyq5_IeW(Tiub$v9Y}6l3#-B= zy4F7=j_?kYz!5P$c(-(k>lF2}W`*QSY?=PJ7T($^6V`{QE)4!W<3Nho_uh0dSy%)WNqc_JxokQUjH06JpI1Y1*iN&$t zBuG+I%S1pGxcl0aYDW2Hd>JgtT2&jDGGFaErECMbraO?k*pOt-$#2_I%?!>Fcfk@M zI={-{gHhVfE|Ejte-gKpehZP_q|GMQ9NQ%R?vw@|ihbOUy8b`vx(=u&nk^ivbOh<0 z0MbE4=^CWhfOJ9$B~&Q_p-S&n1SwJkr79@BCfRe6~t%Z!+q>F0!uSMTt z6(r0~W;d5_h;BUCM@%TN(Cs=y#>v-K?GJ>5c6rMm?c~z3JzI zD?RFe?b@n7PqLGNczgxee2oIQ;-^ULVQH>)W-{nXVkfla%EhpC#kVrE5^69yn;Zen zP$@-w^E!#%+^W)>$*aPN1=eYa)qEGcr=bU$Kdq}YVL99P!ImN#YX zL3U&^^~HJfZ3U(W@m{}zP}#7T6qlsYHo*Cg6(W^%3mmnXN+lyq*kt9r(nfTBkH6BC z%OHC|^5JrR+5DQNV;r_!ckqY21T$KXgxQc+SJy-+_*A&3h}w*T@!kAaq{zr(vy_%0 zHlLhqC~KR=DW-0&=IFEx7^M;Ck00aqYl`mZhf(oYWImU_b6?^G|EuZ%Bt*&@>mP7k z1|3pTj~zpeoGd0*F<4sQ=pd$0m5SXg?c0D!&Din~ju=&Uh4`DPsB%6NIi(<8t`>#s zBeLsr$Zsh^yMS~@nJifWV0-UL+fYp`}_Q7&r@*y3?Ue0sqzVSMP3R|x-j&XcyQ~p zBpm*UNspkYE(>Y=zso3n}De&7s&N?>Y+g)|czs%!NrC7-*tkmrYH7|3~ z``fJp5uXe%_x-W$Z(4%~pC{@(*4~dd$~mYkJxiT(-(olrvAvX4PV$jQL@B&KOD49o z+w~ETZmHil27hLVGnIJybJD=Z`z|#KS7i;O9d9wxd70sFPCJmynbcfc6v5TCX_SYw z#WO4-T1A_p+k69eNnSNe9eh7emRO@G6XtXBJF*36S^HRf?)ew&3GbL_y01Hkgp=nNE+qsF z=n4z&gwY?fKdBvY5{VZ|7gPf;rqS)OuFX8F%@~YkSnBE)n2@+TR51`P=iuw=*SBWe zJ+N{x`8m9Ze!(ouXyQaD7gDS~mz$Dft?2zFg>%e5=81$Z2h1}$iDV#4XKLM}GIx31 zR)lFqajVkC$chS5!}r1`Q5&4Vs?=|Xr$pA6Ticpg*>L#$i?0_MJo3nSb`Z`my5~4; zAEd{5q&GF--So(E03bK zW)=(cj;HX8C(@B(pR<=Q*rI2=qr#N)=;MoptKzZuLny%0RF&JbPkQ+icv8MdbH?vf z&^~E$LKd315EILb@bdkrq`WNrH2M-m_}y@8R|5MJ$~N4>+}^A2xwPbjFN)begeDb7 zHQt}F&(MzK$ouiV{fB3dqJ`ldO4{MND*C{_5~m5{(#WD-wt7l|oi>ur(Ke?9q8{wG z-tqYNQ|+{4XeGmoFPR)Ky`8Vrs;!;SA>6Wkg=vtBxJj#h*Z*V_Zc@*hT2CdsvEA(bAvwU=!4EpbA{Ct8i^b8;oi`>=T8eRPJCxc65kkOU>A~hGZu9 z?XXZB$+xc)=KUiEdD>Y)(31C=SkG*)=D5~>3_*G_Qud;{|BgWH-0r4}$ASTk(gDPk z{Y{gD2$louQE{9^#F}dz3(c{=V$;J2(<+(B^w3NbW+#F*Yt#>!uGkBY4n-kLv%sl! z4+>*F3)7TH=nrPiosckQX=I+-&f;#FqV)sAsG(>!FDXGH?#646k6sK|bV2HGB`+0X z8gP48>^DE;xflf>ERp5ldV%MA zMK0GgCmozW)IM$O_6#MrKm$h=5JLIn$siCap(KuL)ZMaI?$9N?L~32$_S}eO6Q!a{ zfJlG8tP{b5w@L-4p^rJ`MfFq?1 z^Lnx2p`h$tx5dVHi^4DC$>B2APDOaqA=(OLOX`(ntj=QB;Z{@FDOv=O*C|@GhUC@c zJlYw`u)=)X_*PdcH&SiQe1fM50m_&>iy1;B&)HAQ;tMfYJowLZRWy{4%3Bt2utn$& zaSmN~sB)~o_h;nG2WlVGkM(&JAFp<9g10w~Ab9@y z!&felyS8z^Jel|IeGs%%)aqx`v>t>EmelNETYbvX!4`U2ilxN%%8e}yJT2E_FUsHO zAWh-NnW2PIaucLb^Wt+`C3NXB=;Dc57X}0fVP`-tehUq7ePqruMTD`S^+6O*RD=r` zv)5c`?HD_QyRk3UmrPTem~&)rdYF0{;V5u1od_Qoqgqe(+8@R{+FW_pL^0{61tRV7azP`XwUQ z*KuS7LNXAs+xw`|%C)ZyW?#*t?z`x5X?~za^)b6uXir542zF6m``pQ~3lPL?z}wGu z?G-&eS&Ii}-)X|HgmLj`PTvd>-A~fFL~FqN-I-CPl%k3)?*=Aa|Y{hMtuSQ}%xG@wS1pZe7q(2+0~?z@uVnEMJ-DLgUEM?A@V zccXXarjC>ZZQke$#9(>$#@VR#tE9nPHJg(UD=|evR;|KX7TP`XVEu!bqZiT2cvw$8o zxm$Y_4a=Q9%)1ioOC+QIih~X?nB0&`m!NY=N)%t;+?)7@bm{%KRPhUi9C?$|J-iU= zndZW0hWV-chrNSD{@IRRB43goe}5iXoA$W&mQPF(wmZvvovqKJy_rEYdInczc>-A? zoZSY}FQbOTGo5E*o$+h8)*ycEQ`97$H@I*~Jex2|fX**E_30^%=oE+W%*00w!Ds5;&!cJ| zQgZvg$=+C4`H)x0lMgtKeu!tkUL)VJyQ$=X=Pq(mj#_l%(^gRG1JfWexWH9{h|&J1;F2g${S|oSC?BUFZ>#{6$a!)CyTeW?cPIN5Vi?sDMja7n{;1w~lLQH8{#5JWMQ{wNm4f`BGEg3&8gWCtw!0Bg zXv4aA0DhIFCy{Z*adlZ~s5>y&k)Lpm7Eejv&FoXC4z=#M^Q&6vb?u?~gpg#IdNoW$ zRj2v6`)a+o2JUvA61y?Ijr>O8@s4d*>QmbB0W}Sl@<&Bdh?*!YS$EPLEHHh<%6fXo zHw&Ywf{)AkL70ji?2(g%*P^3!mpJNbN4@ECVC9^J{=iD6ntFmGNrRuG8(oGq_(v`c z>PEXJ(id_}T3+J#r=ohoY+XZ-|2D~PoCUWWbi>$JE*XXhD=W2^ zv!WpsUsqU%*_?=wSQ zvC&)wUNMqv-~;+pf%grucbu*ZeK6CjS>cyAVV$eA5>@?(Vj_{y$L_uNv_E*YZD{i; za0p3KiYpo!#?KY$-y$jVp;WmMQB_*Dbk!&6!^;lXsJ;r=T&$>>>vChK-|_a3f5$Tz znEb#P6c>aCj6+#Llv~5O?}6`9@K5yw^utib$q{Mkh=gl<-LrHxJ{{m_sGKKYil?ht z2XIgzb3=Fz{{u)q{*tDCDpKgcjKD8y20+ak7eF9}b3kU`%+C?>FEo(ncXI%YIIaD! zOxHhg5>>qhfh?8rfo$j5qZYWUpG;>t&<)`o8QyFMQ2Br{z-morj!ZyrJ7b8Ar5VD~ z<&VaOPGs{nJg^tA9arpBpg4PEw)`ggMRDzKj>H9BHWmjwNB~Dir*)r0|1!9|HTKz zU7QD60L1z~J|rFcjsMk{XB_a?whW!##1Q~G4lm%q^F9Oti{k%mj8j7_UHdy7X(*rhxIek-|a{}V)Z}gdk{I3ew-LtetcO?3z zGJj~+4}Wu@Z!z*mI4L{U57-k{=67-7Tu+M2K`@Y1|5!`7XL%wulBn!tbi!}Z;~7x xkDhz{gNJGS#-n8-(HYRQUVj+Cn!g#&<-jylaDXrZ0?`651W?*C0e+I8{{ZtM5g7mg literal 0 HcmV?d00001 diff --git a/inc/rocprofiler.h b/inc/rocprofiler.h new file mode 100644 index 0000000000..207dc48950 --- /dev/null +++ b/inc/rocprofiler.h @@ -0,0 +1,272 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// The University of Illinois/NCSA +// Open Source License (NCSA) +// +// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. +// +// Developed by: +// +// AMD Research and AMD HSA Software Development +// +// Advanced Micro Devices, Inc. +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal with the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimers in +// the documentation and/or other materials provided with the distribution. +// - Neither the names of Advanced Micro Devices, Inc, +// nor the names of its contributors may be used to endorse or promote +// products derived from this Software without specific prior written +// permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS WITH THE SOFTWARE. +// +//////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////// +// +// ROC Profiler API +// +// The goal of the implementation is to provide a HW specific low-level +// performance analysis interface for profiling of GPU compute applications. +// The profiling includes HW performance counters (PMC) with complex +// performance metrics and thread traces (SQTT). The profiling is supported +// by the SQTT, PMC and Callback APIs. +// +// The library can be used by a tool library loaded by HSA runtime or by +// higher level HW independent performance analysis API like PAPI. +// +// The library is written on C and will be based on AQLprofile AMD specific +// HSA extension. The library implementation requires HSA API intercepting and +// a profiling queue supporting a submit callback interface. +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef INC_ROCPROFILER_H_ +#define INC_ROCPROFILER_H_ + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +//////////////////////////////////////////////////////////////////////////////// +// Profiling info +// +// Profiling info objects have profiling feature info, type, parameters and data +// Also profiling data samplaes can be iterated using a callback + +// Profiling feature type +typedef enum { + ROCPROFILER_TYPE_METRIC = 0, + ROCPROFILER_TYPE_TRACE = 1 +} rocprofiler_type_t; + +// Profiling feture parameter +typedef hsa_ven_amd_aqlprofile_parameter_t rocprofiler_parameter_t; + +// Profiling data kind +typedef enum { + ROCPROFILER_UNINIT = 0, + ROCPROFILER_INT32 = 1, + ROCPROFILER_INT64 = 2, + ROCPROFILER_FLOAT = 3, + ROCPROFILER_DOUBLE = 4, + ROCPROFILER_BYTES = 5 +} rocprofiler_metric_kind_t; + +// Profiling data type +typedef struct { + rocprofiler_metric_kind_t kind; // result kind + union { + uint32_t result_int32; // 32bit integer result + uint64_t result_int64; // 64bit integer result + float result_float; // float single-precision result + double result_double; // float double-precision result + struct { + void* ptr; + uint32_t size; + uint32_t instance_count; + bool copy; + } result_bytes; // data by ptr and byte size + }; +} rocprofiler_data_t; + +// Profiling feature info +typedef struct { + rocprofiler_type_t type; // feature type + const char* name; // [in] feature name + const rocprofiler_parameter_t* parameters; // feature parameters array + uint32_t parameter_count; // feature parameters count + rocprofiler_data_t data; // [out] profiling data +} rocprofiler_info_t; + +//////////////////////////////////////////////////////////////////////////////// +// Profiling context +// +// Profiling context object accumuate all profiling information + +// Profiling context object +typedef void rocprofiler_t; + +// Profiling group object +typedef struct { + unsigned index; // group index + rocprofiler_info_t** info; // profiling info array + uint32_t info_count; // profiling info count + rocprofiler_t* context; // context object +} rocprofiler_group_t; + +// Profiling mode +typedef enum { + ROCPROFILER_MODE_STANDALONE = 1, + ROCPROFILER_MODE_CREATEQUEUE = 2, +} rocprofiler_mode_t; + +// Profiling preperties +typedef struct { + hsa_queue_t* queue; // queue for STANDALONE mode + // the queue is created and returned in CREATEQUEUE mode + uint32_t queue_depth; // created queue depth +} rocprofiler_properties_t; + +// Create new profiling context +hsa_status_t rocprofiler_open( + unsigned gpu_index, // GPU index + rocprofiler_info_t* info, // [in] profiling info array + uint32_t info_count, // profiling info count + rocprofiler_t** context, // [out] context object + uint32_t mode, // profiling mode mask + rocprofiler_properties_t* properties); // profiling properties + +// Delete profiling info +hsa_status_t rocprofiler_close( + rocprofiler_t* context); // [in] profiling context + +//////////////////////////////////////////////////////////////////////////////// +// Runtime API observer +// +// Runtime API observer is called on enter and exit for the API + +// Profiling callback data +typedef struct { + uint64_t kernel_object; + uint64_t queue_index; + uint32_t gpu_index; +} rocprofiler_callback_data_t; + +// Profiling callback type +typedef hsa_status_t (*rocprofiler_callback_t)( + const rocprofiler_callback_data_t* callback_data, // [in] callback data union, data depends on + // the callback API id + void* user_data, // [in/out] user data passed to the callback + rocprofiler_group_t** group); // [out] profiling group + +// Provided standard profiling callback +static inline hsa_status_t rocprofiler_set_dispatch_callback( + const rocprofiler_callback_data_t* callback_data, + void* user_data, + rocprofiler_group_t** group) { + *group = reinterpret_cast(user_data); + return HSA_STATUS_SUCCESS; +} + +// Set/remove kernel dispatch observer +hsa_status_t rocprofiler_set_dispatch_observer( + rocprofiler_callback_t callback, // observer callback + void* data); // [in/out] passed callback data + +hsa_status_t rocprofiler_remove_dispatch_observer(); + +//////////////////////////////////////////////////////////////////////////////// +// Start/stop profiling +// +// Start/stop the context profiling invocation, have to be as many as +// contect.invocations' to collect all profiling data + +// Start profiling +hsa_status_t rocprofiler_start( + rocprofiler_t* context, // [in/out] profiling context + uint32_t group_index = 0); // group index + +// Stop profiling +hsa_status_t rocprofiler_stop( + rocprofiler_t* context, // [in/out] profiling context + uint32_t group_index = 0); // group index + +// Read profiling data +hsa_status_t rocprofiler_get_data( + rocprofiler_t* context, // [in/out] profiling context + uint32_t group_index = 0); // group index + +// Get profiling groups +hsa_status_t rocprofiler_get_groups( + rocprofiler_t* context, // [in] profiling context + rocprofiler_group_t** groups, // [out] profiling groups + uint32_t* group_count); // [out] profiling groups count + +// Start profiling +hsa_status_t rocprofiler_start_group( + rocprofiler_group_t* group); // [in/out] profiling group + +// Stop profiling +hsa_status_t rocprofiler_stop_group( + rocprofiler_group_t* group); // [in/out] profiling group + +// Get profiling data +hsa_status_t rocprofiler_get_group_data( + rocprofiler_group_t* group); // [in/out] profiling group + +// Get metrics data +hsa_status_t rocprofiler_get_metrics_data( + const rocprofiler_t* context); // [in/out] profiling context + +// Definition of output data iterator callback +typedef hsa_ven_amd_aqlprofile_data_callback_t rocprofiler_trace_data_callback_t; + +// Method for iterating the events output data +hsa_status_t rocprofiler_iterate_trace_data( + rocprofiler_t* context, // [in] profiling context + rocprofiler_trace_data_callback_t callback, // [in] callback to iterate the output data + void* data); // [in/out] callback data + +//////////////////////////////////////////////////////////////////////////////// +// Returning the error string method + +hsa_status_t rocprofiler_error_string ( + const char** str); // [out] the API error string pointer returning + +//////////////////////////////////////////////////////////////////////////////// +// HSA-runtime tool on-load method +bool OnLoad( + HsaApiTable* table, + uint64_t runtime_version, + uint64_t failed_tool_count, + const char* const * failed_tool_names); + +#ifdef __cplusplus +} // extern "C" block +#endif // __cplusplus + +#endif // INC_ROCPROFILER_H_ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000000..4e1bcc43f7 --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,14 @@ +# +# Build dynamic Library object +# +set ( TARGET_LIB "${TARGET_NAME}" ) +set ( LIB_SRC + ${LIB_DIR}/core/rocprofiler.cpp + ${LIB_DIR}/core/proxy_queue.cpp + ${LIB_DIR}/core/simple_proxy_queue.cpp + ${LIB_DIR}/core/intercept_queue.cpp + ${LIB_DIR}/util/hsa_rsrc_factory.cpp +) +add_library ( ${TARGET_LIB} SHARED ${LIB_SRC} ) +target_include_directories ( ${TARGET_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} ) +target_link_libraries( ${TARGET_LIB} PRIVATE ${HSA_RUNTIME_LIB} c stdc++) diff --git a/src/core/context.h b/src/core/context.h new file mode 100644 index 0000000000..15a85166d0 --- /dev/null +++ b/src/core/context.h @@ -0,0 +1,399 @@ +#ifndef SRC_CORE_CONTEXT_H_ +#define SRC_CORE_CONTEXT_H_ + +#include "inc/rocprofiler.h" + +#include +#include +#include + +#include "core/metrics.h" +#include "core/profile.h" +#include "core/queue.h" +#include "core/types.h" +#include "util/exception.h" +#include "util/hsa_rsrc_factory.h" + +namespace rocprofiler { +struct rocprofiler_contex_t; +class Context; + +inline unsigned align_size(unsigned size, unsigned alignment) { return ((size + alignment - 1) & ~(alignment - 1)); } + +// Block descriptor +struct block_des_t { + uint32_t id; + uint32_t index; +}; + +// block_des_t less-then functor +struct lt_block_des { + bool operator()(const block_des_t& a1, const block_des_t& a2) const { + return (a1.id < a2.id) || ((a1.id == a2.id) && (a1.index < a2.index)); + } +}; + +// Block status +struct block_status_t { + uint32_t max_counters; + uint32_t counter_index; + uint32_t group_index; +}; + +// Metrics arguments +template +class MetricArgs : public xml::args_cache_t { + public: + MetricArgs(const Map& map) : map_(map) {} + bool Lookup(const std::string& name, uint64_t& result) const { + rocprofiler_info_t* info = NULL; + auto it = map_.find(name); + if (it == map_.end()) EXC_RAISING(HSA_STATUS_ERROR, "var '" << name << "' is not found"); + info = it->second; + if (info) { + result = info->data.result_int64; + if (info->data.kind == ROCPROFILER_UNINIT) EXC_RAISING(HSA_STATUS_ERROR, "var '" << name << "' is uninitialized"); + if (info->data.kind != ROCPROFILER_INT64) EXC_RAISING(HSA_STATUS_ERROR, "var '" << name << "' is of incompatible type, not INT64"); + } else EXC_RAISING(HSA_STATUS_ERROR, "var '" << name << "' info is NULL"); + return (info != NULL); + } + private: + const Map& map_; +}; + +// Profiling group +class Group { + public: + Group(const util::AgentInfo* agent_info, Context *context, const uint32_t& index) : + pmc_profile_(agent_info), + sqtt_profile_(agent_info), + context_(context), + index_(index) + {} + + void Insert(const profile_info_t& info) { + const rocprofiler_type_t type = info.rinfo->type; + info_vector_.push_back(info.rinfo); + switch (type) { + case ROCPROFILER_TYPE_METRIC: + pmc_profile_.Insert(info); + break; + case ROCPROFILER_TYPE_TRACE: + sqtt_profile_.Insert(info); + break; + default: + EXC_RAISING(HSA_STATUS_ERROR, "bad rocprofiler type (" << type << ")"); + } + } + + hsa_status_t Finalize() { + hsa_status_t status = pmc_profile_.Finalize(start_vector_, stop_vector_); + if (status == HSA_STATUS_SUCCESS) { + status = sqtt_profile_.Finalize(start_vector_, stop_vector_); + } + return status; + } + + void GetProfiles(profile_vector_t& vec) { + pmc_profile_.GetProfiles(vec); + sqtt_profile_.GetProfiles(vec); + } + + void GetTraceProfiles(profile_vector_t& vec) { + sqtt_profile_.GetProfiles(vec); + } + + info_vector_t& GetInfoVector() { return info_vector_; } + const pkt_vector_t& GetStartVector() const { return start_vector_; } + const pkt_vector_t& GetStopVector() const { return stop_vector_; } + Context* GetContext() { return context_; } + uint32_t GetIndex() const { return index_; } + + private: + PmcProfile pmc_profile_; + SqttProfile sqtt_profile_; + info_vector_t info_vector_; + pkt_vector_t start_vector_; + pkt_vector_t stop_vector_; + Context* const context_; + const uint32_t index_; +}; + +// Profiling context +class Context { + public: + typedef std::map info_map_t; + + Context(const util::AgentInfo* agent_info, Queue* queue, rocprofiler_info_t* info, const uint32_t info_count) : + agent_(agent_info->dev_id), + agent_info_(agent_info), + queue_(queue), + hsa_rsrc_(&util::HsaRsrcFactory::Instance()), + api_(hsa_rsrc_->AqlProfileApi()), + metrics_(agent_info) + { + Initialize(info, info_count); + Finalize(); + } + + ~Context() { + for (const auto& v : info_map_) { + const std::string& name = v.first; + const rocprofiler_info_t* info = v.second; + if ((info->type == ROCPROFILER_TYPE_METRIC) && (metrics_map_.find(name) == metrics_map_.end())) { + delete info; + } + } + } + + void Initialize(rocprofiler_info_t* info_array, const uint32_t info_count) { + info_map_t input_map; + for (unsigned i = 0; i < info_count; ++i) { + rocprofiler_info_t* info = &info_array[i]; + input_map[info->name] = info; + info->data.kind = ROCPROFILER_UNINIT; + } + + if (info_count) set_.push_back(Group(agent_info_, this, 0)); + + for (unsigned i = 0; i < info_count; ++i) { + rocprofiler_info_t* info = &info_array[i]; + info_map_[info->name] = info; + const rocprofiler_type_t type = info->type; + const char* name = info->name; + + if (type == ROCPROFILER_TYPE_METRIC) { + const Metric* metric = metrics_.Get(name); + if (metric == NULL) EXC_RAISING(HSA_STATUS_ERROR, "metric '" << name << "' is not found"); + auto ret = metrics_map_.insert({name, metric}); + if (!ret.second) EXC_RAISING(HSA_STATUS_ERROR, "metric '" << name << "' is registered more then once"); + + counters_vec_t counters_vec = metric->GetCounters(); + if (counters_vec.empty()) EXC_RAISING(HSA_STATUS_ERROR, "metric name '" << name << "' is not found"); + + for (const counter_t* counter : counters_vec) { + if (metric->GetExpr()) { + auto it = input_map.find(counter->name); + if (it != input_map.end()) { + continue; + } else { + info = NewCounterInfo(counter); + info_map_[info->name] = info; + } + } + + const event_t* event = &(counter->event); + const block_des_t block_des = {event->block_name, event->block_index}; + auto ret = groups_map_.insert({block_des, {}}); + block_status_t& block_status = ret.first->second; + if (block_status.max_counters == 0) { + profile_t query = {}; + query.agent = agent_; + query.type = HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC; + query.events = event; + + uint32_t block_counters; + hsa_status_t status = api_->hsa_ven_amd_aqlprofile_get_info(&query, HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_COUNTERS, &block_counters); + if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "get block_counters info"); + block_status.max_counters = block_counters; + } + if (block_status.counter_index >= block_status.max_counters) { + block_status.counter_index = 0; + block_status.group_index += 1; + } + if (block_status.group_index >= set_.size()) { + set_.push_back(Group(agent_info_, this, block_status.group_index)); + } + const uint32_t group_index = block_status.group_index; + set_[group_index].Insert(profile_info_t{event, NULL, 0, info}); + } + } else if (type == ROCPROFILER_TYPE_TRACE) { + set_[0].Insert(profile_info_t{NULL, info->parameters, info->parameter_count, info}); + } else { + EXC_RAISING(HSA_STATUS_ERROR, "bad rocprofiler type (" << type << ")"); + } + } + } + + void Finalize() { + for (unsigned index = 0; index < set_.size(); ++index) { + const hsa_status_t status = set_[index].Finalize(); + if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "context finalize failed"); + } + } + + uint32_t GetGroupCount() const { return set_.size(); } + + rocprofiler_group_t GetGroupInfo(const uint32_t& index) { + rocprofiler::info_vector_t& info_vector = set_[index].GetInfoVector(); + rocprofiler_group_t group = {}; + group.info_count = info_vector.size(); + group.info = &info_vector[0]; + group.context = reinterpret_cast(this); + group.index = index; + return group; + } + + const pkt_vector_t& StartPackets(const uint32_t& group_index) const { return set_[group_index].GetStartVector(); } + const pkt_vector_t& StopPackets(const uint32_t& group_index) const { return set_[group_index].GetStopVector(); } + + void Start(const uint32_t& group_index, Queue* const queue = NULL) { + const pkt_vector_t& start_packets = StartPackets(group_index); + Queue* const submit_queue = (queue != NULL) ? queue : queue_; + submit_queue->Submit(&start_packets[0], start_packets.size()); + } + void Stop(const uint32_t& group_index, Queue* const queue = NULL) { + const pkt_vector_t& stop_packets = StopPackets(group_index); + Queue* const submit_queue = (queue != NULL) ? queue : queue_; + submit_queue->Submit(&stop_packets[0], stop_packets.size()); + } + void Submit(const uint32_t& group_index, const packet_t* packet, Queue* const queue = NULL) { + Queue* const submit_queue = (queue != NULL) ? queue : queue_; + Start(group_index, submit_queue); + submit_queue->Submit(packet); + Stop(group_index, submit_queue); + } + + struct callback_data_t { + info_vector_t* info_vector; + size_t index; + char* ptr; + }; + + void GetData(const uint32_t& group_index) { + const profile_vector_t profile_vector = GetProfiles(group_index); + for (auto& tuple : profile_vector) { + // Wait for stop packet to complete + hsa_signal_wait_scacquire( + tuple.completion_signal, + HSA_SIGNAL_CONDITION_LT, + 1, + (uint64_t)-1, + HSA_WAIT_STATE_BLOCKED); + callback_data_t callback_data{tuple.info_vector, tuple.info_vector->size(), NULL}; + const hsa_status_t status = api_->hsa_ven_amd_aqlprofile_iterate_data(tuple.profile, DataCallback, &callback_data); + if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "context iterate data failed"); + } + } + + void GetMetricsData() const { + const MetricArgs args(info_map_); + for (const auto v : metrics_map_) { + const std::string& name = v.first; + const Metric* metric = v.second; + const xml::Expr* expr = metric->GetExpr(); + if (expr) { + auto it = info_map_.find(name); + if (it == info_map_.end()) EXC_RAISING(HSA_STATUS_ERROR, "metric '" << name << "', rocprofiler info is not found"); + rocprofiler_info_t* info = it->second; + info->data.result_int64 = expr->Eval(args); + info->data.kind = ROCPROFILER_INT64; + } + } + } + + void IterateTraceData(rocprofiler_trace_data_callback_t callback, void *data) { + profile_vector_t profile_vector; + set_[0].GetTraceProfiles(profile_vector); + for (auto& tuple : profile_vector) { + const hsa_status_t status = api_->hsa_ven_amd_aqlprofile_iterate_data(tuple.profile, callback, data); + if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "context iterate data failed"); + } + } + + private: + // Getting profling packets + profile_vector_t GetProfiles(const uint32_t& index) { + profile_vector_t vec; + if (index >= set_.size()) { + EXC_RAISING(HSA_STATUS_ERROR, "index exceeding the maximum " << set_.size()); + } + set_[index].GetProfiles(vec); + return vec; + } + + static hsa_status_t DataCallback(hsa_ven_amd_aqlprofile_info_type_t ainfo_type, + hsa_ven_amd_aqlprofile_info_data_t* ainfo_data, + void* data) { + hsa_status_t status = HSA_STATUS_SUCCESS; + callback_data_t* callback_data = reinterpret_cast(data); + info_vector_t& info_vector = *(callback_data->info_vector); + uint32_t index = callback_data->index; + const uint32_t sample_id = ainfo_data->sample_id; + if (info_vector.size() == index) { + index = 0; + } else { + if (sample_id == 0) index += 1; + } + callback_data->index = index; + + if (index < info_vector.size()) { + rocprofiler_info_t* rinfo = info_vector[index]; + if (ainfo_type == HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA) { + if (ainfo_data->sample_id == 0) rinfo->data.result_int64 = 0; + rinfo->data.result_int64 += ainfo_data->pmc_data.result; + rinfo->data.kind = ROCPROFILER_INT64; + } else if (ainfo_type == HSA_VEN_AMD_AQLPROFILE_INFO_SQTT_DATA) { + if (rinfo->data.result_bytes.copy) { + char* result_bytes_ptr = reinterpret_cast(rinfo->data.result_bytes.ptr); + const char* end = result_bytes_ptr + rinfo->data.result_bytes.size; + const char* src = reinterpret_cast(ainfo_data->sqtt_data.ptr); + const uint32_t size = ainfo_data->sqtt_data.size; + char* ptr = (sample_id == 0) ? result_bytes_ptr : callback_data->ptr; + uint64_t* header = reinterpret_cast(ptr); + char* dest = ptr + sizeof(*header); + + if ((dest + size) < end) { + hsa_status_t status = hsa_memory_copy(dest, src, size); + if (status == HSA_STATUS_SUCCESS) { + *header = size; + rinfo->data.kind = ROCPROFILER_BYTES; + rinfo->data.result_bytes.instance_count = sample_id + 1; + callback_data->ptr = dest + align_size(size, sizeof(uint64_t)); + } + } else status = HSA_STATUS_ERROR; + } else { + if (sample_id == 0) { + rinfo->data.kind = ROCPROFILER_BYTES; + rinfo->data.result_bytes.ptr = ainfo_data->sqtt_data.ptr; + rinfo->data.result_bytes.instance_count = UINT32_MAX; + } + rinfo->data.result_bytes.instance_count += 1; + } + } else status = HSA_STATUS_ERROR; + } else status = HSA_STATUS_ERROR; + + return status; + } + + rocprofiler_info_t* NewCounterInfo(const counter_t* counter) { + rocprofiler_info_t* info = new rocprofiler_info_t{}; + info->type = ROCPROFILER_TYPE_METRIC; + info->name = counter->name.c_str(); + return info; + } + + // GPU handel + const hsa_agent_t agent_; + const util::AgentInfo* agent_info_; + // Profiling queue + Queue* queue_; + // HSA resources factory + util::HsaRsrcFactory* hsa_rsrc_; + // aqlprofile API table + const pfn_t* api_; + // Profile group set + std::vector set_; + // Metrics dictionary + MetricsDict metrics_; + // Groups map + std::map groups_map_; + // Info map + info_map_t info_map_; + // Metrics map + std::map metrics_map_; +}; + +} // namespace rocprofiler + +#endif // SRC_CORE_CONTEXT_H_ diff --git a/src/core/hsa_proxy_queue.h b/src/core/hsa_proxy_queue.h new file mode 100644 index 0000000000..b1a8167e66 --- /dev/null +++ b/src/core/hsa_proxy_queue.h @@ -0,0 +1,49 @@ +#ifndef _SRC_CORE_HSA_PROXY_QUEUE_H +#define _SRC_CORE_HSA_PROXY_QUEUE_H + +#include +#include +#include +#include + +#include "core/proxy_queue.h" +#include "util/exception.h" + +namespace rocprofiler { +extern decltype(hsa_queue_destroy)* hsa_queue_destroy_fn; +extern decltype(hsa_amd_queue_intercept_create)* hsa_amd_queue_intercept_create_fn; +extern decltype(hsa_amd_queue_intercept_register)* hsa_amd_queue_intercept_register_fn; + +class HsaProxyQueue : public ProxyQueue { + public: + hsa_status_t SetInterceptCB(on_submit_cb_t on_submit_cb, void* data) { + return hsa_amd_queue_intercept_register_fn(queue_, on_submit_cb, data); + } + + void Submit(const packet_t* packet) { EXC_RAISING(HSA_STATUS_ERROR, "HsaProxyQueue::Submit() is not supported"); } + + private: + hsa_status_t Init( + hsa_agent_t agent, + uint32_t size, + hsa_queue_type32_t type, + void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data), + void *data, + uint32_t private_segment_size, + uint32_t group_segment_size, + hsa_queue_t **queue) + { + printf("HsaProxyQueue::Init()\n"); + const auto status = hsa_amd_queue_intercept_create_fn(agent, size, type, callback, data, private_segment_size, group_segment_size, &queue_); + *queue = queue_; + return status; + } + + hsa_status_t Cleanup() const { return hsa_queue_destroy_fn(queue_); } + + hsa_queue_t* queue_; +}; + +} // namespace rocprofiler + +#endif // _SRC_CORE_HSA_PROXY_QUEUE_H diff --git a/src/core/hsa_queue.h b/src/core/hsa_queue.h new file mode 100644 index 0000000000..992dae8ac1 --- /dev/null +++ b/src/core/hsa_queue.h @@ -0,0 +1,60 @@ +#ifndef _SRC_CORE_HSA_QUEUE_H +#define _SRC_CORE_HSA_QUEUE_H + +#include + +#include "core/queue.h" +#include "core/types.h" + +namespace rocprofiler { + +class HsaQueue : public Queue { + public: + typedef void (HsaQueue::*submit_fptr_t)(const packet_t* packet); + enum { + LEGACY_SLOT_SIZE_W = HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(packet_word_t), + LEGACY_SLOT_SIZE_P = HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(packet_t) + }; + struct slot_pm4_t { + packet_word_t words[LEGACY_SLOT_SIZE_W]; + }; + + HsaQueue(const util::AgentInfo* agent_info, hsa_queue_t* queue) : + queue_(queue) + {} + + void Submit(const packet_t* packet) { + // Compute the write index of queue and copy Aql packet into it + const uint64_t que_idx = hsa_queue_load_write_index_relaxed(queue_); + // Increment the write index + hsa_queue_store_write_index_relaxed(queue_, que_idx + 1); + + const uint32_t mask = queue_->size - 1; + + // Copy packet to the queue + const packet_word_t* src = reinterpret_cast(packet); + packet_t* slot = reinterpret_cast(queue_->base_address) + (que_idx & mask); + packet_word_t* dst = reinterpret_cast(slot); + const uint32_t nwords = sizeof(packet_t) / sizeof(packet_word_t); + for (unsigned i = 1; i < nwords; ++i) { + dst[i] = src[i]; + } + + // To maintain global order to ensure the prior copy of the packet contents is made visible + // before the header is updated. + // With in-order CP it will wait until the first packet in the blob will be valid + std::atomic* header_atomic_ptr = + reinterpret_cast*>(&dst[0]); + header_atomic_ptr->store(src[0], std::memory_order_release); + + // Doorbell signaling + hsa_signal_store_relaxed(queue_->doorbell_signal, que_idx); + } + + private: + hsa_queue_t* queue_; +}; + +} // namespace rocprofiler + +#endif // _SRC_CORE_HSA_QUEUE_H diff --git a/src/core/intercept_queue.cpp b/src/core/intercept_queue.cpp new file mode 100644 index 0000000000..29c3a00aa2 --- /dev/null +++ b/src/core/intercept_queue.cpp @@ -0,0 +1,15 @@ +#include "core/intercept_queue.h" + +namespace rocprofiler { +void InterceptQueue::HsaIntercept(HsaApiTable* table) { + table->core_->hsa_queue_create_fn = rocprofiler::InterceptQueue::QueueCreate; + table->core_->hsa_queue_destroy_fn = rocprofiler::InterceptQueue::QueueDestroy; +} + +InterceptQueue::mutex_t InterceptQueue::mutex_; +rocprofiler_callback_t InterceptQueue::on_dispatch_cb_ = NULL; +void* InterceptQueue::on_dispatch_cb_data_ = NULL; +const char* InterceptQueue::tool_lib_ = NULL; +void* InterceptQueue::tool_handle_ = NULL; +InterceptQueue::obj_map_t* InterceptQueue::obj_map_ = NULL; +} // namespace rocprofiler diff --git a/src/core/intercept_queue.h b/src/core/intercept_queue.h new file mode 100644 index 0000000000..bcc2160bd8 --- /dev/null +++ b/src/core/intercept_queue.h @@ -0,0 +1,161 @@ +#ifndef _SRC_CORE_INTERCEPT_QUEUE_H +#define _SRC_CORE_INTERCEPT_QUEUE_H + +#include +#include +#include +#include +#include + +#include "core/context.h" +#include "core/proxy_queue.h" +#include "core/types.h" +#include "util/hsa_rsrc_factory.h" + +namespace rocprofiler { +extern decltype(hsa_queue_create)* hsa_queue_create_fn; +extern decltype(hsa_queue_destroy)* hsa_queue_destroy_fn; + +class InterceptQueue { + public: + typedef std::recursive_mutex mutex_t; + typedef std::map obj_map_t; + + static void HsaIntercept(HsaApiTable* table); + + static void SetTool(const char* tool) { tool_lib_ = tool; } + + static void UnloadTool() { if (tool_handle_) dlclose(tool_handle_); } + + static hsa_status_t QueueCreate( + hsa_agent_t agent, + uint32_t size, + hsa_queue_type32_t type, + void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data), + void *data, + uint32_t private_segment_size, + uint32_t group_segment_size, + hsa_queue_t **queue) + { + std::lock_guard lck(mutex_); + + hsa_status_t status = HSA_STATUS_ERROR; + + if (tool_lib_) { + tool_handle_ = dlopen(tool_lib_, RTLD_NOW); + if (tool_handle_ == NULL) { + fprintf(stderr, "ROCProfiler: can't load tool library \"%s\"\n", tool_lib_); + fprintf(stderr, "%s\n", dlerror()); + exit(1); + } + tool_lib_ = NULL; + } + + if (!obj_map_) obj_map_ = new obj_map_t; + + ProxyQueue* proxy = ProxyQueue::Create(agent, size, type, callback, data, private_segment_size, group_segment_size, queue, &status); + if (status != HSA_STATUS_SUCCESS) { + InterceptQueue* obj = new InterceptQueue(agent, proxy); + (*obj_map_)[(uint64_t)(*queue)] = obj; + status = proxy->SetInterceptCB(OnSubmitCB, obj); + } + + return status; + } + + static hsa_status_t QueueDestroy(hsa_queue_t *queue) { + std::lock_guard lck(mutex_); + hsa_status_t status = HSA_STATUS_ERROR; + + obj_map_t::iterator it = obj_map_->find((uint64_t)queue); + if (it != obj_map_->end()) { + const InterceptQueue* obj = it->second; + delete obj; + obj_map_->erase(it); + status = HSA_STATUS_SUCCESS; + } + + return status; + } + + static void OnSubmitCB(const void* in_packets, uint64_t count, uint64_t user_que_idx, void* data, hsa_amd_queue_intercept_packet_writer writer) { + const packet_t* packets_arr = reinterpret_cast(in_packets); + InterceptQueue* obj = reinterpret_cast(data); + Queue* proxy = obj->proxy_; + + for (uint64_t j = 0; j < count; ++j) { + bool to_submit = true; + const packet_t* packet = &packets_arr[j]; + + if ((GetHeaderType(packet) == HSA_PACKET_TYPE_KERNEL_DISPATCH) && (on_dispatch_cb_ != NULL)) { + rocprofiler_group_t* group = NULL; + const hsa_kernel_dispatch_packet_t* dispatch_packet = reinterpret_cast(packet); + rocprofiler_callback_data_t data = {dispatch_packet->kernel_object, user_que_idx, obj->agent_info_->dev_index}; + hsa_status_t status = on_dispatch_cb_(&data, on_dispatch_cb_data_, &group); + if ((status == HSA_STATUS_SUCCESS) && (group != NULL)) { + Context* context = reinterpret_cast(group->context); + const pkt_vector_t& start_vector = context->StartPackets(group->index); + const pkt_vector_t& stop_vector = context->StopPackets(group->index); + + pkt_vector_t packets = start_vector; + packets.insert(packets.end(), *packet); + packets.insert(packets.end(), stop_vector.begin(), stop_vector.end()); + if (writer != NULL) { + writer(&packets[0], packets.size()); + } else { + proxy->Submit(&packets[0], packets.size()); + } + to_submit = false; + } + } + + if (to_submit) { + if (writer != NULL) { + writer(packet, 1); + } else { + proxy->Submit(packet, 1); + } + } + + packet += 1; + } + } + + static void SetDispatchCB(rocprofiler_callback_t on_dispatch_cb, void* data) { + std::lock_guard lck(mutex_); + on_dispatch_cb_ = on_dispatch_cb; + on_dispatch_cb_data_ = data; + } + + static void UnsetDispatchCB() { + std::lock_guard lck(mutex_); + on_dispatch_cb_ = NULL; + on_dispatch_cb_data_ = NULL; + } + + private: + InterceptQueue(const hsa_agent_t& agent, ProxyQueue* proxy) : proxy_(proxy) { + agent_info_ = util::HsaRsrcFactory::Instance().GetAgentInfo(agent); + } + ~InterceptQueue() { ProxyQueue::Destroy(proxy_); } + + static packet_word_t GetHeaderType(const packet_t* packet) { + const packet_word_t* header = reinterpret_cast(packet); + return (*header >> HSA_PACKET_HEADER_TYPE) & header_type_mask; + } + + static mutex_t mutex_; + static const packet_word_t header_type_mask = (1ul << HSA_PACKET_HEADER_WIDTH_TYPE) - 1; + static rocprofiler_callback_t on_dispatch_cb_; + static void* on_dispatch_cb_data_; + static const char* tool_lib_; + static void* tool_handle_; + static obj_map_t* obj_map_; + + ProxyQueue* const proxy_; + const util::AgentInfo* agent_info_; +}; + +} // namespace rocprofiler + +#endif // _SRC_CORE_INTERCEPT_QUEUE_H diff --git a/src/core/metrics.h b/src/core/metrics.h new file mode 100644 index 0000000000..95e3536b67 --- /dev/null +++ b/src/core/metrics.h @@ -0,0 +1,169 @@ +#ifndef SRC_CORE_METRICS_H_ +#define SRC_CORE_METRICS_H_ + +#include +#include +#include + +#include +#include +#include +#include + +#include "core/types.h" +#include "util/exception.h" +#include "util/hsa_rsrc_factory.h" +#include "xml/expr.h" +#include "xml/xml.h" + +namespace rocprofiler { +struct counter_t { + std::string name; + event_t event; +}; +typedef std::vector counters_vec_t; + +class Metric { + public: + Metric(const std::string& name) : name_(name) {} + std::string GetName() const { return name_; } + virtual void GetCounters(counters_vec_t &vec) const = 0; + counters_vec_t GetCounters() const { + counters_vec_t counters; + GetCounters(counters); + return counters; + } + virtual const xml::Expr* GetExpr() const = 0; + private: + std::string name_; +}; + +class BaseMetric : public Metric { + public: + BaseMetric(const std::string& name, const counter_t& counter) : Metric(name), counter_(counter) {} + void GetCounters(counters_vec_t &vec) const { vec.push_back(&counter_); } + const xml::Expr* GetExpr() const { return NULL; } + private: + const counter_t counter_; +}; + +class ExprMetric : public Metric { + public: + ExprMetric(const std::string& name, const counters_vec_t& counters, const xml::Expr* expr) : Metric(name), counters_(counters), expr_(expr) {} + void GetCounters(counters_vec_t& vec) const { vec.insert(vec.end(), counters_.begin(), counters_.end()); } + const xml::Expr* GetExpr() const { return expr_; } + private: + const counters_vec_t counters_; + const xml::Expr* expr_; +}; + + +class MetricsDict { + public: + typedef std::map cache_t; + typedef cache_t::const_iterator const_iterator_t; + + class ExprCache : public xml::expr_cache_t { + public: + ExprCache(const cache_t* cache) : cache_(cache) {} + bool Lookup(const std::string& name, std::string& result) const { + bool ret = false; + auto it = cache_->find(name); + if (it != cache_->end()) { + ret = true; + const rocprofiler::ExprMetric* expr_metric = dynamic_cast(it->second); + if (expr_metric) result = expr_metric->GetExpr()->GetStr(); + } + return ret; + } + private: + const cache_t* const cache_; + }; + + MetricsDict(const util::AgentInfo* agent_info) : xml_(NULL) { + const char* xml_name = getenv("ROCP_METRICS"); + if (xml_name != NULL) { + xml_ = new xml::Xml(xml_name); + std::cout << "ROCProfiler: importing metrics from '" << xml_name << "':" << std::endl; + ImportMetrics(agent_info, agent_info->gfxip); + ImportMetrics(agent_info, "global"); + } + } + + const Metric* Get(const std::string& name) const { + const Metric* metric = NULL; + auto it = cache_.find(name); + if (it != cache_.end()) metric = it->second; + return metric; + } + + private: + void ImportMetrics(const util::AgentInfo* agent_info, const char* scope) { + auto scope_list = xml_->GetNodes("top." + std::string(scope) + ".metric"); + if (!scope_list.empty()) { + std::cout << " " << scope_list.size() << " " << scope << " metrics found" << std::endl; + + for (auto node : scope_list) { + const std::string name = node->opts["name"]; + if (cache_.find(name) != cache_.end()) EXC_RAISING(HSA_STATUS_ERROR, "ImportMetrics: metrics redefined '" << name << "'"); + + const std::string expr_str = node->opts["expr"]; + if (expr_str.empty()) { + const std::string block_name = node->opts["block"]; + const uint32_t event_id = atoi(node->opts["event"].c_str()); + + hsa_ven_amd_aqlprofile_profile_t profile; + profile.agent = agent_info->dev_id; + hsa_ven_amd_aqlprofile_id_query_t query = {block_name.c_str(), 0, 0}; + hsa_status_t status = util::HsaRsrcFactory::Instance().AqlProfileApi()-> + hsa_ven_amd_aqlprofile_get_info(&profile, HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_ID, &query); + if (status == HSA_STATUS_SUCCESS) { + const hsa_ven_amd_aqlprofile_block_name_t block_id = (hsa_ven_amd_aqlprofile_block_name_t)query.id; + if (query.instance_count > 1) { + for (unsigned block_index = 0; block_index < query.instance_count; ++block_index) { + std::ostringstream os; + os << name << '[' << block_index << ']'; + const std::string full_name = os.str(); + const counter_t counter = {full_name, {block_id, block_index, event_id}}; + cache_[full_name] = new BaseMetric(full_name, counter); + } + } else { + const counter_t counter = {name, {block_id, 0, event_id}}; + cache_[name] = new BaseMetric(name, counter); + } + } else AQL_EXC_RAISING(HSA_STATUS_ERROR, "ImportMetrics: bad block name '" << block_name << "'"); + } else { + xml::Expr* expr_obj = new xml::Expr(expr_str, new ExprCache(&cache_)); + std::cout << " " << name << " = " << expr_obj->String() << std::endl; + counters_vec_t counters_vec; + for (const std::string var : expr_obj->GetVars()) { + auto it = cache_.find(var); + if (it == cache_.end()) EXC_RAISING(HSA_STATUS_ERROR, "Bad metric '" << name << "', var '" << var << "' is not found"); + it->second->GetCounters(counters_vec); + } + cache_[name] = new ExprMetric(name, counters_vec, expr_obj); + } + } + +#if 0 + for (auto& v : cache_) { + const Metric* metric = v.second; + counters_vec_t counters_vec; + printf("> Metric '%s'\n", metric->GetName().c_str()); + metric->GetCounters(counters_vec); + for (auto c : counters_vec) { + printf(" counter %s, b(%u), i (%u), e (%u)\n", c->name.c_str(), c->event.block_name, c->event.block_index, c->event.counter_id); + } + } +#endif + } + } + + // Metrics map + xml::Xml* xml_; + cache_t cache_; +}; + +} // namespace rocprofiler + +#endif // SRC_CORE_METRICS_H_ diff --git a/src/core/profile.h b/src/core/profile.h new file mode 100644 index 0000000000..95a1a09f0a --- /dev/null +++ b/src/core/profile.h @@ -0,0 +1,206 @@ +#ifndef SRC_CORE_PROFILE_H_ +#define SRC_CORE_PROFILE_H_ + +#include "inc/rocprofiler.h" + +#include +#include + +#include "core/types.h" +#include "util/exception.h" +#include "util/hsa_rsrc_factory.h" + +namespace rocprofiler { +struct profile_info_t { + const event_t* event; + const parameter_t* parameters; + uint32_t parameter_count; + rocprofiler_info_t* rinfo; +}; +typedef std::vector info_vector_t; +typedef std::vector pkt_vector_t; +struct profile_tuple_t { + const profile_t* profile; + info_vector_t* info_vector; + hsa_signal_t completion_signal; +}; +typedef std::vector profile_vector_t; + +template class ConfigBase {}; + +template<> class ConfigBase { + public: + ConfigBase(profile_t *profile) : profile_(profile) {} + + protected: + void* Array() { return const_cast(profile_->events); } + unsigned Count() const { return profile_->event_count; } + void Set(event_t* events, const unsigned& count) { + profile_->events = events; + profile_->event_count = count; + } + profile_t* profile_; +}; + +template<> class ConfigBase { + public: + ConfigBase(profile_t *profile) : profile_(profile) {} + + protected: + void* Array() { return const_cast(profile_->parameters); } + unsigned Count() const { return profile_->parameter_count; } + void Set(parameter_t* parameters, const unsigned& count) { + profile_->parameters = parameters; + profile_->parameter_count = count; + } + profile_t* profile_; +}; + +template +class Config : protected ConfigBase { + typedef ConfigBase Parent; + public: + Config(profile_t *profile) : Parent(profile) {} + void Insert(const Item& item) { + auto count = Parent::Count(); + count += 1; + Item* array = reinterpret_cast(realloc(const_cast(Parent::Array()), count * sizeof(Item))); + array[count - 1] = item; + Parent::Set(array, count); + } +}; + +class Profile { + public: + static const uint32_t LEGACY_SLOT_SIZE_PKT = HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(packet_t); + + Profile(const util::AgentInfo* agent_info) : agent_info_(agent_info) { + profile_ = {}; + profile_.agent = agent_info->dev_id; + is_legacy_ = (strncmp(agent_info->name, "gfx8", 4) == 0); + } + virtual ~Profile() { + hsa_memory_free(profile_.command_buffer.ptr); + hsa_memory_free(profile_.output_buffer.ptr); + free(const_cast(profile_.events)); + free(const_cast(profile_.parameters)); + } + + virtual void Insert(const profile_info_t& info) { + info_vector_.push_back(info.rinfo); + } + + hsa_status_t Finalize(pkt_vector_t& start_vector, pkt_vector_t& stop_vector) { + hsa_status_t status = HSA_STATUS_SUCCESS; + + if (!info_vector_.empty()) { + util::HsaRsrcFactory* rsrc = &util::HsaRsrcFactory::Instance(); + const pfn_t* api = rsrc->AqlProfileApi(); + packet_t start{}; + packet_t stop{}; + + // Check the profile buffer sizes + status = api->hsa_ven_amd_aqlprofile_start(&profile_, NULL); + if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "aqlprofile_start(NULL)"); + Allocate(rsrc); + // Generate start/stop profiling packets + status = api->hsa_ven_amd_aqlprofile_start(&profile_, &start); + if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "aqlprofile_start"); + status = api->hsa_ven_amd_aqlprofile_stop(&profile_, &stop); + if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "aqlprofile_stop"); + // Set completion signals + hsa_signal_t dummy_signal{}; + dummy_signal.handle = 0; + start.completion_signal = dummy_signal; + hsa_signal_t post_signal; + status = hsa_signal_create(1, 0, NULL, &post_signal); + if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "hsa_signal_create"); + stop.completion_signal = post_signal; + completion_signal_ = post_signal; + + if (is_legacy_) { + const uint32_t start_index = start_vector.size(); + const uint32_t stop_index = stop_vector.size(); + + start_vector.insert(start_vector.end(), LEGACY_SLOT_SIZE_PKT, packet_t{}); + stop_vector.insert(stop_vector.end(), LEGACY_SLOT_SIZE_PKT, packet_t{}); + status = api->hsa_ven_amd_aqlprofile_legacy_get_pm4(&start, reinterpret_cast(&start_vector[start_index])); + if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "hsa_ven_amd_aqlprofile_legacy_get_pm4"); + status = api->hsa_ven_amd_aqlprofile_legacy_get_pm4(&stop, reinterpret_cast(&stop_vector[stop_index])); + if (status != HSA_STATUS_SUCCESS) AQL_EXC_RAISING(status, "hsa_ven_amd_aqlprofile_legacy_get_pm4"); + } else { + start_vector.push_back(start); + stop_vector.push_back(stop); + } + } + + return status; + } + + void GetProfiles(profile_vector_t& vec) { + if (!info_vector_.empty()) { + vec.push_back(profile_tuple_t{&profile_, &info_vector_, completion_signal_}); + } + } + + protected: + virtual hsa_status_t Allocate(util::HsaRsrcFactory* rsrc) = 0; + + const util::AgentInfo* const agent_info_; + bool is_legacy_; + profile_t profile_; + info_vector_t info_vector_; + hsa_signal_t completion_signal_; +}; + +class PmcProfile : public Profile { + public: + PmcProfile(const util::AgentInfo* agent_info) : Profile(agent_info) { + profile_.type = HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC; + } + + void Insert(const profile_info_t& info) { + Profile::Insert(info); + Config(&profile_).Insert(*(info.event)); + } + + hsa_status_t Allocate(util::HsaRsrcFactory* rsrc) { + profile_.command_buffer.ptr = rsrc->AllocateSysMemory(agent_info_, profile_.command_buffer.size); + profile_.output_buffer.ptr = rsrc->AllocateSysMemory(agent_info_, profile_.output_buffer.size); + return (profile_.command_buffer.ptr && profile_.output_buffer.ptr) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR; + } +}; + +class SqttProfile : public Profile { + public: + static const uint32_t output_buffer_size = 0x2000000; // 32M + + SqttProfile(const util::AgentInfo* agent_info) : Profile(agent_info) { + profile_.type = HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_SQTT; + } + + void Insert(const profile_info_t& info) { + Profile::Insert(info); + for (unsigned j = 0; j < info.parameter_count; ++j) { + Config(&profile_).Insert(info.parameters[j]); + } + + info.rinfo->data.result_bytes.size = output_buffer_size; + if (info.rinfo->data.result_bytes.copy) { + const uint32_t output_buffer_size64 = output_buffer_size / sizeof(uint64_t); + info.rinfo->data.result_bytes.ptr = calloc(output_buffer_size64, sizeof(uint64_t)); + memset(info.rinfo->data.result_bytes.ptr, 0, output_buffer_size); + } + } + + hsa_status_t Allocate(util::HsaRsrcFactory* rsrc) { + profile_.output_buffer.size = output_buffer_size; + profile_.command_buffer.ptr = rsrc->AllocateSysMemory(agent_info_, profile_.command_buffer.size); + profile_.output_buffer.ptr = rsrc->AllocateLocalMemory(agent_info_, profile_.output_buffer.size); + return (profile_.command_buffer.ptr && profile_.output_buffer.ptr) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR; + } +}; + +} // namespace rocprofiler + +#endif // SRC_CORE_PROFILE_H_ diff --git a/src/core/proxy_queue.cpp b/src/core/proxy_queue.cpp new file mode 100644 index 0000000000..166d84be9e --- /dev/null +++ b/src/core/proxy_queue.cpp @@ -0,0 +1,48 @@ +#include "core/proxy_queue.h" + +#ifdef ROCP_HSA_PROXY +#include "core/hsa_proxy_queue.h" +#endif +#include "core/simple_proxy_queue.h" + +namespace rocprofiler { +void ProxyQueue::HsaIntercept(HsaApiTable* table) { + if (rocp_type_) SimpleProxyQueue::HsaIntercept(table); +} + +ProxyQueue* ProxyQueue::Create( + hsa_agent_t agent, + uint32_t size, + hsa_queue_type32_t type, + void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data), + void *data, + uint32_t private_segment_size, + uint32_t group_segment_size, + hsa_queue_t **queue, + hsa_status_t* status) +{ + hsa_status_t suc = HSA_STATUS_ERROR; +#ifdef ROCP_HSA_PROXY + ProxyQueue* instance = (rocp_type_) ? (ProxyQueue*) new SimpleProxyQueue() : (ProxyQueue*) new HsaProxyQueue(); +#else + ProxyQueue* instance = new SimpleProxyQueue(); +#endif + if (instance != NULL) { + const auto suc = instance->Init(agent, size, type, callback, data, private_segment_size, group_segment_size, queue); + if (suc != HSA_STATUS_SUCCESS) { + delete instance; + instance = NULL; + } + } + *status = suc; + return instance; +} + +hsa_status_t ProxyQueue::Destroy(const ProxyQueue* obj) { + auto suc = obj->Cleanup(); + delete obj; + return suc; +} + +bool ProxyQueue::rocp_type_ = false; +} // namespace rocprofiler diff --git a/src/core/proxy_queue.h b/src/core/proxy_queue.h new file mode 100644 index 0000000000..eb97402eb5 --- /dev/null +++ b/src/core/proxy_queue.h @@ -0,0 +1,70 @@ +#ifndef _SRC_CORE_PROXY_QUEUE_H +#define _SRC_CORE_PROXY_QUEUE_H + +#include +#include +#include +#include +#include + +#include "core/queue.h" +#include "core/types.h" + +struct HsaApiTable; + +namespace rocprofiler { +typedef void (*hsa_amd_queue_intercept_packet_writer)(const void* packets, uint64_t count); +typedef void (*on_submit_cb_t)(const void* packet, uint64_t count, uint64_t que_idx, void* data, hsa_amd_queue_intercept_packet_writer writer); + +class ProxyQueue : public Queue { + public: + static void InitFactory() { +#ifdef ROCP_HSA_PROXY + const char* type = getenv("ROCP_PROXY_QUEUE"); + if (type != NULL) { + if (strncmp(type, "rocp", 4) == 0) rocp_type_ = true; + } +#else + rocp_type_ = true; +#endif + } + + static void HsaIntercept(HsaApiTable* table); + + static ProxyQueue* Create( + hsa_agent_t agent, + uint32_t size, + hsa_queue_type32_t type, + void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data), + void *data, + uint32_t private_segment_size, + uint32_t group_segment_size, + hsa_queue_t **queue, + hsa_status_t* status); + + static hsa_status_t Destroy(const ProxyQueue* obj); + + virtual hsa_status_t Init( + hsa_agent_t agent, + uint32_t size, + hsa_queue_type32_t type, + void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data), + void *data, + uint32_t private_segment_size, + uint32_t group_segment_size, + hsa_queue_t **queue + ) = 0; + virtual hsa_status_t Cleanup() const = 0; + virtual hsa_status_t SetInterceptCB(on_submit_cb_t on_submit_cb, void* data) = 0; + virtual void Submit(const packet_t* packet) = 0; + + protected: + virtual ~ProxyQueue() {}; + + private: + static bool rocp_type_; +}; + +} // namespace rocprofiler + +#endif // _SRC_CORE_PROXY_QUEUE_H diff --git a/src/core/queue.h b/src/core/queue.h new file mode 100644 index 0000000000..9d7393fdaf --- /dev/null +++ b/src/core/queue.h @@ -0,0 +1,20 @@ +#ifndef _SRC_CORE_QUEUE_H +#define _SRC_CORE_QUEUE_H + +#include "core/types.h" + +namespace rocprofiler { + +class Queue { + public: + Queue() {} + virtual ~Queue() {} + virtual void Submit(const packet_t* packet) = 0; + virtual void Submit(const packet_t* packet, const size_t& count) { + for (const packet_t* p = packet; p < packet + count; ++p) Submit(p); + } +}; + +} // namespace rocprofiler + +#endif // _SRC_CORE_QUEUE_H diff --git a/src/core/rocprofiler.cpp b/src/core/rocprofiler.cpp new file mode 100644 index 0000000000..1206e59064 --- /dev/null +++ b/src/core/rocprofiler.cpp @@ -0,0 +1,251 @@ +#include "inc/rocprofiler.h" + +#include +#include +#include +#include + +#include "core/context.h" +#include "core/hsa_queue.h" +#include "core/intercept_queue.h" +#include "core/proxy_queue.h" +#include "core/simple_proxy_queue.h" +#include "util/exception.h" +#include "util/hsa_rsrc_factory.h" +#include "util/logger.h" + +#define PUBLIC_API __attribute__((visibility("default"))) +#define CONSTRUCTOR_API __attribute__((constructor)) +#define DESTRUCTOR_API __attribute__((destructor)) + +#define API_METHOD_PREFIX \ + hsa_status_t status = HSA_STATUS_SUCCESS; \ + try { + +#define API_METHOD_SUFFIX \ + } catch (std::exception& e) { \ + ERR_LOGGING(__FUNCTION__ << "(), " << e.what()); \ + status = rocprofiler::GetExcStatus(e); \ + } \ + return status; + +namespace rocprofiler { +decltype(hsa_queue_create)* hsa_queue_create_fn; +decltype(hsa_queue_destroy)* hsa_queue_destroy_fn; +decltype(hsa_signal_store_relaxed)* hsa_signal_store_relaxed_fn; +decltype(hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed_fn; +decltype(hsa_queue_store_write_index_relaxed)* hsa_queue_store_write_index_relaxed_fn; +#ifdef ROCP_HSA_PROXY +decltype(hsa_amd_queue_intercept_create)* hsa_amd_queue_intercept_create_fn; +decltype(hsa_amd_queue_intercept_register)* hsa_amd_queue_intercept_register_fn; +#endif + +::HsaApiTable* kHsaApiTable; + +void SaveHsaApi(::HsaApiTable* table) { + kHsaApiTable = table; + hsa_queue_create_fn = table->core_->hsa_queue_create_fn; + hsa_queue_destroy_fn = table->core_->hsa_queue_destroy_fn; + hsa_signal_store_relaxed_fn = table->core_->hsa_signal_store_relaxed_fn; + hsa_queue_load_write_index_relaxed_fn = table->core_->hsa_queue_load_write_index_relaxed_fn; + hsa_queue_store_write_index_relaxed_fn = table->core_->hsa_queue_store_write_index_relaxed_fn; +#ifdef ROCP_HSA_PROXY + hsa_amd_queue_intercept_create_fn = table->amd_ext_->hsa_amd_queue_intercept_create_fn; + hsa_amd_queue_intercept_register_fn = table->amd_ext_->hsa_amd_queue_intercept_register_fn; +#endif +} + +void RestoreHsaApi() { + ::HsaApiTable* table = kHsaApiTable; + table->core_->hsa_queue_create_fn = hsa_queue_create_fn; + table->core_->hsa_queue_destroy_fn = hsa_queue_destroy_fn; + table->core_->hsa_signal_store_relaxed_fn = hsa_signal_store_relaxed_fn; + table->core_->hsa_queue_load_write_index_relaxed_fn = hsa_queue_load_write_index_relaxed_fn; + table->core_->hsa_queue_store_write_index_relaxed_fn = hsa_queue_store_write_index_relaxed_fn; +#ifdef ROCP_HSA_PROXY + table->amd_ext_->hsa_amd_queue_intercept_create_fn = hsa_amd_queue_intercept_create_fn; + table->amd_ext_->hsa_amd_queue_intercept_register_fn = hsa_amd_queue_intercept_register_fn; +#endif +} + +CONSTRUCTOR_API void constructor() { + util::Logger::Create(); + util::HsaRsrcFactory::Create(); +} + +DESTRUCTOR_API void destructor() { + util::HsaRsrcFactory::Destroy(); + util::Logger::Destroy(); +} + +hsa_status_t GetExcStatus(const std::exception& e) { + const util::exception* rocprofiler_exc_ptr = dynamic_cast(&e); + return (rocprofiler_exc_ptr) ? static_cast(rocprofiler_exc_ptr->status()) : HSA_STATUS_ERROR; +} + +util::Logger::mutex_t util::Logger::mutex_; +util::Logger* util::Logger::instance_ = NULL; + +} + +extern "C" { + +// Returns the last error message +PUBLIC_API hsa_status_t rocprofiler_error_string(const char** str) { + API_METHOD_PREFIX + *str = rocprofiler::util::Logger::LastMessage().c_str(); + API_METHOD_SUFFIX +} + +// Create new profiling context +PUBLIC_API hsa_status_t rocprofiler_open( + unsigned agent_id, + rocprofiler_info_t* info, + uint32_t info_count, + rocprofiler_t** handle, + uint32_t mode, + rocprofiler_properties_t* properties) +{ + API_METHOD_PREFIX + rocprofiler::util::HsaRsrcFactory* hsa_rsrc = &rocprofiler::util::HsaRsrcFactory::Instance(); + const rocprofiler::util::AgentInfo* agent_info; + if (!hsa_rsrc->GetGpuAgentInfo(agent_id, &agent_info)) { + EXC_RAISING(HSA_STATUS_ERROR, "agent[" << agent_id << "] is not found"); + } + + rocprofiler::Queue* queue = NULL; + if (mode != 0) { + if (mode & ROCPROFILER_MODE_STANDALONE) { + if (mode & ROCPROFILER_MODE_CREATEQUEUE) { + if (hsa_rsrc->CreateQueue(agent_info, properties->queue_depth, &(properties->queue)) == false) { + EXC_RAISING(HSA_STATUS_ERROR, "CreateQueue() failed"); + } + } + queue = new rocprofiler::HsaQueue(agent_info, properties->queue); + } else { + EXC_RAISING(HSA_STATUS_ERROR, "invalid mode (" << mode << ")"); + } + } + + *handle = (void*) new rocprofiler::Context(agent_info, queue, info, info_count); + API_METHOD_SUFFIX +} + +// Delete profiling info +PUBLIC_API hsa_status_t rocprofiler_close(rocprofiler_t* handle) +{ + API_METHOD_PREFIX + rocprofiler::Context* context = reinterpret_cast(handle); + if (context) delete context; + API_METHOD_SUFFIX +} + +// Get profiling groups +PUBLIC_API hsa_status_t rocprofiler_get_groups(rocprofiler_t* handle, rocprofiler_group_t** group_array, uint32_t* group_count) { + API_METHOD_PREFIX + rocprofiler::Context* context = reinterpret_cast(handle); + const uint32_t count = context->GetGroupCount(); + rocprofiler_group_t* groups = (rocprofiler_group_t*) calloc(count, sizeof(rocprofiler_group_t)); + for (unsigned i = 0; i < count; ++i) groups[i] = context->GetGroupInfo(i); + *group_array = groups; + *group_count = count; + API_METHOD_SUFFIX +} + +// Start profiling +PUBLIC_API hsa_status_t rocprofiler_start(rocprofiler_t* handle, uint32_t group_index) { + API_METHOD_PREFIX + rocprofiler::Context* context = reinterpret_cast(handle); + context->Start(group_index); + API_METHOD_SUFFIX +} + +// Stop profiling +PUBLIC_API hsa_status_t rocprofiler_stop(rocprofiler_t* handle, uint32_t group_index) { + API_METHOD_PREFIX + rocprofiler::Context* context = reinterpret_cast(handle); + context->Stop(group_index); + API_METHOD_SUFFIX +} + +// Get profiling data +PUBLIC_API hsa_status_t rocprofiler_get_data(rocprofiler_t* handle, uint32_t group_index) { + API_METHOD_PREFIX + rocprofiler::Context* context = reinterpret_cast(handle); + context->GetData(group_index); + API_METHOD_SUFFIX +} + +// Start profiling +PUBLIC_API hsa_status_t rocprofiler_group_start(rocprofiler_group_t* group) { + API_METHOD_PREFIX + rocprofiler_start(group->context, group->index); + API_METHOD_SUFFIX +} + +// Stop profiling +PUBLIC_API hsa_status_t rocprofiler_group_stop(rocprofiler_group_t* group) { + API_METHOD_PREFIX + rocprofiler_stop(group->context, group->index); + API_METHOD_SUFFIX +} + +// Get profiling data +PUBLIC_API hsa_status_t rocprofiler_get_group_data(rocprofiler_group_t* group) { + API_METHOD_PREFIX + rocprofiler::Context* context = reinterpret_cast(group->context); + context->GetData(group->index); + API_METHOD_SUFFIX +} + +// Get metrics data +PUBLIC_API hsa_status_t rocprofiler_get_metrics_data(const rocprofiler_t* handle) { + API_METHOD_PREFIX + const rocprofiler::Context* context = reinterpret_cast(handle); + context->GetMetricsData(); + API_METHOD_SUFFIX +} + +// Set kernel dispatch observer +PUBLIC_API hsa_status_t rocprofiler_set_dispatch_observer(rocprofiler_callback_t callback, void* data) { + API_METHOD_PREFIX + rocprofiler::InterceptQueue::SetDispatchCB(callback, data); + API_METHOD_SUFFIX +} + +// Set kernel dispatch observer +PUBLIC_API hsa_status_t rocprofiler_remove_dispatch_observer() { + API_METHOD_PREFIX + rocprofiler::InterceptQueue::UnsetDispatchCB(); + API_METHOD_SUFFIX +} + +// Method for iterating the events output data +PUBLIC_API hsa_status_t rocprofiler_iterate_trace_data(rocprofiler_t* handle, hsa_ven_amd_aqlprofile_data_callback_t callback, void* data) { + API_METHOD_PREFIX + rocprofiler::Context* context = reinterpret_cast(handle); + context->IterateTraceData(callback, data); + API_METHOD_SUFFIX +} + +PUBLIC_API bool OnLoad( + HsaApiTable* table, + uint64_t runtime_version, + uint64_t failed_tool_count, + const char* const * failed_tool_names) { + rocprofiler::SaveHsaApi(table); + rocprofiler::ProxyQueue::InitFactory(); + rocprofiler::InterceptQueue::SetTool(getenv("ROCP_TOOL_LIB")); + // HSA intercepting + if (getenv("ROCP_HSA_INTERCEPT") != NULL) { + rocprofiler::InterceptQueue::HsaIntercept(table); + rocprofiler::ProxyQueue::HsaIntercept(table); + } + return true; +} + +PUBLIC_API void OnUnload() { + rocprofiler::RestoreHsaApi(); +} + +} // extern "C" diff --git a/src/core/simple_proxy_queue.cpp b/src/core/simple_proxy_queue.cpp new file mode 100644 index 0000000000..a4dadf4e5e --- /dev/null +++ b/src/core/simple_proxy_queue.cpp @@ -0,0 +1,11 @@ +#include "core/simple_proxy_queue.h" + +namespace rocprofiler { +void SimpleProxyQueue::HsaIntercept(HsaApiTable* table) { + table->core_->hsa_signal_store_relaxed_fn = rocprofiler::SimpleProxyQueue::SignalStore; + table->core_->hsa_queue_load_write_index_relaxed_fn = rocprofiler::SimpleProxyQueue::LoadIndex; + table->core_->hsa_queue_store_write_index_relaxed_fn = rocprofiler::SimpleProxyQueue::StoreIndex; +} + +std::map SimpleProxyQueue::queue_map_; +} // namespace rocprofiler diff --git a/src/core/simple_proxy_queue.h b/src/core/simple_proxy_queue.h new file mode 100644 index 0000000000..9a9f6221c9 --- /dev/null +++ b/src/core/simple_proxy_queue.h @@ -0,0 +1,187 @@ +#ifndef _SRC_CORE_SIMPLE_PROXY_QUEUE_H +#define _SRC_CORE_SIMPLE_PROXY_QUEUE_H + +#include +#include +#include +#include + +#include "core/proxy_queue.h" +#include "core/types.h" +#include "util/hsa_rsrc_factory.h" + +namespace rocprofiler { +extern decltype(hsa_queue_create)* hsa_queue_create_fn; +extern decltype(hsa_queue_destroy)* hsa_queue_destroy_fn; +extern decltype(hsa_signal_store_relaxed)* hsa_signal_store_relaxed_fn; +extern decltype(hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed_fn; +extern decltype(hsa_queue_store_write_index_relaxed)* hsa_queue_store_write_index_relaxed_fn; +typedef decltype(hsa_signal_t::handle) signal_handle_t; + + +class SimpleProxyQueue : public ProxyQueue { + public: + static void HsaIntercept(HsaApiTable* table); + + static void SignalStore( + hsa_signal_t signal, + hsa_signal_value_t que_idx) + { + auto it = queue_map_.find(signal.handle); + if (it != queue_map_.end()) { + SimpleProxyQueue* instance = it->second; + const uint64_t begin = instance->submit_index_; + const uint64_t end = que_idx + 1; + instance->submit_index_ = end; + for (uint64_t j = begin; j < end; ++j) { + // Submited packet + const uint32_t idx = j & instance->queue_mask_; + packet_t* packet = reinterpret_cast(instance->queue_->base_address) + idx; + if (instance->on_submit_cb_ != NULL) instance->on_submit_cb_(packet, 1, j, instance->on_submit_cb_data_, NULL); + else instance->Submit(packet); + } + } else { + hsa_signal_store_relaxed_fn(signal, que_idx); + } + } + + static uint64_t LoadIndex( + const hsa_queue_t *queue) + { + uint64_t index = 0; + auto it = queue_map_.find(queue->doorbell_signal.handle); + if (it != queue_map_.end()) { + SimpleProxyQueue* instance = it->second; + instance->mutex_.lock(); + index = instance->queue_index_; + } else { + index = hsa_queue_load_write_index_relaxed_fn(queue); + } + return index; + } + + static void StoreIndex( + const hsa_queue_t *queue, + uint64_t value) + { + auto it = queue_map_.find(queue->doorbell_signal.handle); + if (it != queue_map_.end()) { + SimpleProxyQueue* instance = it->second; + instance->queue_index_ = value; + instance->mutex_.unlock(); + } else { + hsa_queue_store_write_index_relaxed_fn(queue, value); + } + } + + hsa_status_t SetInterceptCB(on_submit_cb_t on_submit_cb, void* data) { + on_submit_cb_ = on_submit_cb; + on_submit_cb_data_ = data; + return HSA_STATUS_SUCCESS; + } + + void Submit(const packet_t* packet) { + // Compute the write index of queue and copy Aql packet into it + const uint64_t que_idx = hsa_queue_load_write_index_relaxed_fn(queue_); + // Increment the write index and ring the doorbell to submit the packet. + hsa_queue_store_write_index_relaxed_fn(queue_, que_idx + 1); + + const uint32_t mask = queue_->size - 1; + const uint32_t idx = que_idx & mask; + + // Copy packet to the queue + const packet_word_t* src = reinterpret_cast(packet); + packet_word_t* dst = reinterpret_cast(base_address_ + idx); + for (unsigned i = 1; i < sizeof(packet_t) / sizeof(packet_word_t); ++i) { + dst[i] = src[i]; + } + + // To maintain global order to ensure the prior copy of the packet contents is made visible + // before the header is updated. + // With in-order CP it will wait until the first packet in the blob will be valid + std::atomic* header_atomic_ptr = + reinterpret_cast*>(&dst[0]); + header_atomic_ptr->store(src[0], std::memory_order_release); + + // Doorbell signaling + hsa_signal_store_relaxed_fn(doorbell_signal_, que_idx); + } + + SimpleProxyQueue() : + agent_info_(NULL), + queue_(NULL), + base_address_(NULL), + doorbell_signal_({}), + queue_index_(0), + queue_mask_(0), + submit_index_(0), + on_submit_cb_(0), + on_submit_cb_data_(0) + {} + + ~SimpleProxyQueue() {} + + private: + hsa_status_t Init( + hsa_agent_t agent, + uint32_t size, + hsa_queue_type32_t type, + void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data), + void *data, + uint32_t private_segment_size, + uint32_t group_segment_size, + hsa_queue_t **queue) + { + auto status = Init(agent, size); + *queue = queue_; + return status; + } + + hsa_status_t Init(hsa_agent_t agent, uint32_t size) { + hsa_status_t status = HSA_STATUS_ERROR; + agent_info_ = util::HsaRsrcFactory::Instance().GetAgentInfo(agent); + if (agent_info_ != NULL) { + if (agent_info_->dev_type == HSA_DEVICE_TYPE_GPU) { + status = hsa_queue_create_fn(agent, size, HSA_QUEUE_TYPE_MULTI, NULL, NULL, UINT32_MAX, UINT32_MAX, &queue_); + if (status == HSA_STATUS_SUCCESS) { + base_address_ = reinterpret_cast(queue_->base_address); + doorbell_signal_ = queue_->doorbell_signal; + data_array_ = calloc(size + 1, sizeof(packet_t)); + uintptr_t addr = (uintptr_t)data_array_; + queue_->base_address = (void*) ((addr + align_mask_) & ~align_mask_); + status = hsa_signal_create(1, 0, NULL, &(queue_->doorbell_signal)); + queue_mask_ = size - 1; + queue_map_[queue_->doorbell_signal.handle] = this; + } + } + } + return status; + } + + hsa_status_t Cleanup() const { + hsa_status_t status = HSA_STATUS_SUCCESS; + queue_->base_address = base_address_; + queue_->doorbell_signal = doorbell_signal_; + status = hsa_queue_destroy_fn(queue_); + free(data_array_); + return status; + } + + static std::map queue_map_; + const util::AgentInfo* agent_info_; + hsa_queue_t* queue_; + static const uintptr_t align_mask_ = sizeof(packet_t) - 1; + packet_t* base_address_; + hsa_signal_t doorbell_signal_; + uint64_t queue_index_; + uint64_t queue_mask_; + uint64_t submit_index_; + std::mutex mutex_; + on_submit_cb_t on_submit_cb_; + void* on_submit_cb_data_; + void* data_array_; +}; + +} // namespace rocprofiler + +#endif // _SRC_CORE_SIMPLE_PROXY_QUEUE_H diff --git a/src/core/types.h b/src/core/types.h new file mode 100644 index 0000000000..a179e5584f --- /dev/null +++ b/src/core/types.h @@ -0,0 +1,15 @@ +#ifndef SRC_CORE_TYPES_H_ +#define SRC_CORE_TYPES_H_ + +#include + +namespace rocprofiler { +typedef hsa_ven_amd_aqlprofile_1_00_pfn_t pfn_t; +typedef hsa_ven_amd_aqlprofile_event_t event_t; +typedef hsa_ven_amd_aqlprofile_parameter_t parameter_t; +typedef hsa_ven_amd_aqlprofile_profile_t profile_t; +typedef hsa_ext_amd_aql_pm4_packet_t packet_t; +typedef uint32_t packet_word_t; +} // namespace rocprofiler + +#endif // SRC_CORE_TYPES_H_ diff --git a/src/util/exception.h b/src/util/exception.h new file mode 100644 index 0000000000..82b71d8557 --- /dev/null +++ b/src/util/exception.h @@ -0,0 +1,39 @@ +#ifndef SRC_UTIL_EXCEPTION_H_ +#define SRC_UTIL_EXCEPTION_H_ + +#include +#include +#include + +#include + +#define EXC_RAISING(error, stream) { \ + std::ostringstream oss; oss << __FUNCTION__ << "(), " << stream; \ + throw rocprofiler::util::exception(error, oss.str()); \ +} + +#define AQL_EXC_RAISING(error, stream) { \ + const char* error_string = NULL; \ + const rocprofiler::pfn_t* api = util::HsaRsrcFactory::Instance().AqlProfileApi(); \ + api->hsa_ven_amd_aqlprofile_error_string(&error_string); \ + EXC_RAISING(error, stream << ", " << error_string); \ +} + +namespace rocprofiler { +namespace util { + +class exception : public std::exception { + public: + explicit exception(const uint32_t &status, const std::string& msg) : status_(status), str_(msg) {} + const char* what() const throw() { return str_.c_str(); } + uint32_t status() const throw() { return status_; } + + protected: + const uint32_t status_; + const std::string str_; +}; + +} // namespace util +} // namespace rocprofiler + +#endif // SRC_UTIL_EXCEPTION_H_ diff --git a/src/util/hsa_rsrc_factory.cpp b/src/util/hsa_rsrc_factory.cpp new file mode 100644 index 0000000000..18ce098ce2 --- /dev/null +++ b/src/util/hsa_rsrc_factory.cpp @@ -0,0 +1,392 @@ +/********************************************************************** +Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted +provided that the following conditions are met: + +<95> Redistributions of source code must retain the above copyright notice, this list of +conditions and the following disclaimer. +<95> Redistributions in binary form must reproduce the above copyright notice, this list of +conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +********************************************************************/ + +#include "util/hsa_rsrc_factory.h" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace rocprofiler { +namespace util { + +// Callback function to get available in the system agents +hsa_status_t HsaRsrcFactory::GetHsaAgentsCallback(hsa_agent_t agent, void* data) { + hsa_status_t status = HSA_STATUS_ERROR; + HsaRsrcFactory* hsa_rsrc = reinterpret_cast(data); + const AgentInfo* agent_info = hsa_rsrc->AddAgentInfo(agent); + if (agent_info != NULL) status = HSA_STATUS_SUCCESS; + return status; +} + +// Callback function to find and bind kernarg region of an agent +hsa_status_t HsaRsrcFactory::FindMemRegionsCallback(hsa_region_t region, void* data) { + hsa_region_global_flag_t flags; + hsa_region_segment_t segment_id; + + hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment_id); + if (segment_id != HSA_REGION_SEGMENT_GLOBAL) { + return HSA_STATUS_SUCCESS; + } + + AgentInfo* agent_info = (AgentInfo*)data; + hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags); + if (flags & HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED) { + agent_info->coarse_region = region; + } + + if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG) { + agent_info->kernarg_region = region; + } + + return HSA_STATUS_SUCCESS; +} + +// Constructor of the class +HsaRsrcFactory::HsaRsrcFactory() { + // Initialize the Hsa Runtime + hsa_status_t status = hsa_init(); + CHECK_STATUS("Error in hsa_init", status); + + // Discover the set of Gpu devices available on the platform + status = hsa_iterate_agents(GetHsaAgentsCallback, this); + CHECK_STATUS("Error Calling hsa_iterate_agents", status); + + // Get AqlProfile API table + status = hsa_system_get_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, 1, 0, &aqlprofile_api_); + CHECK_STATUS("aqlprofile API table query failed", status); +} + +// Destructor of the class +HsaRsrcFactory::~HsaRsrcFactory() { + hsa_status_t status = hsa_shut_down(); + CHECK_STATUS("Error in hsa_shut_down", status); +} + +// Add system agent info +const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) { + // Determine if device is a Gpu agent + hsa_status_t status; + AgentInfo* agent_info = NULL; + + hsa_device_type_t type; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type); + CHECK_STATUS("Error Calling hsa_agent_get_info", status); + + if (type == HSA_DEVICE_TYPE_CPU) { + agent_info = new AgentInfo{}; + agent_info->dev_id = agent; + agent_info->dev_type = HSA_DEVICE_TYPE_CPU; + agent_info->dev_index = cpu_list_.size(); + cpu_list_.push_back(agent_info); + } + + if (type == HSA_DEVICE_TYPE_GPU) { + agent_info = new AgentInfo{}; + agent_info->dev_id = agent; + agent_info->dev_type = HSA_DEVICE_TYPE_GPU; + hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_info->name); + strncpy(agent_info->gfxip, agent_info->name, 4); + agent_info->gfxip[4] = '\0'; + agent_info->max_wave_size = 0; + hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &agent_info->max_wave_size); + agent_info->max_queue_size = 0; + hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &agent_info->max_queue_size); + agent_info->profile = hsa_profile_t(108); + hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_info->profile); + + // Initialize memory regions to zero + agent_info->kernarg_region.handle = 0; + agent_info->coarse_region.handle = 0; + + // Find and Bind Memory regions of the Gpu agent + hsa_agent_iterate_regions(agent, FindMemRegionsCallback, agent_info); + + // Set GPU index + agent_info->dev_index = gpu_list_.size(); + gpu_list_.push_back(agent_info); + } + + if (agent_info) agent_map_[agent.handle] = agent_info; + + return agent_info; +} + +// Return systen agent info +const AgentInfo* HsaRsrcFactory::GetAgentInfo(const hsa_agent_t agent) { + const AgentInfo* agent_info = NULL; + auto it = agent_map_.find(agent.handle); + if (it != agent_map_.end()) { + agent_info = it->second; + } + return agent_info; +} + +// Get the count of Hsa Gpu Agents available on the platform +// +// @return uint32_t Number of Gpu agents on platform +// +uint32_t HsaRsrcFactory::GetCountOfGpuAgents() { return uint32_t(gpu_list_.size()); } + +// Get the count of Hsa Cpu Agents available on the platform +// +// @return uint32_t Number of Cpu agents on platform +// +uint32_t HsaRsrcFactory::GetCountOfCpuAgents() { return uint32_t(cpu_list_.size()); } + +// Get the AgentInfo handle of a Gpu device +// +// @param idx Gpu Agent at specified index +// +// @param agent_info Output parameter updated with AgentInfo +// +// @return bool true if successful, false otherwise +// +bool HsaRsrcFactory::GetGpuAgentInfo(uint32_t idx, const AgentInfo** agent_info) { + // Determine if request is valid + uint32_t size = uint32_t(gpu_list_.size()); + if (idx >= size) { + return false; + } + + // Copy AgentInfo from specified index + *agent_info = gpu_list_[idx]; + + return true; +} + +// Get the AgentInfo handle of a Cpu device +// +// @param idx Cpu Agent at specified index +// +// @param agent_info Output parameter updated with AgentInfo +// +// @return bool true if successful, false otherwise +// +bool HsaRsrcFactory::GetCpuAgentInfo(uint32_t idx, const AgentInfo** agent_info) { + // Determine if request is valid + uint32_t size = uint32_t(cpu_list_.size()); + if (idx >= size) { + return false; + } + + // Copy AgentInfo from specified index + *agent_info = cpu_list_[idx]; + return true; +} + +// Create a Queue object and return its handle. The queue object is expected +// to support user requested number of Aql dispatch packets. +// +// @param agent_info Gpu Agent on which to create a queue object +// +// @param num_Pkts Number of packets to be held by queue +// +// @param queue Output parameter updated with handle of queue object +// +// @return bool true if successful, false otherwise +// +bool HsaRsrcFactory::CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts, hsa_queue_t** queue) { + hsa_status_t status; + status = hsa_queue_create(agent_info->dev_id, num_pkts, HSA_QUEUE_TYPE_MULTI, NULL, NULL, + UINT32_MAX, UINT32_MAX, queue); + return (status == HSA_STATUS_SUCCESS); +} + +// Create a Signal object and return its handle. +// +// @param value Initial value of signal object +// +// @param signal Output parameter updated with handle of signal object +// +// @return bool true if successful, false otherwise +// +bool HsaRsrcFactory::CreateSignal(uint32_t value, hsa_signal_t* signal) { + hsa_status_t status; + status = hsa_signal_create(value, 0, NULL, signal); + return (status == HSA_STATUS_SUCCESS); +} + +// Allocate memory for use by a kernel of specified size in specified +// agent's memory region. Currently supports Global segment whose Kernarg +// flag set. +// +// @param agent_info Agent from whose memory region to allocate +// +// @param size Size of memory in terms of bytes +// +// @return uint8_t* Pointer to buffer, null if allocation fails. +// +uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t size) { + hsa_status_t status; + uint8_t* buffer = NULL; + size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; + + if (agent_info->coarse_region.handle != 0) { + // Allocate in local memory if it is available + status = hsa_memory_allocate(agent_info->coarse_region, size, (void**)&buffer); + if (status == HSA_STATUS_SUCCESS) { + status = hsa_memory_assign_agent(buffer, agent_info->dev_id, HSA_ACCESS_PERMISSION_RW); + } + } else { + // Allocate in system memory if local memory is not available + status = hsa_memory_allocate(agent_info->kernarg_region, size, (void**)&buffer); + } + + return (status == HSA_STATUS_SUCCESS) ? buffer : NULL; +} + +// Allocate memory tp pass kernel parameters. +// +// @param agent_info Agent from whose memory region to allocate +// +// @param size Size of memory in terms of bytes +// +// @return uint8_t* Pointer to buffer, null if allocation fails. +// +uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t size) { + hsa_status_t status; + size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; + + uint8_t* buffer = NULL; + status = hsa_memory_allocate(agent_info->kernarg_region, size, (void**)&buffer); + return (status == HSA_STATUS_SUCCESS) ? buffer : NULL; +} + +// Transfer data method +bool HsaRsrcFactory::TransferData(void* dest_buff, void* src_buff, uint32_t length, + bool host_to_dev) { + hsa_status_t status; + status = hsa_memory_copy(dest_buff, src_buff, length); + return (status == HSA_STATUS_SUCCESS); +} + +// Loads an Assembled Brig file and Finalizes it into Device Isa +// +// @param agent_info Gpu device for which to finalize +// +// @param brig_path File path of the Assembled Brig file +// +// @param kernel_name Name of the kernel to finalize +// +// @param code_desc Handle of finalized Code Descriptor that could +// be used to submit for execution +// +// @return bool true if successful, false otherwise +// +bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* brig_path, + char* kernel_name, hsa_executable_symbol_t* code_desc) { + // Finalize the Hsail object into code object + hsa_status_t status; + hsa_code_object_t code_object; + + // Build the code object filename + std::string filename(brig_path); + std::clog << "Code object filename: " << filename << std::endl; + + // Open the file containing code object + std::ifstream codeStream(filename.c_str(), std::ios::binary | std::ios::ate); + if (!codeStream) { + std::cerr << "Error: failed to load " << filename << std::endl; + assert(false); + return false; + } + + // Allocate memory to read in code object from file + size_t size = std::string::size_type(codeStream.tellg()); + char* codeBuff = (char*)AllocateSysMemory(agent_info, size); + if (!codeBuff) { + std::cerr << "Error: failed to allocate memory for code object." << std::endl; + assert(false); + return false; + } + + // Read the code object into allocated memory + codeStream.seekg(0, std::ios::beg); + std::copy(std::istreambuf_iterator(codeStream), std::istreambuf_iterator(), codeBuff); + + // De-Serialize the code object that has been read into memory + status = hsa_code_object_deserialize(codeBuff, size, NULL, &code_object); + if (status != HSA_STATUS_SUCCESS) { + std::cerr << "Failed to deserialize code object" << std::endl; + return false; + } + + // Create executable. + hsa_executable_t hsaExecutable; + status = + hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, "", &hsaExecutable); + CHECK_STATUS("Error in creating executable object", status); + + // Load code object. + status = hsa_executable_load_code_object(hsaExecutable, agent_info->dev_id, code_object, ""); + CHECK_STATUS("Error in loading executable object", status); + + // Freeze executable. + status = hsa_executable_freeze(hsaExecutable, ""); + CHECK_STATUS("Error in freezing executable object", status); + + // Get symbol handle. + hsa_executable_symbol_t kernelSymbol; + status = hsa_executable_get_symbol(hsaExecutable, NULL, kernel_name, agent_info->dev_id, 0, + &kernelSymbol); + CHECK_STATUS("Error in looking up kernel symbol", status); + + // Update output parameter + *code_desc = kernelSymbol; + return true; +} + +// Print the various fields of Hsa Gpu Agents +bool HsaRsrcFactory::PrintGpuAgents(const std::string& header) { + std::clog << header << " :" << std::endl; + + const AgentInfo* agent_info; + int size = uint32_t(gpu_list_.size()); + for (int idx = 0; idx < size; idx++) { + agent_info = gpu_list_[idx]; + + std::clog << "> agent[" << idx << "] :" << std::endl; + std::clog << ">> Name : " << agent_info->name << std::endl; + std::clog << ">> Max Wave Size : " << agent_info->max_wave_size << std::endl; + std::clog << ">> Max Queue Size : " << agent_info->max_queue_size << std::endl; + std::clog << ">> Kernarg Region Id : " << agent_info->coarse_region.handle << std::endl; + } + return true; +} + +HsaRsrcFactory* HsaRsrcFactory::instance_ = NULL; +HsaRsrcFactory::mutex_t HsaRsrcFactory::mutex_; + +} // namespace util +} // namespace rocprofiler diff --git a/src/util/hsa_rsrc_factory.h b/src/util/hsa_rsrc_factory.h new file mode 100644 index 0000000000..8b4e2227a0 --- /dev/null +++ b/src/util/hsa_rsrc_factory.h @@ -0,0 +1,262 @@ +/********************************************************************** +Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted +provided that the following conditions are met: + +<95> Redistributions of source code must retain the above copyright notice, this list of +conditions and the following disclaimer. +<95> Redistributions in binary form must reproduce the above copyright notice, this list of +conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +********************************************************************/ + +#ifndef SRC_UTIL_HSA_RSRC_FACTORY_H_ +#define SRC_UTIL_HSA_RSRC_FACTORY_H_ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#define HSA_ARGUMENT_ALIGN_BYTES 16 +#define HSA_QUEUE_ALIGN_BYTES 64 +#define HSA_PACKET_ALIGN_BYTES 64 + +#define CHECK_STATUS(msg, status) \ + if (status != HSA_STATUS_SUCCESS) { \ + const char* emsg = 0; \ + hsa_status_string(status, &emsg); \ + printf("%s: %s\n", msg, emsg ? emsg : ""); \ + exit(1); \ + } + +namespace rocprofiler { +namespace util { +static const unsigned MEM_PAGE_BYTES = 0x1000; +static const unsigned MEM_PAGE_MASK = MEM_PAGE_BYTES - 1; +typedef decltype(hsa_agent_t::handle) hsa_agent_handle_t; + +// Encapsulates information about a Hsa Agent such as its +// handle, name, max queue size, max wavefront size, etc. +struct AgentInfo { + // Handle of Agent + hsa_agent_t dev_id; + + // Agent type - Cpu = 0, Gpu = 1 or Dsp = 2 + uint32_t dev_type; + + // Agent system index + uint32_t dev_index; + + // GFXIP name + char gfxip[64]; + + // Name of Agent whose length is less than 64 + char name[64]; + + // Max size of Wavefront size + uint32_t max_wave_size; + + // Max size of Queue buffer + uint32_t max_queue_size; + + // Hsail profile supported by agent + hsa_profile_t profile; + + // Memory region supporting kernel parameters + hsa_region_t coarse_region; + + // Memory region supporting kernel arguments + hsa_region_t kernarg_region; +}; + +class HsaRsrcFactory { + public: + typedef std::recursive_mutex mutex_t; + + static HsaRsrcFactory* Create() { return NULL; } + + static HsaRsrcFactory* CreateInstance() { + std::lock_guard lck(mutex_); + if (instance_ == NULL) { + instance_ = new HsaRsrcFactory(); + } + return instance_; + } + + static HsaRsrcFactory& Instance() { + CreateInstance(); + hsa_status_t status = (instance_ != NULL) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR; + CHECK_STATUS("HsaRsrcFactory::Instance() is not found", status); + return *instance_; + } + + static void Destroy() { + std::lock_guard lck(mutex_); + if (instance_) delete instance_; + instance_ = NULL; + } + + // Return system agent info + const AgentInfo* GetAgentInfo(const hsa_agent_t agent); + + // Get the count of Hsa Gpu Agents available on the platform + // + // @return uint32_t Number of Gpu agents on platform + // + uint32_t GetCountOfGpuAgents(); + + // Get the count of Hsa Cpu Agents available on the platform + // + // @return uint32_t Number of Cpu agents on platform + // + uint32_t GetCountOfCpuAgents(); + + // Get the AgentInfo handle of a Gpu device + // + // @param idx Gpu Agent at specified index + // + // @param agent_info Output parameter updated with AgentInfo + // + // @return bool true if successful, false otherwise + // + bool GetGpuAgentInfo(uint32_t idx, const AgentInfo** agent_info); + + // Get the AgentInfo handle of a Cpu device + // + // @param idx Cpu Agent at specified index + // + // @param agent_info Output parameter updated with AgentInfo + // + // @return bool true if successful, false otherwise + // + bool GetCpuAgentInfo(uint32_t idx, const AgentInfo** agent_info); + + // Create a Queue object and return its handle. The queue object is expected + // to support user requested number of Aql dispatch packets. + // + // @param agent_info Gpu Agent on which to create a queue object + // + // @param num_Pkts Number of packets to be held by queue + // + // @param queue Output parameter updated with handle of queue object + // + // @return bool true if successful, false otherwise + // + bool CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts, hsa_queue_t** queue); + + // Create a Signal object and return its handle. + // + // @param value Initial value of signal object + // + // @param signal Output parameter updated with handle of signal object + // + // @return bool true if successful, false otherwise + // + bool CreateSignal(uint32_t value, hsa_signal_t* signal); + + // Allocate memory for use by a kernel of specified size in specified + // agent's memory region. Currently supports Global segment whose Kernarg + // flag set. + // + // @param agent_info Agent from whose memory region to allocate + // + // @param size Size of memory in terms of bytes + // + // @return uint8_t* Pointer to buffer, null if allocation fails. + // + uint8_t* AllocateLocalMemory(const AgentInfo* agent_info, size_t size); + + // Allocate memory tp pass kernel parameters. + // + // @param agent_info Agent from whose memory region to allocate + // + // @param size Size of memory in terms of bytes + // + // @return uint8_t* Pointer to buffer, null if allocation fails. + // + uint8_t* AllocateSysMemory(const AgentInfo* agent_info, size_t size); + + // Transfer data method + bool TransferData(void* dest_buff, void* src_buff, uint32_t length, bool host_to_dev); + + // Loads an Assembled Brig file and Finalizes it into Device Isa + // + // @param agent_info Gpu device for which to finalize + // + // @param brig_path File path of the Assembled Brig file + // + // @param kernel_name Name of the kernel to finalize + // + // @param code_desc Handle of finalized Code Descriptor that could + // be used to submit for execution + // + // @return bool true if successful, false otherwise + // + bool LoadAndFinalize(const AgentInfo* agent_info, const char* brig_path, char* kernel_name, + hsa_executable_symbol_t* code_desc); + + // Print the various fields of Hsa Gpu Agents + bool PrintGpuAgents(const std::string& header); + + // Return AqlProfile API table + const hsa_ven_amd_aqlprofile_1_00_pfn_t* AqlProfileApi() const { return &aqlprofile_api_; } + + private: + // System agents iterating callback + static hsa_status_t GetHsaAgentsCallback(hsa_agent_t agent, void* data); + + // Callback function to find and bind kernarg region of an agent + static hsa_status_t FindMemRegionsCallback(hsa_region_t region, void* data); + + // Constructor of the class. Will initialize the Hsa Runtime and + // query the system topology to get the list of Cpu and Gpu devices + HsaRsrcFactory(); + + // Destructor of the class + ~HsaRsrcFactory(); + + // Add an instance of AgentInfo representing a Hsa Gpu agent + const AgentInfo* AddAgentInfo(const hsa_agent_t agent); + + static HsaRsrcFactory* instance_; + static mutex_t mutex_; + + // Used to maintain a list of Hsa Gpu Agent Info + std::vector gpu_list_; + + // Used to maintain a list of Hsa Cpu Agent Info + std::vector cpu_list_; + + // System agents map + std::map agent_map_; + + // AqlProfile API table + hsa_ven_amd_aqlprofile_1_00_pfn_t aqlprofile_api_; +}; + +} // namespace util +} // namespace rocprofiler + +#endif // SRC_UTIL_HSA_RSRC_FACTORY_H_ diff --git a/src/util/logger.h b/src/util/logger.h new file mode 100644 index 0000000000..1688dd735f --- /dev/null +++ b/src/util/logger.h @@ -0,0 +1,163 @@ +#ifndef SRC_UTIL_LOGGER_H_ +#define SRC_UTIL_LOGGER_H_ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace rocprofiler { +namespace util { + +class Logger { + public: + typedef std::recursive_mutex mutex_t; + + template Logger& operator<<(const T& m) { + std::ostringstream oss; + oss << m; + if (!streaming_) + Log(oss.str()); + else + Put(oss.str()); + streaming_ = true; + return *this; + } + + typedef void (*manip_t)(); + Logger& operator<<(manip_t f) { + f(); + return *this; + } + + static void begm() { Instance().ResetStreaming(true); } + static void endl() { Instance().ResetStreaming(false); } + + static const std::string& LastMessage() { + Logger& logger = Instance(); + std::lock_guard lck(mutex_); + return logger.message_[GetTid()]; + } + + static Logger* Create() { + std::lock_guard lck(mutex_); + if (instance_ == NULL) instance_ = new Logger(); + return instance_; + } + + static void Destroy() { + std::lock_guard lck(mutex_); + if (instance_ != NULL) delete instance_; + instance_ = NULL; + } + + static Logger& Instance() { + Create(); + return *instance_; + } + + private: + static uint32_t GetPid() { return syscall(__NR_getpid); } + static uint32_t GetTid() { return syscall(__NR_gettid); } + + Logger() : file_(NULL), dirty_(false), streaming_(false), messaging_(false) { + const char* path = getenv("ROCPROFILER_LOG"); + if (path != NULL) { + file_ = fopen("/tmp/rocprofiler_log.txt", "a"); + } + ResetStreaming(false); + } + + ~Logger() { + if (file_ != NULL) { + if (dirty_) Put("\n"); + fclose(file_); + } + } + + void ResetStreaming(const bool messaging) { + std::lock_guard lck(mutex_); + if (messaging) { + message_[GetTid()] = ""; + } + messaging_ = messaging; + streaming_ = messaging; + } + + void Put(const std::string& m) { + std::lock_guard lck(mutex_); + if (messaging_) { + message_[GetTid()] += m; + } + if (file_ != NULL) { + dirty_ = true; + flock(fileno(file_), LOCK_EX); + fprintf(file_, "%s", m.c_str()); + fflush(file_); + flock(fileno(file_), LOCK_UN); + } + } + + void Log(const std::string& m) { + const time_t rawtime = time(NULL); + tm tm_info; + localtime_r(&rawtime, &tm_info); + char tm_str[26]; + strftime(tm_str, 26, "%Y-%m-%d %H:%M:%S", &tm_info); + std::ostringstream oss; + oss << "<" << tm_str << std::dec << " pid" << GetPid() << " tid" << GetTid() << "> " << m; + Put(oss.str()); + } + + FILE* file_; + bool dirty_; + bool streaming_; + bool messaging_; + + static mutex_t mutex_; + static Logger* instance_; + std::map message_; +}; + +} // namespace util +} // namespace rocprofiler + +#define ERR_LOGGING(stream) { \ + rocprofiler::util::Logger::Instance() \ + << "error: " \ + << rocprofiler::util::Logger::begm \ + << stream \ + << rocprofiler::util::Logger::endl; \ +} + +#define INFO_LOGGING(stream) { \ + rocprofiler::util::Logger::Instance() \ + << "info: " \ + << rocprofiler::util::Logger::begm \ + << stream \ + << rocprofiler::util::Logger::endl; \ +} + +#ifdef DEBUG +# define DBG_LOGGING(stream) { \ + rocprofiler::util::Logger::Instance() << rocprofiler::util::Logger::begm \ + << "debug: \"" << stream << "\"" << \ + << " in " << __FUNCTION__ \ + << " at " << __FILE__ << " line " << __LINE__ \ + << rocprofiler::util::Logger::endl; \ +} +#endif + +#endif // SRC_UTIL_LOGGER_H_ diff --git a/src/xml/expr.h b/src/xml/expr.h new file mode 100644 index 0000000000..1f450d186e --- /dev/null +++ b/src/xml/expr.h @@ -0,0 +1,279 @@ +#ifndef _SRC_XML_EXPR_H +#define _SRC_XML_EXPR_H + +#include +#include +#include +#include +#include +#include + +namespace xml { +class exception_t : public std::exception { + public: + explicit exception_t(const std::string& msg) : str_(msg) {} + const char* what() const throw() { return str_.c_str(); } + protected: + const std::string str_; +}; + +typedef uint64_t args_t; +typedef std::map args_map_t; +class Expr; + +template +class any_cache_t { + public: + virtual ~any_cache_t() {} + virtual bool Lookup(const std::string& name, T& result) const = 0; +}; + +typedef any_cache_t expr_cache_t; +typedef any_cache_t args_cache_t; + +class bin_expr_t { + public: + static const bin_expr_t* CreateExpr(const bin_expr_t* arg1, const bin_expr_t* arg2, const char op); + static const bin_expr_t* CreateArg(Expr *obj, const std::string str); + + bin_expr_t() : arg1_(NULL), arg2_(NULL) {} + bin_expr_t(const bin_expr_t* arg1, const bin_expr_t* arg2) : arg1_(arg1), arg2_(arg2) {} + + virtual args_t Eval(const args_cache_t& args) const = 0; + virtual std::string Symbol() const = 0; + + std::string String() const { + std::string str; + if (arg1_) { str = "(" + arg1_->String() + " " + Symbol() + " " + arg2_->String() + ")"; } + else str = Symbol(); + return str; + } + + protected: + const bin_expr_t* arg1_; + const bin_expr_t* arg2_; +}; + +class Expr { + public: + explicit Expr(const std::string& expr, const expr_cache_t* cache) : + expr_(expr), + pos_(0), + sub_count_(0), + cache_(cache) + { + sub_vec_ = new std::vector; + var_vec_ = new std::vector; + tree_ = ParseExpr(); + } + + explicit Expr(const std::string& expr, const Expr* obj) : + expr_(expr), + pos_(0), + sub_count_(0), + cache_(obj->cache_), + sub_vec_(obj->sub_vec_), + var_vec_(obj->var_vec_) + { + sub_vec_->push_back(this); + tree_ = ParseExpr(); + if (!SubCheck()) throw exception_t("expr '" + expr_ + "', bad parenthesis count"); + } + + ~Expr() { + delete cache_; + for (auto it : *sub_vec_) delete it; + delete sub_vec_; + delete var_vec_; + } + + std::string GetStr() const { return expr_; } + const expr_cache_t* GetCache() const { return cache_; } + const bin_expr_t* GetTree() const { return tree_; } + + args_t Eval(const args_cache_t& args) const { return tree_->Eval(args); } + + std::string Lookup(const std::string& str) const { + std::string result; + if (cache_ && !(cache_->Lookup(str, result))) throw exception_t("expr '" + expr_ + "', lookup '" + str + "' failed"); + return result; + } + + void AddVar(const std::string& str) { + bool found = false; + for (std::string s : *var_vec_) if (s == str) found = true; + if (!found) var_vec_->push_back(str); + } + + const std::vector& GetVars() const { return *var_vec_; } + + std::string String() const { return tree_->String(); } + + private: + const bin_expr_t* ParseExpr() { + const bin_expr_t* expr = ParseArg(); + while (!IsEnd()) { + const char op = Symb(); + const bin_expr_t* second_arg = NULL; + if (IsSymb(')')) { + Next(); + SubClose(); + break; + } if (IsSymb('*') || IsSymb('/')) { + Next(); + second_arg = ParseArg(); + expr = bin_expr_t::CreateExpr(expr, second_arg, op); + } else if (IsSymb('+') || IsSymb('-')) { + Next(); + second_arg = ParseExpr(); + expr = bin_expr_t::CreateExpr(expr, second_arg, op); + break; + } else { + throw exception_t("expr '" + expr_ + "', bad operator '" + op + "'"); + } + } + return expr; + } + + const bin_expr_t* ParseArg() { + const bin_expr_t* arg = NULL; + if (IsSymb('(')) { + Next(); + SubOpen(); + arg = ParseExpr(); + } else { + const unsigned pos = FindOp(); + const std::string str = CutTill(pos); + arg = bin_expr_t::CreateArg(this, str); + if (arg == NULL) throw exception_t("expr '" + expr_ + "', bad argument '" + str + "'"); + } + return arg; + } + + char Symb() const { return Symb(pos_); } + char Symb(const unsigned ind) const { return expr_[ind]; } + bool IsEnd() const { return (pos_ >= expr_.length()); } + bool IsSymb(const char c) const { return IsSymb(pos_, c); } + bool IsSymb(const unsigned ind, const char c) const { return (expr_[ind] == c); } + void Next() { ++pos_; } + void SubOpen() { ++sub_count_; } + void SubClose() { --sub_count_; } + bool SubCheck() const { return (sub_count_ == 0); } + unsigned FindOp() const { + unsigned i = pos_; + while (i < expr_.length()) { + switch (Symb(i)) { + case '*': + case '/': + case '+': + case '-': + case '(': + case ')': + goto end; + } + ++i; + } + end: + return i; + } + std::string CutTill(const unsigned pos) { + const std::string str = (pos > pos_) ? expr_.substr(pos_, pos - pos_) : ""; + pos_ = pos; + return str; + } + + const std::string expr_; + unsigned pos_; + unsigned sub_count_; + const bin_expr_t* tree_; + const expr_cache_t* const cache_; + std::vector* sub_vec_; + std::vector* var_vec_; +}; + +class add_expr_t : public bin_expr_t { + public: + add_expr_t(const bin_expr_t* arg1, const bin_expr_t* arg2) : bin_expr_t(arg1, arg2) {} + args_t Eval(const args_cache_t& args) const { return (arg1_->Eval(args) + arg2_->Eval(args)); } + std::string Symbol() const { return "+"; } +}; +class sub_expr_t : public bin_expr_t { + public: + sub_expr_t(const bin_expr_t* arg1, const bin_expr_t* arg2) : bin_expr_t(arg1, arg2) {} + args_t Eval(const args_cache_t& args) const { return (arg1_->Eval(args) - arg2_->Eval(args)); } + std::string Symbol() const { return "-"; } +}; +class mul_expr_t : public bin_expr_t { + public: + mul_expr_t(const bin_expr_t* arg1, const bin_expr_t* arg2) : bin_expr_t(arg1, arg2) {} + args_t Eval(const args_cache_t& args) const { return (arg1_->Eval(args) * arg2_->Eval(args)); } + std::string Symbol() const { return "*"; } +}; +class div_expr_t : public bin_expr_t { + public: + div_expr_t(const bin_expr_t* arg1, const bin_expr_t* arg2) : bin_expr_t(arg1, arg2) {} + args_t Eval(const args_cache_t& args) const { return (arg1_->Eval(args) / arg2_->Eval(args)); } + std::string Symbol() const { return "/"; } +}; +class const_expr_t : public bin_expr_t { + public: + const_expr_t(const args_t value) : value_(value) {} + args_t Eval(const args_cache_t&) const { return value_; } + std::string Symbol() const { std::ostringstream os; os << value_; return os.str(); } + private: + const args_t value_; +}; +class var_expr_t : public bin_expr_t { + public: + var_expr_t(const std::string name) : name_(name) {} + args_t Eval(const args_cache_t& args) const { + args_t result = 0; + if (!args.Lookup(name_, result)) throw exception_t("expr arg lookup '" + name_ + "' failed"); + return result; + } + std::string Symbol() const { return name_; } + private: + const std::string name_; +}; + +inline const bin_expr_t* bin_expr_t::CreateExpr(const bin_expr_t* arg1, const bin_expr_t* arg2, const char op) { + const bin_expr_t* expr = NULL; + switch (op) { + case '+': + expr = new add_expr_t(arg1, arg2); + break; + case '-': + expr = new sub_expr_t(arg1, arg2); + break; + case '*': + expr = new mul_expr_t(arg1, arg2); + break; + case '/': + expr = new div_expr_t(arg1, arg2); + break; + } + return expr; +} + +inline const bin_expr_t* bin_expr_t::CreateArg(Expr* obj, const std::string str) { + const bin_expr_t* arg = NULL; + const unsigned i = strspn(str.c_str(), "1234567890"); + if (i == str.length()) { + const unsigned value = atoi(str.c_str()); + arg = new const_expr_t(value); + } else { + const std::string sub_expr = obj->Lookup(str); + if (sub_expr.empty()) { + arg = new var_expr_t(str); + obj->AddVar(str); + } else { + const Expr* expr = new Expr(sub_expr, obj); + arg = expr->GetTree(); + } + } + return arg; +} + +} // namespace xml + +#endif // _SRC_XML_EXPR_H diff --git a/src/xml/xml.h b/src/xml/xml.h new file mode 100644 index 0000000000..f1a8410ab9 --- /dev/null +++ b/src/xml/xml.h @@ -0,0 +1,221 @@ +#ifndef SRC_XML_XML_H_ +#define SRC_XML_XML_H_ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace xml { + +class Xml { + public: + typedef std::vector token_t; + struct level_t { + std::string tag; + std::vector nodes; + std::map opts; + }; + typedef std::vector nodes_vec_t; + + enum { + DECL_STATE, + BODY_STATE + }; + + Xml(const char* file_name) : + file_name_(file_name), + file_line_(0), + data_size_(0), + index_(0), + state_(BODY_STATE), + level_(NULL), + comment_(false) + { + AddLevel("top"); + + fd_ = open(file_name, O_RDONLY); + if (fd_ == -1) { + std::cout << "XML file not found: " << file_name << std::endl; + return; + } + + token_t remainder; + while (1) { + token_t token = (remainder.size()) ? remainder : NextToken(); + remainder.clear(); + // End of file + if (token.size() == 0) break; + +// token_t token1 = token; +// token1.push_back('\0'); +// std::cout << "> " << &token1[0] << std::endl; + + switch(state_) { + case BODY_STATE: + if (token[0] == '<') { + bool node_begin = true; + unsigned ind = 1; + if (token[1] == '/') { + node_begin = false; + ++ind; + } + + unsigned i = ind; + while (i < token.size()) { if (token[i] == '>') break; ++i; } + for (unsigned j = i + 1; j < token.size(); ++j) remainder.push_back(token[j]); + + if (i == token.size()) { + if (node_begin) state_ = DECL_STATE; + else BadFormat(token); + token.push_back('\0'); + } else token[i] = '\0'; + + const char* tag = strdup(&token[ind]); + if (node_begin) { + AddLevel(tag); + } else { + if (strncmp(CurrentLevel().c_str(), tag, strlen(tag))) { + token.back() = '>'; + BadFormat(token); + } + UpLevel(); + } + } else BadFormat(token); + break; + case DECL_STATE: + if (token[0] == '>') { + state_ = BODY_STATE; + for (unsigned j = 1; j < token.size(); ++j) remainder.push_back(token[j]); + continue; + } else { + token.push_back('\0'); + unsigned j = 0; + for (j = 0; j < token.size(); ++j) if (token[j] == '=') break; + if (j == token.size()) BadFormat(token); + token[j] = '\0'; + const char* key = &token[0]; + const char* value = &token[j + 1]; + AddOption(key, value); + } + break; + default: + std::cout << "Wrong state: " << state_ << std::endl; + exit(1); + } + } + } + + std::vector GetNodes(std::string global_tag) { + return map_[global_tag]; + } + + void Print() const { + for(auto& elem : map_) { + for (auto node : elem.second) { + if (node->opts.size()) { + std::cout << elem.first << ":" << std::endl; + for (auto& opt : node->opts) { + std::cout << " " << opt.first << " = " << opt.second << std::endl; + } + } + } + } + } + + private: + bool LineEndCheck() { + bool found = false; + if (buffer_[index_] == '\n') { + buffer_[index_] = ' '; + ++file_line_; + found = true; + comment_ = false; + } else if (comment_ || (buffer_[index_] == '#')) { + found = true; + comment_ = true; + } + return found; + } + + token_t NextToken() { + token_t token; + + while (1) { + if (data_size_ == 0) { + data_size_ = read(fd_, buffer_, buf_size_); + if (data_size_ <= 0) break; + } + if (token.empty()) while ((index_ < data_size_) && ((buffer_[index_] == ' ') || LineEndCheck())) { + ++index_; + } + while ((index_ < data_size_) && (buffer_[index_] != ' ') && !LineEndCheck()) { + token.push_back(buffer_[index_++]); + } + if (index_ == data_size_) { + index_ = 0; + data_size_ = 0; + } else break; + } + + return token; + } + + void BadFormat(token_t token) { + token.push_back('\0'); + std::cout << "Error: " << file_name_ << ", line " << file_line_ << ", bad XML token '" << &token[0] << "'" << std::endl; + exit(1); + } + + void AddLevel(const std::string& tag) { + level_t* level = new level_t; + level->tag = tag; + if (level_) { + level_->nodes.push_back(level); + stack_.push_back(level_); + } + level_ = level; + + std::string global_tag; + for (level_t* level : stack_) { global_tag += level->tag + "."; } + global_tag += tag; + map_[global_tag].push_back(level_); + } + + void UpLevel() { + level_ = stack_.back(); + stack_.pop_back(); + } + + std::string CurrentLevel() const { + return level_->tag; + } + + void AddOption(const std::string& key, const std::string& value) { + level_->opts[key] = value; + } + + const char* file_name_; + unsigned file_line_; + int fd_; + static const unsigned buf_size_ = 256; + char buffer_[buf_size_]; + unsigned data_size_; + unsigned index_; + unsigned state_; + level_t* level_; + std::vector stack_; + std::map map_; + bool comment_; +}; + +} // namespace xml + +#endif // SRC_XML_XML_H_ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt new file mode 100644 index 0000000000..8dc2c57efa --- /dev/null +++ b/test/CMakeLists.txt @@ -0,0 +1,41 @@ +cmake_minimum_required ( VERSION 3.5.0 ) +set ( CMAKE_VERBOSE_MAKEFILE TRUE CACHE BOOL "Verbose Output" FORCE ) + +set ( EXE_NAME "ctrl" ) + +if ( NOT DEFINED TEST_DIR ) + set ( TEST_DIR ${CMAKE_CURRENT_SOURCE_DIR} ) + project ( ${EXE_NAME} ) + ## Set build environment + include ( env ) + set ( ROCPROFILER_TARGET "rocprofiler64" ) +endif () + +## Util sources +file( GLOB UTIL_SRC "${TEST_DIR}/util/*.cpp" ) + +## Test control sources +set ( CTRL_SRC + ${TEST_DIR}/ctrl/test.cpp + ${TEST_DIR}/ctrl/test_hsa.cpp +) + +## Test kernels sources +set ( TEST_NAME simple_convolution ) +set ( KERN_SRC ${TEST_DIR}/${TEST_NAME}/${TEST_NAME}.cpp ) +execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/${TEST_NAME}/*.hsaco ${PROJECT_BINARY_DIR}" ) + +## Building test executable +add_executable ( ${EXE_NAME} ${KERN_SRC} ${CTRL_SRC} ${UTIL_SRC} ) +target_include_directories ( ${EXE_NAME} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} ) +target_link_libraries( ${EXE_NAME} ${ROCPROFILER_TARGET} ${HSA_RUNTIME_LIB} c stdc++ dl pthread rt atomic ) +execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/run.sh ${PROJECT_BINARY_DIR}" ) +execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/*.xml ${PROJECT_BINARY_DIR}" ) + +## Build test library +set ( TEST_LIB "tool" ) +set ( TEST_LIB_SRC ${TEST_DIR}/ctrl/tool.cpp ) +add_library ( ${TEST_LIB} SHARED ${TEST_LIB_SRC} ) +target_include_directories ( ${TEST_LIB} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} ) +target_link_libraries( ${TEST_LIB} ${ROCPROFILER_TARGET} ${HSA_RUNTIME_LIB} c stdc++ dl pthread rt atomic ) + diff --git a/test/ctrl/run_kernel.h b/test/ctrl/run_kernel.h new file mode 100644 index 0000000000..4eed641056 --- /dev/null +++ b/test/ctrl/run_kernel.h @@ -0,0 +1,85 @@ +/****************************************************************************** + +Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this list +of conditions and the following disclaimer. + +Redistributions in binary form must reproduce the above copyright notice, this +list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +OF THE POSSIBILITY OF SUCH DAMAGE. + +*******************************************************************************/ + +#ifndef TEST_CTRL_RUN_KERNEL_H_ +#define TEST_CTRL_RUN_KERNEL_H_ + +#include "ctrl/test_hsa.h" +#include "util/test_assert.h" + +template bool RunKernel(int argc, char* argv[]) { + bool ret_val = false; + + // Create test kernel object + Kernel test_kernel; + TestAql* test_aql = new TestHsa(&test_kernel); + test_aql = new Test(test_aql); + TEST_ASSERT(test_aql != NULL); + if (test_aql == NULL) return 1; + + // Initialization of Hsa Runtime + ret_val = test_aql->Initialize(argc, argv); + if (ret_val == false) { + std::cerr << "Error in the test initialization" << std::endl; + // TEST_ASSERT(ret_val); + return false; + } + + // Setup Hsa resources needed for execution + ret_val = test_aql->Setup(); + if (ret_val == false) { + std::cerr << "Error in creating hsa resources" << std::endl; + TEST_ASSERT(ret_val); + return false; + } + + // Run test kernel + ret_val = test_aql->Run(); + if (ret_val == false) { + std::cerr << "Error in running the test kernel" << std::endl; + TEST_ASSERT(ret_val); + return false; + } + + // Verify the results of the execution + ret_val = test_aql->VerifyResults(); + if (ret_val) { + std::clog << "Test : Passed" << std::endl; + } else { + std::clog << "Test : Failed" << std::endl; + } + + // Print time taken by sample + test_aql->PrintTime(); + + test_aql->Cleanup(); + delete test_aql; + + return ret_val; +} + +#endif // TEST_CTRL_RUN_KERNEL_H_ diff --git a/test/ctrl/standalone_test.cpp b/test/ctrl/standalone_test.cpp new file mode 100644 index 0000000000..5f3d9147e6 --- /dev/null +++ b/test/ctrl/standalone_test.cpp @@ -0,0 +1,126 @@ +#include +#include +#include + +#include "ctrl/run_kernel.h" +#include "ctrl/test_aql.h" +#include "ctrl/test_hsa.h" +#include "inc/rocprofiler.h" +#include "simple_convolution/simple_convolution.h" +#include "util/test_assert.h" + +int main(int argc, char** argv) { + bool ret_val = false; + // HSA status + hsa_status_t status = HSA_STATUS_ERROR; + // Profiling context + rocprofiler_t* context = NULL; + // Profiling properties + rocprofiler_properties_t properties; + // Number of context invocation + uint32_t invocation = 0; + +#if 0 + // Profiling info objects + const unsigned info_count = 1; + rocprofiler_info_t info[info_count]; + // PMC events + memset(info, 0, sizeof(info)); + info[0].type = ROCPROFILER_TYPE_METRIC; + info[0].name = "SQ_WAVES"; +#else + // Profiling info objects + const unsigned info_count = 3; + rocprofiler_info_t info[info_count]; + // PMC events + memset(info, 0, sizeof(info)); + info[0].type = ROCPROFILER_TYPE_METRIC; + info[0].name = "SQ_WAVES"; + info[1].type = ROCPROFILER_TYPE_METRIC; + info[1].name = "SQ_ITEMS"; + // Tracing parameters + const unsigned parameter_count = 2; + rocprofiler_parameter_t parameters[parameter_count]; + info[2].name = "THREAD_TRACE"; + info[2].type = ROCPROFILER_TYPE_TRACE; + info[2].parameters = parameters; + info[2].parameter_count = parameter_count; + parameters[0].parameter_name = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK; + parameters[0].value = 0; + parameters[1].parameter_name = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK; + parameters[1].value = 0; +#endif + + // Creating profiling context + properties = {}; + properties.queue_depth = 128; + status = rocprofiler_open(TestHsa::HsaAgentId(), info, info_count, &context, ROCPROFILER_MODE_STANDALONE|ROCPROFILER_MODE_OWNQUEUE, &properties); + TEST_STATUS(status == HSA_STATUS_SUCCESS); + + TestHsa::SetQueue(properties.queue); + + // Adding dispatch observer + status = rocprofiler_dispatch_observer(rocprofiler_dispatch_callback, context); + TEST_STATUS(status == HSA_STATUS_SUCCESS); + + // Querying the number of context invocation + status = rocprofiler_invocation(context, &invocation); + TEST_STATUS(status == HSA_STATUS_SUCCESS); + + // Dispatching profiled kernel n-times to collect all counter groups data + unsigned n = 0; + while(1) { + std::cout << "> " << n << "/" << invocation << std::endl; +#if 0 + status = rocprofiler_start(context); + TEST_STATUS(status == HSA_STATUS_SUCCESS); + ret_val = RunKernel(argc, argv); + status = rocprofiler_stop(context); + TEST_STATUS(status == HSA_STATUS_SUCCESS); +#else + ret_val = RunKernel(argc, argv); +#endif + status = rocprofiler_sample(context); + TEST_STATUS(status == HSA_STATUS_SUCCESS); + + for (rocprofiler_info_t* p = info; p < info + info_count; ++p) { + std::cout << (p - info) << ": " << p->name; + switch (p->data.kind) { + case ROCPROFILER_INT64: + std::cout << std::dec << " result64 (" << p->data.result64 << ")" << std::endl; + break; + case ROCPROFILER_BYTES: { + const char* ptr = reinterpret_cast(p->data.result_bytes.ptr); + uint64_t size = 0; + for (unsigned i = 0; i < p->data.result_bytes.instance_count; ++i) { + size = *reinterpret_cast(ptr); + const char* data = ptr + sizeof(size); + std::cout << std::endl; + std::cout << std::hex << " data (" << (void*)data << ")" << std::endl; + std::cout << std::dec << " size (" << size << ")" << std::endl; + ptr = data + size; + } + break; + } + default: + std::cout << "result kind (" << p->data.kind << ")" << std::endl; + TEST_ASSERT(false); + } + } + + ++n; + if (n < invocation) { + status = rocprofiler_next(context); + TEST_STATUS(status == HSA_STATUS_SUCCESS); + continue; + } + break; + } + + // Finishing cleanup + // Deleting profiling context will delete all allocated resources + status = rocprofiler_close(context); + TEST_STATUS(status == HSA_STATUS_SUCCESS); + + return (ret_val) ? 0 : 1; +} diff --git a/test/ctrl/test.cpp b/test/ctrl/test.cpp new file mode 100644 index 0000000000..c9e8e71f23 --- /dev/null +++ b/test/ctrl/test.cpp @@ -0,0 +1,14 @@ +#include +#include +#include + +#include "ctrl/run_kernel.h" +#include "ctrl/test_aql.h" +#include "simple_convolution/simple_convolution.h" + +int main(int argc, char** argv) { + TestHsa::HsaInstantiate(); + for (int i = 0; i < 3; ++i) RunKernel(argc, argv); + TestHsa::HsaShutdown(); + return 0; +} diff --git a/test/ctrl/test_aql.h b/test/ctrl/test_aql.h new file mode 100644 index 0000000000..38909e6acd --- /dev/null +++ b/test/ctrl/test_aql.h @@ -0,0 +1,80 @@ +/****************************************************************************** + +Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this list +of conditions and the following disclaimer. + +Redistributions in binary form must reproduce the above copyright notice, this +list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +OF THE POSSIBILITY OF SUCH DAMAGE. + +*******************************************************************************/ + +#ifndef TEST_CTRL_TEST_AQL_H_ +#define TEST_CTRL_TEST_AQL_H_ + +#include +#include + +#include "util/hsa_rsrc_factory.h" + +// Test AQL interface +class TestAql { + public: + explicit TestAql(TestAql* t = 0) : test_(t) {} + virtual ~TestAql() { if (test_) delete test_; } + + TestAql* Test() { return test_; } + virtual AgentInfo* GetAgentInfo() { return (test_) ? test_->GetAgentInfo() : 0; } + virtual hsa_queue_t* GetQueue() { return (test_) ? test_->GetQueue() : 0; } + virtual HsaRsrcFactory* GetRsrcFactory() { return (test_) ? test_->GetRsrcFactory() : 0; } + + // Initialize application environment including setting + // up of various configuration parameters based on + // command line arguments + // @return bool true on success and false on failure + virtual bool Initialize(int argc, char** argv) { + return (test_) ? test_->Initialize(argc, argv) : true; + } + + // Setup application parameters for exectuion + // @return bool true on success and false on failure + virtual bool Setup() { return (test_) ? test_->Setup() : true; } + + // Run the kernel + // @return bool true on success and false on failure + virtual bool Run() { return (test_) ? test_->Run() : true; } + + // Verify results + // @return bool true on success and false on failure + virtual bool VerifyResults() { return (test_) ? test_->VerifyResults() : true; } + + // Print to console the time taken to execute kernel + virtual void PrintTime() { + if (test_) test_->PrintTime(); + } + + // Release resources e.g. memory allocations + // @return bool true on success and false on failure + virtual bool Cleanup() { return (test_) ? test_->Cleanup() : true; } + + private: + TestAql* const test_; +}; + +#endif // TEST_CTRL_TEST_AQL_H_ diff --git a/test/ctrl/test_hsa.cpp b/test/ctrl/test_hsa.cpp new file mode 100644 index 0000000000..39d606f405 --- /dev/null +++ b/test/ctrl/test_hsa.cpp @@ -0,0 +1,252 @@ +/****************************************************************************** + +Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this list +of conditions and the following disclaimer. + +Redistributions in binary form must reproduce the above copyright notice, this +list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +OF THE POSSIBILITY OF SUCH DAMAGE. + +*******************************************************************************/ + +#include "ctrl/test_hsa.h" + +#include + +#include "util/test_assert.h" +#include "util/helper_funcs.h" +#include "util/hsa_rsrc_factory.h" + +HsaRsrcFactory* TestHsa::hsa_rsrc_ = NULL; +AgentInfo* TestHsa::agent_info_ = NULL; +hsa_queue_t* TestHsa::hsa_queue_ = NULL; +uint32_t TestHsa::agent_id_ = 0; + +HsaRsrcFactory* TestHsa::HsaInstantiate(const uint32_t agent_ind) { + // Instantiate an instance of Hsa Resources Factory + if (hsa_rsrc_ == NULL) { + agent_id_ = agent_ind; + + hsa_rsrc_ = HsaRsrcFactory::Create(); + + // Print properties of the agents + hsa_rsrc_->PrintGpuAgents("> GPU agents"); + + // Create an instance of Gpu agent + if (!hsa_rsrc_->GetGpuAgentInfo(agent_ind, &agent_info_)) { + agent_info_ = NULL; + std::cerr << "> error: agent[" << agent_ind << "] is not found" << std::endl; + return NULL; + } + std::clog << "> Using agent[" << agent_ind << "] : " << agent_info_->name << std::endl; + + // Create an instance of Aql Queue + if (hsa_queue_ == NULL) { + uint32_t num_pkts = 128; + if(hsa_rsrc_->CreateQueue(agent_info_, num_pkts, &hsa_queue_) == false) { + hsa_queue_ = NULL; + } + } + } + return hsa_rsrc_; +} + +void TestHsa::HsaShutdown() { if (hsa_rsrc_) hsa_rsrc_->Destroy(); } + +bool TestHsa::Initialize(int arg_cnt, char** arg_list) { + std::clog << "TestHsa::Initialize :" << std::endl; + + // Instantiate a Timer object + setup_timer_idx_ = hsa_timer_.CreateTimer(); + dispatch_timer_idx_ = hsa_timer_.CreateTimer(); + + hsa_rsrc_ = HsaInstantiate(agent_id_); + if (hsa_rsrc_ == NULL) { + TEST_ASSERT(false); + return false; + } + + // Obtain handle of signal + hsa_rsrc_->CreateSignal(1, &hsa_signal_); + + // Obtain the code object file name + std::string agentName(agent_info_->name); + if (agentName.compare(0, 4, "gfx8") == 0) { + brig_path_obj_.append("gfx8"); + } else if (agentName.compare(0, 4, "gfx9") == 0) { + brig_path_obj_.append("gfx9"); + } else { + TEST_ASSERT(false); + return false; + } + brig_path_obj_.append("_" + name_ + ".hsaco"); + + return true; +} + +bool TestHsa::Setup() { + std::clog << "TestHsa::setup :" << std::endl; + + // Start the timer object + hsa_timer_.StartTimer(setup_timer_idx_); + + mem_map_t& mem_map = test_->GetMemMap(); + for (mem_it_t it = mem_map.begin(); it != mem_map.end(); ++it) { + mem_descr_t& des = it->second; + void* ptr = (des.local) ? hsa_rsrc_->AllocateLocalMemory(agent_info_, des.size) + : hsa_rsrc_->AllocateSysMemory(agent_info_, des.size); + des.ptr = ptr; + TEST_ASSERT(ptr != NULL); + if (ptr == NULL) return false; + } + test_->Init(); + + // Load and Finalize Kernel Code Descriptor + char* brig_path = (char*)brig_path_obj_.c_str(); + const bool ret_val = + hsa_rsrc_->LoadAndFinalize(agent_info_, brig_path, strdup(name_.c_str()), &kernel_code_desc_); + if (ret_val == false) { + std::cerr << "Error in loading and finalizing Kernel" << std::endl; + return ret_val; + } + + // Stop the timer object + hsa_timer_.StopTimer(setup_timer_idx_); + setup_time_taken_ = hsa_timer_.ReadTimer(setup_timer_idx_); + total_time_taken_ = setup_time_taken_; + + return true; +} + +bool TestHsa::Run() { + std::clog << "TestHsa::run :" << std::endl; + + const uint32_t work_group_size = 64; + const uint32_t work_grid_size = test_->GetGridSize(); + uint32_t group_segment_size = 0; + uint32_t private_segment_size = 0; + const size_t kernarg_segment_size = test_->GetKernargSize(); + uint64_t code_handle = 0; + + // Retrieve the amount of group memory needed + hsa_executable_symbol_get_info( + kernel_code_desc_, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, &group_segment_size); + + // Retrieve the amount of private memory needed + hsa_executable_symbol_get_info(kernel_code_desc_, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, + &private_segment_size); + + // Check the kernel args size + size_t size_info = 0; + hsa_executable_symbol_get_info( + kernel_code_desc_, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &size_info); + TEST_ASSERT(kernarg_segment_size == size_info); + if (kernarg_segment_size != size_info) return false; + + // Retrieve handle of the code block + hsa_executable_symbol_get_info(kernel_code_desc_, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, + &code_handle); + + // Initialize the dispatch packet. + hsa_kernel_dispatch_packet_t aql; + memset(&aql, 0, sizeof(aql)); + // Set the packet's type, barrier bit, acquire and release fences + aql.header = HSA_PACKET_TYPE_KERNEL_DISPATCH; + aql.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE; + aql.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE; + // Populate Aql packet with default values + aql.setup = 1; + aql.grid_size_x = work_grid_size; + aql.grid_size_y = 1; + aql.grid_size_z = 1; + aql.workgroup_size_x = work_group_size; + aql.workgroup_size_y = 1; + aql.workgroup_size_z = 1; + // Bind the kernel code descriptor and arguments + aql.kernel_object = code_handle; + aql.kernarg_address = test_->GetKernargPtr(); + aql.group_segment_size = group_segment_size; + aql.private_segment_size = private_segment_size; + // Initialize Aql packet with handle of signal + aql.completion_signal = hsa_signal_; + + // Compute the write index of queue and copy Aql packet into it + const uint64_t que_idx = hsa_queue_load_write_index_relaxed(hsa_queue_); + const uint32_t mask = hsa_queue_->size - 1; + + std::clog << "> Executing kernel: \"" << name_ << "\"" << std::endl; + + // Start the timer object + hsa_timer_.StartTimer(dispatch_timer_idx_); + + // Disable packet so that submission to HW is complete + const auto header = aql.header; + aql.header = HSA_PACKET_TYPE_INVALID << HSA_PACKET_HEADER_TYPE; + + // Copy Aql packet into queue buffer + ((hsa_kernel_dispatch_packet_t*)(hsa_queue_->base_address))[que_idx & mask] = aql; + + // After AQL packet is fully copied into queue buffer + // update packet header from invalid state to valid state + std::atomic_thread_fence(std::memory_order_release); + ((hsa_kernel_dispatch_packet_t*)(hsa_queue_->base_address))[que_idx & mask].header = header; + + // Increment the write index and ring the doorbell to dispatch the kernel. + hsa_queue_store_write_index_relaxed(hsa_queue_, (que_idx + 1)); + hsa_signal_store_relaxed(hsa_queue_->doorbell_signal, que_idx); + + std::clog << "> Waiting on kernel dispatch signal, que_idx=" << que_idx << std::endl; + + // Wait on the dispatch signal until the kernel is finished. + // Update wait condition to HSA_WAIT_STATE_ACTIVE for Polling + hsa_signal_wait_acquire(hsa_signal_, HSA_SIGNAL_CONDITION_LT, 1, (uint64_t)-1, + HSA_WAIT_STATE_BLOCKED); + + // Stop the timer object + hsa_timer_.StopTimer(dispatch_timer_idx_); + dispatch_time_taken_ = hsa_timer_.ReadTimer(dispatch_timer_idx_); + total_time_taken_ += dispatch_time_taken_; + + // Copy kernel buffers from local memory into system memory + hsa_rsrc_->TransferData(test_->GetOutputPtr(), test_->GetLocalPtr(), test_->GetOutputSize(), + false); + test_->PrintOutput(); + + return true; +} + +bool TestHsa::VerifyResults() { + // Compare the results and see if they match + const void* const refout_ptr = test_->GetRefoutPtr(); + const int32_t cmp_val = + (refout_ptr != NULL) ? memcmp(test_->GetOutputPtr(), refout_ptr, test_->GetOutputSize()) : 0; + return (cmp_val == 0); +} + +void TestHsa::PrintTime() { + std::clog << "Time taken for Setup by " << this->name_ << " : " << this->setup_time_taken_ + << std::endl; + std::clog << "Time taken for Dispatch by " << this->name_ << " : " << this->dispatch_time_taken_ + << std::endl; + std::clog << "Time taken in Total by " << this->name_ << " : " << this->total_time_taken_ + << std::endl; +} + +bool TestHsa::Cleanup() { return true; } diff --git a/test/ctrl/test_hsa.h b/test/ctrl/test_hsa.h new file mode 100644 index 0000000000..ceb33b4ca9 --- /dev/null +++ b/test/ctrl/test_hsa.h @@ -0,0 +1,125 @@ +/****************************************************************************** + +Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this list +of conditions and the following disclaimer. + +Redistributions in binary form must reproduce the above copyright notice, this +list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +OF THE POSSIBILITY OF SUCH DAMAGE. + +*******************************************************************************/ + +#ifndef TEST_CTRL_TEST_HSA_H_ +#define TEST_CTRL_TEST_HSA_H_ + +#include "ctrl/test_aql.h" +#include "ctrl/test_kernel.h" +#include "util/hsa_rsrc_factory.h" +#include "util/perf_timer.h" + +// Class implements HSA test +class TestHsa : public TestAql { + public: + // Instantiate HSA resources + static HsaRsrcFactory* HsaInstantiate(const uint32_t agent_ind = agent_id_); + static void HsaShutdown(); + static void SetQueue(hsa_queue_t* queue) { hsa_queue_ = queue; } + static uint32_t HsaAgentId() { return agent_id_; } + + // Constructor + explicit TestHsa(TestKernel* test) : test_(test), name_(test->Name()) { + total_time_taken_ = 0; + setup_time_taken_ = 0; + dispatch_time_taken_ = 0; + } + + // Get methods for Agent Info, HAS queue, HSA Resourcse Manager + AgentInfo* GetAgentInfo() { return agent_info_; } + hsa_queue_t* GetQueue() { return hsa_queue_; } + HsaRsrcFactory* GetRsrcFactory() { return hsa_rsrc_; } + + // Initialize application environment including setting + // up of various configuration parameters based on + // command line arguments + // @return bool true on success and false on failure + bool Initialize(int argc, char** argv); + + // Setup application parameters for exectuion + // @return bool true on success and false on failure + bool Setup(); + + // Run the BinarySearch kernel + // @return bool true on success and false on failure + bool Run(); + + // Verify against reference implementation + // @return bool true on success and false on failure + bool VerifyResults(); + + // Print to console the time taken to execute kernel + void PrintTime(); + + // Release resources e.g. memory allocations + // @return bool true on success and false on failure + bool Cleanup(); + + private: + typedef TestKernel::mem_descr_t mem_descr_t; + typedef TestKernel::mem_map_t mem_map_t; + typedef TestKernel::mem_it_t mem_it_t; + + // Test object + TestKernel* test_; + + // Path of Brig file + std::string brig_path_obj_; + + // Used to track time taken to run the sample + double total_time_taken_; + double setup_time_taken_; + double dispatch_time_taken_; + + // Handle of signal + hsa_signal_t hsa_signal_; + + // Handle of Kernel Code Descriptor + hsa_executable_symbol_t kernel_code_desc_; + + // Instance of timer object + uint32_t setup_timer_idx_; + uint32_t dispatch_timer_idx_; + PerfTimer hsa_timer_; + + // Instance of Hsa Resources Factory + static HsaRsrcFactory* hsa_rsrc_; + + // GPU id + static uint32_t agent_id_; + + // Handle to an Hsa Gpu Agent + static AgentInfo* agent_info_; + + // Handle to an Hsa Queue + static hsa_queue_t* hsa_queue_; + + // Test kernel name + std::string name_; +}; + +#endif // TEST_CTRL_TEST_HSA_H_ diff --git a/test/ctrl/test_kernel.h b/test/ctrl/test_kernel.h new file mode 100644 index 0000000000..5427ba3144 --- /dev/null +++ b/test/ctrl/test_kernel.h @@ -0,0 +1,107 @@ +/****************************************************************************** + +Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this list +of conditions and the following disclaimer. + +Redistributions in binary form must reproduce the above copyright notice, this +list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +OF THE POSSIBILITY OF SUCH DAMAGE. + +*******************************************************************************/ + +#ifndef TEST_CTRL_TEST_KERNEL_H_ +#define TEST_CTRL_TEST_KERNEL_H_ + +#include +#include + +// Class implements kernel test +class TestKernel { + public: + // Memory descriptors IDs + enum { INPUT_DES_ID, OUTPUT_DES_ID, LOCAL_DES_ID, MASK_DES_ID, KERNARG_DES_ID, REFOUT_DES_ID }; + + // Memory descriptors vector declaration + struct mem_descr_t { + void* ptr; + uint32_t size; + bool local; + }; + + // Memory map declaration + typedef std::map mem_map_t; + typedef mem_map_t::iterator mem_it_t; + typedef mem_map_t::const_iterator mem_const_it_t; + + virtual ~TestKernel() {} + + // Initialize method + virtual void Init() = 0; + + // Return kernel memory map + mem_map_t& GetMemMap() { return mem_map_; } + + // Return NULL descriptor + static mem_descr_t NullDescriptor() { return {NULL, 0, 0}; } + + // Methods to get the kernel attributes + void* GetKernargPtr() const { return GetDescr(KERNARG_DES_ID).ptr; } + uint32_t GetKernargSize() const { return GetDescr(KERNARG_DES_ID).size; } + void* GetOutputPtr() const { return GetDescr(OUTPUT_DES_ID).ptr; } + uint32_t GetOutputSize() const { return GetDescr(OUTPUT_DES_ID).size; } + void* GetLocalPtr() const { return GetDescr(LOCAL_DES_ID).ptr; } + void* GetRefoutPtr() const { return GetDescr(REFOUT_DES_ID).ptr; } + virtual uint32_t GetGridSize() const = 0; + + // Print output + virtual void PrintOutput() const = 0; + + // Return name + virtual std::string Name() const = 0; + + protected: + // Set system memory descriptor + bool SetSysDescr(const uint32_t& id, const uint32_t& size) { + return SetMemDescr(id, size, false); + } + + // Set local memory descriptor + bool SetLocalDescr(const uint32_t& id, const uint32_t& size) { + return SetMemDescr(id, size, true); + } + + // Get memory descriptor + mem_descr_t GetDescr(const uint32_t& id) const { + mem_const_it_t it = mem_map_.find(id); + return (it != mem_map_.end()) ? it->second : NullDescriptor(); + } + + private: + // Set memory descriptor + bool SetMemDescr(const uint32_t& id, const uint32_t& size, const bool& local) { + const mem_descr_t des = {NULL, size, local}; + auto ret = mem_map_.insert(mem_map_t::value_type(id, des)); + return ret.second; + } + + // Kernel memory map object + mem_map_t mem_map_; +}; + +#endif // TEST_CTRL_TEST_KERNEL_H_ diff --git a/test/ctrl/test_pgen.h b/test/ctrl/test_pgen.h new file mode 100644 index 0000000000..36e36bd2ef --- /dev/null +++ b/test/ctrl/test_pgen.h @@ -0,0 +1,45 @@ +/****************************************************************************** + +Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this list +of conditions and the following disclaimer. + +Redistributions in binary form must reproduce the above copyright notice, this +list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +OF THE POSSIBILITY OF SUCH DAMAGE. + +*******************************************************************************/ + +#ifndef TEST_CTRL_TEST_PGEN_H_ +#define TEST_CTRL_TEST_PGEN_H_ + +#include "ctrl/test_pmgr.h" + +// SimpleConvolution: Class implements OpenCL SimpleConvolution sample +class TestPGen : public TestPMgr { + protected: + typedef hsa_ext_amd_aql_pm4_packet_t packet_t; + + packet_t* PrePacket() { return reinterpret_cast(&pre_packet_); } + packet_t* PostPacket() { return reinterpret_cast(&post_packet_); } + + public: + explicit TestPGen(TestAql* t) : TestPMgr(t) {} +}; + +#endif // TEST_CTRL_TEST_PGEN_H_ diff --git a/test/ctrl/test_pgen_rocp.h b/test/ctrl/test_pgen_rocp.h new file mode 100644 index 0000000000..06ed2edfd7 --- /dev/null +++ b/test/ctrl/test_pgen_rocp.h @@ -0,0 +1,78 @@ +/****************************************************************************** + +Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this list +of conditions and the following disclaimer. + +Redistributions in binary form must reproduce the above copyright notice, this +list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +OF THE POSSIBILITY OF SUCH DAMAGE. + +*******************************************************************************/ + +#ifndef TEST_CTRL_TEST_PGEN_ROCP_H_ +#define TEST_CTRL_TEST_PGEN_ROCP_H_ + +#include +#include + +#include "ctrl/test_pgen.h" +#include "util/test_assert.h" + +hsa_status_t TestPGenRocpCallback(hsa_ven_amd_aqlprofile_info_type_t info_type, + hsa_ven_amd_aqlprofile_info_data_t* info_data, + void* callback_data) { + hsa_status_t status = HSA_STATUS_SUCCESS; + typedef std::vector passed_data_t; + reinterpret_cast(callback_data)->push_back(*info_data); + return status; +} + +// Class implements PMC profiling +class TestPGenRocp : public TestPGen { + public: + explicit TestPGenRocp(TestAql* t) : TestPGen(t) { std::clog << "Test: PGen ROCP" << std::endl; } + + bool Initialize(int /*arg_cnt*/, char** /*arg_list*/) { + status = rocprofiler_on_dispatch(&profile_, PrePacket(), PostPacket()); + TEST_STATUS(status != HSA_STATUS_SUCCESS); + return (status == HSA_STATUS_SUCCESS); + } + + private: + bool BuildPackets() { return true; } + + bool DumpData() { + std::clog << "TestPGenRocp::DumpData :" << std::endl; + + typedef std::vector callback_data_t; + + callback_data_t data; + api_.hsa_ven_amd_aqlprofile_iterate_data(&profile_, TestPGenRocpCallback, &data); + for (callback_data_t::iterator it = data.begin(); it != data.end(); ++it) { + std::cout << std::dec << "event(block(" << it->pmc_data.event.block_name << "_" + << it->pmc_data.event.block_index << "), id(" << it->pmc_data.event.counter_id + << ")), sample(" << it->sample_id << "), result(" << it->pmc_data.result << ")" + << std::endl; + } + + return true; + } +}; + +#endif // TEST_CTRL_TEST_PGEN_ROCP_H_ diff --git a/test/ctrl/test_pmgr.cpp b/test/ctrl/test_pmgr.cpp new file mode 100644 index 0000000000..87fba5676f --- /dev/null +++ b/test/ctrl/test_pmgr.cpp @@ -0,0 +1,144 @@ +/****************************************************************************** + +Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this list +of conditions and the following disclaimer. + +Redistributions in binary form must reproduce the above copyright notice, this +list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +OF THE POSSIBILITY OF SUCH DAMAGE. + +*******************************************************************************/ + +#include "ctrl/test_pmgr.h" + +#include + +#include "ctrl/test_assert.h" + +bool TestPMgr::AddPacketGfx9(const packet_t* packet) { + packet_t aql_packet = *packet; + + // Compute the write index of queue and copy Aql packet into it + uint64_t que_idx = hsa_queue_load_write_index_relaxed(GetQueue()); + const uint32_t mask = GetQueue()->size - 1; + packet_t* slot = (reinterpret_cast(GetQueue()->base_address)) + (que_idx & mask); + + // Disable packet so that submission to HW is complete + const auto header = HSA_PACKET_TYPE_VENDOR_SPECIFIC << HSA_PACKET_HEADER_TYPE; + aql_packet.header &= (~((1ul << HSA_PACKET_HEADER_WIDTH_TYPE) - 1)) << HSA_PACKET_HEADER_TYPE; + aql_packet.header |= HSA_PACKET_TYPE_INVALID << HSA_PACKET_HEADER_TYPE; + + // Copy Aql packet into queue buffer + *slot = aql_packet; + // After AQL packet is fully copied into queue buffer + // update packet header from invalid state to valid state + auto header_atomic_ptr = + reinterpret_cast*>(&slot->header); + header_atomic_ptr->store(header, std::memory_order_release); + + // Increment the write index and ring the doorbell to dispatch the kernel. + hsa_queue_store_write_index_relaxed(GetQueue(), (que_idx + 1)); + hsa_signal_store_relaxed(GetQueue()->doorbell_signal, que_idx); + + return true; +} + +bool TestPMgr::AddPacketGfx8(const packet_t* packet) { + // Create legacy devices PM4 data + const hsa_ext_amd_aql_pm4_packet_t* aql_packet = (const hsa_ext_amd_aql_pm4_packet_t*)packet; + slot_pm4_t data; + api_.hsa_ven_amd_aqlprofile_legacy_get_pm4(aql_packet, reinterpret_cast(data.words)); + + // Compute the write index of queue and copy Aql packet into it + uint64_t que_idx = hsa_queue_load_write_index_relaxed(GetQueue()); + const uint32_t mask = GetQueue()->size - 1; + + // Copy Aql/Pm4 blob into queue buffer + packet_t* ptr = (reinterpret_cast(GetQueue()->base_address)) + (que_idx & mask); + slot_pm4_t* slot = reinterpret_cast(ptr); + for (unsigned i = 1; i < SLOT_PM4_SIZE_DW; ++i) { + slot->words[i] = data.words[i]; + } + // To maintain global order to ensure the prior copy of the packet contents is made visible + // before the header is updated. + // With in-order CP it will wait until the first packet in the blob will be valid + std::atomic* header_atomic_ptr = + reinterpret_cast*>(&slot->words[0]); + header_atomic_ptr->store(data.words[0], std::memory_order_release); + + // Increment the write index and ring the doorbell to dispatch the kernel. + que_idx += SLOT_PM4_SIZE_AQLP - 1; + hsa_queue_store_write_index_relaxed(GetQueue(), (que_idx + 1)); + hsa_signal_store_relaxed(GetQueue()->doorbell_signal, que_idx); + + return true; +} + +bool TestPMgr::AddPacket(const packet_t* packet) { + const char* agent_name = GetAgentInfo()->name; + return (strncmp(agent_name, "gfx8", 4) == 0) ? AddPacketGfx8(packet) : AddPacketGfx9(packet); +} + +bool TestPMgr::Run() { + // Build Aql Pkts + const bool active = BuildPackets(); + if (active) { + // Submit Pre-Dispatch Aql packet + AddPacket(&pre_packet_); + } + + Test()->Run(); + + if (active) { + // Set post packet completion signal + post_packet_.completion_signal = post_signal_; + + // Submit Post-Dispatch Aql packet + AddPacket(&post_packet_); + + // Wait for Post-Dispatch packet to complete + hsa_signal_wait_acquire(post_signal_, HSA_SIGNAL_CONDITION_LT, 1, (uint64_t)-1, + HSA_WAIT_STATE_BLOCKED); + + // Dumping profiling data + DumpData(); + } + + return true; +} + +bool TestPMgr::Initialize(int argc, char** argv) { + TestAql::Initialize(argc, argv); + + hsa_status_t status = HSA_STATUS_ERROR; + status = hsa_signal_create(1, 0, NULL, &post_signal_); + TEST_ASSERT(status == HSA_STATUS_SUCCESS); + status = hsa_system_get_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, 1, 0, &api_); + TEST_ASSERT(status == HSA_STATUS_SUCCESS); + + return true; +} + +TestPMgr::TestPMgr(TestAql* t) : TestAql(t), api_({0}) { + memset(&pre_packet_, 0, sizeof(pre_packet_)); + memset(&post_packet_, 0, sizeof(post_packet_)); + dummy_signal_.handle = 0; + post_signal_ = dummy_signal_; + memset(&api_, 0, sizeof(api_)); +} diff --git a/test/ctrl/test_pmgr.h b/test/ctrl/test_pmgr.h new file mode 100644 index 0000000000..3998dc1f03 --- /dev/null +++ b/test/ctrl/test_pmgr.h @@ -0,0 +1,70 @@ +/****************************************************************************** + +Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this list +of conditions and the following disclaimer. + +Redistributions in binary form must reproduce the above copyright notice, this +list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +OF THE POSSIBILITY OF SUCH DAMAGE. + +*******************************************************************************/ + +#ifndef TEST_CTRL_TEST_PMGR_H_ +#define TEST_CTRL_TEST_PMGR_H_ + +#include +#include +#include + +#include "ctrl/test_aql.h" + +// Class implements profiling manager +class TestPMgr : public TestAql { + public: + typedef hsa_ext_amd_aql_pm4_packet_t packet_t; + explicit TestPMgr(TestAql* t); + bool Run(); + + protected: + packet_t pre_packet_; + packet_t post_packet_; + hsa_signal_t dummy_signal_; + hsa_signal_t post_signal_; + + hsa_ven_amd_aqlprofile_1_00_pfn_t api_; + + virtual bool BuildPackets() { return false; } + virtual bool DumpData() { return false; } + virtual bool Initialize(int argc, char** argv); + + private: + enum { + SLOT_PM4_SIZE_DW = HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(uint32_t), + SLOT_PM4_SIZE_AQLP = HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(packet_t) + }; + struct slot_pm4_t { + uint32_t words[SLOT_PM4_SIZE_DW]; + }; + + bool AddPacket(const packet_t* packet); + bool AddPacketGfx8(const packet_t* packet); + bool AddPacketGfx9(const packet_t* packet); +}; + +#endif // TEST_CTRL_TEST_PMGR_H_ diff --git a/test/ctrl/thr_tool.cpp b/test/ctrl/thr_tool.cpp new file mode 100644 index 0000000000..fa2f2abfa5 --- /dev/null +++ b/test/ctrl/thr_tool.cpp @@ -0,0 +1,297 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "inc/rocprofiler.h" +#include "util/xml.h" + +#define PUBLIC_API __attribute__((visibility("default"))) +#define CONSTRUCTOR_API __attribute__((constructor)) +#define DESTRUCTOR_API __attribute__((destructor)) + +// Tool thread +pthread_t thread; +pthread_attr_t thr_attr; +bool thr_stop = false; + +struct dispatch_data_t { + rocprofiler_info_t* info; + unsigned info_count; + unsigned group_index; +}; + +struct context_entry_t { + rocprofiler_group_t* group; + rocprofiler_info_t* info; + unsigned info_count; + rocprofiler_callback_data_t data; +}; + +pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; +unsigned context_array_size = 1; +context_entry_t* context_array = NULL; +unsigned context_array_index = 0; + +const char* file_name; +FILE* file_handle = NULL; + +void check_status(hsa_status_t status) { + if (status != HSA_STATUS_SUCCESS) { + const char* error_string = NULL; + rocprofiler_error_string(&error_string); + fprintf(stderr, "ERROR: %s\n", error_string); + exit(1); + } +} + +unsigned align_size(unsigned size, unsigned alignment) { return ((size + alignment - 1) & ~(alignment - 1)); } + +void print_info(FILE* file, const rocprofiler_info_t* info, const unsigned info_count, const char* str) { + if (str) fprintf(file, "%s:\n", str); + for (unsigned i= 0; i < info_count; ++i) { + const rocprofiler_info_t* p = &info[i]; + fprintf(file, " %s ", p->name); + switch (p->data.kind) { + case ROCPROFILER_INT64: + fprintf(file, "(%lu)\n", p->data.result64); + break; + case ROCPROFILER_BYTES: { + fprintf(file, "(\n"); + const char* ptr = reinterpret_cast(p->data.result_bytes.ptr); + uint64_t size = 0; + for (unsigned i = 0; i < p->data.result_bytes.instance_count; ++i) { + size = *reinterpret_cast(ptr); + const char* data = ptr + sizeof(size); + fprintf(file, " data (%p), size (%lu)\n", data, size); + size = align_size(size, sizeof(uint64_t)); + ptr = data + size; + } + fprintf(file, " )\n"); + break; + } + default: + std::cout << "Bad result kind (" << p->data.kind << ")" << std::endl; + } + } +} + +void print_group(FILE* file, const rocprofiler_group_t* group, const char* str) { + if (str) fprintf(file, "%s:\n", str); + for (unsigned i= 0; i < group->info_count; ++i) { + print_info(file, group->info[i], 1, NULL); + } +} + +void store_context(context_entry_t context_entry) { + if(pthread_mutex_lock(&mutex) != 0) { + perror("pthread_mutex_lock"); + exit(1); + } + if ((context_array == NULL) || (context_array_index >= context_array_size)) { + context_array_size *= 2; + context_array = reinterpret_cast(realloc(context_array, context_array_size * sizeof(context_entry_t))); + } + context_array_index += 1; + context_array[context_array_index - 1] = context_entry; + if(pthread_mutex_unlock(&mutex) != 0) { + perror("pthread_mutex_unlock"); + exit(1); + } +} + +void dump_context(FILE *file, unsigned index) { + hsa_status_t status = HSA_STATUS_ERROR; + + if (pthread_mutex_lock(&mutex) != 0) { + perror("pthread_mutex_lock"); + exit(1); + } + context_entry_t* entry = &context_array[index]; + rocprofiler_group_t* group = entry->group; + const rocprofiler_info_t* info = entry->info; + const unsigned info_count = entry->info_count; + fprintf(file, "Dispatch[%u], kernel_object(0x%lx):\n", index, entry->data.kernel_object); + if (pthread_mutex_unlock(&mutex) != 0) { + perror("pthread_mutex_unlock"); + exit(1); + } + + status = rocprofiler_get_group_data(group); + check_status(status); + //print_group(file, group, "Group[0] data"); + + status = rocprofiler_get_metrics_data(group->context); + check_status(status); + print_info(file, info, info_count, NULL); + + // Finishing cleanup + // Deleting profiling context will delete all allocated resources + rocprofiler_close(group->context); +} + +// Provided standard profiling callback +hsa_status_t dispatch_callback( + const rocprofiler_callback_data_t* callback_data, + void* user_data, + rocprofiler_group_t** group) { + hsa_status_t status = HSA_STATUS_ERROR; + // Passed tool data + dispatch_data_t* tool_data = reinterpret_cast(user_data); + // Profiling context + rocprofiler_t* context = NULL; + + // Open profiling context + status = rocprofiler_open(0, tool_data->info, tool_data->info_count, &context, 0, NULL); + check_status(status); + + rocprofiler_group_t* groups = NULL; + uint32_t group_count = 0; + status = rocprofiler_get_groups(context, &groups, &group_count); + check_status(status); + assert(group_count == 1); + + *group = &groups[0]; + store_context({*group, tool_data->info, tool_data->info_count, *callback_data}); + + return status; +} + +void* dumping_data(void*) { + unsigned index = 0; + do { + while (index < context_array_index) { + dump_context(file_handle, index); + ++index; + } + } while (!thr_stop); + return NULL; +} + +CONSTRUCTOR_API void constructor() { + std::map parameters_dict; + parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET; + parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK; + parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK; + parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK; + parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2; + +#ifdef TOOL_THREAD + int err = pthread_attr_init(&thr_attr); + if (err) { errno = err; perror("pthread_attr_init"); exit(1); } + err = pthread_create(&thread, &thr_attr, dumping_data, NULL); + if (err) { errno = err; perror("pthread_create"); exit(1); } +#endif + + // Set output file + file_name = getenv("ROCP_OUTPUT"); + if (file_name != NULL) { + file_handle = fopen(file_name, "w"); + if (file_handle == NULL) { + perror("fopen"); + exit(1); + } + } else file_handle = stdout; + + // Getting input + const char* xml_name = getenv("ROCP_INPUT"); + if (xml_name == NULL) { + fprintf(stderr, "ROCProfiler: input is not specified, ROCP_INPUT env"); + exit(1); + } + printf("ROCProfiler: input from \"%s\"\n", xml_name); + xml::Xml* xml = new xml::Xml(xml_name); + + // Getting metrics + auto metrics_list = xml->GetNodes("top.metric"); + std::vector metrics_vec; + for (auto* entry : metrics_list) { + const std::string entry_str = entry->opts["name"]; + size_t pos1 = 0; + while(pos1 < entry_str.length()) { + const size_t pos2 = entry_str.find(",", pos1); + const std::string metric_name = entry_str.substr(pos1, pos2 - pos1); + metrics_vec.push_back(metric_name); + if (pos2 == std::string::npos) break; + pos1 = pos2 + 1; + } + } + + // Getting traces + auto traces_list = xml->GetNodes("top.trace"); + + const unsigned info_count = metrics_vec.size() + traces_list.size(); + rocprofiler_info_t* info= new rocprofiler_info_t[info_count]; + memset(info, 0, info_count * sizeof(rocprofiler_info_t)); + + printf(" %d metrics\n", (int) metrics_vec.size()); + for (unsigned i = 0; i < metrics_vec.size(); ++i) { + const std::string& name = metrics_vec[i]; + printf("%s%s", (i == 0) ? " " : ", ", name.c_str()); + info[i] = {}; + info[i].type = ROCPROFILER_TYPE_METRIC; + info[i].name = strdup(name.c_str()); + } + if (metrics_vec.size()) printf("\n"); + + printf(" %d traces\n", (int) traces_list.size()); + unsigned index = metrics_vec.size(); + for (auto* entry : traces_list) { + auto params_list = xml->GetNodes("top.trace.parameters"); + if (params_list.size() != 1) { + fprintf(stderr, "ROCProfiler: Single input 'parameters' section is supported\n"); + exit(1); + } + const std::string& name = entry->opts["name"]; + printf(" %s (\n", name.c_str()); + info[index] = {}; + info[index].type = ROCPROFILER_TYPE_TRACE; + info[index].name = strdup(name.c_str()); + + for (auto* params : params_list) { + const unsigned parameter_count = params->opts.size(); + rocprofiler_parameter_t *parameters = new rocprofiler_parameter_t[parameter_count]; + unsigned p_index = 0; + for (auto& v : params->opts) { + const std::string parameter_name = v.first; + if (parameters_dict.find(parameter_name) == parameters_dict.end()) { + fprintf(stderr, "ROCProfiler: unknown trace parameter %s\n", parameter_name.c_str()); + exit(1); + } + const uint32_t value = strtol(v.second.c_str(), NULL, 0); + printf(" %s = 0x%x\n", parameter_name.c_str(), value); + parameters[p_index] = {}; + parameters[p_index].parameter_name = parameters_dict[parameter_name]; + parameters[p_index].value = value; + ++p_index; + } + + info[index].parameters = parameters; + info[index].parameter_count = parameter_count; + } + printf(" )\n"); + ++index; + } + + if (info_count) { + // Adding dispatch observer + dispatch_data_t* dispatch_data = new dispatch_data_t{}; + dispatch_data->info = info; + dispatch_data->info_count = info_count; + dispatch_data->group_index = 0; + rocprofiler_dispatch_observer(dispatch_callback, dispatch_data); + } +} + +DESTRUCTOR_API void destructor() { + printf("\nROCPRofiler: %u contexts collected", context_array_index); + thr_stop = true; +#ifdef TOOL_THREAD + pthread_join(thread, NULL); +#else + dumping_data(NULL); +#endif +} diff --git a/test/ctrl/tool.cpp b/test/ctrl/tool.cpp new file mode 100644 index 0000000000..570a738916 --- /dev/null +++ b/test/ctrl/tool.cpp @@ -0,0 +1,313 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "inc/rocprofiler.h" +#include "util/xml.h" + +#define PUBLIC_API __attribute__((visibility("default"))) +#define CONSTRUCTOR_API __attribute__((constructor)) +#define DESTRUCTOR_API __attribute__((destructor)) + +struct dispatch_data_t { + rocprofiler_info_t* info; + unsigned info_count; + unsigned group_index; +}; + +struct context_entry_t { + rocprofiler_group_t* group; + rocprofiler_info_t* info; + unsigned info_count; + rocprofiler_callback_data_t data; +}; + +pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; +unsigned context_array_size = 1; +context_entry_t* context_array = NULL; +unsigned context_array_index = 0; +unsigned dump_index = 0; + +const char* file_name = NULL; +FILE* file_handle = NULL; + +void check_status(hsa_status_t status) { + if (status != HSA_STATUS_SUCCESS) { + const char* error_string = NULL; + rocprofiler_error_string(&error_string); + fprintf(stderr, "ERROR: %s\n", error_string); + exit(1); + } +} + +hsa_status_t trace_data_cb( + hsa_ven_amd_aqlprofile_info_type_t info_type, + hsa_ven_amd_aqlprofile_info_data_t* info_data, + void* data) +{ + hsa_status_t status = HSA_STATUS_SUCCESS; + if (info_type == HSA_VEN_AMD_AQLPROFILE_INFO_SQTT_DATA) { + printf(" data ptr (%p), size(%u)\n", info_data->sqtt_data.ptr, info_data->sqtt_data.size); + } else status = HSA_STATUS_ERROR; + return status; +} + +unsigned align_size(unsigned size, unsigned alignment) { return ((size + alignment - 1) & ~(alignment - 1)); } + +void print_info(FILE* file, const rocprofiler_info_t* info, const unsigned info_count, rocprofiler_t* context, const char* str) { + if (str) fprintf(file, "%s:\n", str); + for (unsigned i= 0; i < info_count; ++i) { + const rocprofiler_info_t* p = &info[i]; + fprintf(file, " %s ", p->name); + switch (p->data.kind) { + case ROCPROFILER_INT64: + fprintf(file, "(%lu)\n", p->data.result_int64); + break; + case ROCPROFILER_BYTES: { + fprintf(file, "(\n"); + if (p->data.result_bytes.copy) { + fprintf(file, " system memory copy\n"); + const char* ptr = reinterpret_cast(p->data.result_bytes.ptr); + uint64_t size = 0; + for (unsigned i = 0; i < p->data.result_bytes.instance_count; ++i) { + size = *reinterpret_cast(ptr); + const char* data = ptr + sizeof(size); + fprintf(file, " data (%p), size (%lu)\n", data, size); + size = align_size(size, sizeof(uint64_t)); + ptr = data + size; + } + } else { + fprintf(file, " local memory buffer\n"); + rocprofiler_iterate_trace_data(context, trace_data_cb, NULL); + } + fprintf(file, " )\n"); + break; + } + default: + std::cout << "Bad result kind (" << p->data.kind << ")" << std::endl; + } + } +} + +void print_group(FILE* file, const rocprofiler_group_t* group, const char* str) { + if (str) fprintf(file, "%s:\n", str); + for (unsigned i= 0; i < group->info_count; ++i) { + print_info(file, group->info[i], 1, group->context, NULL); + } +} + +void store_entry(const context_entry_t& context_entry) { + if(pthread_mutex_lock(&mutex) != 0) { + perror("pthread_mutex_lock"); + exit(1); + } + + if ((context_array == NULL) || (context_array_index >= context_array_size)) { + context_array_size *= 2; + context_array = reinterpret_cast(realloc(context_array, context_array_size * sizeof(context_entry_t))); + } + context_array[context_array_index] = context_entry; + context_array_index += 1; + + if (pthread_mutex_unlock(&mutex) != 0) { + perror("pthread_mutex_unlock"); + exit(1); + } +} + +void dump_context(FILE *file, context_entry_t* entry, unsigned index) { + hsa_status_t status = HSA_STATUS_ERROR; + + rocprofiler_group_t* group = entry->group; + const rocprofiler_info_t* info = entry->info; + const unsigned info_count = entry->info_count; + fprintf(file, "Dispatch[%u], kernel_object(0x%lx):\n", index, entry->data.kernel_object); + + status = rocprofiler_get_group_data(group); + check_status(status); + //print_group(file, group, "Group[0] data"); + + status = rocprofiler_get_metrics_data(group->context); + check_status(status); + print_info(file, info, info_count, group->context, NULL); + + // Finishing cleanup + // Deleting profiling context will delete all allocated resources + rocprofiler_close(group->context); + + dump_index = index; +} + +void dumping_data() { + if (pthread_mutex_lock(&mutex) != 0) { + perror("pthread_mutex_lock"); + exit(1); + } + + for (unsigned index = 0; index < context_array_index; ++index) { + dump_context(file_handle, &context_array[index], index); + } + + if (pthread_mutex_unlock(&mutex) != 0) { + perror("pthread_mutex_unlock"); + exit(1); + } +} + +// profiling callback +hsa_status_t dispatch_callback( + const rocprofiler_callback_data_t* callback_data, + void* user_data, + rocprofiler_group_t** group) { + hsa_status_t status = HSA_STATUS_ERROR; + // Passed tool data + dispatch_data_t* tool_data = reinterpret_cast(user_data); + // Profiling context + rocprofiler_t* context = NULL; + // context properties + rocprofiler_properties_t properties{}; + + // Open profiling context + status = rocprofiler_open(0, tool_data->info, tool_data->info_count, &context, 0, &properties); + check_status(status); + + rocprofiler_group_t* groups = NULL; + uint32_t group_count = 0; + status = rocprofiler_get_groups(context, &groups, &group_count); + check_status(status); + assert(group_count == 1); + + *group = &groups[0]; + context_entry_t entry; + entry.group = *group; + entry.info = tool_data->info; + entry.info_count = tool_data->info_count; + entry.data = *callback_data; + store_entry(entry); + + return status; +} + +CONSTRUCTOR_API void constructor() { + std::map parameters_dict; + parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET; + parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK; + parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK; + parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK; + parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2; + + // Set output file + file_name = getenv("ROCP_OUTPUT"); + if (file_name != NULL) { + file_handle = fopen(file_name, "w"); + if (file_handle == NULL) { + perror("fopen"); + exit(1); + } + } else file_handle = stdout; + + // Getting input + const char* xml_name = getenv("ROCP_INPUT"); + if (xml_name == NULL) { + fprintf(stderr, "ROCProfiler: input is not specified, ROCP_INPUT env"); + exit(1); + } + printf("ROCProfiler: input from \"%s\"\n", xml_name); + xml::Xml* xml = new xml::Xml(xml_name); + + // Getting metrics + auto metrics_list = xml->GetNodes("top.metric"); + std::vector metrics_vec; + for (auto* entry : metrics_list) { + const std::string entry_str = entry->opts["name"]; + size_t pos1 = 0; + while(pos1 < entry_str.length()) { + const size_t pos2 = entry_str.find(",", pos1); + const std::string metric_name = entry_str.substr(pos1, pos2 - pos1); + metrics_vec.push_back(metric_name); + if (pos2 == std::string::npos) break; + pos1 = pos2 + 1; + } + } + + // Getting traces + auto traces_list = xml->GetNodes("top.trace"); + + const unsigned info_count = metrics_vec.size() + traces_list.size(); + rocprofiler_info_t* info= new rocprofiler_info_t[info_count]; + memset(info, 0, info_count * sizeof(rocprofiler_info_t)); + + printf(" %d metrics\n", (int) metrics_vec.size()); + for (unsigned i = 0; i < metrics_vec.size(); ++i) { + const std::string& name = metrics_vec[i]; + printf("%s%s", (i == 0) ? " " : ", ", name.c_str()); + info[i] = {}; + info[i].type = ROCPROFILER_TYPE_METRIC; + info[i].name = strdup(name.c_str()); + } + if (metrics_vec.size()) printf("\n"); + + printf(" %d traces\n", (int) traces_list.size()); + unsigned index = metrics_vec.size(); + for (auto* entry : traces_list) { + auto params_list = xml->GetNodes("top.trace.parameters"); + if (params_list.size() != 1) { + fprintf(stderr, "ROCProfiler: Single input 'parameters' section is supported\n"); + exit(1); + } + const std::string& name = entry->opts["name"]; + const bool to_copy_data = (entry->opts["copy"] == "true"); + printf(" %s (\n", name.c_str()); + info[index] = {}; + info[index].type = ROCPROFILER_TYPE_TRACE; + info[index].name = strdup(name.c_str()); + info[index].data.result_bytes.copy = to_copy_data; + + for (auto* params : params_list) { + const unsigned parameter_count = params->opts.size(); + rocprofiler_parameter_t *parameters = new rocprofiler_parameter_t[parameter_count]; + unsigned p_index = 0; + for (auto& v : params->opts) { + const std::string parameter_name = v.first; + if (parameters_dict.find(parameter_name) == parameters_dict.end()) { + fprintf(stderr, "ROCProfiler: unknown trace parameter %s\n", parameter_name.c_str()); + exit(1); + } + const uint32_t value = strtol(v.second.c_str(), NULL, 0); + printf(" %s = 0x%x\n", parameter_name.c_str(), value); + parameters[p_index] = {}; + parameters[p_index].parameter_name = parameters_dict[parameter_name]; + parameters[p_index].value = value; + ++p_index; + } + + info[index].parameters = parameters; + info[index].parameter_count = parameter_count; + } + printf(" )\n"); + ++index; + } + + if (info_count) { + // Adding dispatch observer + dispatch_data_t* dispatch_data = new dispatch_data_t{}; + dispatch_data->info = info; + dispatch_data->info_count = info_count; + dispatch_data->group_index = 0; + rocprofiler_set_dispatch_observer(dispatch_callback, dispatch_data); + } +} + +DESTRUCTOR_API void destructor() { + printf("\nROCPRofiler: %u contexts collected", context_array_index); + if (file_name == NULL) { + printf("\n"); + } else { + printf(", dumping to %s\n", file_name); + } + dumping_data(); +} diff --git a/test/input.xml b/test/input.xml new file mode 100644 index 0000000000..9a51eb2b82 --- /dev/null +++ b/test/input.xml @@ -0,0 +1,7 @@ + + + + diff --git a/test/metrics.xml b/test/metrics.xml new file mode 100644 index 0000000000..87c5d12f0f --- /dev/null +++ b/test/metrics.xml @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/test/run.sh b/test/run.sh new file mode 100755 index 0000000000..f1db179397 --- /dev/null +++ b/test/run.sh @@ -0,0 +1,27 @@ +#!/bin/sh + +tbin=./test/ctrl + +#export HSA_LIB=/home/evgeny/pkg/compute-psdb-16453/lib +export HSA_LIB=/home/evgeny/git/compute/out/ubuntu-16.04/16.04/lib +export OCL_LIB=/home/evgeny/pkg/opencl_modified/opencl_x86_64/lib +#export OCL_LIB=/home/evgeny/Perforce/eshcherb_opencl/drivers/opencl/dist/linux/debug/lib/x86_64 + +export LD_LIBRARY_PATH=$PWD:$HSA_LIB:$OCL_LIB +export ROCPROFILER_LOG=1 + +export HSA_TOOLS_LIB=librocprofiler64.so +export ROCP_TOOL_LIB=test/libtool.so +export ROCP_HSA_INTERCEPT=1 +export ROCP_METRICS=metrics.xml +export ROCP_INPUT=input.xml +unset ROCP_PROXY_QUEUE + +echo "Run simple profiling test" +if [ -n "$1" ] ; then + eval "$*" +else + eval $tbin +fi + +exit 0 diff --git a/test/simple_convolution/gfx8_SimpleConvolution.hsaco b/test/simple_convolution/gfx8_SimpleConvolution.hsaco new file mode 100644 index 0000000000000000000000000000000000000000..831484c2267528bc7ff55923b2294aab223b1cef GIT binary patch literal 9392 zcmeGiO>YxN^zC?e?W`RqgfvZQDT_)=RS_#Ju8B}9B_wS?1QH0JMHMmeCK&ux$Bvpx zS)A~ZAR#~qA&$LPs?QJLZU<2(bU+B(Fw}vLW0lhczf&OO3RvLA03;5% zbRv`9^WghB!I40h!g+J=9<$y2+&E$vb9QE|$}vo{ z%{-RO+Eo%N4W_e&jNP5jjpsAv5}HCW)9g#;M$5_3s%U?~&UN>h-RroX^Yf-9oRV%= zadeGWByb&aKA8ThYLP@cTox*2(xYo;v6gi@nJL>x(z%q3yEl`+n9Lk3kBrzw&j6P? zIZ^N&oZ4r3%&y_#QjsJ({(1CGp!F zZ+DqzlH>MBF`p|PD`y9L2Tq`}B}?#3zIY{Fva`rKEm(W^Z)(&1QJ2fmTg;aWuDcr6 zKrua@EZLhgy;$V)BVoj1F?tFm;z3J=lEi57_%ujn&-uu-qw7`}%BRn6fs=kV_j`%Mb zsMn#QRg`I9%u$%?033RPDPRFbUueY`uQ7eWL>vg{3pB@Tpf4bT3d&!F+w*EfcZ|m{ z1yj+xD&jz|?f?rptGbRGfl7a%L3cE`xg5az3>|K84DiEZ#b8j07;)SQEa_o}JA#|b zS`cwCVrfA=fh(~TsAo`UxINEMFQnq`Km#iRU@@|%Fx)TPSZ4g5*lY6rxcz5sm7Wsy~_}{b*G1D`Y3A z0}DQ!ylhtH9GIQW!Yt06Ig}T1FHo7khB>2cjnd1URmS-Il?LXh7_WkHY8WqJ?mL9@ zH$ri|XIRqe&_2qj0mK2rLY%-EEGd{5g>-SQVV>jgUDdTQ#SH5)i&y|SchXlv2{rU^ z5@X@>B)gUAZUVfN zW2&jistZ4wyxequ<_nA=THoSOeHgE^Jxs>5dX)7)#*^5dfy++?eBH-f7a!@}NLA2qFKir3j1bjUzlc0I=$^wiMBP=kRQ z`WvqNFE#WXF8x^zeLv9|X3gXRUh^*YHoMn{ST0|(A(ooRjZI`Bb}2b_NsLNGh?VRw zOMH~friURmoX^rL4~U^?DR~j`rLj^`^aO^32iq^~<*mireY}nHHo@Bt*!areLeIHl zT}Kahd*Qxtht`8~&yR;>MRr^)|6f2X0{K4!F%r5|V^U4loy4*lNRNDpy9EGQj}1tp znw0vo?j)9Sk}v&BjK^{Qp9aa7buDqta`PBa#e?nGqqyYvM!v_9*p!Z>1_`f$cd$>o zc)ugST#tv`p)@1sPxmSWd+2O;HRk}vL$0w$=!r@}5-U@uKvhhirG9ml%48uf|)K7K+T zU4}5Ob_GZlKRx}YUD;I(TJvOET3WO-R&gwy&mA-bTrl?SHCl~tw4+urXJy7J9L+FV zjN_@SRUx6$KssB zCJMfT)B8=I*)cS9qMXVI1BTIUjikz%Qbk~RxHwQq4Ov3W_2Ki$@VeCPDW`L#&-Qo% z!PICq5h@G%nyYr`?XKELCZBo>%hox;r|Xx?zbnQK#YVLziusL|+PfI@;q>sZm2Bl0TA4x49kJD4|)j-eI?X-jO_Ag?eLXT_uh<$)>VZ(FK=LDS~13?CY z3b#?NxZ6R=UsxTh@&5d??H6D#AMA|70NF z07r48X<*Ejm}~S$#Z7`nvA&9kLTvfrTGU zUNM|G8)j#7FpG0%Hs!^A=i$s>$DGl&M(SbqDr0>9QXR8pj90=q6^xfK_Z`9cYmo%L zXIN5d&_2q@A;clgM4ZGKEJ>IbiF9$UQJ&-IJ=wD{#WeFNi zB)iV^k5Z&2;?5QQkxd1CIQ0)FCCqJSN)^BMxHMqAt-N>MpePG8L8_npBrZ#~77nm?TM!aSQVfLY|HaD8JANf3Os-eNG zi9VjZ((qvBTevf?*EZQy9}+xUif&V7yJ6+_udBD`5zpB5D>GBFY_c%Q>vv~tedYEt z@pR-xR%Tu@#-uvgra1xm#7e+gFhfkWr7**I3NMHNFf~tY06_+V3agA4f0ZtQLLULWGQe940N@I-EGA`9`$sjK zg}$gefrXsl3;zP+ah(6BLGVRg3tY9_JO)(pVF&goF7dq)_c#KJ`y5|^gxA2w*e6|l zzvF_j9uIm$5wDm(y;mXFiyty6@= width) ? width - 1 : (x + vstep); + uint top = (y < hstep) ? 0 : (y - hstep); + uint bottom = ((y + hstep) >= height)? height - 1: (y + hstep); + + // initializing wighted sum value + float sumFX = 0; + + for(uint i = left; i <= right; ++i) { + for(uint j = top ; j <= bottom; ++j) { + // performing wighted sum within the mask boundaries + uint maskIndex = (j - (y - hstep)) * maskWidth + (i - (x - vstep)); + uint index = j * width + i; + sumFX += ((float)input[index] * mask[maskIndex]); + } + } + + // To round to the nearest integer + sumFX += 0.5f; + output[tid] = (uint)sumFX; +} diff --git a/test/simple_convolution/simple_convolution.cpp b/test/simple_convolution/simple_convolution.cpp new file mode 100644 index 0000000000..e5676d6d44 --- /dev/null +++ b/test/simple_convolution/simple_convolution.cpp @@ -0,0 +1,390 @@ +/****************************************************************************** + +Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this list +of conditions and the following disclaimer. + +Redistributions in binary form must reproduce the above copyright notice, this +list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +OF THE POSSIBILITY OF SUCH DAMAGE. + +*******************************************************************************/ + +#include "simple_convolution/simple_convolution.h" + +#include +#include + +#include "util/helper_funcs.h" +#include "util/test_assert.h" + +const uint32_t SimpleConvolution::input_data_[]{ + 15, 201, 51, 89, 92, 34, 96, 66, 11, 225, 161, 96, 81, 211, 108, 124, 202, 244, 182, + 90, 215, 92, 98, 20, 44, 225, 55, 247, 202, 0, 45, 218, 202, 97, 51, 39, 131, 147, + 105, 143, 116, 11, 239, 198, 222, 92, 67, 169, 81, 250, 3, 40, 86, 101, 60, 131, 70, + 116, 123, 17, 117, 168, 236, 64, 10, 31, 103, 142, 179, 209, 29, 40, 220, 13, 239, 187, + 105, 50, 100, 186, 44, 104, 227, 131, 205, 32, 6, 20, 149, 130, 38, 10, 43, 18, 75, + 53, 50, 178, 195, 230, 132, 225, 14, 96, 238, 253, 27, 88, 48, 128, 18, 92, 232, 246, + 224, 182, 23, 231, 203, 172, 105, 241, 183, 148, 4, 2, 202, 55, 181, 142, 29, 57, 111, + 43, 153, 93, 41, 181, 181, 89, 54, 200, 182, 31, 190, 150, 213, 213, 126, 160, 130, 232, + 146, 57, 125, 151, 59, 71, 206, 240, 213, 236, 42, 68, 24, 195, 162, 65, 121, 87, 155, + 175, 31, 81, 207, 222, 232, 164, 180, 102, 69, 55, 79, 216, 112, 204, 112, 171, 19, 63, + 156, 233, 43, 198, 46, 67, 138, 208, 132, 4, 39, 32, 180, 71, 113, 131, 38, 90, 40, + 219, 193, 109, 18, 16, 70, 131, 220, 182, 46, 240, 245, 203, 217, 32, 146, 7, 100, 28, + 216, 233, 32, 255, 9, 213, 71, 123, 88, 110, 213, 128, 74, 150, 238, 93, 166, 52, 224, + 131, 234, 15, 115, 224, 218, 76, 1, 108, 84, 101, 137, 44, 79, 170, 44, 88, 127, 116, + 211, 216, 226, 168, 88, 45, 63, 70, 138, 230, 123, 107, 105, 101, 122, 220, 70, 84, 41, + 71, 193, 125, 173, 75, 169, 252, 245, 213, 84, 117, 73, 40, 77, 44, 209, 166, 90, 16, + 237, 229, 246, 104, 80, 95, 206, 202, 60, 20, 31, 101, 92, 225, 226, 9, 44, 140, 5, + 34, 97, 89, 151, 171, 129, 229, 216, 82, 139, 51, 99, 120, 24, 89, 225, 104, 185, 175, + 50, 246, 196, 82, 91, 32, 51, 62, 42, 96, 202, 47, 130, 44, 137, 26, 215, 10, 255, + 176, 93, 138, 227, 193, 3, 251, 27, 229, 100, 212, 149, 151, 202, 89, 233, 38, 122, 29, + 100, 164, 125, 46, 212, 0, 90, 93, 26, 50, 103, 25, 226, 197, 164, 198, 135, 168, 194, + 162, 141, 38, 119, 34, 190, 66, 124, 167, 104, 247, 197, 204, 156, 67, 251, 112, 67, 85, + 205, 93, 135, 53, 119, 106, 251, 28, 49, 130, 196, 243, 36, 82, 26, 155, 117, 216, 221, + 241, 128, 70, 233, 70, 18, 133, 137, 14, 245, 204, 99, 195, 42, 235, 248, 161, 86, 243, + 190, 135, 118, 130, 123, 154, 213, 150, 54, 74, 111, 20, 60, 240, 90, 37, 54, 109, 171, + 191, 123, 161, 140, 222, 100, 182, 202, 93, 88, 32, 80, 23, 168, 198, 153, 36, 97, 111, + 187, 151, 185, 43, 172, 245, 27, 6, 27, 82, 115, 199, 18, 239, 104, 158, 206, 205, 85, + 152, 42, 174, 185, 123, 197, 98, 65, 95, 135, 163, 206, 66, 59, 136, 109, 231, 125, 137, + 237, 153, 219, 97, 96, 237, 81, 201, 140, 31, 150, 226, 183, 192, 144, 113, 59, 86, 212, + 125, 182, 91, 33, 132, 158, 92, 12, 12, 68, 138, 149, 50, 36, 113, 147, 133, 95, 229, + 78, 235, 4, 228, 206, 188, 165, 95, 45, 225, 181, 1, 94, 107, 93, 128, 240, 251, 220, + 252, 7, 32, 135, 156, 83, 171, 14, 230, 48, 109, 203, 126, 89, 208, 99, 39, 140, 9, + 134, 185, 234, 60, 187, 73, 167, 24, 201, 152, 20, 166, 148, 27, 199, 28, 184, 26, 199, + 198, 0, 248, 52, 204, 119, 141, 157, 218, 181, 41, 227, 59, 227, 206, 119, 159, 23, 31, + 184, 224, 183, 204, 134, 76, 231, 77, 105, 160, 103, 48, 103, 104, 41, 155, 53, 160, 41, + 210, 123, 222, 252, 95, 26, 223, 45, 146, 126, 68, 177, 54, 37, 105, 3, 171, 182, 235, + 249, 31, 139, 97, 80, 243, 202, 121, 143, 0, 26, 184, 210, 149, 151, 207, 244, 177, 174, + 34, 67, 45, 102, 245, 100, 140, 95, 104, 55, 21, 83, 49, 53, 223, 147, 134, 210, 93, + 0, 97, 93, 26, 26, 48, 175, 178, 255, 164, 99, 174, 198, 167, 220, 45, 156, 64, 185, + 252, 168, 241, 18, 252, 35, 71, 219, 182, 205, 173, 19, 206, 15, 113, 232, 42, 161, 152, + 220, 160, 60, 64, 79, 3, 231, 43, 49, 132, 108, 235, 128, 21, 220, 146, 17, 255, 218, + 236, 182, 168, 154, 201, 118, 170, 58, 94, 212, 220, 246, 177, 125, 51, 241, 204, 55, 216, + 248, 104, 92, 100, 83, 221, 121, 48, 111, 138, 47, 73, 119, 230, 241, 17, 175, 103, 187, + 234, 198, 144, 199, 188, 65, 68, 240, 51, 17, 39, 11, 9, 143, 104, 109, 227, 70, 231, + 19, 181, 113, 66, 255, 233, 41, 241, 250, 217, 89, 182, 196, 31, 71, 139, 220, 137, 208, + 204, 188, 225, 243, 200, 234, 131, 48, 88, 102, 119, 63, 121, 44, 177, 188, 44, 154, 229, + 29, 149, 190, 118, 76, 130, 150, 147, 14, 114, 28, 222, 62, 217, 191, 50, 161, 170, 181, + 210, 2, 28, 73, 66, 149, 117, 243, 81, 162, 141, 55, 191, 35, 245, 54, 111, 120, 204, + 2, 134, 62, 31, 100, 125, 248, 36, 175, 153, 206, 101, 107, 209, 129, 181, 19, 22, 43, + 7, 104, 205, 149, 159, 140, 184, 149, 195, 39, 14, 143, 42, 148, 205, 73, 249, 74, 66, + 30, 250, 219, 237, 96, 71, 190, 225, 253, 210, 248, 40, 218, 96, 245, 111, 0, 130, 39, + 150, 69, 79, 165, 212, 122, 57, 162, 195, 51, 237, 6, 82, 231, 225, 63, 71, 41, 253, + 41, 38, 208, 33, 78, 170, 130, 68, 26, 131, 198, 66, 26, 12, 145, 191, 224, 11, 249, + 130, 207, 44, 112, 213, 126, 88, 183, 190, 160, 225, 187, 201, 8, 140, 235, 87, 55, 109, + 155, 81, 241, 98, 147, 11, 110, 37, 202, 79, 49, 195, 210, 0, 240, 66, 214, 110, 154, + 142, 44, 58, 111, 232, 4, 119, 117, 239, 207, 172, 93, 106, 254, 78, 205, 145, 89, 59, + 183, 35, 138, 232, 230, 92, 233, 214, 159, 191, 69, 58, 78, 114, 116, 189, 91, 121, 53, + 208, 104, 4, 125, 198, 111, 123, 20, 60, 13, 109, 120, 196, 145, 3, 172, 119, 95, 150, + 78, 255, 85, 147, 57, 163, 6, 174, 97, 97, 39, 151, 50, 144, 155, 175, 86, 11, 43, + 107, 71, 56, 216, 191, 253, 105, 194, 170, 225, 34, 64, 47, 34, 150, 195, 91, 58, 201, + 10, 155, 43, 49, 50, 93, 194, 206, 13, 25, 217, 56, 132, 33, 112, 92, 225, 109, 198, + 164, 23, 167, 199, 88, 215, 234, 238, 155, 69, 40, 100, 80, 196, 144, 129, 246, 237, 68, + 197, 250, 93, 159, 51, 225, 193, 163, 62, 163, 17, 4, 71, 41, 172, 15, 130, 132, 249, + 112, 31, 63, 152, 132, 143, 92, 20, 17, 83, 1, 86, 25, 252, 179, 185, 47, 149, 122, + 211, 211, 29, 229, 216, 101, 15, 133, 117, 145, 9, 111, 1, 40, 175, 154, 173, 62, 247, + 193, 80, 75, 194, 166, 100, 191, 90, 29, 239, 239, 152, 194, 195, 182, 168, 156, 27, 183, + 33, 145, 73, 43, 0, 75, 83, 175, 229, 0, 238, 221, 194, 63, 40, 133, 230, 140, 68, + 64, 170, 51, 48, 66, 246, 243, 248, 159, 144, 20, 87, 177, 165, 160, 220, 166, 235, 48, + 86, 209, 49, 68, 174, 243, 132, 214, 120, 106, 99, 189, 170, 13, 241, 219, 80, 232, 207, + 72, 135, 95, 92, 223, 16, 2, 127, 237, 169, 107, 29, 255, 61, 79, 68, 236, 67, 200, + 194, 188, 50, 38, 121, 221, 52, 107, 184, 132, 84, 136, 204, 219, 231, 41, 186, 248, 44, + 58, 229, 213, 166, 3, 212, 227, 82, 25, 207, 150, 225, 146, 82, 20, 185, 204, 242, 237, + 55, 170, 113, 139, 50, 62, 103, 26, 103, 34, 18, 148, 93, 247, 105, 3, 251, 62, 231, + 77, 87, 182, 227, 57, 73, 54, 77, 2, 2, 63, 239, 57, 234, 97, 197, 29, 159, 44, + 55, 7, 79, 74, 155, 172, 66, 5, 175, 61, 67, 150, 139, 155, 77, 111, 212, 151, 165, + 34, 153, 167, 98, 137, 225, 77, 234, 166, 107, 138, 211, 163, 145, 34, 237, 45, 206, 47, + 50, 126, 108, 117, 21, 248, 17, 98, 103, 230, 249, 12, 9, 147, 179, 107, 29, 149, 185, + 7, 59, 37, 146, 14, 200, 35, 49, 182, 80, 0, 230, 130, 126, 83, 248, 148, 75, 9, + 247, 178, 240, 240, 190, 249, 132, 114, 101, 161, 7, 30, 169, 67, 68, 59, 82, 12, 95, + 131, 195, 176, 131, 169, 51, 2, 252, 44, 150, 72, 54, 141, 250, 38, 126, 185, 31, 3, + 44, 132, 165, 52, 163, 78, 120, 231, 138, 202, 244, 234, 77, 183, 155, 209, 97, 207, 212, + 94, 251, 107, 166, 49, 249, 161, 88, 120, 91, 120, 123, 135, 253, 33, 188, 160, 112, 52, + 136, 250, 254, 125, 229, 76, 53, 128, 30, 150, 79, 243, 244, 75, 95, 155, 125, 88, 60, + 213, 209, 152, 78, 77, 32, 75, 110, 220, 236, 222, 17, 117, 217, 15, 242, 190, 92, 39, + 63, 123, 190, 143, 111, 178, 219, 206, 78, 88, 38, 138, 46, 247, 34, 124, 69, 66, 199, + 179, 31, 179, 145, 48, 41, 106, 64, 27, 41, 157, 67, 105, 24, 1, 249, 135, 179, 212, + 86, 1, 44, 124, 140, 91, 116, 175, 215, 185, 242, 159, 108, 17, 83, 254, 66, 124, 105, + 131, 151, 146, 32, 218, 252, 57, 219, 245, 193, 143, 201, 23, 145, 246, 148, 30, 82, 8, + 206, 41, 194, 192, 201, 47, 210, 28, 46, 20, 152, 151, 151, 48, 42, 184, 11, 38, 241, + 231, 28, 179, 119, 230, 202, 8, 220, 94, 39, 46, 103, 245, 88, 42, 181, 33, 90, 136, + 62, 136, 156, 214, 31, 52, 7, 74, 237, 19, 113, 223, 250, 141, 146, 113, 115, 92, 122, + 80, 187, 161, 126, 35, 150, 215, 78, 76, 249, 168, 212, 55, 48, 113, 14, 80, 166, 21, + 154, 147, 40, 12, 114, 35, 153, 5, 148, 12, 98, 15, 92, 29, 176, 219, 65, 71, 179, + 143, 147, 172, 56, 104, 227, 104, 218, 241, 185, 128, 7, 84, 20, 47, 96, 135, 82, 249, + 140, 231, 6, 238, 246, 99, 12, 167, 63, 77, 238, 242, 221, 130, 158, 21, 235, 129, 126, + 197, 114, 56, 69, 121, 140, 90, 169, 237, 225, 252, 231, 109, 228, 237, 91, 219, 81, 104, + 130, 144, 181, 113, 130, 147, 244, 32, 169, 223, 162, 39, 164, 21, 95, 234, 143, 236, 68, + 57, 217, 37, 53, 192, 147, 25, 174, 239, 245, 0, 87, 119, 144, 13, 232, 19, 160, 220, + 51, 73, 188, 214, 113, 96, 235, 209, 75, 122, 190, 144, 179, 151, 181, 233, 88, 73, 3, + 7, 56, 248, 7, 143, 112, 152, 156, 89, 171, 61, 53, 223, 135, 242, 181, 248, 83, 161, + 202, 158, 28, 136, 46, 208, 32, 228, 186, 121, 45, 189, 128, 102, 182, 136, 246, 38, 32, + 147, 127, 204, 208, 181, 171, 87, 167, 97, 80, 250, 2, 26, 153, 31, 163, 200, 239, 195, + 172, 169, 60, 218, 103, 188, 65, 30, 69, 55, 68, 102, 202, 196, 50, 154, 121, 221, 242, + 33, 63, 67, 28, 66, 93, 181, 97, 0, 126, 81, 196, 43, 251, 0, 5, 98, 189, 70, + 128, 3, 126, 197, 105, 72, 137, 155, 227, 3, 121, 214, 36, 184, 25, 65, 250, 118, 247, + 91, 119, 117, 173, 60, 160, 168, 60, 166, 10, 250, 237, 139, 253, 107, 80, 102, 180, 217, + 2, 151, 221, 123, 109, 1, 52, 134, 66, 46, 253, 57, 138, 117, 175, 55, 178, 79, 223, + 239, 245, 234, 233, 226, 117, 231, 78, 198, 78, 2, 159, 80, 154, 124, 204, 7, 126, 0, + 142, 193, 47, 140, 251, 185, 2, 170, 241, 180, 249, 208, 163, 239, 186, 141, 210, 48, 116, + 32, 246, 195, 34, 150, 19, 188, 19, 224, 196, 146, 224, 83, 83, 15, 224, 78, 201, 226, + 249, 186, 151, 243, 139, 58, 226, 70, 199, 181, 118, 60, 213, 109, 255, 248, 3, 19, 181, + 23, 243, 122, 169, 212, 205, 252, 228, 173, 75, 173, 144, 68, 104, 39, 55, 243, 98, 26, + 57, 41, 207, 175, 102, 165, 29, 102, 158, 32, 121, 83, 56, 109, 205, 225, 66, 155, 222, + 38, 73, 42, 212, 218, 110, 60, 1, 166, 48, 99, 193, 105, 141, 145, 25, 244, 54, 54, + 90, 213, 87, 212, 40, 143, 66, 246, 112, 132, 146, 79, 171, 220, 121, 128, 182, 232, 189, + 184, 143, 237, 27, 80, 86, 169, 226, 112, 158, 25, 166, 248, 238, 253, 204, 23, 141, 15, + 13, 254, 147, 160, 77, 63, 124, 199, 191, 50, 175, 124, 234, 62, 105, 6, 143, 192, 176, + 113, 48, 78, 139, 215, 71, 121, 213, 20, 144, 98, 35, 158, 96, 183, 62, 174, 246, 187, + 117, 182, 237, 37, 50, 216, 99, 156, 223, 243, 93, 143, 101, 142, 222, 240, 101, 37, 106, + 58, 57, 250, 157, 93, 153, 254, 20, 216, 172, 10, 147, 34, 192, 129, 71, 243, 90, 171, + 144, 57, 159, 238, 201, 4, 124, 167, 244, 225, 205, 95, 28, 7, 89, 185, 100, 243, 184, + 121, 203, 100, 131, 95, 135, 68, 224, 207, 56, 58, 122, 201, 115, 25, 183, 61, 30, 51, + 229, 18, 21, 178, 113, 49, 186, 203, 235, 31, 191, 163, 152, 138, 8, 28, 233, 143, 97, + 202, 95, 153, 4, 217, 98, 120, 243, 26, 182, 17, 77, 155, 36, 99, 78, 150, 149, 8, + 98, 128, 39, 33, 36, 192, 172, 45, 220, 149, 189, 61, 96, 28, 215, 100, 246, 58, 221, + 233, 84, 147, 251, 162, 47, 31, 5, 125, 181, 154, 134, 23, 27, 174, 57, 64, 110, 229, + 109, 75, 123, 43, 136, 219, 71, 95, 64, 61, 154, 29, 39, 238, 177, 34, 145, 225, 65, + 150, 94, 247, 49, 229, 15, 77, 147, 72, 141, 2, 45, 251, 77, 169, 38, 213, 132, 110, + 53, 196, 172, 207, 226, 212, 190, 148, 246, 79, 117, 56, 230, 212, 48, 23, 185, 63, 100, + 76, 136, 242, 78, 181, 237, 156, 95, 20, 113, 227, 131, 167, 168, 47, 119, 139, 3, 53, + 31, 250, 133, 149, 50, 107, 105, 99, 130, 34, 162, 231, 111, 42, 217, 190, 224, 199, 90, + 63, 220, 204, 35, 95, 115, 203, 143, 234, 86, 147, 32, 118, 141, 165, 11, 192, 16, 117, + 35, 147, 152, 198, 123, 7, 240, 84, 198, 209, 28, 33, 17, 248, 237, 52, 88, 97, 255, + 231, 76, 86, 122, 109, 204, 8, 18, 216, 201, 35, 77, 237, 183, 229, 179, 50, 237, 164, + 135, 179, 118, 164, 213, 135, 157, 195, 187, 245, 36, 187, 220, 113, 18, 87, 222, 222, 96, + 241, 183, 42, 21, 4, 23, 205, 233, 203, 0, 214, 112, 136, 138, 230, 44, 95, 110, 201, + 34, 41, 191, 71, 229, 155, 185, 247, 243, 151, 214, 84, 137, 141, 126, 159, 146, 149, 108, + 124, 97, 109, 82, 209, 245, 221, 183, 34, 60, 37, 236, 95, 79, 171, 167, 53, 71, 96, + 45, 58, 248, 3, 142, 129, 145, 12, 33, 36, 162, 142, 160, 3, 251, 243, 213, 240, 208, + 141, 19, 13, 178, 255, 109, 2, 170, 20, 55, 241, 116, 101, 44, 108, 105, 186, 238, 251, + 199, 15, 31, 106, 157, 191, 110, 152, 178, 67, 137, 131, 208, 156, 144, 131, 155, 253, 134, + 70, 18, 190, 55, 134, 35, 99, 243, 140, 30, 225, 135, 230, 240, 166, 81, 142, 102, 191, + 39, 25, 3, 177, 156, 211, 77, 45, 87, 233, 43, 221, 48, 61, 155, 103, 195, 191, 203, + 182, 75, 233, 152, 211, 208, 136, 121, 33, 23, 224, 224, 62, 249, 227, 239, 149, 183, 61, + 195, 15, 39, 238, 236, 87, 43, 136, 191, 239, 71, 138, 166, 147, 116, 62, 102, 68, 199, + 224, 101, 223, 193, 70, 29, 186, 42, 13, 80, 225, 75, 19, 241, 115, 1, 221, 202, 45, + 102, 137, 29, 174, 20, 195, 66, 136, 2, 168, 205, 201, 137, 50, 168, 74, 121, 198, 4, + 163, 212, 85, 133, 31, 105, 118, 146, 106, 84, 93, 152, 187, 231, 181, 105, 251, 121, 171, + 132, 123, 84, 81, 69, 221, 132, 238, 40, 253, 181, 45, 161, 137, 130, 39, 169, 235, 158, + 59, 86, 242, 153, 239, 173, 128, 165, 23, 123, 30, 195, 0, 154, 23, 81, 224, 245, 214, + 206, 30, 212, 131, 75, 117, 12, 206, 157, 181, 186, 59, 241, 17, 45, 138, 0, 219, 11, + 165, 243, 135, 196, 182, 135, 95, 205, 217, 63, 195, 175, 14, 225, 131, 145, 45, 249, 158, + 251, 150, 84, 182, 209, 70, 199, 255, 209, 199, 219, 220, 109, 206, 99, 50, 132, 234, 146, + 82, 195, 209, 22, 114, 223, 247, 246, 113, 37, 239, 16, 33, 134, 100, 215, 88, 170, 158, + 87, 123, 102, 50, 88, 211, 1, 187, 6, 134, 165, 152, 216, 105, 106, 239, 220, 74, 231, + 210, 187, 12, 194, 204, 45, 72, 49, 4, 160, 219, 162, 248, 87, 8, 43, 176, 220, 44, + 107, 227, 178, 17, 124, 139, 122, 230, 122, 87, 48, 97, 42, 236, 110, 236, 185, 155, 53, + 234, 159, 214, 198, 66, 206, 30, 75, 249, 206, 40, 38, 57, 11, 217, 74, 136, 100, 197, + 110, 223, 29, 159, 65, 71, 140, 175, 51, 69, 74, 105, 48, 234, 63, 246, 45, 13, 20, + 121, 7, 226, 161, 46, 28, 173, 7, 103, 53, 108, 45, 164, 76, 74, 68, 141, 145, 208, + 61, 197, 22, 136, 46, 70, 115, 110, 60, 161, 124, 81, 26, 132, 51, 188, 178, 79, 106, + 186, 183, 160, 39, 228, 68, 115, 46, 136, 1, 192, 89, 62, 133, 112, 198, 180, 182, 58, + 34, 243, 219, 158, 69, 245, 34, 120, 178, 213, 200, 28, 143, 128, 188, 182, 100, 1, 41, + 146, 137, 43, 82, 227, 105, 216, 83, 48, 140, 10, 106, 175, 254, 70, 77, 67, 59, 112, + 188, 237, 69, 133, 10, 212, 5, 198, 138, 105, 199, 180, 252, 81, 223, 79, 53, 73, 39, + 137, 121, 180, 148, 228, 99, 146, 42, 177, 214, 102, 33, 147, 84, 102, 25, 94, 59, 31, + 37, 197, 137, 237, 122, 133, 63, 90, 213, 116, 163, 253, 253, 29, 177, 145, 2, 21, 36, + 45, 198, 251, 147, 231, 143, 232, 78, 168, 71, 137, 199, 108, 79, 80, 90, 201, 214, 153, + 35, 172, 13, 199, 169, 11, 228, 91, 157, 231, 112, 193, 20, 54, 189, 167, 30, 77, 144, + 108, 245, 215, 246, 189, 68, 69, 14, 158, 14, 228, 55, 50, 145, 69, 249, 58, 80, 222, + 149, 237, 198, 5, 175, 218, 60, 109, 130, 91, 186, 18, 200, 175, 234, 190, 109, 46, 3, + 123, 204, 18, 96, 4, 68, 241, 73, 62, 44, 154, 29, 193, 136, 227, 199, 55, 189, 4, + 164, 64, 95, 95, 82, 39, 15, 60, 230, 124, 107, 233, 248, 55, 251, 89, 60, 63, 75, + 134, 126, 119, 32, 156, 57, 168, 127, 0, 224, 61, 5, 133, 125, 100, 228, 208, 140, 243, + 12, 114, 111, 119, 92, 104, 175, 87, 193, 236, 151, 13, 114, 21, 132, 146, 177, 189, 59, + 49, 190, 27, 110, 195, 160, 236, 40, 132, 188, 181, 120, 201, 40, 232, 65, 132, 80, 241, + 220, 18, 221, 115, 31, 79, 137, 164, 226, 58, 98, 29, 108, 32, 57, 219, 228, 218, 199, + 13, 95, 132, 195, 215, 77, 235, 191, 143, 112, 16, 128, 76, 35, 93, 191, 66, 173, 73, + 231, 143, 132, 73, 173, 240, 106, 231, 203, 78, 193, 147, 92, 33, 23, 31, 248, 100, 11, + 184, 243, 123, 201, 115, 200, 236, 209, 135, 47, 126, 209, 22, 14, 85, 95, 188, 69, 202, + 163, 17, 24, 101, 164, 117, 134, 187, 148, 127, 31, 159, 55, 19, 27, 1, 135, 227, 237, + 89, 107, 28, 216, 60, 51, 230, 145, 147, 163, 215, 93, 70, 232, 118, 172, 140, 235, 50, + 71, 128, 177, 103, 32, 233, 123, 60, 234, 2, 31, 216, 91, 139, 244, 52, 200, 40, 26, + 90, 188, 189, 49, 25, 4, 25, 144, 176, 166, 124, 227, 237, 252, 148, 85, 29, 125, 208, + 89, 104, 210, 121, 64, 46, 4, 53, 99, 204, 93, 125, 38, 25, 59, 88, 51, 64, 113, + 195, 241, 23, 64, 212, 5, 60, 104, 90, 90, 230, 42, 179, 78, 253, 44, 143, 44, 49, + 196, 143, 254, 34, 13, 36, 60, 73, 125, 112, 137, 239, 52, 122, 7, 116, 79, 12, 177, + 183, 103, 11, 158, 146, 190, 237, 143, 235, 124, 188, 28, 65, 76, 26, 100, 89, 63, 160, + 163, 188, 17, 44, 172, 69, 167, 179, 185, 246, 191, 107, 174, 38, 118, 76, 184, 53, 58, + 72, 32, 182, 5, 61, 248, 81, 88, 92, 170, 152, 253, 77, 84, 14, 122, 1, 83, 34, + 180, 13, 25, 115, 120, 199, 154, 238, 20, 83, 36, 79, 155, 68, 5, 160, 130, 254, 242, + 218, 90, 156, 114, 87, 234, 199, 101, 101, 200, 185, 135, 124, 198, 160, 240, 62, 104, 138, + 45, 125, 222, 81, 204, 122, 150, 210, 26, 24, 208, 12, 242, 42, 169, 101, 130, 148, 44, + 232, 249, 245, 161, 128, 113, 103, 33, 98, 166, 137, 236, 212, 7, 202, 38, 211, 69, 188, + 165, 95, 212, 118, 108, 199, 161, 22, 45, 35, 170, 90, 11, 163, 79, 173, 36, 193, 20, + 69, 35, 187, 207, 16, 144, 214, 219, 182, 170, 32, 114, 79, 128, 71, 198, 237, 15, 103, + 4, 60, 139, 175, 150, 151, 82, 230, 68, 119, 168, 89, 188, 204, 20, 140, 220, 165, 98, + 184, 91, 12, 217, 205, 92, 90, 20, 35, 71, 36, 138, 76, 96, 22, 251, 247, 173, 78, + 222, 241, 197, 134, 75, 130, 83, 96, 14, 47, 5, 113, 232, 96, 126, 193, 45, 218, 28, + 66, 253, 99, 103, 136, 176, 200, 158, 171, 191, 76, 249, 158, 62, 190, 37, 137, 65, 120, + 233, 80, 168, 238, 193, 145, 79, 63, 82, 125, 26, 111, 191, 24, 210, 39, 161, 131, 239, + 64, 46, 175, 140, 39, 77, 202, 230, 115, 84, 40, 235, 62, 120, 148, 45, 57, 37, 124, + 121, 120, 249, 148, 231, 185, 172, 186, 224, 77, 61, 207, 141, 107, 126, 26, 147, 204, 229, + 121, 63, 58, 161, 43, 120, 25, 191, 165, 83, 228, 34, 205, 92, 27, 97, 67, 213, 13, + 253, 182, 91, 59, 133, 233, 166, 4, 4, 57, 209, 233, 179, 16, 35, 85, 59, 155, 111, + 250, 65, 194, 223, 99, 144, 59, 127, 241, 127, 85, 255, 125, 11, 90, 184, 145, 68, 95, + 150, 72, 153, 103, 49, 76, 120, 85, 161, 179, 241, 16, 174, 51, 211, 142, 150, 99, 201, + 22, 85, 73, 108, 84, 199, 120, 175, 128, 9, 243, 223, 160, 59, 120, 8, 109, 197, 128, + 194, 103, 52, 180, 119, 227, 231, 75, 113, 126, 175, 59, 148, 4, 132, 1, 89, 75, 121, + 8, 204, 131, 251, 171, 36, 55, 36, 44, 165, 233, 172, 103, 80, 224, 28, 200, 195, 3, + 20, 53, 129, 195, 112, 22, 200, 244, 23, 34, 64, 145, 42, 12, 20, 38, 184, 56, 94, + 220, 101, 3, 198, 17, 107, 22, 242, 135, 222, 182, 138, 243, 235, 11, 182, 91, 34, 127, + 80, 58, 161, 145, 203, 204, 158, 224, 242, 86, 24, 81, 51, 126, 84, 249, 143, 191, 15, + 130, 70, 238, 57, 209, 225, 36, 221, 152, 128, 255, 24, 208, 57, 186, 97, 4, 134, 255, + 229, 121, 86, 254, 202, 137, 124, 31, 130, 12, 222, 146, 142, 37, 129, 199, 247, 98, 236, + 212, 251, 108, 211, 20, 60, 13, 206, 158, 18, 84}; + +SimpleConvolution::SimpleConvolution() { + width_ = 64; + height_ = 64; + mask_width_ = 3; + mask_height_ = mask_width_; + randomize_seed_ = 0; + + if (!IsPowerOf2(width_)) { + width_ = RoundToPowerOf2(width_); + } + + if (!IsPowerOf2(height_)) { + height_ = RoundToPowerOf2(height_); + } + + if (!(mask_width_ % 2)) { + mask_width_++; + } + + if (!(mask_height_ % 2)) { + mask_height_++; + } + + if (width_ * height_ < 256) { + width_ = 64; + height_ = 64; + } + + const uint32_t input_size_bytes = width_ * height_ * sizeof(uint32_t); + const uint32_t mask_size_bytes = mask_width_ * mask_height_ * sizeof(float); + + SetSysDescr(KERNARG_DES_ID, sizeof(kernel_args_t)); + SetSysDescr(INPUT_DES_ID, input_size_bytes); + SetSysDescr(OUTPUT_DES_ID, input_size_bytes); + SetLocalDescr(LOCAL_DES_ID, input_size_bytes); + SetSysDescr(MASK_DES_ID, mask_size_bytes); + SetSysDescr(REFOUT_DES_ID, input_size_bytes); + + if (!randomize_seed_) TEST_ASSERT(sizeof(input_data_) <= input_size_bytes); +} + +void SimpleConvolution::Init() { + std::clog << "SimpleConvolution::init :" << std::endl; + + mem_descr_t input_des = GetDescr(INPUT_DES_ID); + mem_descr_t local_des = GetDescr(LOCAL_DES_ID); + mem_descr_t mask_des = GetDescr(MASK_DES_ID); + mem_descr_t refout_des = GetDescr(REFOUT_DES_ID); + mem_descr_t kernarg_des = GetDescr(KERNARG_DES_ID); + + uint32_t* input = (uint32_t*)input_des.ptr; + uint32_t* output_local = (uint32_t*)local_des.ptr; + float* mask = (float*)mask_des.ptr; + kernel_args_t* kernel_args = (kernel_args_t*)kernarg_des.ptr; + + if (randomize_seed_) { + // random initialisation of input + FillRandom(input, width_, height_, 0, 255, randomize_seed_); + } else { + // initialization with preset values + memcpy(input, input_data_, width_ * height_ * sizeof(uint32_t)); + } + + // Fill a blurr filter or some other filter of your choice + const float val = 1.0f / (mask_width_ * 2.0f - 1.0f); + for (uint32_t i = 0; i < (mask_width_ * mask_height_); i++) { + mask[i] = 0; + } + for (uint32_t i = 0; i < mask_width_; i++) { + uint32_t y = mask_height_ / 2; + mask[y * mask_width_ + i] = val; + } + for (uint32_t i = 0; i < mask_height_; i++) { + uint32_t x = mask_width_ / 2; + mask[i * mask_width_ + x] = val; + } + + // Print the INPUT array. + std::clog << std::dec; + PrintArray("> Input[0]", input, width_, 1); + PrintArray("> Mask", mask, mask_width_, mask_height_); + + // Fill the kernel args + kernel_args->arg1 = output_local; + kernel_args->arg2 = input; + kernel_args->arg3 = mask; + kernel_args->arg4 = width_; + kernel_args->arg41 = height_; + kernel_args->arg5 = mask_width_; + kernel_args->arg51 = mask_height_; + + // Calculate the reference output + memset(refout_des.ptr, 0, refout_des.size); + ReferenceImplementation(reinterpret_cast(refout_des.ptr), input, mask, width_, height_, + mask_width_, mask_height_); +} + +void SimpleConvolution::PrintOutput() const { + PrintArray("> Output[0]", reinterpret_cast(GetOutputPtr()), width_, 1); +} + +bool SimpleConvolution::ReferenceImplementation(uint32_t* output, const uint32_t* input, + const float* mask, const uint32_t width, + const uint32_t height, const uint32_t mask_width, + const uint32_t mask_height) { + const uint32_t vstep = (mask_width - 1) / 2; + const uint32_t hstep = (mask_height - 1) / 2; + + // for each pixel in the input + for (uint32_t x = 0; x < width; x++) { + for (uint32_t y = 0; y < height; y++) { + // find the left, right, top and bottom indices such that + // the indices do not go beyond image boundaires + const uint32_t left = (x < vstep) ? 0 : (x - vstep); + const uint32_t right = ((x + vstep) >= width) ? width - 1 : (x + vstep); + const uint32_t top = (y < hstep) ? 0 : (y - hstep); + const uint32_t bottom = ((y + hstep) >= height) ? height - 1 : (y + hstep); + + // initializing wighted sum value + float sum_fx = 0; + for (uint32_t i = left; i <= right; ++i) { + for (uint32_t j = top; j <= bottom; ++j) { + // performing wighted sum within the mask boundaries + uint32_t mask_idx = (j - (y - hstep)) * mask_width + (i - (x - vstep)); + uint32_t index = j * width + i; + + // to round to the nearest integer + sum_fx += ((float)input[index] * mask[mask_idx]); + } + } + sum_fx += 0.5f; + output[y * width + x] = uint32_t(sum_fx); + } + } + + return true; +} diff --git a/test/simple_convolution/simple_convolution.h b/test/simple_convolution/simple_convolution.h new file mode 100644 index 0000000000..a5b75a6c30 --- /dev/null +++ b/test/simple_convolution/simple_convolution.h @@ -0,0 +1,96 @@ +/****************************************************************************** + +Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this list +of conditions and the following disclaimer. + +Redistributions in binary form must reproduce the above copyright notice, this +list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +OF THE POSSIBILITY OF SUCH DAMAGE. + +*******************************************************************************/ + +#ifndef TEST_SIMPLE_CONVOLUTION_SIMPLE_CONVOLUTION_H_ +#define TEST_SIMPLE_CONVOLUTION_SIMPLE_CONVOLUTION_H_ + +#include +#include + +#include "ctrl/test_kernel.h" + +// Class implements SimpleConvolution kernel parameters +class SimpleConvolution : public TestKernel { + public: + // Constructor + SimpleConvolution(); + + // Initialize method + void Init(); + + // Return compute grid size + uint32_t GetGridSize() const { return width_ * height_; } + + // Print output + void PrintOutput() const; + + // Return name + std::string Name() const { return std::string("SimpleConvolution"); } + + private: + // Local kernel arguments declaration + struct kernel_args_t { + void* arg1; + void* arg2; + void* arg3; + uint32_t arg4; + uint32_t arg41; + uint32_t arg5; + uint32_t arg51; + }; + + // Reference CPU implementation of Simple Convolution + // @param output Output matrix after performing convolution + // @param input Input matrix on which convolution is to be performed + // @param mask mask matrix using which convolution was to be performed + // @param input_dimensions dimensions of the input matrix + // @param mask_dimensions dimensions of the mask matrix + // @return bool true on success and false on failure + bool ReferenceImplementation(uint32_t* output, const uint32_t* input, const float* mask, + const uint32_t width, const uint32_t height, + const uint32_t maskWidth, const uint32_t maskHeight); + + // Width of the Input array + uint32_t width_; + + // Height of the Input array + uint32_t height_; + + // Mask dimensions + uint32_t mask_width_; + + // Mask dimensions + uint32_t mask_height_; + + // Randomize input data + unsigned randomize_seed_; + + // Input data + static const uint32_t input_data_[]; +}; + +#endif // TEST_SIMPLE_CONVOLUTION_SIMPLE_CONVOLUTION_H_ diff --git a/test/simple_convolution/simple_convolution.hsail b/test/simple_convolution/simple_convolution.hsail new file mode 100644 index 0000000000..223ef8eddb --- /dev/null +++ b/test/simple_convolution/simple_convolution.hsail @@ -0,0 +1,154 @@ +module &m:1:0:$full:$large:$default; +extension "amd:gcn"; +extension "IMAGE"; + +decl prog function &abort()(); + +prog kernel &__OpenCL_SimpleConvolution(kernarg_u64 %__global_offset_0, + kernarg_u64 %output, + kernarg_u64 %input, + kernarg_u64 %mask, + kernarg_u32 %inputDimensions[2], + kernarg_u32 %maskDimensions[2]) { + + pragma "AMD RTI", "ARGSTART:__OpenCL_SimpleConvolution"; + pragma "AMD RTI", "version:3:1:104"; + pragma "AMD RTI", "device:generic"; + pragma "AMD RTI", "uniqueid:1024"; + pragma "AMD RTI", "memory:private:0"; + pragma "AMD RTI", "memory:region:0"; + pragma "AMD RTI", "memory:local:0"; + pragma "AMD RTI", "value:__global_offset_0:u64:1:1:0"; + pragma "AMD RTI", "pointer:output:u32:1:1:96:uav:7:4:RW:0:0:0"; + pragma "AMD RTI", "pointer:input:u32:1:1:112:uav:7:4:RW:0:0:0"; + pragma "AMD RTI", "pointer:mask:float:1:1:128:uav:7:4:RW:0:0:0"; + pragma "AMD RTI", "value:inputDimensions:u32:2:1:144"; + pragma "AMD RTI", "constarg:4:inputDimensions"; + pragma "AMD RTI", "value:maskDimensions:u32:2:1:160"; + pragma "AMD RTI", "constarg:5:maskDimensions"; + pragma "AMD RTI", "function:1:0"; + pragma "AMD RTI", "memory:64bitABI"; + pragma "AMD RTI", "privateid:8"; + pragma "AMD RTI", "enqueue_kernel:0"; + pragma "AMD RTI", "kernel_index:0"; + pragma "AMD RTI", "reflection:0:size_t"; + pragma "AMD RTI", "reflection:1:uint*"; + pragma "AMD RTI", "reflection:2:uint*"; + pragma "AMD RTI", "reflection:3:float*"; + pragma "AMD RTI", "reflection:4:uint2"; + pragma "AMD RTI", "reflection:5:uint2"; + pragma "AMD RTI", "ARGEND:__OpenCL_SimpleConvolution"; + + @__OpenCL_SimpleConvolution_Entry: + + // BB#0: // %entry + + workitemabsid_u32 $s6, 0; + cvt_u64_u32 $d0, $s6; + ld_kernarg_align(8)_width(all)_u64 $d4, [%__global_offset_0]; + add_u64 $d0, $d0, $d4; + cvt_u32_u64 $s5, $d0; + ld_v2_kernarg_align(4)_width(all)_u32 ($s0, $s4), [%inputDimensions]; + ld_v2_kernarg_align(4)_width(all)_u32 ($s1, $s9), [%maskDimensions]; + rem_u32 $s7, $s5, $s0; + add_u32 $s2, $s1, 4294967295; + shr_u32 $s8, $s2, 1; + add_u32 $s2, $s7, $s8; + add_u32 $s3, $s0, 4294967295; + cmp_ge_b1_u32 $c0, $s2, $s0; + cmov_b32 $s2, $c0, $s3, $s2; + sub_u32 $s3, $s7, $s8; + cmp_lt_b1_u32 $c0, $s7, $s8; + cmov_b32 $s3, $c0, 0, $s3; + ld_kernarg_align(8)_width(all)_u64 $d1, [%output]; + cmp_le_b1_u32 $c0, $s3, $s2; + cbr_b1 $c0, @BB0_2; + + // BB#1: + + mov_b32 $s6, 0; + br @BB0_6; + + // @BB0_2: // %for.cond32.preheader.lr.ph + + @BB0_2: + + div_u32 $s5, $s5, $s0; + add_u32 $s9, $s9, 4294967295; + shr_u32 $s9, $s9, 1; + add_u32 $s10, $s5, $s9; + add_u32 $s11, $s4, 4294967295; + cmp_ge_b1_u32 $c0, $s10, $s4; + cmov_b32 $s4, $c0, $s11, $s10; + sub_u32 $s10, $s5, $s9; + cmp_lt_b1_u32 $c0, $s5, $s9; + cmov_b32 $s5, $c0, 0, $s10; + ld_kernarg_align(8)_width(all)_u64 $d2, [%mask]; + ld_kernarg_align(8)_width(all)_u64 $d3, [%input]; + cvt_u64_u32 $d5, $s6; + add_u64 $d4, $d4, $d5; + cvt_u32_u64 $s6, $d4; + div_u32 $s6, $s6, $s0; + max_u32 $s10, $s9, $s6; + sub_u32 $s12, $s10, $s6; + max_u32 $s11, $s7, $s8; + mov_b32 $s6, 0; + mad_u32 $s12, $s1, $s12, $s11; + sub_u32 $s7, $s12, $s7; + sub_u32 $s9, $s10, $s9; + mad_u32 $s9, $s0, $s9, $s11; + sub_u32 $s8, $s9, $s8; + + // @BB0_3: // %for.cond32.preheader + + @BB0_3: + + cmp_gt_b1_u32 $c0, $s5, $s4; + mov_b32 $s9, $s7; + mov_b32 $s10, $s8; + mov_b32 $s11, $s5; + cbr_b1 $c0, @BB0_5; + + // @BB0_4: // %for.body35 + + @BB0_4: + + cvt_u64_u32 $d4, $s9; + shl_u64 $d4, $d4, 2; + add_u64 $d4, $d2, $d4; + ld_global_align(4)_f32 $s12, [$d4]; + cvt_u64_u32 $d4, $s10; + shl_u64 $d4, $d4, 2; + add_u64 $d4, $d3, $d4; + ld_global_align(4)_u32 $s13, [$d4]; + cvt_f32_u32 $s13, $s13; + mul_ftz_f32 $s12, $s13, $s12; + add_u32 $s9, $s9, $s1; + add_u32 $s10, $s10, $s0; + add_u32 $s11, $s11, 1; + add_ftz_f32 $s6, $s6, $s12; + cmp_le_b1_u32 $c0, $s11, $s4; + cbr_b1 $c0, @BB0_4; + + // @BB0_5: // %for.inc48 + + @BB0_5: + + add_u32 $s7, $s7, 1; + add_u32 $s8, $s8, 1; + add_u32 $s3, $s3, 1; + cmp_le_b1_u32 $c0, $s3, $s2; + cbr_b1 $c0, @BB0_3; + + // @BB0_6: // %for.end50 + + @BB0_6: + + and_b64 $d0, $d0, 4294967295; + shl_u64 $d0, $d0, 2; + add_u64 $d0, $d1, $d0; + add_ftz_f32 $s0, $s6, 0F3f000000; + cvt_ftz_u32_f32 $s0, $s0; + st_global_align(4)_u32 $s0, [$d0]; + ret; +}; diff --git a/test/util/helper_funcs.h b/test/util/helper_funcs.h new file mode 100644 index 0000000000..758e3d874f --- /dev/null +++ b/test/util/helper_funcs.h @@ -0,0 +1,87 @@ +/********************************************************************** +Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted +provided that the following conditions are met: + +• Redistributions of source code must retain the above copyright notice, this list of +conditions and the following disclaimer. +• Redistributions in binary form must reproduce the above copyright notice, this list of +conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +********************************************************************/ +#ifndef TEST_UTIL_HELPER_FUNCS_H_ +#define TEST_UTIL_HELPER_FUNCS_H_ + +#include +#include +#include +#include +#include + +static inline void Error(std::string error_msg) { + std::cerr << "Error: " << error_msg << std::endl; +} + +template +void PrintArray(const std::string header, const T* data, const int width, const int height) { + std::clog << header << " :\n"; + for (int i = 0; i < height; i++) { + std::clog << "> "; + for (int j = 0; j < width; j++) { + std::clog << data[i * width + j] << " "; + } + std::clog << "\n"; + } +} + +template +bool FillRandom(T* array_ptr, const int width, const int height, const T range_min, + const T range_max, unsigned int seed = 123) { + if (!array_ptr) { + Error("Cannot fill array. NULL pointer."); + return false; + } + + if (!seed) seed = (unsigned int)time(NULL); + + srand(seed); + double range = double(range_max - range_min) + 1.0; + + /* random initialisation of input */ + for (int i = 0; i < height; i++) + for (int j = 0; j < width; j++) { + int index = i * width + j; + array_ptr[index] = range_min + T(range * rand() / (RAND_MAX + 1.0)); + } + + return true; +} + +template T RoundToPowerOf2(T val) { + int bytes = sizeof(T); + + val--; + for (int i = 0; i < bytes; i++) val |= val >> (1 << i); + val++; + + return val; +} + +template bool IsPowerOf2(T val) { + long long long_val = val; + return (((long_val & (-long_val)) - long_val == 0) && (long_val != 0)); +} + +#endif // TEST_UTIL_HELPER_FUNCS_H_ diff --git a/test/util/hsa_rsrc_factory.cpp b/test/util/hsa_rsrc_factory.cpp new file mode 100644 index 0000000000..139b618de9 --- /dev/null +++ b/test/util/hsa_rsrc_factory.cpp @@ -0,0 +1,372 @@ +/********************************************************************** +Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted +provided that the following conditions are met: + +<95> Redistributions of source code must retain the above copyright notice, this list of +conditions and the following disclaimer. +<95> Redistributions in binary form must reproduce the above copyright notice, this list of +conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +********************************************************************/ + +#include "util/hsa_rsrc_factory.h" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +// Callback function to find and bind kernarg region of an agent +static hsa_status_t FindMemRegionsCallback(hsa_region_t region, void* data) { + hsa_region_global_flag_t flags; + hsa_region_segment_t segment_id; + + hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment_id); + if (segment_id != HSA_REGION_SEGMENT_GLOBAL) { + return HSA_STATUS_SUCCESS; + } + + AgentInfo* agent_info = (AgentInfo*)data; + hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags); + if (flags & HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED) { + agent_info->coarse_region = region; + } + + if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG) { + agent_info->kernarg_region = region; + } + + return HSA_STATUS_SUCCESS; +} + +// Callback function to get the number of agents +static hsa_status_t GetHsaAgentsCallback(hsa_agent_t agent, void* data) { + // Copy handle of agent and increment number of agents reported + HsaRsrcFactory* rsrcFactory = reinterpret_cast(data); + + // Determine if device is a Gpu agent + hsa_status_t status; + hsa_device_type_t type; + status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type); + CHECK_STATUS("Error Calling hsa_agent_get_info", status); + if (type == HSA_DEVICE_TYPE_DSP) { + return HSA_STATUS_SUCCESS; + } + + if (type == HSA_DEVICE_TYPE_CPU) { + AgentInfo* agent_info = reinterpret_cast(malloc(sizeof(AgentInfo))); + agent_info->dev_id = agent; + agent_info->dev_type = HSA_DEVICE_TYPE_CPU; + rsrcFactory->AddAgentInfo(agent_info, false); + return HSA_STATUS_SUCCESS; + } + + // Device is a Gpu agent, build an instance of AgentInfo + AgentInfo* agent_info = reinterpret_cast(malloc(sizeof(AgentInfo))); + agent_info->dev_id = agent; + agent_info->dev_type = HSA_DEVICE_TYPE_GPU; + hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_info->name); + agent_info->max_wave_size = 0; + hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &agent_info->max_wave_size); + agent_info->max_queue_size = 0; + hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &agent_info->max_queue_size); + agent_info->profile = hsa_profile_t(108); + hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_info->profile); + + // Initialize memory regions to zero + agent_info->kernarg_region.handle = 0; + agent_info->coarse_region.handle = 0; + + // Find and Bind Memory regions of the Gpu agent + hsa_agent_iterate_regions(agent, FindMemRegionsCallback, agent_info); + + // Save the instance of AgentInfo + rsrcFactory->AddAgentInfo(agent_info, true); + + return HSA_STATUS_SUCCESS; +} + +// Constructor of the class +HsaRsrcFactory::HsaRsrcFactory() { + // Initialize the Hsa Runtime + hsa_status_t status = hsa_init(); + CHECK_STATUS("Error in hsa_init", status); + + // Discover the set of Gpu devices available on the platform + status = hsa_iterate_agents(GetHsaAgentsCallback, this); + CHECK_STATUS("Error Calling hsa_iterate_agents", status); +} + +// Destructor of the class +HsaRsrcFactory::~HsaRsrcFactory() { + hsa_status_t status = hsa_shut_down(); + CHECK_STATUS("Error in hsa_shut_down", status); +} + +// Get the count of Hsa Gpu Agents available on the platform +// +// @return uint32_t Number of Gpu agents on platform +// +uint32_t HsaRsrcFactory::GetCountOfGpuAgents() { return uint32_t(gpu_list_.size()); } + +// Get the count of Hsa Cpu Agents available on the platform +// +// @return uint32_t Number of Cpu agents on platform +// +uint32_t HsaRsrcFactory::GetCountOfCpuAgents() { return uint32_t(cpu_list_.size()); } + +// Get the AgentInfo handle of a Gpu device +// +// @param idx Gpu Agent at specified index +// +// @param agent_info Output parameter updated with AgentInfo +// +// @return bool true if successful, false otherwise +// +bool HsaRsrcFactory::GetGpuAgentInfo(uint32_t idx, AgentInfo** agent_info) { + // Determine if request is valid + uint32_t size = uint32_t(gpu_list_.size()); + if (idx >= size) { + return false; + } + + // Copy AgentInfo from specified index + *agent_info = gpu_list_[idx]; + return true; +} + +// Get the AgentInfo handle of a Cpu device +// +// @param idx Cpu Agent at specified index +// +// @param agent_info Output parameter updated with AgentInfo +// +// @return bool true if successful, false otherwise +// +bool HsaRsrcFactory::GetCpuAgentInfo(uint32_t idx, AgentInfo** agent_info) { + // Determine if request is valid + uint32_t size = uint32_t(cpu_list_.size()); + if (idx >= size) { + return false; + } + + // Copy AgentInfo from specified index + *agent_info = cpu_list_[idx]; + return true; +} + +// Create a Queue object and return its handle. The queue object is expected +// to support user requested number of Aql dispatch packets. +// +// @param agent_info Gpu Agent on which to create a queue object +// +// @param num_Pkts Number of packets to be held by queue +// +// @param queue Output parameter updated with handle of queue object +// +// @return bool true if successful, false otherwise +// +bool HsaRsrcFactory::CreateQueue(AgentInfo* agent_info, uint32_t num_pkts, hsa_queue_t** queue) { + hsa_status_t status; + status = hsa_queue_create(agent_info->dev_id, num_pkts, HSA_QUEUE_TYPE_MULTI, NULL, NULL, + UINT32_MAX, UINT32_MAX, queue); + return (status == HSA_STATUS_SUCCESS); +} + +// Create a Signal object and return its handle. +// +// @param value Initial value of signal object +// +// @param signal Output parameter updated with handle of signal object +// +// @return bool true if successful, false otherwise +// +bool HsaRsrcFactory::CreateSignal(uint32_t value, hsa_signal_t* signal) { + hsa_status_t status; + status = hsa_signal_create(value, 0, NULL, signal); + return (status == HSA_STATUS_SUCCESS); +} + +// Allocate memory for use by a kernel of specified size in specified +// agent's memory region. Currently supports Global segment whose Kernarg +// flag set. +// +// @param agent_info Agent from whose memory region to allocate +// +// @param size Size of memory in terms of bytes +// +// @return uint8_t* Pointer to buffer, null if allocation fails. +// +uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t size) { + hsa_status_t status; + uint8_t* buffer = NULL; + size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; + + if (agent_info->coarse_region.handle != 0) { + // Allocate in local memory if it is available + status = hsa_memory_allocate(agent_info->coarse_region, size, (void**)&buffer); + if (status == HSA_STATUS_SUCCESS) { + status = hsa_memory_assign_agent(buffer, agent_info->dev_id, HSA_ACCESS_PERMISSION_RW); + } + } else { + // Allocate in system memory if local memory is not available + status = hsa_memory_allocate(agent_info->kernarg_region, size, (void**)&buffer); + } + + return (status == HSA_STATUS_SUCCESS) ? buffer : NULL; +} + +// Allocate memory tp pass kernel parameters. +// +// @param agent_info Agent from whose memory region to allocate +// +// @param size Size of memory in terms of bytes +// +// @return uint8_t* Pointer to buffer, null if allocation fails. +// +uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t size) { + hsa_status_t status; + size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; + + uint8_t* buffer = NULL; + status = hsa_memory_allocate(agent_info->kernarg_region, size, (void**)&buffer); + return (status == HSA_STATUS_SUCCESS) ? buffer : NULL; +} + +// Transfer data method +bool HsaRsrcFactory::TransferData(void* dest_buff, void* src_buff, uint32_t length, + bool host_to_dev) { + hsa_status_t status; + status = hsa_memory_copy(dest_buff, src_buff, length); + return (status == HSA_STATUS_SUCCESS); +} + +// Loads an Assembled Brig file and Finalizes it into Device Isa +// +// @param agent_info Gpu device for which to finalize +// +// @param brig_path File path of the Assembled Brig file +// +// @param kernel_name Name of the kernel to finalize +// +// @param code_desc Handle of finalized Code Descriptor that could +// be used to submit for execution +// +// @return bool true if successful, false otherwise +// +bool HsaRsrcFactory::LoadAndFinalize(AgentInfo* agent_info, const char* brig_path, + char* kernel_name, hsa_executable_symbol_t* code_desc) { + // Finalize the Hsail object into code object + hsa_status_t status; + hsa_code_object_t code_object; + + // Build the code object filename + std::string filename(brig_path); + std::clog << "Code object filename: " << filename << std::endl; + + // Open the file containing code object + std::ifstream codeStream(filename.c_str(), std::ios::binary | std::ios::ate); + if (!codeStream) { + std::cerr << "Error: failed to load " << filename << std::endl; + assert(false); + return false; + } + + // Allocate memory to read in code object from file + size_t size = std::string::size_type(codeStream.tellg()); + char* codeBuff = (char*)AllocateSysMemory(agent_info, size); + if (!codeBuff) { + std::cerr << "Error: failed to allocate memory for code object." << std::endl; + assert(false); + return false; + } + + // Read the code object into allocated memory + codeStream.seekg(0, std::ios::beg); + std::copy(std::istreambuf_iterator(codeStream), std::istreambuf_iterator(), codeBuff); + + // De-Serialize the code object that has been read into memory + status = hsa_code_object_deserialize(codeBuff, size, NULL, &code_object); + if (status != HSA_STATUS_SUCCESS) { + std::cerr << "Failed to deserialize code object" << std::endl; + return false; + } + + // Create executable. + hsa_executable_t hsaExecutable; + status = + hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, "", &hsaExecutable); + CHECK_STATUS("Error in creating executable object", status); + + // Load code object. + status = hsa_executable_load_code_object(hsaExecutable, agent_info->dev_id, code_object, ""); + CHECK_STATUS("Error in loading executable object", status); + + // Freeze executable. + status = hsa_executable_freeze(hsaExecutable, ""); + CHECK_STATUS("Error in freezing executable object", status); + + // Get symbol handle. + hsa_executable_symbol_t kernelSymbol; + status = hsa_executable_get_symbol(hsaExecutable, NULL, kernel_name, agent_info->dev_id, 0, + &kernelSymbol); + CHECK_STATUS("Error in looking up kernel symbol", status); + + // Update output parameter + *code_desc = kernelSymbol; + return true; +} + +// Add an instance of AgentInfo representing a Hsa Gpu agent +void HsaRsrcFactory::AddAgentInfo(AgentInfo* agent_info, bool gpu) { + // Add input to Gpu list + if (gpu) { + gpu_list_.push_back(agent_info); + return; + } + + // Add input to Cpu list + cpu_list_.push_back(agent_info); +} + +// Print the various fields of Hsa Gpu Agents +bool HsaRsrcFactory::PrintGpuAgents(const std::string& header) { + std::clog << header << " :" << std::endl; + + AgentInfo* agent_info; + int size = uint32_t(gpu_list_.size()); + for (int idx = 0; idx < size; idx++) { + agent_info = gpu_list_[idx]; + + std::clog << "> agent[" << idx << "] :" << std::endl; + std::clog << ">> Name : " << agent_info->name << std::endl; + std::clog << ">> Max Wave Size : " << agent_info->max_wave_size << std::endl; + std::clog << ">> Max Queue Size : " << agent_info->max_queue_size << std::endl; + std::clog << ">> Kernarg Region Id : " << agent_info->coarse_region.handle << std::endl; + } + return true; +} + +HsaRsrcFactory* HsaRsrcFactory::instance_ = NULL; +HsaRsrcFactory::mutex_t HsaRsrcFactory::mutex_; diff --git a/test/util/hsa_rsrc_factory.h b/test/util/hsa_rsrc_factory.h new file mode 100644 index 0000000000..88235e62bf --- /dev/null +++ b/test/util/hsa_rsrc_factory.h @@ -0,0 +1,234 @@ +/********************************************************************** +Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted +provided that the following conditions are met: + +<95> Redistributions of source code must retain the above copyright notice, this list of +conditions and the following disclaimer. +<95> Redistributions in binary form must reproduce the above copyright notice, this list of +conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +********************************************************************/ + +#ifndef TEST_UTIL_HSA_RSRC_FACTORY_H_ +#define TEST_UTIL_HSA_RSRC_FACTORY_H_ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#define HSA_ARGUMENT_ALIGN_BYTES 16 +#define HSA_QUEUE_ALIGN_BYTES 64 +#define HSA_PACKET_ALIGN_BYTES 64 + +#define CHECK_STATUS(msg, status) \ + if (status != HSA_STATUS_SUCCESS) { \ + const char* emsg = 0; \ + hsa_status_string(status, &emsg); \ + printf("%s: %s\n", msg, emsg ? emsg : ""); \ + exit(1); \ + } + +static const unsigned MEM_PAGE_BYTES = 0x1000; +static const unsigned MEM_PAGE_MASK = MEM_PAGE_BYTES - 1; + +// Encapsulates information about a Hsa Agent such as its +// handle, name, max queue size, max wavefront size, etc. +typedef struct { + // Handle of Agent + hsa_agent_t dev_id; + + // Agent type - Cpu = 0, Gpu = 1 or Dsp = 2 + uint32_t dev_type; + + // Name of Agent whose length is less than 64 + char name[64]; + + // Max size of Wavefront size + uint32_t max_wave_size; + + // Max size of Queue buffer + uint32_t max_queue_size; + + // Hsail profile supported by agent + hsa_profile_t profile; + + // Memory region supporting kernel parameters + hsa_region_t coarse_region; + + // Memory region supporting kernel arguments + hsa_region_t kernarg_region; + +} AgentInfo; + +class HsaRsrcFactory { + public: + typedef std::recursive_mutex mutex_t; + + static HsaRsrcFactory* Create() { + std::lock_guard lck(mutex_); + if (HsaRsrcFactory::instance_ == NULL) { + HsaRsrcFactory::instance_ = new HsaRsrcFactory(); + } + return instance_; + } + + static void Destroy() { + std::lock_guard lck(mutex_); + if (instance_) delete instance_; + instance_ = NULL; + } + + static HsaRsrcFactory& Instance() { + hsa_status_t status = (instance_ != NULL) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR; + CHECK_STATUS("HsaRsrcFactory::Instance()", status); + return *instance_; + } + + // Get the count of Hsa Gpu Agents available on the platform + // + // @return uint32_t Number of Gpu agents on platform + // + uint32_t GetCountOfGpuAgents(); + + // Get the count of Hsa Cpu Agents available on the platform + // + // @return uint32_t Number of Cpu agents on platform + // + uint32_t GetCountOfCpuAgents(); + + // Get the AgentInfo handle of a Gpu device + // + // @param idx Gpu Agent at specified index + // + // @param agent_info Output parameter updated with AgentInfo + // + // @return bool true if successful, false otherwise + // + bool GetGpuAgentInfo(uint32_t idx, AgentInfo** agent_info); + + // Get the AgentInfo handle of a Cpu device + // + // @param idx Cpu Agent at specified index + // + // @param agent_info Output parameter updated with AgentInfo + // + // @return bool true if successful, false otherwise + // + bool GetCpuAgentInfo(uint32_t idx, AgentInfo** agent_info); + + // Create a Queue object and return its handle. The queue object is expected + // to support user requested number of Aql dispatch packets. + // + // @param agent_info Gpu Agent on which to create a queue object + // + // @param num_Pkts Number of packets to be held by queue + // + // @param queue Output parameter updated with handle of queue object + // + // @return bool true if successful, false otherwise + // + bool CreateQueue(AgentInfo* agent_info, uint32_t num_pkts, hsa_queue_t** queue); + + // Create a Signal object and return its handle. + // + // @param value Initial value of signal object + // + // @param signal Output parameter updated with handle of signal object + // + // @return bool true if successful, false otherwise + // + bool CreateSignal(uint32_t value, hsa_signal_t* signal); + + // Allocate memory for use by a kernel of specified size in specified + // agent's memory region. Currently supports Global segment whose Kernarg + // flag set. + // + // @param agent_info Agent from whose memory region to allocate + // + // @param size Size of memory in terms of bytes + // + // @return uint8_t* Pointer to buffer, null if allocation fails. + // + uint8_t* AllocateLocalMemory(const AgentInfo* agent_info, size_t size); + + // Allocate memory tp pass kernel parameters. + // + // @param agent_info Agent from whose memory region to allocate + // + // @param size Size of memory in terms of bytes + // + // @return uint8_t* Pointer to buffer, null if allocation fails. + // + uint8_t* AllocateSysMemory(const AgentInfo* agent_info, size_t size); + + // Transfer data method + bool TransferData(void* dest_buff, void* src_buff, uint32_t length, bool host_to_dev); + + // Loads an Assembled Brig file and Finalizes it into Device Isa + // + // @param agent_info Gpu device for which to finalize + // + // @param brig_path File path of the Assembled Brig file + // + // @param kernel_name Name of the kernel to finalize + // + // @param code_desc Handle of finalized Code Descriptor that could + // be used to submit for execution + // + // @return bool true if successful, false otherwise + // + bool LoadAndFinalize(AgentInfo* agent_info, const char* brig_path, char* kernel_name, + hsa_executable_symbol_t* code_desc); + + // Add an instance of AgentInfo representing a Hsa Gpu agent + void AddAgentInfo(AgentInfo* agent_info, bool gpu); + + // Print the various fields of Hsa Gpu Agents + bool PrintGpuAgents(const std::string& header); + + private: + // Constructor of the class. Will initialize the Hsa Runtime and + // query the system topology to get the list of Cpu and Gpu devices + HsaRsrcFactory(); + + // Destructor of the class + ~HsaRsrcFactory(); + + static HsaRsrcFactory* instance_; + static mutex_t mutex_; + + // Used to maintain a list of Hsa Queue handles + std::vector queue_list_; + + // Used to maintain a list of Hsa Signal handles + std::vector signal_list_; + + // Used to maintain a list of Hsa Gpu Agent Info + std::vector gpu_list_; + + // Used to maintain a list of Hsa Cpu Agent Info + std::vector cpu_list_; +}; + +#endif // TEST_UTIL_HSA_RSRC_FACTORY_H_ diff --git a/test/util/perf_timer.cpp b/test/util/perf_timer.cpp new file mode 100644 index 0000000000..591e4d5801 --- /dev/null +++ b/test/util/perf_timer.cpp @@ -0,0 +1,181 @@ +/********************************************************************** +Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted +provided that the following conditions are met: + +<95> Redistributions of source code must retain the above copyright notice, this list of +conditions and the following disclaimer. +<95> Redistributions in binary form must reproduce the above copyright notice, this list of +conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +********************************************************************/ + +#include "util/perf_timer.h" + +PerfTimer::PerfTimer() { freq_in_100mhz_ = MeasureTSCFreqHz(); } + +PerfTimer::~PerfTimer() { + while (!timers_.empty()) { + Timer* temp = timers_.back(); + timers_.pop_back(); + delete temp; + } +} + +// New cretaed timer instantance index will be returned +int PerfTimer::CreateTimer() { + Timer* newTimer = new Timer; + newTimer->start = 0; + newTimer->clocks = 0; + +#ifdef _WIN32 + QueryPerformanceFrequency((LARGE_INTEGER*)&newTimer->freq); +#else + newTimer->freq = (long long)1.0E3; +#endif + + /* Push back the address of new Timer instance created */ + timers_.push_back(newTimer); + return (int)(timers_.size() - 1); +} + +int PerfTimer::StartTimer(int index) { + if (index >= (int)timers_.size()) { + Error("Cannot reset timer. Invalid handle."); + return FAILURE; + } + +#ifdef _WIN32 +// General Windows timing method +#ifndef _AMD + long long tmpStart; + QueryPerformanceCounter((LARGE_INTEGER*)&(tmpStart)); + timers_[index]->start = (double)tmpStart; +#else +// AMD Windows timing method +#endif +#else +// General Linux timing method +#ifndef _AMD + struct timeval s; + gettimeofday(&s, 0); + timers_[index]->start = s.tv_sec * 1.0E3 + ((double)(s.tv_usec / 1.0E3)); +#else + // AMD timing method + unsigned int unused; + timers_[index]->start = __rdtscp(&unused); +#endif +#endif + + return SUCCESS; +} + + +int PerfTimer::StopTimer(int index) { + double n = 0; + if (index >= (int)timers_.size()) { + Error("Cannot reset timer. Invalid handle."); + return FAILURE; + } +#ifdef _WIN32 +#ifndef _AMD + long long n1; + QueryPerformanceCounter((LARGE_INTEGER*)&(n1)); + n = (double)n1; +#else +// AMD Window Timing +#endif + +#else +// General Linux timing method +#ifndef _AMD + struct timeval s; + gettimeofday(&s, 0); + n = s.tv_sec * 1.0E3 + (double)(s.tv_usec / 1.0E3); +#else + // AMD Linux timing + unsigned int unused; + n = __rdtscp(&unused); +#endif +#endif + + n -= timers_[index]->start; + timers_[index]->start = 0; + +#ifndef _AMD + timers_[index]->clocks += n; +#else + // timers_[index]->clocks += 10 * n / freq_in_100mhz_; // unit is ns + timers_[index]->clocks += 1.0E-6 * 10 * n / freq_in_100mhz_; // convert to ms +#endif + + return SUCCESS; +} + +void PerfTimer::Error(std::string str) { std::cout << str << std::endl; } + + +double PerfTimer::ReadTimer(int index) { + if (index >= (int)timers_.size()) { + Error("Cannot read timer. Invalid handle."); + return FAILURE; + } + + double reading = double(timers_[index]->clocks); + + reading = double(reading / timers_[index]->freq); + + return reading; +} + + +uint64_t PerfTimer::CoarseTimestampUs() { +#ifdef _WIN32 + uint64_t freqHz, ticks; + QueryPerformanceFrequency((LARGE_INTEGER*)&freqHz); + QueryPerformanceCounter((LARGE_INTEGER*)&ticks); + + // Scale numerator and divisor until (ticks * 1000000) fits in uint64_t. + while (ticks > (1ULL << 44)) { + ticks /= 16; + freqHz /= 16; + } + + return (ticks * 1000000) / freqHz; +#else + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC_RAW, &ts); + return uint64_t(ts.tv_sec) * 1000000 + ts.tv_nsec / 1000; +#endif +} + +uint64_t PerfTimer::MeasureTSCFreqHz() { + // Make a coarse interval measurement of TSC ticks for 1 gigacycles. + unsigned int unused; + uint64_t tscTicksEnd; + + uint64_t coarseBeginUs = CoarseTimestampUs(); + uint64_t tscTicksBegin = __rdtscp(&unused); + do { + tscTicksEnd = __rdtscp(&unused); + } while (tscTicksEnd - tscTicksBegin < 1000000000); + + uint64_t coarseEndUs = CoarseTimestampUs(); + + // Compute the TSC frequency and round to nearest 100MHz. + uint64_t coarseIntervalNs = (coarseEndUs - coarseBeginUs) * 1000; + uint64_t tscIntervalTicks = tscTicksEnd - tscTicksBegin; + return (tscIntervalTicks * 10 + (coarseIntervalNs / 2)) / coarseIntervalNs; +} diff --git a/test/util/perf_timer.h b/test/util/perf_timer.h new file mode 100644 index 0000000000..5c77246c1c --- /dev/null +++ b/test/util/perf_timer.h @@ -0,0 +1,85 @@ +/********************************************************************** +Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted +provided that the following conditions are met: + +<95> Redistributions of source code must retain the above copyright notice, this list of +conditions and the following disclaimer. +<95> Redistributions in binary form must reproduce the above copyright notice, this list of +conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +********************************************************************/ + +#ifndef TEST_UTIL_PERF_TIMER_H_ +#define TEST_UTIL_PERF_TIMER_H_ + +// Will use AMD timer or general Linux timer based on compilation flag +// Need to consider platform is Windows or Linux + +#include +#include +#include + +#if defined(_MSC_VER) +#include +#include +#include +#else +#if defined(__GNUC__) +#include +#include +#endif // __GNUC__ +#endif // _MSC_VER + +#include +#include +#include + +class PerfTimer { + public: + enum { SUCCESS = 0, FAILURE = 1 }; + + PerfTimer(); + ~PerfTimer(); + + // General Linux timing method + int CreateTimer(); + int StartTimer(int index); + int StopTimer(int index); + + // retrieve time + double ReadTimer(int index); + // write into a file + double WriteTimer(int index); + + private: + struct Timer { + std::string name; /* name of time object */ + long long freq; /* frequency */ + double clocks; /* number of ticks at end */ + double start; /* start point ticks */ + }; + + std::vector timers_; /* vector to Timer objects */ + double freq_in_100mhz_; + + // AMD timing method + uint64_t CoarseTimestampUs(); + uint64_t MeasureTSCFreqHz(); + + void Error(std::string str); +}; + +#endif // TEST_UTIL_PERF_TIMER_H_ diff --git a/test/util/test_assert.h b/test/util/test_assert.h new file mode 100644 index 0000000000..101dc03434 --- /dev/null +++ b/test/util/test_assert.h @@ -0,0 +1,52 @@ +/****************************************************************************** + +Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this list +of conditions and the following disclaimer. + +Redistributions in binary form must reproduce the above copyright notice, this +list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +OF THE POSSIBILITY OF SUCH DAMAGE. + +*******************************************************************************/ + +#ifndef TEST_CTRL_TEST_ASSERT_H_ +#define TEST_CTRL_TEST_ASSERT_H_ + +#define TEST_ASSERT(cond) \ + { \ + if (!(cond)) { \ + std::cerr << "Assert failed(" << #cond << ") at " << __FILE__ << ", line " << __LINE__ \ + << std::endl; \ + exit(-1); \ + } \ + } + +#define TEST_STATUS(cond) \ + { \ + if (!(cond)) { \ + std::cerr << "Test error at " << __FILE__ << ", line " << __LINE__ \ + << std::endl; \ + const char* message; \ + rocprofiler_error_string(&message); \ + std::cerr << "ERROR: " << message << std::endl; \ + exit(-1); \ + } \ + } + +#endif // TEST_CTRL_TEST_ASSERT_H_ diff --git a/test/util/xml.h b/test/util/xml.h new file mode 100644 index 0000000000..ef84fb4e36 --- /dev/null +++ b/test/util/xml.h @@ -0,0 +1,221 @@ +#ifndef TEST_UTIL_XML_H_ +#define TEST_UTIL_XML_H_ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace xml { + +class Xml { + public: + typedef std::vector token_t; + struct level_t { + std::string tag; + std::vector nodes; + std::map opts; + }; + typedef std::vector nodes_vec_t; + + enum { + DECL_STATE, + BODY_STATE + }; + + Xml(const char* file_name) : + file_name_(file_name), + file_line_(0), + data_size_(0), + index_(0), + state_(BODY_STATE), + level_(NULL), + comment_(false) + { + AddLevel("top"); + + fd_ = open(file_name, O_RDONLY); + if (fd_ == -1) { + std::cout << "XML file not found: " << file_name << std::endl; + return; + } + + token_t remainder; + while (1) { + token_t token = (remainder.size()) ? remainder : NextToken(); + remainder.clear(); + // End of file + if (token.size() == 0) break; + +// token_t token1 = token; +// token1.push_back('\0'); +// std::cout << "> " << &token1[0] << std::endl; + + switch(state_) { + case BODY_STATE: + if (token[0] == '<') { + bool node_begin = true; + unsigned ind = 1; + if (token[1] == '/') { + node_begin = false; + ++ind; + } + + unsigned i = ind; + while (i < token.size()) { if (token[i] == '>') break; ++i; } + for (unsigned j = i + 1; j < token.size(); ++j) remainder.push_back(token[j]); + + if (i == token.size()) { + if (node_begin) state_ = DECL_STATE; + else BadFormat(token); + token.push_back('\0'); + } else token[i] = '\0'; + + const char* tag = strdup(&token[ind]); + if (node_begin) { + AddLevel(tag); + } else { + if (strncmp(CurrentLevel().c_str(), tag, strlen(tag))) { + token.back() = '>'; + BadFormat(token); + } + UpLevel(); + } + } else BadFormat(token); + break; + case DECL_STATE: + if (token[0] == '>') { + state_ = BODY_STATE; + for (unsigned j = 1; j < token.size(); ++j) remainder.push_back(token[j]); + continue; + } else { + token.push_back('\0'); + unsigned j = 0; + for (j = 0; j < token.size(); ++j) if (token[j] == '=') break; + if (j == token.size()) BadFormat(token); + token[j] = '\0'; + const char* key = &token[0]; + const char* value = &token[j + 1]; + AddOption(key, value); + } + break; + default: + std::cout << "Wrong state: " << state_ << std::endl; + exit(1); + } + } + } + + std::vector GetNodes(std::string global_tag) { + return map_[global_tag]; + } + + void Print() const { + for(auto& elem : map_) { + for (auto node : elem.second) { + if (node->opts.size()) { + std::cout << elem.first << ":" << std::endl; + for (auto& opt : node->opts) { + std::cout << " " << opt.first << " = " << opt.second << std::endl; + } + } + } + } + } + + private: + bool LineEndCheck() { + bool found = false; + if (buffer_[index_] == '\n') { + buffer_[index_] = ' '; + ++file_line_; + found = true; + comment_ = false; + } else if (comment_ || (buffer_[index_] == '#')) { + found = true; + comment_ = true; + } + return found; + } + + token_t NextToken() { + token_t token; + + while (1) { + if (data_size_ == 0) { + data_size_ = read(fd_, buffer_, buf_size_); + if (data_size_ <= 0) break; + } + if (token.empty()) while ((index_ < data_size_) && ((buffer_[index_] == ' ') || LineEndCheck())) { + ++index_; + } + while ((index_ < data_size_) && (buffer_[index_] != ' ') && !LineEndCheck()) { + token.push_back(buffer_[index_++]); + } + if (index_ == data_size_) { + index_ = 0; + data_size_ = 0; + } else break; + } + + return token; + } + + void BadFormat(token_t token) { + token.push_back('\0'); + std::cout << "Error: " << file_name_ << ", line " << file_line_ << ", bad XML token '" << &token[0] << "'" << std::endl; + exit(1); + } + + void AddLevel(const std::string& tag) { + level_t* level = new level_t; + level->tag = tag; + if (level_) { + level_->nodes.push_back(level); + stack_.push_back(level_); + } + level_ = level; + + std::string global_tag; + for (level_t* level : stack_) { global_tag += level->tag + "."; } + global_tag += tag; + map_[global_tag].push_back(level_); + } + + void UpLevel() { + level_ = stack_.back(); + stack_.pop_back(); + } + + std::string CurrentLevel() const { + return level_->tag; + } + + void AddOption(const std::string& key, const std::string& value) { + level_->opts[key] = value; + } + + const char* file_name_; + unsigned file_line_; + int fd_; + static const unsigned buf_size_ = 256; + char buffer_[buf_size_]; + unsigned data_size_; + unsigned index_; + unsigned state_; + level_t* level_; + std::vector stack_; + std::map map_; + bool comment_; +}; + +} // namespace xml + +#endif // TEST_UTIL_XML_H_