moving hsa-amd-aqlprofile to ssh://gerritgit/hsa/ec/aqlprofile
Change-Id: Ic42752ca41f877db02aa5a5d8d617cd67cce8956
This commit is contained in:
@@ -1,30 +0,0 @@
|
||||
#
|
||||
# Minimum version of cmake required
|
||||
#
|
||||
cmake_minimum_required ( VERSION 3.5.0 )
|
||||
|
||||
#
|
||||
# Setup flag to be verbose or not
|
||||
#
|
||||
set ( CMAKE_VERBOSE_MAKEFILE TRUE CACHE BOOL "Verbose Output" FORCE )
|
||||
|
||||
set ( ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR} )
|
||||
set ( PROJ_DIR ${ROOT_DIR}/src )
|
||||
set ( TEST_DIR ${ROOT_DIR}/test )
|
||||
|
||||
#
|
||||
# Build sources
|
||||
#
|
||||
include ( ${PROJ_DIR}/CMakeLists.txt )
|
||||
|
||||
#
|
||||
# Build tests
|
||||
#
|
||||
add_subdirectory ( ${TEST_DIR} ${PROJECT_BINARY_DIR}/test )
|
||||
|
||||
#
|
||||
# Style format
|
||||
#
|
||||
execute_process ( COMMAND sh -xc "/usr/bin/find ${PROJ_DIR} ${TEST_DIR} -name '*.cpp' -exec /usr/bin/clang-format -i -style=file \{\} \;" )
|
||||
execute_process ( COMMAND sh -xc "/usr/bin/find ${PROJ_DIR} ${TEST_DIR} -name '*.hpp' -exec /usr/bin/clang-format -i -style=file \{\} \;" )
|
||||
execute_process ( COMMAND sh -xc "/usr/bin/find ${PROJ_DIR} ${TEST_DIR} -name '*.h' -exec /usr/bin/clang-format -i -style=file \{\} \;" )
|
||||
@@ -1,44 +0,0 @@
|
||||
HSA extension AMD AQL profile library.
|
||||
Provides AQL packets helper methods for
|
||||
perfcounters (PMC) and SQ threadtraces (SQTT).
|
||||
|
||||
Current library implementation supports only GFX9.
|
||||
The library source tree:
|
||||
- doc - Documantation, the API specification and the presentation
|
||||
- inc - Public API
|
||||
- hsa_ven_amd_aqlprofile.h - AMD AQL profile library public API
|
||||
- src - AMD AQL profile library sources
|
||||
- core - the library sources
|
||||
- commandwriter - PM4 command writer originated from 'hsa-runtime/tools'
|
||||
- perfcounter - PM4 perfcounter manager originated from 'hsa-runtime/tools'
|
||||
- threadtrace - PM4 threadtrace manager originated from 'hsa-runtime/tools'
|
||||
- test - the library test suite
|
||||
- ctrl - Test controll
|
||||
- util - Test utils
|
||||
- SimpleConvolution - Simple convolution test
|
||||
|
||||
To build the library:
|
||||
|
||||
$ cd .../hsa-amd-aqlprofile
|
||||
$ mkdir build
|
||||
$ cd build
|
||||
$ cmake ..
|
||||
$ make
|
||||
|
||||
To run the test:
|
||||
|
||||
$ cd .../hsa-amd-aqlprofile/build
|
||||
$ export LD_LIBRARY_PATH=$PWD
|
||||
$ ./test/ctrl
|
||||
|
||||
To enable PMC profiling:
|
||||
|
||||
$ export ROCR_ENABLE_PMC=1
|
||||
|
||||
To enable SQTT profiling:
|
||||
|
||||
$ export ROCR_ENABLE_SQTT=1
|
||||
|
||||
Or to use the script:
|
||||
|
||||
$ ./run.sh
|
||||
@@ -1,66 +0,0 @@
|
||||
#
|
||||
# Compiler Preprocessor definitions.
|
||||
#
|
||||
add_definitions ( -D__linux__ )
|
||||
add_definitions ( -DUNIX_OS )
|
||||
add_definitions ( -DLINUX )
|
||||
add_definitions ( -D__AMD64__ )
|
||||
add_definitions ( -D__x86_64__ )
|
||||
add_definitions ( -DAMD_INTERNAL_BUILD )
|
||||
add_definitions ( -DLITTLEENDIAN_CPU=1 )
|
||||
add_definitions ( -DHSA_LARGE_MODEL= )
|
||||
add_definitions ( -DHSA_DEPRECATED= )
|
||||
|
||||
#
|
||||
# Linux Compiler options
|
||||
#
|
||||
set ( CMAKE_CXX_FLAGS "-std=c++11")
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror=return-type" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=sign-compare" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=enum-compare" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=comment " )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=pointer-arith" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-comment" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-sign-compare" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-pointer-arith" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-write-strings" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-conversion-null" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-math-errno" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-threadsafe-statics" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fms-extensions" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fmerge-all-constants" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC" )
|
||||
|
||||
#
|
||||
# Extend Compiler flags based on build type
|
||||
#
|
||||
set ( CMAKE_BUILD_TYPE ${BUILD_TYPE} )
|
||||
if ( "${CMAKE_BUILD_TYPE}" STREQUAL Debug )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb" )
|
||||
endif ()
|
||||
|
||||
#
|
||||
# Extend Compiler flags based on Processor architecture
|
||||
#
|
||||
if ( CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64 -msse -msse2" )
|
||||
elseif ( CMAKE_SYSTEM_PROCESSOR STREQUAL "x86" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32" )
|
||||
endif ()
|
||||
|
||||
#
|
||||
# Basic Tool Chain Information
|
||||
#
|
||||
message ( "-------------IS64BIT: " ${IS64BIT} )
|
||||
message ( "-----------BuildType: " ${BUILD_TYPE} )
|
||||
message ( " -----------Compiler: " ${CMAKE_CXX_COMPILER} )
|
||||
message ( " ------------Version: " ${CMAKE_CXX_COMPILER_VERSION} )
|
||||
message ( " ------------ProjDir: " ${PROJ_DIR} )
|
||||
message ( " ------------TestDir: " ${PROJ_DIR} )
|
||||
message ( "------HSA-RuntimeDir: " ${HSA_RUNTIME_DIR} )
|
||||
message ( " -----------CoreUtil: " ${CORE_UTIL_DIR} )
|
||||
@@ -1,52 +0,0 @@
|
||||
#
|
||||
# Build is not supported on Windows plaform
|
||||
#
|
||||
if ( WIN32 )
|
||||
message ( FATAL_ERROR "Windows build is not supported." )
|
||||
endif ()
|
||||
|
||||
#
|
||||
# External dependencies for Rocr Header files
|
||||
#
|
||||
if ( NOT DEFINED ENV{ROCR_INC_DIR} )
|
||||
message ( FATAL_ERROR "ERROR: Environment variable ROCR_INC_DIR is not set" )
|
||||
return ()
|
||||
endif ()
|
||||
|
||||
#
|
||||
# External dependencies for Rocr Library files
|
||||
#
|
||||
if ( NOT DEFINED ENV{ROCR_LIB_DIR} )
|
||||
message ( FATAL_ERROR "ERROR: Environment variable ROCR_LIB_DIR is not set" )
|
||||
return ()
|
||||
endif ()
|
||||
|
||||
#
|
||||
# Process Env to determine build type
|
||||
#
|
||||
string ( TOLOWER "$ENV{ROCR_BLD_TYPE}" type )
|
||||
if ( "${type}" STREQUAL debug )
|
||||
set ( ISDEBUG 1 )
|
||||
set ( BUILD_TYPE "Debug" )
|
||||
else ()
|
||||
set ( ISDEBUG 0 )
|
||||
set ( BUILD_TYPE "Release" )
|
||||
endif ()
|
||||
|
||||
#
|
||||
# Determine build is 32-bit or 64-bit
|
||||
# @note: By default it is not set
|
||||
#
|
||||
if ( "$ENV{ROCR_BLD_BITS}" STREQUAL 32 )
|
||||
set ( ONLY64STR "" )
|
||||
set ( IS64BIT 0 )
|
||||
else ()
|
||||
set ( ONLY64STR "64" )
|
||||
set ( IS64BIT 1 )
|
||||
endif ()
|
||||
|
||||
#
|
||||
# Build information
|
||||
#
|
||||
message ( "---------ROCR-HdrDir: " $ENV{ROCR_INC_DIR} )
|
||||
message ( "---------ROCR-LibDir: " $ENV{ROCR_LIB_DIR} )
|
||||
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -1,954 +0,0 @@
|
||||
#ifndef hainan____GPU_FEATURES_H__
|
||||
#define hainan____GPU_FEATURES_H__
|
||||
#define hainan__GPU__BIF__VC_PRESENT 0
|
||||
#define hainan__GPU__BIF__VC_PRESENT__0 1
|
||||
#define hainan__GPU__BIF__PCIEGEN2_MCB_DEPTH 96
|
||||
#define hainan__GPU__BIF__PCIEGEN2_MCB_DEPTH__96 1
|
||||
#define hainan__GPU__BIF__CLKBUF_PRESENT 1
|
||||
#define hainan__GPU__BIF__CLKBUF_PRESENT__1 1
|
||||
#define hainan__GPU__XSP__PRESENT 0
|
||||
#define hainan__GPU__XSP__PRESENT__0 1
|
||||
#define hainan__GPU__CHIP__DFS 1
|
||||
#define hainan__GPU__CHIP__DFS__1 1
|
||||
#define hainan__GPU__CHIP__TECH tsmc28hp
|
||||
#define hainan__GPU__CHIP__TECH__TSMC28HP 1
|
||||
#define hainan__GPU__CHIP__TECHVER B .0.0
|
||||
#define hainan__GPU__CHIP__TECHVER__B_0_0 1
|
||||
#define hainan__TOOLS__GUTS__TECHNM tsmc28hp
|
||||
#define hainan__TOOLS__GUTS__TECHNM__TSMC28HP 1
|
||||
#define hainan__TOOLS__GUTS__MEMTECH 28nm
|
||||
#define hainan__TOOLS__GUTS__MEMTECH__28NM 1
|
||||
#define hainan__TOOLS__GUTS__LARRVENDOR AMD
|
||||
#define hainan__TOOLS__GUTS__LARRVENDOR__AMD 1
|
||||
#define hainan__TOOLS__GUTS__MEMFABTECH TSMC28
|
||||
#define hainan__TOOLS__GUTS__MEMFABTECH__TSMC28 1
|
||||
#define hainan__TOOLS__GUTS__MEMVENDOR Virage
|
||||
#define hainan__TOOLS__GUTS__MEMVENDOR__VIRAGE 1
|
||||
#define hainan__TOOLS__GUTS__MEMTYPE slow
|
||||
#define hainan__TOOLS__GUTS__MEMTYPE__SLOW 1
|
||||
#define hainan__TOOLS__GUTS__MEMVER 1_0
|
||||
#define hainan__TOOLS__GUTS__MEMVER__1_0 1
|
||||
#define hainan__TOOLS__GUTS__LARRTYPE default
|
||||
#define hainan__TOOLS__GUTS__LARRTYPE__DEFAULT 1
|
||||
#define hainan__TOOLS__GUTS__LARRVER 0_6han
|
||||
#define hainan__TOOLS__GUTS__LARRVER__0_6HAN 1
|
||||
#define hainan__TOOLS__GUTS__TECHVER B .0.0
|
||||
#define hainan__TOOLS__GUTS__TECHVER__B_0_0 1
|
||||
#define hainan__TOOLS__GUTS__MEMVIEWVER 0_1
|
||||
#define hainan__TOOLS__GUTS__MEMVIEWVER__0_1 1
|
||||
#define hainan__GPU__CHIP__MEMTECH 28nm
|
||||
#define hainan__GPU__CHIP__MEMTECH__28NM 1
|
||||
#define hainan__GPU__CHIP__MEMVIEWVER 0_1
|
||||
#define hainan__GPU__CHIP__MEMVIEWVER__0_1 1
|
||||
#define hainan__GPU__CHIP__MEM virage
|
||||
#define hainan__GPU__CHIP__MEM__VIRAGE 1
|
||||
#define hainan__GPU__CHIP__MEMVENDOR Virage
|
||||
#define hainan__GPU__CHIP__MEMVENDOR__VIRAGE 1
|
||||
#define hainan__GPU__CHIP__SRAM_MEMFABTECH TSMC28
|
||||
#define hainan__GPU__CHIP__SRAM_MEMFABTECH__TSMC28 1
|
||||
#define hainan__GPU__CHIP__LARR_MEMWRAPPERVER 0_1
|
||||
#define hainan__GPU__CHIP__LARR_MEMWRAPPERVER__0_1 1
|
||||
#define hainan__GPU__CHIP__SRAM_MEMWRAPPERVER 0_1
|
||||
#define hainan__GPU__CHIP__SRAM_MEMWRAPPERVER__0_1 1
|
||||
#define hainan__GPU__CHIP__SRAM_TIMING slow
|
||||
#define hainan__GPU__CHIP__SRAM_TIMING__SLOW 1
|
||||
#define hainan__GPU__CHIP__SRAM_MEMVER 1_0_1
|
||||
#define hainan__GPU__CHIP__SRAM_MEMVER__1_0_1 1
|
||||
#define hainan__GPU__CHIP__LARRVENDOR AMD
|
||||
#define hainan__GPU__CHIP__LARRVENDOR__AMD 1
|
||||
#define hainan__GPU__CHIP__LARR_MEMFABTECH TSMC28
|
||||
#define hainan__GPU__CHIP__LARR_MEMFABTECH__TSMC28 1
|
||||
#define hainan__GPU__CHIP__LARR_TIMING default
|
||||
#define hainan__GPU__CHIP__LARR_TIMING__DEFAULT 1
|
||||
#define hainan__GPU__CHIP__LARR_MEMVER 0_6han
|
||||
#define hainan__GPU__CHIP__LARR_MEMVER__0_6HAN 1
|
||||
#define hainan__GPU__CHIP__MEMFABTECH TSMC28
|
||||
#define hainan__GPU__CHIP__MEMFABTECH__TSMC28 1
|
||||
#define hainan__GPU__CHIP__MEMVER 1_0
|
||||
#define hainan__GPU__CHIP__MEMVER__1_0 1
|
||||
#define hainan__GPU__CHIP__MEMTYPE slow
|
||||
#define hainan__GPU__CHIP__MEMTYPE__SLOW 1
|
||||
#define hainan__GPU__CHIP__LARRVER 0_6han
|
||||
#define hainan__GPU__CHIP__LARRVER__0_6HAN 1
|
||||
#define hainan__GPU__CHIP__LARRTYPE default
|
||||
#define hainan__GPU__CHIP__LARRTYPE__DEFAULT 1
|
||||
#define hainan__GPU__CHIP__TILES_PRESENT 0
|
||||
#define hainan__GPU__CHIP__TILES_PRESENT__0 1
|
||||
#define hainan__GPU__CHIP__SMSGCOUNT 2
|
||||
#define hainan__GPU__CHIP__SMSGCOUNT__2 1
|
||||
#define hainan__GPU__CHIP__SMSG_0_PRESENT 1
|
||||
#define hainan__GPU__CHIP__SMSG_0_PRESENT__1 1
|
||||
#define hainan__GPU__CHIP__SMSG_1_PRESENT 1
|
||||
#define hainan__GPU__CHIP__SMSG_1_PRESENT__1 1
|
||||
#define hainan__GPU__CHIP__SMSG_2_PRESENT 0
|
||||
#define hainan__GPU__CHIP__SMSG_2_PRESENT__0 1
|
||||
#define hainan__GPU__CHIP__SMSG_3_PRESENT 0
|
||||
#define hainan__GPU__CHIP__SMSG_3_PRESENT__0 1
|
||||
#define hainan__GPU__CHIP__SMSG_FOR_BL 1
|
||||
#define hainan__GPU__CHIP__SMSG_FOR_BL__1 1
|
||||
#define hainan__GPU__CHIP__SMSG_FOR_TR 0
|
||||
#define hainan__GPU__CHIP__SMSG_FOR_TR__0 1
|
||||
#define hainan__GPU__CHIP__TCB_DEPTH 512
|
||||
#define hainan__GPU__CHIP__TCB_DEPTH__512 1
|
||||
#define hainan__GPU__CHIP__XCLK_MHZ 25
|
||||
#define hainan__GPU__CHIP__XCLK_MHZ__25 1
|
||||
#define hainan__GPU__LBIST__PRESENT 0
|
||||
#define hainan__GPU__LBIST__PRESENT__0 1
|
||||
#define hainan__GPU__CHIP__BACO 1
|
||||
#define hainan__GPU__CHIP__BACO__1 1
|
||||
#define hainan__GPU__CEC__PRESENT 1
|
||||
#define hainan__GPU__CEC__PRESENT__1 1
|
||||
#define hainan__GPU__CHIP__REAL_RDL_READY 1
|
||||
#define hainan__GPU__CHIP__REAL_RDL_READY__1 1
|
||||
#define hainan__GPU__CHIP__INFERRED_REPS 1
|
||||
#define hainan__GPU__CHIP__INFERRED_REPS__1 1
|
||||
#define hainan__GPU__CHIP__DRMDMA_POWERGATE 0
|
||||
#define hainan__GPU__CHIP__DRMDMA_POWERGATE__0 1
|
||||
#define hainan__GPU__CHIP__EDCMEM1 0
|
||||
#define hainan__GPU__CHIP__EDCMEM1__0 1
|
||||
#define hainan__GPU__CHIP__POWERGATE 0
|
||||
#define hainan__GPU__CHIP__POWERGATE__0 1
|
||||
#define hainan__GPU__THM__CMON_PRESENT 1
|
||||
#define hainan__GPU__THM__CMON_PRESENT__1 1
|
||||
#define hainan__GPU__TMON0__LEFT_NUM_RDI 6
|
||||
#define hainan__GPU__TMON0__LEFT_NUM_RDI__6 1
|
||||
#define hainan__GPU__TMON0__RIGHT_NUM_RDI 6
|
||||
#define hainan__GPU__TMON0__RIGHT_NUM_RDI__6 1
|
||||
#define hainan__GPU__DFT__IBIZA_TMON 1
|
||||
#define hainan__GPU__DFT__IBIZA_TMON__1 1
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL 17
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL__17 1
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_LS 0
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_LS__0 1
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_DS_D 1
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_DS_D__1 1
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_DS_M 2
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_DS_M__2 1
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_SD_D 3
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_SD_D__3 1
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_SD_M 4
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_SD_M__4 1
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_DS 5
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_DS__5 1
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_SD 6
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_SD__6 1
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_FISO 7
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_FISO__7 1
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_V_RM_START 8
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_V_RM_START__8 1
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_V_RM_END 16
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_V_RM_END__16 1
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_A_RM_START 8
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_A_RM_START__8 1
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_A_RM_END 30
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_A_RM_END__30 1
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_V_RM_RF_RME 8
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_V_RM_RF_RME__8 1
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_V_RM_RF_RM_START 9
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_V_RM_RF_RM_START__9 1
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_V_RM_RF_RM_END 10
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_V_RM_RF_RM_END__10 1
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_V_RM_PDP_RME 11
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_V_RM_PDP_RME__11 1
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_V_RM_PDP_RM_START 12
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_V_RM_PDP_RM_START__12 1
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_V_RM_PDP_RM_END 13
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_V_RM_PDP_RM_END__13 1
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_V_RM_HD_RME 14
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_V_RM_HD_RME__14 1
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_V_RM_HD_RM_START 15
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_V_RM_HD_RM_START__15 1
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_V_RM_HD_RM_END 16
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_V_RM_HD_RM_END__16 1
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_A_RM_RF_RME 8
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_A_RM_RF_RME__8 1
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_A_RM_RF_RM_START 9
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_A_RM_RF_RM_START__9 1
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_A_RM_RF_RM_END 17
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_A_RM_RF_RM_END__17 1
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_A_RM_PDP_RME 18
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_A_RM_PDP_RME__18 1
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_A_RM_PDP_RM_START 19
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_A_RM_PDP_RM_START__19 1
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_A_RM_PDP_RM_END 30
|
||||
#define hainan__GPU__CHIP__MEM_POWER_CTRL_A_RM_PDP_RM_END__30 1
|
||||
#define hainan__GPU__TSS__NUM_TILES 5
|
||||
#define hainan__GPU__TSS__NUM_TILES__5 1
|
||||
#define hainan__GPU__TSS__TSS0_TILE 1
|
||||
#define hainan__GPU__TSS__TSS0_TILE__1 1
|
||||
#define hainan__GPU__TSS__TSS1_TILE 1
|
||||
#define hainan__GPU__TSS__TSS1_TILE__1 1
|
||||
#define hainan__GPU__TSS__TSS2_TILE 1
|
||||
#define hainan__GPU__TSS__TSS2_TILE__1 1
|
||||
#define hainan__GPU__TSS__TSS3_TILE 1
|
||||
#define hainan__GPU__TSS__TSS3_TILE__1 1
|
||||
#define hainan__GPU__TSS__TSS4_TILE 1
|
||||
#define hainan__GPU__TSS__TSS4_TILE__1 1
|
||||
#define hainan__GPU__TSS__TSS4_AS_ADC 1
|
||||
#define hainan__GPU__TSS__TSS4_AS_ADC__1 1
|
||||
#define hainan__GPU__RCU__PROGRAMMABLE_RMBITS 1
|
||||
#define hainan__GPU__RCU__PROGRAMMABLE_RMBITS__1 1
|
||||
#define hainan__GPU__CGTT_TILE__PDLY 1
|
||||
#define hainan__GPU__CGTT_TILE__PDLY__1 1
|
||||
#define hainan__GPU__PDLY_TILE__PDLY 1
|
||||
#define hainan__GPU__PDLY_TILE__PDLY__1 1
|
||||
#define hainan__GPU__PDLY_TILE__CLKGATE 0
|
||||
#define hainan__GPU__PDLY_TILE__CLKGATE__0 1
|
||||
#define hainan__GPU__CG__SMC_SCRATCH_REGS 1
|
||||
#define hainan__GPU__CG__SMC_SCRATCH_REGS__1 1
|
||||
#define hainan__GPU__CG__CG_DLL_PDNB 1
|
||||
#define hainan__GPU__CG__CG_DLL_PDNB__1 1
|
||||
#define hainan__GPU__SMU__USE_HW_VBI 1
|
||||
#define hainan__GPU__SMU__USE_HW_VBI__1 1
|
||||
#define hainan__GPU__SMU__NUM_CAC_MGR_4 1
|
||||
#define hainan__GPU__SMU__NUM_CAC_MGR_4__1 1
|
||||
#define hainan__GPU__PDMA__PRESENT 0
|
||||
#define hainan__GPU__PDMA__PRESENT__0 1
|
||||
#define hainan__GPU__DRMDMA__DUAL_DRMDMA_PRESENT 1
|
||||
#define hainan__GPU__DRMDMA__DUAL_DRMDMA_PRESENT__1 1
|
||||
#define hainan__GPU__DRM__BGAES_OFF 1
|
||||
#define hainan__GPU__DRM__BGAES_OFF__1 1
|
||||
#define hainan__GPU__DLB__SLEW 1
|
||||
#define hainan__GPU__DLB__SLEW__1 1
|
||||
#define hainan__GPU__ROM__EXT_CS_EN 1
|
||||
#define hainan__GPU__ROM__EXT_CS_EN__1 1
|
||||
#define hainan__GPU__CPL__GPIO_23_PRESENT 0
|
||||
#define hainan__GPU__CPL__GPIO_23_PRESENT__0 1
|
||||
#define hainan__GPU__CPL__GPIO_24_PRESENT 0
|
||||
#define hainan__GPU__CPL__GPIO_24_PRESENT__0 1
|
||||
#define hainan__GPU__CPL__GPIO_25_PRESENT 0
|
||||
#define hainan__GPU__CPL__GPIO_25_PRESENT__0 1
|
||||
#define hainan__GPU__CPL__GPIO_26_PRESENT 0
|
||||
#define hainan__GPU__CPL__GPIO_26_PRESENT__0 1
|
||||
#define hainan__GPU__CPL__GPIO_27_PRESENT 0
|
||||
#define hainan__GPU__CPL__GPIO_27_PRESENT__0 1
|
||||
#define hainan__GPU__CPL__MLPS_0_PRESENT 1
|
||||
#define hainan__GPU__CPL__MLPS_0_PRESENT__1 1
|
||||
#define hainan__GPU__CPL__MLPS_1_PRESENT 1
|
||||
#define hainan__GPU__CPL__MLPS_1_PRESENT__1 1
|
||||
#define hainan__GPU__CPL__MLPS_2_PRESENT 1
|
||||
#define hainan__GPU__CPL__MLPS_2_PRESENT__1 1
|
||||
#define hainan__GPU__CPL__MLPS_3_PRESENT 1
|
||||
#define hainan__GPU__CPL__MLPS_3_PRESENT__1 1
|
||||
#define hainan__GPU__CPL__SX_0_PRESENT 1
|
||||
#define hainan__GPU__CPL__SX_0_PRESENT__1 1
|
||||
#define hainan__GPU__SMC__TAP_FED_PRESENT 1
|
||||
#define hainan__GPU__SMC__TAP_FED_PRESENT__1 1
|
||||
#define hainan__GPU__CPL__PG_CODE_ENABLE 1
|
||||
#define hainan__GPU__CPL__PG_CODE_ENABLE__1 1
|
||||
#define hainan__GPU__CPL__PG_CODE_GPG 1
|
||||
#define hainan__GPU__CPL__PG_CODE_GPG__1 1
|
||||
#define hainan__GPU__AVP__MC_IF 1
|
||||
#define hainan__GPU__AVP__MC_IF__1 1
|
||||
#define hainan__GPU__AVP__UVD_RLC_CMC_IF 1
|
||||
#define hainan__GPU__AVP__UVD_RLC_CMC_IF__1 1
|
||||
#define hainan__GPU__DC__TMDS_LINK tmds_link_dual
|
||||
#define hainan__GPU__DC__TMDS_LINK__TMDS_LINK_DUAL 1
|
||||
#define hainan__GPU__DC__NUM_DDC_PAIRS 6
|
||||
#define hainan__GPU__DC__NUM_DDC_PAIRS__6 1
|
||||
#define hainan__GPU__DC__NUM_DDC_PAIRS__0_PRESENT 1
|
||||
#define hainan__GPU__DC__NUM_DDC_PAIRS__1_PRESENT 1
|
||||
#define hainan__GPU__DC__NUM_DDC_PAIRS__2_PRESENT 1
|
||||
#define hainan__GPU__DC__NUM_DDC_PAIRS__3_PRESENT 1
|
||||
#define hainan__GPU__DC__NUM_DDC_PAIRS__4_PRESENT 1
|
||||
#define hainan__GPU__DC__NUM_DDC_PAIRS__5_PRESENT 1
|
||||
#define hainan__GPU__DC__NUM_HPD 6
|
||||
#define hainan__GPU__DC__NUM_HPD__6 1
|
||||
#define hainan__GPU__DC__NUM_HPD__0_PRESENT 1
|
||||
#define hainan__GPU__DC__NUM_HPD__1_PRESENT 1
|
||||
#define hainan__GPU__DC__NUM_HPD__2_PRESENT 1
|
||||
#define hainan__GPU__DC__NUM_HPD__3_PRESENT 1
|
||||
#define hainan__GPU__DC__NUM_HPD__4_PRESENT 1
|
||||
#define hainan__GPU__DC__NUM_HPD__5_PRESENT 1
|
||||
#define hainan__GPU__DC__NUM_PIPE_PAIRS 3
|
||||
#define hainan__GPU__DC__NUM_PIPE_PAIRS__3 1
|
||||
#define hainan__GPU__DC__NUM_PIPE_PAIRS__0_PRESENT 1
|
||||
#define hainan__GPU__DC__NUM_PIPE_PAIRS__1_PRESENT 1
|
||||
#define hainan__GPU__DC__NUM_PIPE_PAIRS__2_PRESENT 1
|
||||
#define hainan__GPU__DC__NUM_PIPES 6
|
||||
#define hainan__GPU__DC__NUM_PIPES__6 1
|
||||
#define hainan__GPU__DC__NUM_PIPES__0_PRESENT 1
|
||||
#define hainan__GPU__DC__NUM_PIPES__1_PRESENT 1
|
||||
#define hainan__GPU__DC__NUM_PIPES__2_PRESENT 1
|
||||
#define hainan__GPU__DC__NUM_PIPES__3_PRESENT 1
|
||||
#define hainan__GPU__DC__NUM_PIPES__4_PRESENT 1
|
||||
#define hainan__GPU__DC__NUM_PIPES__5_PRESENT 1
|
||||
#define hainan__GPU__DC__NUM_DIG 6
|
||||
#define hainan__GPU__DC__NUM_DIG__6 1
|
||||
#define hainan__GPU__DC__NUM_DIG__0_PRESENT 1
|
||||
#define hainan__GPU__DC__NUM_DIG__1_PRESENT 1
|
||||
#define hainan__GPU__DC__NUM_DIG__2_PRESENT 1
|
||||
#define hainan__GPU__DC__NUM_DIG__3_PRESENT 1
|
||||
#define hainan__GPU__DC__NUM_DIG__4_PRESENT 1
|
||||
#define hainan__GPU__DC__NUM_DIG__5_PRESENT 1
|
||||
#define hainan__GPU__DC__NUM_AUX 6
|
||||
#define hainan__GPU__DC__NUM_AUX__6 1
|
||||
#define hainan__GPU__DC__NUM_AUX__0_PRESENT 1
|
||||
#define hainan__GPU__DC__NUM_AUX__1_PRESENT 1
|
||||
#define hainan__GPU__DC__NUM_AUX__2_PRESENT 1
|
||||
#define hainan__GPU__DC__NUM_AUX__3_PRESENT 1
|
||||
#define hainan__GPU__DC__NUM_AUX__4_PRESENT 1
|
||||
#define hainan__GPU__DC__NUM_AUX__5_PRESENT 1
|
||||
#define hainan__GPU__DISPPLL__MACRO walden
|
||||
#define hainan__GPU__DISPPLL__MACRO__WALDEN 1
|
||||
#define hainan__GPU__TMDPA__MACRO walden
|
||||
#define hainan__GPU__TMDPA__MACRO__WALDEN 1
|
||||
#define hainan__GPU__TMDPB__MACRO walden
|
||||
#define hainan__GPU__TMDPB__MACRO__WALDEN 1
|
||||
#define hainan__GPU__LVTMDP__MACRO walden
|
||||
#define hainan__GPU__LVTMDP__MACRO__WALDEN 1
|
||||
#define hainan__GPU__DACA__MACRO walden
|
||||
#define hainan__GPU__DACA__MACRO__WALDEN 1
|
||||
#define hainan__GPU__DACB__MACRO walden
|
||||
#define hainan__GPU__DACB__MACRO__WALDEN 1
|
||||
#define hainan__GPU__DC__VIP_PRESENT 1
|
||||
#define hainan__GPU__DC__VIP_PRESENT__1 1
|
||||
#define hainan__GPU__DC__ABM_PRESENT 1
|
||||
#define hainan__GPU__DC__ABM_PRESENT__1 1
|
||||
#define hainan__GPU__DC__DMCU_PRESENT 1
|
||||
#define hainan__GPU__DC__DMCU_PRESENT__1 1
|
||||
#define hainan__GPU__DC__DVO_PRESENT 1
|
||||
#define hainan__GPU__DC__DVO_PRESENT__1 1
|
||||
#define hainan__GPU__DC__SDVO_PRESENT 1
|
||||
#define hainan__GPU__DC__SDVO_PRESENT__1 1
|
||||
#define hainan__GPU__DC__LVDS_PRESENT 1
|
||||
#define hainan__GPU__DC__LVDS_PRESENT__1 1
|
||||
#define hainan__GPU__UNIPHYAB__PRESENT 1
|
||||
#define hainan__GPU__UNIPHYAB__PRESENT__1 1
|
||||
#define hainan__GPU__UNIPHYCD__PRESENT 1
|
||||
#define hainan__GPU__UNIPHYCD__PRESENT__1 1
|
||||
#define hainan__GPU__UNIPHYEF__PRESENT 1
|
||||
#define hainan__GPU__UNIPHYEF__PRESENT__1 1
|
||||
#define hainan__GPU__UNIPHYAB__TYPE lvtmdp
|
||||
#define hainan__GPU__UNIPHYAB__TYPE__LVTMDP 1
|
||||
#define hainan__GPU__UNIPHYCD__TYPE tmdpa
|
||||
#define hainan__GPU__UNIPHYCD__TYPE__TMDPA 1
|
||||
#define hainan__GPU__UNIPHYEF__TYPE tmdpb
|
||||
#define hainan__GPU__UNIPHYEF__TYPE__TMDPB 1
|
||||
#define hainan__GPU__UNIPHYAB__LVTMDP 1
|
||||
#define hainan__GPU__UNIPHYAB__LVTMDP__1 1
|
||||
#define hainan__GPU__DC__DACA_PRESENT 1
|
||||
#define hainan__GPU__DC__DACA_PRESENT__1 1
|
||||
#define hainan__GPU__DC__DACB_PRESENT 1
|
||||
#define hainan__GPU__DC__DACB_PRESENT__1 1
|
||||
#define hainan__GPU__DC__TVOUT_PRESENT 1
|
||||
#define hainan__GPU__DC__TVOUT_PRESENT__1 1
|
||||
#define hainan__GPU__DC__MVP_PRESENT 1
|
||||
#define hainan__GPU__DC__MVP_PRESENT__1 1
|
||||
#define hainan__GPU__DC__DENTIST_INTERFACE_PRESENT 0
|
||||
#define hainan__GPU__DC__DENTIST_INTERFACE_PRESENT__0 1
|
||||
#define hainan__GPU__DC__DDC1AUX1 dual_mode
|
||||
#define hainan__GPU__DC__DDC1AUX1__DUAL_MODE 1
|
||||
#define hainan__GPU__DC__DDC2AUX2 dual_mode
|
||||
#define hainan__GPU__DC__DDC2AUX2__DUAL_MODE 1
|
||||
#define hainan__GPU__DC__DDC3AUX3 dual_mode
|
||||
#define hainan__GPU__DC__DDC3AUX3__DUAL_MODE 1
|
||||
#define hainan__GPU__DC__DDC4AUX4 dual_mode
|
||||
#define hainan__GPU__DC__DDC4AUX4__DUAL_MODE 1
|
||||
#define hainan__GPU__DC__DDC5AUX5 dual_mode
|
||||
#define hainan__GPU__DC__DDC5AUX5__DUAL_MODE 1
|
||||
#define hainan__GPU__DC__DDC6AUX6 dual_mode
|
||||
#define hainan__GPU__DC__DDC6AUX6__DUAL_MODE 1
|
||||
#define hainan__GPU__DC__AUX1_PRESENT 1
|
||||
#define hainan__GPU__DC__AUX1_PRESENT__1 1
|
||||
#define hainan__GPU__DC__AUX2_PRESENT 1
|
||||
#define hainan__GPU__DC__AUX2_PRESENT__1 1
|
||||
#define hainan__GPU__DC__AUX3_PRESENT 1
|
||||
#define hainan__GPU__DC__AUX3_PRESENT__1 1
|
||||
#define hainan__GPU__DC__AUX4_PRESENT 1
|
||||
#define hainan__GPU__DC__AUX4_PRESENT__1 1
|
||||
#define hainan__GPU__DC__AUX5_PRESENT 1
|
||||
#define hainan__GPU__DC__AUX5_PRESENT__1 1
|
||||
#define hainan__GPU__DC__AUX6_PRESENT 1
|
||||
#define hainan__GPU__DC__AUX6_PRESENT__1 1
|
||||
#define hainan__GPU__DC__DENTIST_PRESENT 0
|
||||
#define hainan__GPU__DC__DENTIST_PRESENT__0 1
|
||||
#define hainan__GPU__DC__GENERICA_PRESENT 1
|
||||
#define hainan__GPU__DC__GENERICA_PRESENT__1 1
|
||||
#define hainan__GPU__DC__GENERICB_PRESENT 1
|
||||
#define hainan__GPU__DC__GENERICB_PRESENT__1 1
|
||||
#define hainan__GPU__DC__GENERICC_PRESENT 1
|
||||
#define hainan__GPU__DC__GENERICC_PRESENT__1 1
|
||||
#define hainan__GPU__DC__GENERICD_PRESENT 1
|
||||
#define hainan__GPU__DC__GENERICD_PRESENT__1 1
|
||||
#define hainan__GPU__DC__GENERICE_PRESENT 1
|
||||
#define hainan__GPU__DC__GENERICE_PRESENT__1 1
|
||||
#define hainan__GPU__DC__GENERICF_PRESENT 1
|
||||
#define hainan__GPU__DC__GENERICF_PRESENT__1 1
|
||||
#define hainan__GPU__DC__GENERICG_PRESENT 1
|
||||
#define hainan__GPU__DC__GENERICG_PRESENT__1 1
|
||||
#define hainan__GPU__DC__BLON_TYPE 0
|
||||
#define hainan__GPU__DC__BLON_TYPE__0 1
|
||||
#define hainan__GPU__DC__NB_STUTTER_MODE_PRESENT 0
|
||||
#define hainan__GPU__DC__NB_STUTTER_MODE_PRESENT__0 1
|
||||
#define hainan__GPU__DC__PCIE_REFCLK_TEST_MODE_MUX_PRESENT 0
|
||||
#define hainan__GPU__DC__PCIE_REFCLK_TEST_MODE_MUX_PRESENT__0 1
|
||||
#define hainan__GPU__DC__REFCLK_TEST_MODE_MUX_PRESENT 0
|
||||
#define hainan__GPU__DC__REFCLK_TEST_MODE_MUX_PRESENT__0 1
|
||||
#define hainan__GPU__DC__PIXCLK_TEST_MODE_MUX_PRESENT 0
|
||||
#define hainan__GPU__DC__PIXCLK_TEST_MODE_MUX_PRESENT__0 1
|
||||
#define hainan__GPU__DC__SYMCLK_TEST_MODE_MUX_PRESENT 0
|
||||
#define hainan__GPU__DC__SYMCLK_TEST_MODE_MUX_PRESENT__0 1
|
||||
#define hainan__GPU__GC__NUM_SE 1
|
||||
#define hainan__GPU__GC__NUM_SE__1 1
|
||||
#define hainan__GPU__GC__NUM_SE__0_PRESENT 1
|
||||
#define hainan__GPU__GC__NUM_SH_PER_SE 1
|
||||
#define hainan__GPU__GC__NUM_SH_PER_SE__1 1
|
||||
#define hainan__GPU__GC__NUM_SH_PER_SE__0_PRESENT 1
|
||||
#define hainan__GPU__GC__NUM_RB_PER_SE 1
|
||||
#define hainan__GPU__GC__NUM_RB_PER_SE__1 1
|
||||
#define hainan__GPU__GC__NUM_RB_PER_SE__0_PRESENT 1
|
||||
#define hainan__GPU__GC__NUM_CU_PER_SH 5
|
||||
#define hainan__GPU__GC__NUM_CU_PER_SH__5 1
|
||||
#define hainan__GPU__GC__NUM_CU_PER_SH__0_PRESENT 1
|
||||
#define hainan__GPU__GC__NUM_CU_PER_SH__1_PRESENT 1
|
||||
#define hainan__GPU__GC__NUM_CU_PER_SH__2_PRESENT 1
|
||||
#define hainan__GPU__GC__NUM_CU_PER_SH__3_PRESENT 1
|
||||
#define hainan__GPU__GC__NUM_CU_PER_SH__4_PRESENT 1
|
||||
#define hainan__GPU__GC__WAVE_SIZE 64
|
||||
#define hainan__GPU__GC__WAVE_SIZE__64 1
|
||||
#define hainan__GPU__GC__NUM_CP_RINGS 3
|
||||
#define hainan__GPU__GC__NUM_CP_RINGS__3 1
|
||||
#define hainan__GPU__GC__NUM_CP_RINGS__0_PRESENT 1
|
||||
#define hainan__GPU__GC__NUM_CP_RINGS__1_PRESENT 1
|
||||
#define hainan__GPU__GC__NUM_CP_RINGS__2_PRESENT 1
|
||||
#define hainan__GPU__GC__NUM_SC_PER_SE 1
|
||||
#define hainan__GPU__GC__NUM_SC_PER_SE__1 1
|
||||
#define hainan__GPU__GC__NUM_SC_PER_SE__0_PRESENT 1
|
||||
#define hainan__GPU__GC__NUM_BCI_PER_SE 1
|
||||
#define hainan__GPU__GC__NUM_BCI_PER_SE__1 1
|
||||
#define hainan__GPU__GC__NUM_BCI_PER_SE__0_PRESENT 1
|
||||
#define hainan__GPU__GC__NUM_RB_PER_SC 1
|
||||
#define hainan__GPU__GC__NUM_RB_PER_SC__1 1
|
||||
#define hainan__GPU__GC__NUM_RB_PER_SC__0_PRESENT 1
|
||||
#define hainan__GPU__GC__NUM_RB_PER_PACKER 1
|
||||
#define hainan__GPU__GC__NUM_RB_PER_PACKER__1 1
|
||||
#define hainan__GPU__GC__NUM_RB_PER_PACKER__0_PRESENT 1
|
||||
#define hainan__GPU__GC__NUM_PACKER_PER_SC 1
|
||||
#define hainan__GPU__GC__NUM_PACKER_PER_SC__1 1
|
||||
#define hainan__GPU__GC__NUM_PACKER_PER_SC__0_PRESENT 1
|
||||
#define hainan__GPU__GC__NUM_DB_PER_PACKER 1
|
||||
#define hainan__GPU__GC__NUM_DB_PER_PACKER__1 1
|
||||
#define hainan__GPU__GC__NUM_DB_PER_PACKER__0_PRESENT 1
|
||||
#define hainan__GPU__GC__NUM_PACKER_PER_SE 1
|
||||
#define hainan__GPU__GC__NUM_PACKER_PER_SE__1 1
|
||||
#define hainan__GPU__GC__NUM_PACKER_PER_SE__0_PRESENT 1
|
||||
#define hainan__GPU__GC__NUM_RB_PER_SX 1
|
||||
#define hainan__GPU__GC__NUM_RB_PER_SX__1 1
|
||||
#define hainan__GPU__GC__NUM_RB_PER_SX__0_PRESENT 1
|
||||
#define hainan__GPU__GC__NUM_CU_PER_SE 5
|
||||
#define hainan__GPU__GC__NUM_CU_PER_SE__5 1
|
||||
#define hainan__GPU__GC__NUM_CU_PER_SE__0_PRESENT 1
|
||||
#define hainan__GPU__GC__NUM_CU_PER_SE__1_PRESENT 1
|
||||
#define hainan__GPU__GC__NUM_CU_PER_SE__2_PRESENT 1
|
||||
#define hainan__GPU__GC__NUM_CU_PER_SE__3_PRESENT 1
|
||||
#define hainan__GPU__GC__NUM_CU_PER_SE__4_PRESENT 1
|
||||
#define hainan__GPU__GC__MAX_NUMBER_WAVES 200
|
||||
#define hainan__GPU__GC__MAX_NUMBER_WAVES__200 1
|
||||
#define hainan__GPU__GC__MAX_NUMBER_WAVES_PER_PACKER 200
|
||||
#define hainan__GPU__GC__MAX_NUMBER_WAVES_PER_PACKER__200 1
|
||||
#define hainan__GPU__SQ__NUM_WAVES_PER_SIMD 10
|
||||
#define hainan__GPU__SQ__NUM_WAVES_PER_SIMD__10 1
|
||||
#define hainan__GPU__SQ__THREAD_GROUPS_PER_CU 16
|
||||
#define hainan__GPU__SQ__THREAD_GROUPS_PER_CU__16 1
|
||||
#define hainan__GPU__SQ__NUM_PERF_CNTRS 8
|
||||
#define hainan__GPU__SQ__NUM_PERF_CNTRS__8 1
|
||||
#define hainan__GPU__SQ__NUM_PERF_CNTRS__0_PRESENT 1
|
||||
#define hainan__GPU__SQ__NUM_PERF_CNTRS__1_PRESENT 1
|
||||
#define hainan__GPU__SQ__NUM_PERF_CNTRS__2_PRESENT 1
|
||||
#define hainan__GPU__SQ__NUM_PERF_CNTRS__3_PRESENT 1
|
||||
#define hainan__GPU__SQ__NUM_PERF_CNTRS__4_PRESENT 1
|
||||
#define hainan__GPU__SQ__NUM_PERF_CNTRS__5_PRESENT 1
|
||||
#define hainan__GPU__SQ__NUM_PERF_CNTRS__6_PRESENT 1
|
||||
#define hainan__GPU__SQ__NUM_PERF_CNTRS__7_PRESENT 1
|
||||
#define hainan__GPU__SQ__NUM_SGPR_PER_SIMD 512
|
||||
#define hainan__GPU__SQ__NUM_SGPR_PER_SIMD__512 1
|
||||
#define hainan__GPU__SQ__P2_IS_P1 1
|
||||
#define hainan__GPU__SQ__P2_IS_P1__1 1
|
||||
#define hainan__GPU__SQ__USE_SV_PACKAGES 0
|
||||
#define hainan__GPU__SQ__USE_SV_PACKAGES__0 1
|
||||
#define hainan__GPU__SQ__BUG_307568_FIXED 1
|
||||
#define hainan__GPU__SQ__BUG_307568_FIXED__1 1
|
||||
#define hainan__GPU__SQC__NUM_SQC 2
|
||||
#define hainan__GPU__SQC__NUM_SQC__2 1
|
||||
#define hainan__GPU__SQC__NUM_SQC__0_PRESENT 1
|
||||
#define hainan__GPU__SQC__NUM_SQC__1_PRESENT 1
|
||||
#define hainan__GPU__SQC__NUM_SQC_PER_SH 2
|
||||
#define hainan__GPU__SQC__NUM_SQC_PER_SH__2 1
|
||||
#define hainan__GPU__SQC__NUM_SQC_PER_SH__0_PRESENT 1
|
||||
#define hainan__GPU__SQC__NUM_SQC_PER_SH__1_PRESENT 1
|
||||
#define hainan__GPU__SQC__IDENTICAL_NAMES 0
|
||||
#define hainan__GPU__SQC__IDENTICAL_NAMES__0 1
|
||||
#define hainan__GPU__SQC__SH_SQC0_POSN_AFTER_SQ 0
|
||||
#define hainan__GPU__SQC__SH_SQC0_POSN_AFTER_SQ__0 1
|
||||
#define hainan__GPU__SQC__SH_SQC0_FIRST_CONNECTED_SQ 0
|
||||
#define hainan__GPU__SQC__SH_SQC0_FIRST_CONNECTED_SQ__0 1
|
||||
#define hainan__GPU__SQC__SH_SQC0_NUM_CU 3
|
||||
#define hainan__GPU__SQC__SH_SQC0_NUM_CU__3 1
|
||||
#define hainan__GPU__SQC__SH_SQC0_NUM_CU__0_PRESENT 1
|
||||
#define hainan__GPU__SQC__SH_SQC0_NUM_CU__1_PRESENT 1
|
||||
#define hainan__GPU__SQC__SH_SQC0_NUM_CU__2_PRESENT 1
|
||||
#define hainan__GPU__SQC__SH_SQC0_NUM_BANK 4
|
||||
#define hainan__GPU__SQC__SH_SQC0_NUM_BANK__4 1
|
||||
#define hainan__GPU__SQC__SH_SQC0_NUM_BANK__0_PRESENT 1
|
||||
#define hainan__GPU__SQC__SH_SQC0_NUM_BANK__1_PRESENT 1
|
||||
#define hainan__GPU__SQC__SH_SQC0_NUM_BANK__2_PRESENT 1
|
||||
#define hainan__GPU__SQC__SH_SQC0_NUM_BANK__3_PRESENT 1
|
||||
#define hainan__GPU__SQC__SH_SQC0_BANK_INST_CACHE_SIZE_KBYTES 8
|
||||
#define hainan__GPU__SQC__SH_SQC0_BANK_INST_CACHE_SIZE_KBYTES__8 1
|
||||
#define hainan__GPU__SQC__SH_SQC0_BANK_DATA_CACHE_SIZE_KBYTES 4
|
||||
#define hainan__GPU__SQC__SH_SQC0_BANK_DATA_CACHE_SIZE_KBYTES__4 1
|
||||
#define hainan__GPU__SQC__SH_SQC1_POSN_AFTER_SQ 3
|
||||
#define hainan__GPU__SQC__SH_SQC1_POSN_AFTER_SQ__3 1
|
||||
#define hainan__GPU__SQC__SH_SQC1_FIRST_CONNECTED_SQ 3
|
||||
#define hainan__GPU__SQC__SH_SQC1_FIRST_CONNECTED_SQ__3 1
|
||||
#define hainan__GPU__SQC__SH_SQC1_NUM_CU 2
|
||||
#define hainan__GPU__SQC__SH_SQC1_NUM_CU__2 1
|
||||
#define hainan__GPU__SQC__SH_SQC1_NUM_CU__0_PRESENT 1
|
||||
#define hainan__GPU__SQC__SH_SQC1_NUM_CU__1_PRESENT 1
|
||||
#define hainan__GPU__SQC__SH_SQC1_NUM_BANK 2
|
||||
#define hainan__GPU__SQC__SH_SQC1_NUM_BANK__2 1
|
||||
#define hainan__GPU__SQC__SH_SQC1_NUM_BANK__0_PRESENT 1
|
||||
#define hainan__GPU__SQC__SH_SQC1_NUM_BANK__1_PRESENT 1
|
||||
#define hainan__GPU__SQC__SH_SQC1_BANK_INST_CACHE_SIZE_KBYTES 16
|
||||
#define hainan__GPU__SQC__SH_SQC1_BANK_INST_CACHE_SIZE_KBYTES__16 1
|
||||
#define hainan__GPU__SQC__SH_SQC1_BANK_DATA_CACHE_SIZE_KBYTES 8
|
||||
#define hainan__GPU__SQC__SH_SQC1_BANK_DATA_CACHE_SIZE_KBYTES__8 1
|
||||
#define hainan__GPU__SQC__SH_SQC2_POSN_AFTER_SQ 0
|
||||
#define hainan__GPU__SQC__SH_SQC2_POSN_AFTER_SQ__0 1
|
||||
#define hainan__GPU__SQC__SH_SQC2_FIRST_CONNECTED_SQ 0
|
||||
#define hainan__GPU__SQC__SH_SQC2_FIRST_CONNECTED_SQ__0 1
|
||||
#define hainan__GPU__SQC__SH_SQC2_NUM_CU 0
|
||||
#define hainan__GPU__SQC__SH_SQC2_NUM_CU__0 1
|
||||
#define hainan__GPU__SQC__SH_SQC2_NUM_BANK 0
|
||||
#define hainan__GPU__SQC__SH_SQC2_NUM_BANK__0 1
|
||||
#define hainan__GPU__SQC__SH_SQC2_BANK_INST_CACHE_SIZE_KBYTES 0
|
||||
#define hainan__GPU__SQC__SH_SQC2_BANK_INST_CACHE_SIZE_KBYTES__0 1
|
||||
#define hainan__GPU__SQC__SH_SQC2_BANK_DATA_CACHE_SIZE_KBYTES 0
|
||||
#define hainan__GPU__SQC__SH_SQC2_BANK_DATA_CACHE_SIZE_KBYTES__0 1
|
||||
#define hainan__GPU__SQC__P2_IS_P1 1
|
||||
#define hainan__GPU__SQC__P2_IS_P1__1 1
|
||||
#define hainan__GPU__SQC__BUG_303685_EXISTS 1
|
||||
#define hainan__GPU__SQC__BUG_303685_EXISTS__1 1
|
||||
#define hainan__GPU__GC__GDS_EXISTS 1
|
||||
#define hainan__GPU__GC__GDS_EXISTS__1 1
|
||||
#define hainan__GPU__GC__RB_REDUNDANCY 0
|
||||
#define hainan__GPU__GC__RB_REDUNDANCY__0 1
|
||||
#define hainan__GPU__GC__SC_DOES_RB_REDUNDANCY 0
|
||||
#define hainan__GPU__GC__SC_DOES_RB_REDUNDANCY__0 1
|
||||
#define hainan__GPU__GC__MEM_ADDR_BITS 40
|
||||
#define hainan__GPU__GC__MEM_ADDR_BITS__40 1
|
||||
#define hainan__GPU__GC__NEW_VERTEX_VECTOR_ORDER 0
|
||||
#define hainan__GPU__GC__NEW_VERTEX_VECTOR_ORDER__0 1
|
||||
#define hainan__GPU__GC__NUM_INTERPS 1
|
||||
#define hainan__GPU__GC__NUM_INTERPS__1 1
|
||||
#define hainan__GPU__GC__HZ_PRESENT 1
|
||||
#define hainan__GPU__GC__HZ_PRESENT__1 1
|
||||
#define hainan__GPU__GC__NUM_CLKS_PER_PRIM 1
|
||||
#define hainan__GPU__GC__NUM_CLKS_PER_PRIM__1 1
|
||||
#define hainan__GPU__GC__NUM_INTERP_PRIM_PER_CLK 2
|
||||
#define hainan__GPU__GC__NUM_INTERP_PRIM_PER_CLK__2 1
|
||||
#define hainan__GPU__GC__ATTR_BUS_PRIM_PER_CLK 2
|
||||
#define hainan__GPU__GC__ATTR_BUS_PRIM_PER_CLK__2 1
|
||||
#define hainan__GPU__GC__NUM_MAX_GS_THDS 16
|
||||
#define hainan__GPU__GC__NUM_MAX_GS_THDS__16 1
|
||||
#define hainan__GPU__GC__NUM_MIN_GS_THDS 4
|
||||
#define hainan__GPU__GC__NUM_MIN_GS_THDS__4 1
|
||||
#define hainan__GPU__GC__NUM_STATES 8
|
||||
#define hainan__GPU__GC__NUM_STATES__8 1
|
||||
#define hainan__GPU__GC__NUM_STATES__0_PRESENT 1
|
||||
#define hainan__GPU__GC__NUM_STATES__1_PRESENT 1
|
||||
#define hainan__GPU__GC__NUM_STATES__2_PRESENT 1
|
||||
#define hainan__GPU__GC__NUM_STATES__3_PRESENT 1
|
||||
#define hainan__GPU__GC__NUM_STATES__4_PRESENT 1
|
||||
#define hainan__GPU__GC__NUM_STATES__5_PRESENT 1
|
||||
#define hainan__GPU__GC__NUM_STATES__6_PRESENT 1
|
||||
#define hainan__GPU__GC__NUM_STATES__7_PRESENT 1
|
||||
#define hainan__GPU__GC__STWTPTR_WIDTH 3
|
||||
#define hainan__GPU__GC__STWTPTR_WIDTH__3 1
|
||||
#define hainan__GPU__SH__DOUBLE_FLOAT_PRESENT 1
|
||||
#define hainan__GPU__SH__DOUBLE_FLOAT_PRESENT__1 1
|
||||
#define hainan__GPU__SH__NUM_DOUBLE_VSPS_PER_SIMD 1
|
||||
#define hainan__GPU__SH__NUM_DOUBLE_VSPS_PER_SIMD__1 1
|
||||
#define hainan__GPU__SH__NUM_DOUBLE_VSPS_PER_SIMD__0_PRESENT 1
|
||||
#define hainan__GPU__SH__NORM_SIN_COS 1
|
||||
#define hainan__GPU__SH__NORM_SIN_COS__1 1
|
||||
#define hainan__GPU__SH__MICROCODE_LEVEL 10
|
||||
#define hainan__GPU__SH__MICROCODE_LEVEL__10 1
|
||||
#define hainan__GPU__SH__NUM_EXPREQ_PER_CU 12
|
||||
#define hainan__GPU__SH__NUM_EXPREQ_PER_CU__12 1
|
||||
#define hainan__GPU__GC__GLOBAL_VGT_PA 0
|
||||
#define hainan__GPU__GC__GLOBAL_VGT_PA__0 1
|
||||
#define hainan__GPU__GC__NUM_FRONTEND 1
|
||||
#define hainan__GPU__GC__NUM_FRONTEND__1 1
|
||||
#define hainan__GPU__GC__NUM_FRONTEND__0_PRESENT 1
|
||||
#define hainan__GPU__GC__COALESCED_READ_PRESENT 1
|
||||
#define hainan__GPU__GC__COALESCED_READ_PRESENT__1 1
|
||||
#define hainan__GPU__GC__NUM_CLKS_PER_TILE 1
|
||||
#define hainan__GPU__GC__NUM_CLKS_PER_TILE__1 1
|
||||
#define hainan__GPU__GC__DBSC_TRUE_QUAD_INTF 1
|
||||
#define hainan__GPU__GC__DBSC_TRUE_QUAD_INTF__1 1
|
||||
#define hainan__GPU__GC__ASYNC_DISPATCH 1
|
||||
#define hainan__GPU__GC__ASYNC_DISPATCH__1 1
|
||||
#define hainan__GPU__GC__VMID_PORTS_EXISTS 1
|
||||
#define hainan__GPU__GC__VMID_PORTS_EXISTS__1 1
|
||||
#define hainan__GPU__GC__NUM_EXPORT_BUS 2
|
||||
#define hainan__GPU__GC__NUM_EXPORT_BUS__2 1
|
||||
#define hainan__GPU__GC__TILING_CONFIG_TABLE 1
|
||||
#define hainan__GPU__GC__TILING_CONFIG_TABLE__1 1
|
||||
#define hainan__GPU__GC__FMASK_TILING_CONFIG_TABLE 1
|
||||
#define hainan__GPU__GC__FMASK_TILING_CONFIG_TABLE__1 1
|
||||
#define hainan__GPU__GC__NEW_SRC_COLOR_FORMAT 1
|
||||
#define hainan__GPU__GC__NEW_SRC_COLOR_FORMAT__1 1
|
||||
#define hainan__GPU__SP__NUM_GPRS 256
|
||||
#define hainan__GPU__SP__NUM_GPRS__256 1
|
||||
#define hainan__GPU__SP__GPR_ADDR_WIDTH 8
|
||||
#define hainan__GPU__SP__GPR_ADDR_WIDTH__8 1
|
||||
#define hainan__GPU__SP__WIDTH_GPRS 128
|
||||
#define hainan__GPU__SP__WIDTH_GPRS__128 1
|
||||
#define hainan__GPU__SPI__TMP_SCBD_SLOTS_PER_CU 32
|
||||
#define hainan__GPU__SPI__TMP_SCBD_SLOTS_PER_CU__32 1
|
||||
#define hainan__GPU__VGT__GSPRIM_BUFF_DEPTH 768
|
||||
#define hainan__GPU__VGT__GSPRIM_BUFF_DEPTH__768 1
|
||||
#define hainan__GPU__VGT__GS_TABLE_DEPTH 16
|
||||
#define hainan__GPU__VGT__GS_TABLE_DEPTH__16 1
|
||||
#define hainan__GPU__SX__PARAMETER_CACHE_DEPTH 512
|
||||
#define hainan__GPU__SX__PARAMETER_CACHE_DEPTH__512 1
|
||||
#define hainan__GPU__SX__PARAMETER_CACHE_WIDTH 16
|
||||
#define hainan__GPU__SX__PARAMETER_CACHE_WIDTH__16 1
|
||||
#define hainan__GPU__SX__PARAMETER_CACHE_WIDTH__0_PRESENT 1
|
||||
#define hainan__GPU__SX__PARAMETER_CACHE_WIDTH__1_PRESENT 1
|
||||
#define hainan__GPU__SX__PARAMETER_CACHE_WIDTH__2_PRESENT 1
|
||||
#define hainan__GPU__SX__PARAMETER_CACHE_WIDTH__3_PRESENT 1
|
||||
#define hainan__GPU__SX__PARAMETER_CACHE_WIDTH__4_PRESENT 1
|
||||
#define hainan__GPU__SX__PARAMETER_CACHE_WIDTH__5_PRESENT 1
|
||||
#define hainan__GPU__SX__PARAMETER_CACHE_WIDTH__6_PRESENT 1
|
||||
#define hainan__GPU__SX__PARAMETER_CACHE_WIDTH__7_PRESENT 1
|
||||
#define hainan__GPU__SX__PARAMETER_CACHE_WIDTH__8_PRESENT 1
|
||||
#define hainan__GPU__SX__PARAMETER_CACHE_WIDTH__9_PRESENT 1
|
||||
#define hainan__GPU__SX__PARAMETER_CACHE_WIDTH__10_PRESENT 1
|
||||
#define hainan__GPU__SX__PARAMETER_CACHE_WIDTH__11_PRESENT 1
|
||||
#define hainan__GPU__SX__PARAMETER_CACHE_WIDTH__12_PRESENT 1
|
||||
#define hainan__GPU__SX__PARAMETER_CACHE_WIDTH__13_PRESENT 1
|
||||
#define hainan__GPU__SX__PARAMETER_CACHE_WIDTH__14_PRESENT 1
|
||||
#define hainan__GPU__SX__PARAMETER_CACHE_WIDTH__15_PRESENT 1
|
||||
#define hainan__GPU__SX__COLOR_SCOREBOARD_SLOTS 64
|
||||
#define hainan__GPU__SX__COLOR_SCOREBOARD_SLOTS__64 1
|
||||
#define hainan__GPU__SX__POS_SCOREBOARD_SLOTS 16
|
||||
#define hainan__GPU__SX__POS_SCOREBOARD_SLOTS__16 1
|
||||
#define hainan__GPU__SX__COLOR_EXPORT_BUFFER_SIZE 256
|
||||
#define hainan__GPU__SX__COLOR_EXPORT_BUFFER_SIZE__256 1
|
||||
#define hainan__GPU__SX__POS_EXPORT_BUFFER_SIZE 256
|
||||
#define hainan__GPU__SX__POS_EXPORT_BUFFER_SIZE__256 1
|
||||
#define hainan__GPU__SX__COLOR_EXPORT_REG_BUFFER_SIZE 1024
|
||||
#define hainan__GPU__SX__COLOR_EXPORT_REG_BUFFER_SIZE__1024 1
|
||||
#define hainan__GPU__SX__POS_EXPORT_REG_BUFFER_SIZE 1024
|
||||
#define hainan__GPU__SX__POS_EXPORT_REG_BUFFER_SIZE__1024 1
|
||||
#define hainan__GPU__SX__PIXEL_FIFO_DEPTH 32
|
||||
#define hainan__GPU__SX__PIXEL_FIFO_DEPTH__32 1
|
||||
#define hainan__GPU__PA__PRIM_BUFF_DEPTH 1536
|
||||
#define hainan__GPU__PA__PRIM_BUFF_DEPTH__1536 1
|
||||
#define hainan__GPU__PA__NUM_CLIPPERS 4
|
||||
#define hainan__GPU__PA__NUM_CLIPPERS__4 1
|
||||
#define hainan__GPU__PA__LOG2_MAX_SAMPLES 3
|
||||
#define hainan__GPU__PA__LOG2_MAX_SAMPLES__3 1
|
||||
#define hainan__GPU__TA__GRBM_INTF_RESET_FIX 1
|
||||
#define hainan__GPU__TA__GRBM_INTF_RESET_FIX__1 1
|
||||
#define hainan__GPU__TC__TCC_PRESENT 1
|
||||
#define hainan__GPU__TC__TCC_PRESENT__1 1
|
||||
#define hainan__GPU__TC__TCR_TCA_REQ_CREDITS 32
|
||||
#define hainan__GPU__TC__TCR_TCA_REQ_CREDITS__32 1
|
||||
#define hainan__GPU__TC__TA_HANDLE_BASEADDR 1
|
||||
#define hainan__GPU__TC__TA_HANDLE_BASEADDR__1 1
|
||||
#define hainan__GPU__TC__TCP_L1_SIZE 16
|
||||
#define hainan__GPU__TC__TCP_L1_SIZE__16 1
|
||||
#define hainan__GPU__TC__NUM_TCPS 5
|
||||
#define hainan__GPU__TC__NUM_TCPS__5 1
|
||||
#define hainan__GPU__TC__NUM_TCPS__0_PRESENT 1
|
||||
#define hainan__GPU__TC__NUM_TCPS__1_PRESENT 1
|
||||
#define hainan__GPU__TC__NUM_TCPS__2_PRESENT 1
|
||||
#define hainan__GPU__TC__NUM_TCPS__3_PRESENT 1
|
||||
#define hainan__GPU__TC__NUM_TCPS__4_PRESENT 1
|
||||
#define hainan__GPU__TC__NUM_TCCS 2
|
||||
#define hainan__GPU__TC__NUM_TCCS__2 1
|
||||
#define hainan__GPU__TC__NUM_TCCS__0_PRESENT 1
|
||||
#define hainan__GPU__TC__NUM_TCCS__1_PRESENT 1
|
||||
#define hainan__GPU__TC__NUM_TCAS 2
|
||||
#define hainan__GPU__TC__NUM_TCAS__2 1
|
||||
#define hainan__GPU__TC__NUM_TCAS__0_PRESENT 1
|
||||
#define hainan__GPU__TC__NUM_TCAS__1_PRESENT 1
|
||||
#define hainan__GPU__TC__NUM_TCIRS 3
|
||||
#define hainan__GPU__TC__NUM_TCIRS__3 1
|
||||
#define hainan__GPU__TC__NUM_TCIRS__0_PRESENT 1
|
||||
#define hainan__GPU__TC__NUM_TCIRS__1_PRESENT 1
|
||||
#define hainan__GPU__TC__NUM_TCIRS__2_PRESENT 1
|
||||
#define hainan__GPU__TC__NUM_TCIWS 1
|
||||
#define hainan__GPU__TC__NUM_TCIWS__1 1
|
||||
#define hainan__GPU__TC__NUM_TCIWS__0_PRESENT 1
|
||||
#define hainan__GPU__TC__CLIENT_TCI_REQ_CREDITS 8
|
||||
#define hainan__GPU__TC__CLIENT_TCI_REQ_CREDITS__8 1
|
||||
#define hainan__GPU__TC__VGT_TCI_REQ_CREDITS 8
|
||||
#define hainan__GPU__TC__VGT_TCI_REQ_CREDITS__8 1
|
||||
#define hainan__GPU__TC__SQC_TCI_REQ_CREDITS 8
|
||||
#define hainan__GPU__TC__SQC_TCI_REQ_CREDITS__8 1
|
||||
#define hainan__GPU__TC__CP_TCI_REQ_CREDITS 8
|
||||
#define hainan__GPU__TC__CP_TCI_REQ_CREDITS__8 1
|
||||
#define hainan__GPU__TC__NUM_TCIS 4
|
||||
#define hainan__GPU__TC__NUM_TCIS__4 1
|
||||
#define hainan__GPU__TC__NUM_TCIS__0_PRESENT 1
|
||||
#define hainan__GPU__TC__NUM_TCIS__1_PRESENT 1
|
||||
#define hainan__GPU__TC__NUM_TCIS__2_PRESENT 1
|
||||
#define hainan__GPU__TC__NUM_TCIS__3_PRESENT 1
|
||||
#define hainan__GPU__TC__TCC_NUM_LINES 2048
|
||||
#define hainan__GPU__TC__TCC_NUM_LINES__2048 1
|
||||
#define hainan__GPU__TC__TCA_PHASE 1
|
||||
#define hainan__GPU__TC__TCA_PHASE__1 1
|
||||
#define hainan__GPU__TC__TCA_RTN_ARB_IO_PIPELINING 0
|
||||
#define hainan__GPU__TC__TCA_RTN_ARB_IO_PIPELINING__0 1
|
||||
#define hainan__GPU__TC__CP_VGT_TCI_ABOVE_SH0 0
|
||||
#define hainan__GPU__TC__CP_VGT_TCI_ABOVE_SH0__0 1
|
||||
#define hainan__GPU__DB__TB_USES_EMULATOR_MODE 0
|
||||
#define hainan__GPU__DB__TB_USES_EMULATOR_MODE__0 1
|
||||
#define hainan__GPU__DB__USE_ADDRRAXX_LIB 1
|
||||
#define hainan__GPU__DB__USE_ADDRRAXX_LIB__1 1
|
||||
#define hainan__GPU__DB__LEGACY_TILE_MODE_ASSERTS 1
|
||||
#define hainan__GPU__DB__LEGACY_TILE_MODE_ASSERTS__1 1
|
||||
#define hainan__GPU__DB__SUBBLOCK_GATES_PRESENT 1
|
||||
#define hainan__GPU__DB__SUBBLOCK_GATES_PRESENT__1 1
|
||||
#define hainan__GPU__CB__BLENDER_NUM_PIXELS 4
|
||||
#define hainan__GPU__CB__BLENDER_NUM_PIXELS__4 1
|
||||
#define hainan__GPU__CB__BLENDER_NUM_FP32_COMPS 4
|
||||
#define hainan__GPU__CB__BLENDER_NUM_FP32_COMPS__4 1
|
||||
#define hainan__GPU__CB__COMPRESSION 1
|
||||
#define hainan__GPU__CB__COMPRESSION__1 1
|
||||
#define hainan__GPU__LDS__SIZE 64
|
||||
#define hainan__GPU__LDS__SIZE__64 1
|
||||
#define hainan__GPU__LDS__NUM_PIXELS 32
|
||||
#define hainan__GPU__LDS__NUM_PIXELS__32 1
|
||||
#define hainan__GPU__LDS__NUM_BANKS 32
|
||||
#define hainan__GPU__LDS__NUM_BANKS__32 1
|
||||
#define hainan__GPU__GDS__SIZE 64
|
||||
#define hainan__GPU__GDS__SIZE__64 1
|
||||
#define hainan__GPU__GDS__NUM_PIXELS 16
|
||||
#define hainan__GPU__GDS__NUM_PIXELS__16 1
|
||||
#define hainan__GPU__GDS__NUM_PIXELS__0_PRESENT 1
|
||||
#define hainan__GPU__GDS__NUM_PIXELS__1_PRESENT 1
|
||||
#define hainan__GPU__GDS__NUM_PIXELS__2_PRESENT 1
|
||||
#define hainan__GPU__GDS__NUM_PIXELS__3_PRESENT 1
|
||||
#define hainan__GPU__GDS__NUM_PIXELS__4_PRESENT 1
|
||||
#define hainan__GPU__GDS__NUM_PIXELS__5_PRESENT 1
|
||||
#define hainan__GPU__GDS__NUM_PIXELS__6_PRESENT 1
|
||||
#define hainan__GPU__GDS__NUM_PIXELS__7_PRESENT 1
|
||||
#define hainan__GPU__GDS__NUM_PIXELS__8_PRESENT 1
|
||||
#define hainan__GPU__GDS__NUM_PIXELS__9_PRESENT 1
|
||||
#define hainan__GPU__GDS__NUM_PIXELS__10_PRESENT 1
|
||||
#define hainan__GPU__GDS__NUM_PIXELS__11_PRESENT 1
|
||||
#define hainan__GPU__GDS__NUM_PIXELS__12_PRESENT 1
|
||||
#define hainan__GPU__GDS__NUM_PIXELS__13_PRESENT 1
|
||||
#define hainan__GPU__GDS__NUM_PIXELS__14_PRESENT 1
|
||||
#define hainan__GPU__GDS__NUM_PIXELS__15_PRESENT 1
|
||||
#define hainan__GPU__GDS__NUM_BANKS 16
|
||||
#define hainan__GPU__GDS__NUM_BANKS__16 1
|
||||
#define hainan__GPU__GDS__NUM_BANKS__0_PRESENT 1
|
||||
#define hainan__GPU__GDS__NUM_BANKS__1_PRESENT 1
|
||||
#define hainan__GPU__GDS__NUM_BANKS__2_PRESENT 1
|
||||
#define hainan__GPU__GDS__NUM_BANKS__3_PRESENT 1
|
||||
#define hainan__GPU__GDS__NUM_BANKS__4_PRESENT 1
|
||||
#define hainan__GPU__GDS__NUM_BANKS__5_PRESENT 1
|
||||
#define hainan__GPU__GDS__NUM_BANKS__6_PRESENT 1
|
||||
#define hainan__GPU__GDS__NUM_BANKS__7_PRESENT 1
|
||||
#define hainan__GPU__GDS__NUM_BANKS__8_PRESENT 1
|
||||
#define hainan__GPU__GDS__NUM_BANKS__9_PRESENT 1
|
||||
#define hainan__GPU__GDS__NUM_BANKS__10_PRESENT 1
|
||||
#define hainan__GPU__GDS__NUM_BANKS__11_PRESENT 1
|
||||
#define hainan__GPU__GDS__NUM_BANKS__12_PRESENT 1
|
||||
#define hainan__GPU__GDS__NUM_BANKS__13_PRESENT 1
|
||||
#define hainan__GPU__GDS__NUM_BANKS__14_PRESENT 1
|
||||
#define hainan__GPU__GDS__NUM_BANKS__15_PRESENT 1
|
||||
#define hainan__GPU__GDS__NUM_OA_COUNTERS 4
|
||||
#define hainan__GPU__GDS__NUM_OA_COUNTERS__4 1
|
||||
#define hainan__GPU__RLC__LARGE_UCODE_RAM 1
|
||||
#define hainan__GPU__RLC__LARGE_UCODE_RAM__1 1
|
||||
#define hainan__GPU__RLC__LARGE_SCRATCH_RAM 1
|
||||
#define hainan__GPU__RLC__LARGE_SCRATCH_RAM__1 1
|
||||
#define hainan__GPU__RLC__GFX_POWER_GATING 0
|
||||
#define hainan__GPU__RLC__GFX_POWER_GATING__0 1
|
||||
#define hainan__GPU__GC__SC_BCI_16_SAMPLE_PER_PIXEL 1
|
||||
#define hainan__GPU__GC__SC_BCI_16_SAMPLE_PER_PIXEL__1 1
|
||||
#define hainan__GPU__GC__TMP_USE_RASTER_CONFIG 1
|
||||
#define hainan__GPU__GC__TMP_USE_RASTER_CONFIG__1 1
|
||||
#define hainan__GPU__GC__FLT_NORM_0_6 0
|
||||
#define hainan__GPU__GC__FLT_NORM_0_6__0 1
|
||||
#define hainan__GPU__IO__PCIE_PHY falcon65g16x
|
||||
#define hainan__GPU__IO__PCIE_PHY__FALCON65G16X 1
|
||||
#define hainan__GPU__IO__DVP_SUBMOD io_r
|
||||
#define hainan__GPU__IO__DVP_SUBMOD__IO_R 1
|
||||
#define hainan__GPU__IO__SYNC_SUBMOD io_b
|
||||
#define hainan__GPU__IO__SYNC_SUBMOD__IO_B 1
|
||||
#define hainan__GPU__IO__GENERICA_SUBMOD io_b
|
||||
#define hainan__GPU__IO__GENERICA_SUBMOD__IO_B 1
|
||||
#define hainan__GPU__IO__GENERICB_SUBMOD io_b
|
||||
#define hainan__GPU__IO__GENERICB_SUBMOD__IO_B 1
|
||||
#define hainan__GPU__IO__GENERICC_SUBMOD io_b
|
||||
#define hainan__GPU__IO__GENERICC_SUBMOD__IO_B 1
|
||||
#define hainan__GPU__IO__GENERICD_SUBMOD io_b
|
||||
#define hainan__GPU__IO__GENERICD_SUBMOD__IO_B 1
|
||||
#define hainan__GPU__IO__GENERICE_SUBMOD io_b
|
||||
#define hainan__GPU__IO__GENERICE_SUBMOD__IO_B 1
|
||||
#define hainan__GPU__IO__GENERICF_SUBMOD io_b
|
||||
#define hainan__GPU__IO__GENERICF_SUBMOD__IO_B 1
|
||||
#define hainan__GPU__IO__GENERICG_SUBMOD io_b
|
||||
#define hainan__GPU__IO__GENERICG_SUBMOD__IO_B 1
|
||||
#define hainan__GPU__IO__VID_SUBMOD io_r
|
||||
#define hainan__GPU__IO__VID_SUBMOD__IO_R 1
|
||||
#define hainan__GPU__IO__GPIO_SUBMOD io_b
|
||||
#define hainan__GPU__IO__GPIO_SUBMOD__IO_B 1
|
||||
#define hainan__GPU__IO__PLL_SUBMOD io_b
|
||||
#define hainan__GPU__IO__PLL_SUBMOD__IO_B 1
|
||||
#define hainan__GPU__IO__SPLL_SUBMOD io_b
|
||||
#define hainan__GPU__IO__SPLL_SUBMOD__IO_B 1
|
||||
#define hainan__GPU__IO__UPLL_SUBMOD io_b
|
||||
#define hainan__GPU__IO__UPLL_SUBMOD__IO_B 1
|
||||
#define hainan__GPU__IO__HPD_SUBMOD io_b
|
||||
#define hainan__GPU__IO__HPD_SUBMOD__IO_B 1
|
||||
#define hainan__GPU__IO__I2C_SUBMOD io_b
|
||||
#define hainan__GPU__IO__I2C_SUBMOD__IO_B 1
|
||||
#define hainan__GPU__IO__ASAT_45_PLL 1
|
||||
#define hainan__GPU__IO__ASAT_45_PLL__1 1
|
||||
#define hainan__GPU__IO__PWRGOOD 1
|
||||
#define hainan__GPU__IO__PWRGOOD__1 1
|
||||
#define hainan__GPU__IO__NUM_MPLL 2
|
||||
#define hainan__GPU__IO__NUM_MPLL__2 1
|
||||
#define hainan__GPU__IO__READY 1
|
||||
#define hainan__GPU__IO__READY__1 1
|
||||
#define hainan__GPU__MC__NUM_MCB_BLOCKS 1
|
||||
#define hainan__GPU__MC__NUM_MCB_BLOCKS__1 1
|
||||
#define hainan__GPU__MC__NUM_MCB_BLOCKS__0_PRESENT 1
|
||||
#define hainan__GPU__MC__NUM_MCB_TILES 1
|
||||
#define hainan__GPU__MC__NUM_MCB_TILES__1 1
|
||||
#define hainan__GPU__MC__NUM_MCB_TILES__0_PRESENT 1
|
||||
#define hainan__GPU__MC__NUM_MCD_BLOCKS 1
|
||||
#define hainan__GPU__MC__NUM_MCD_BLOCKS__1 1
|
||||
#define hainan__GPU__MC__NUM_MCD_BLOCKS__0_PRESENT 1
|
||||
#define hainan__GPU__MC__NUM_MCC_BLOCKS 1
|
||||
#define hainan__GPU__MC__NUM_MCC_BLOCKS__1 1
|
||||
#define hainan__GPU__MC__NUM_MCC_BLOCKS__0_PRESENT 1
|
||||
#define hainan__GPU__MC__NUM_MCT_TILES 1
|
||||
#define hainan__GPU__MC__NUM_MCT_TILES__1 1
|
||||
#define hainan__GPU__MC__NUM_IO_CHNLS 2
|
||||
#define hainan__GPU__MC__NUM_IO_CHNLS__2 1
|
||||
#define hainan__GPU__MC__NUM_IO_CHNLS__0_PRESENT 1
|
||||
#define hainan__GPU__MC__NUM_IO_CHNLS__1_PRESENT 1
|
||||
#define hainan__GPU__MC__CDRRDBK 6
|
||||
#define hainan__GPU__MC__CDRRDBK__6 1
|
||||
#define hainan__GPU__MC__NUM_RPB_EFF_QUEUES 2
|
||||
#define hainan__GPU__MC__NUM_RPB_EFF_QUEUES__2 1
|
||||
#define hainan__GPU__MC__MCD0_BLOCK 1
|
||||
#define hainan__GPU__MC__MCD0_BLOCK__1 1
|
||||
#define hainan__GPU__MC__MCC0_BLOCK 1
|
||||
#define hainan__GPU__MC__MCC0_BLOCK__1 1
|
||||
#define hainan__GPU__MC__MCB_BLOCK 1
|
||||
#define hainan__GPU__MC__MCB_BLOCK__1 1
|
||||
#define hainan__GPU__MC__ALLOW_LARRAY 0
|
||||
#define hainan__GPU__MC__ALLOW_LARRAY__0 1
|
||||
#define hainan__GPU__MC__MCD_SRBM_PRESENT 1
|
||||
#define hainan__GPU__MC__MCD_SRBM_PRESENT__1 1
|
||||
#define hainan__GPU__MC__HDP_RD_ON_GBL1 1
|
||||
#define hainan__GPU__MC__HDP_RD_ON_GBL1__1 1
|
||||
#define hainan__GPU__MC__TWO_GBL0_RDRET 1
|
||||
#define hainan__GPU__MC__TWO_GBL0_RDRET__1 1
|
||||
#define hainan__GPU__MC__NUM_OF_RB_PER_MCD 1
|
||||
#define hainan__GPU__MC__NUM_OF_RB_PER_MCD__1 1
|
||||
#define hainan__GPU__MC__NUM_TC_PER_MCD 2
|
||||
#define hainan__GPU__MC__NUM_TC_PER_MCD__2 1
|
||||
#define hainan__GPU__MC__NUM_TCCS 2
|
||||
#define hainan__GPU__MC__NUM_TCCS__2 1
|
||||
#define hainan__GPU__MC__NUM_MCD_POW2 1
|
||||
#define hainan__GPU__MC__NUM_MCD_POW2__1 1
|
||||
#define hainan__GPU__MC__MCD0_IO0_REP 1
|
||||
#define hainan__GPU__MC__MCD0_IO0_REP__1 1
|
||||
#define hainan__GPU__MC__MCD0_IO1_REP 1
|
||||
#define hainan__GPU__MC__MCD0_IO1_REP__1 1
|
||||
#define hainan__GPU__MC__SIMPLIFIED_BLACKOUT 1
|
||||
#define hainan__GPU__MC__SIMPLIFIED_BLACKOUT__1 1
|
||||
#define hainan__GPU__MC__DDR5_MCLK_DEFAULT 5
|
||||
#define hainan__GPU__MC__DDR5_MCLK_DEFAULT__5 1
|
||||
#define hainan__GPU__MC__XBAR_REMAP 0
|
||||
#define hainan__GPU__MC__XBAR_REMAP__0 1
|
||||
#define hainan__GPU__MC__GPU_VIRTUAL_ADDRESS_WIDTH 40
|
||||
#define hainan__GPU__MC__GPU_VIRTUAL_ADDRESS_WIDTH__40 1
|
||||
#define hainan__GPU__MC__GPU_PHYSICAL_ADDRESS_WIDTH 40
|
||||
#define hainan__GPU__MC__GPU_PHYSICAL_ADDRESS_WIDTH__40 1
|
||||
#define hainan__GPU__MC__PCIE_VIRTUAL_ADDRESS_WIDTH 48
|
||||
#define hainan__GPU__MC__PCIE_VIRTUAL_ADDRESS_WIDTH__48 1
|
||||
#define hainan__GPU__MC__PCIE_PHYSICAL_ADDRESS_WIDTH 48
|
||||
#define hainan__GPU__MC__PCIE_PHYSICAL_ADDRESS_WIDTH__48 1
|
||||
#define hainan__GPU__MC__SPLIT_TILES 1
|
||||
#define hainan__GPU__MC__SPLIT_TILES__1 1
|
||||
#define hainan__GPU__MC__FUSION_FEATURE_ONLY 0
|
||||
#define hainan__GPU__MC__FUSION_FEATURE_ONLY__0 1
|
||||
#define hainan__GPU__MC__POWER_GATING 1
|
||||
#define hainan__GPU__MC__POWER_GATING__1 1
|
||||
#define hainan__GPU__MC__NUM_PGFSM_BLOCKS 3
|
||||
#define hainan__GPU__MC__NUM_PGFSM_BLOCKS__3 1
|
||||
#define hainan__GPU__MC__PHY_POWER_GATING 1
|
||||
#define hainan__GPU__MC__PHY_POWER_GATING__1 1
|
||||
#define hainan__GPU__MC__LOWSPEED_MEMPHY 1
|
||||
#define hainan__GPU__MC__LOWSPEED_MEMPHY__1 1
|
||||
#define hainan__GPU__MC__PAB_EXISTS 0
|
||||
#define hainan__GPU__MC__PAB_EXISTS__0 1
|
||||
#define hainan__GPU__VID__PRESENT 0
|
||||
#define hainan__GPU__VID__PRESENT__0 1
|
||||
#define hainan__GPU__DC__PRESENT 0
|
||||
#define hainan__GPU__DC__PRESENT__0 1
|
||||
#define hainan__GPU__AVP__PRESENT 0
|
||||
#define hainan__GPU__AVP__PRESENT__0 1
|
||||
#define hainan__GPU__UVD__PRESENT 0
|
||||
#define hainan__GPU__UVD__PRESENT__0 1
|
||||
#define hainan__ENV__GPU__UVD__HAVE_RTL 0
|
||||
#define hainan__ENV__GPU__UVD__HAVE_RTL__0 1
|
||||
#define hainan__ENV__GPU__MC__HAVE_BFM 1
|
||||
#define hainan__ENV__GPU__MC__HAVE_BFM__1 1
|
||||
#define hainan__ENV__GPU__MC__HAVE_RTL 0
|
||||
#define hainan__ENV__GPU__MC__HAVE_RTL__0 1
|
||||
#define hainan__GPU__UVD__PROJ_LARK 1
|
||||
#define hainan__GPU__UVD__PROJ_LARK__1 1
|
||||
#define hainan__GPU__UVD__CTX_ENABLE 1
|
||||
#define hainan__GPU__UVD__CTX_ENABLE__1 1
|
||||
#define hainan__GPU__UVD__MC_7XX 1
|
||||
#define hainan__GPU__UVD__MC_7XX__1 1
|
||||
#define hainan__GPU__UVD__CGC_CGTT_LOCAL_CLOCK_GATER 1
|
||||
#define hainan__GPU__UVD__CGC_CGTT_LOCAL_CLOCK_GATER__1 1
|
||||
#define hainan__GPU__MC__ARB_VM_CREDITS 32
|
||||
#define hainan__GPU__MC__ARB_VM_CREDITS__32 1
|
||||
#define hainan__GPU__MC__MCD_TLBS 4
|
||||
#define hainan__GPU__MC__MCD_TLBS__4 1
|
||||
#define hainan__GPU__MC__MCB_TLBS 3
|
||||
#define hainan__GPU__MC__MCB_TLBS__3 1
|
||||
#define hainan__GPU__MC__NO_STALL_ON_FAULT 1
|
||||
#define hainan__GPU__MC__NO_STALL_ON_FAULT__1 1
|
||||
#define hainan__GPU__MC__VMC_CACHES 2
|
||||
#define hainan__GPU__MC__VMC_CACHES__2 1
|
||||
#define hainan__GPU__MC__BIGK_CACHE_SIZE 4
|
||||
#define hainan__GPU__MC__BIGK_CACHE_SIZE__4 1
|
||||
#define hainan__GPU__MC__MCB_TLB0_CAM 5
|
||||
#define hainan__GPU__MC__MCB_TLB0_CAM__5 1
|
||||
#define hainan__GPU__MC__MCB_TLB1_CAM 4
|
||||
#define hainan__GPU__MC__MCB_TLB1_CAM__4 1
|
||||
#define hainan__GPU__MC__MCB_TLB2_CAM 4
|
||||
#define hainan__GPU__MC__MCB_TLB2_CAM__4 1
|
||||
#define hainan__GPU__MC__MCD_TLB0_CAM 4
|
||||
#define hainan__GPU__MC__MCD_TLB0_CAM__4 1
|
||||
#define hainan__GPU__MC__MCD_TLB1_CAM 4
|
||||
#define hainan__GPU__MC__MCD_TLB1_CAM__4 1
|
||||
#define hainan__GPU__MC__MCD_TLB2_CAM 4
|
||||
#define hainan__GPU__MC__MCD_TLB2_CAM__4 1
|
||||
#define hainan__GPU__MC__MCD_TLB3_CAM 4
|
||||
#define hainan__GPU__MC__MCD_TLB3_CAM__4 1
|
||||
#define hainan__GPU__MC__SEND_FREE_AT_RTN 1
|
||||
#define hainan__GPU__MC__SEND_FREE_AT_RTN__1 1
|
||||
#define hainan__GPU__MC__CONTEXT_WIDTH 3
|
||||
#define hainan__GPU__MC__CONTEXT_WIDTH__3 1
|
||||
#define hainan__GPU__MC__BUG_159204_EXISTS 1
|
||||
#define hainan__GPU__MC__BUG_159204_EXISTS__1 1
|
||||
#endif
|
||||
@@ -1,979 +0,0 @@
|
||||
#ifndef oland____GPU_FEATURES_H__
|
||||
#define oland____GPU_FEATURES_H__
|
||||
#define oland__GPU__BIF__VC_PRESENT 0
|
||||
#define oland__GPU__BIF__VC_PRESENT__0 1
|
||||
#define oland__GPU__BIF__PCIEGEN2_MCB_DEPTH 96
|
||||
#define oland__GPU__BIF__PCIEGEN2_MCB_DEPTH__96 1
|
||||
#define oland__GPU__BIF__CLKBUF_PRESENT 1
|
||||
#define oland__GPU__BIF__CLKBUF_PRESENT__1 1
|
||||
#define oland__GPU__XSP__PRESENT 0
|
||||
#define oland__GPU__XSP__PRESENT__0 1
|
||||
#define oland__GPU__CHIP__DFS 1
|
||||
#define oland__GPU__CHIP__DFS__1 1
|
||||
#define oland__GPU__CHIP__TECH tsmc28hp
|
||||
#define oland__GPU__CHIP__TECH__TSMC28HP 1
|
||||
#define oland__GPU__CHIP__TECHVER B .0.5
|
||||
#define oland__GPU__CHIP__TECHVER__B_0_5 1
|
||||
#define oland__TOOLS__GUTS__TECHNM tsmc28hp
|
||||
#define oland__TOOLS__GUTS__TECHNM__TSMC28HP 1
|
||||
#define oland__TOOLS__GUTS__MEMTECH 28nm
|
||||
#define oland__TOOLS__GUTS__MEMTECH__28NM 1
|
||||
#define oland__TOOLS__GUTS__LARRVENDOR AMD
|
||||
#define oland__TOOLS__GUTS__LARRVENDOR__AMD 1
|
||||
#define oland__TOOLS__GUTS__MEMFABTECH TSMC28
|
||||
#define oland__TOOLS__GUTS__MEMFABTECH__TSMC28 1
|
||||
#define oland__TOOLS__GUTS__MEMVENDOR Virage
|
||||
#define oland__TOOLS__GUTS__MEMVENDOR__VIRAGE 1
|
||||
#define oland__TOOLS__GUTS__MEMTYPE slow
|
||||
#define oland__TOOLS__GUTS__MEMTYPE__SLOW 1
|
||||
#define oland__TOOLS__GUTS__MEMVER 1_0
|
||||
#define oland__TOOLS__GUTS__MEMVER__1_0 1
|
||||
#define oland__TOOLS__GUTS__LARRTYPE default
|
||||
#define oland__TOOLS__GUTS__LARRTYPE__DEFAULT 1
|
||||
#define oland__TOOLS__GUTS__LARRVER 0_6ola
|
||||
#define oland__TOOLS__GUTS__LARRVER__0_6OLA 1
|
||||
#define oland__TOOLS__GUTS__TECHVER B .0.5
|
||||
#define oland__TOOLS__GUTS__TECHVER__B_0_5 1
|
||||
#define oland__TOOLS__GUTS__MEMVIEWVER 0_2
|
||||
#define oland__TOOLS__GUTS__MEMVIEWVER__0_2 1
|
||||
#define oland__GPU__CHIP__MEMTECH 28nm
|
||||
#define oland__GPU__CHIP__MEMTECH__28NM 1
|
||||
#define oland__GPU__CHIP__MEMVIEWVER 0_2
|
||||
#define oland__GPU__CHIP__MEMVIEWVER__0_2 1
|
||||
#define oland__GPU__CHIP__MEM virage
|
||||
#define oland__GPU__CHIP__MEM__VIRAGE 1
|
||||
#define oland__GPU__CHIP__MEMVENDOR Virage
|
||||
#define oland__GPU__CHIP__MEMVENDOR__VIRAGE 1
|
||||
#define oland__GPU__CHIP__SRAM_MEMFABTECH TSMC28
|
||||
#define oland__GPU__CHIP__SRAM_MEMFABTECH__TSMC28 1
|
||||
#define oland__GPU__CHIP__LARR_MEMWRAPPERVER 0_1
|
||||
#define oland__GPU__CHIP__LARR_MEMWRAPPERVER__0_1 1
|
||||
#define oland__GPU__CHIP__SRAM_MEMWRAPPERVER 0_1
|
||||
#define oland__GPU__CHIP__SRAM_MEMWRAPPERVER__0_1 1
|
||||
#define oland__GPU__CHIP__SRAM_TIMING slow
|
||||
#define oland__GPU__CHIP__SRAM_TIMING__SLOW 1
|
||||
#define oland__GPU__CHIP__SRAM_MEMVER 1_0_1
|
||||
#define oland__GPU__CHIP__SRAM_MEMVER__1_0_1 1
|
||||
#define oland__GPU__CHIP__LARRVENDOR AMD
|
||||
#define oland__GPU__CHIP__LARRVENDOR__AMD 1
|
||||
#define oland__GPU__CHIP__LARR_MEMFABTECH TSMC28
|
||||
#define oland__GPU__CHIP__LARR_MEMFABTECH__TSMC28 1
|
||||
#define oland__GPU__CHIP__LARR_TIMING default
|
||||
#define oland__GPU__CHIP__LARR_TIMING__DEFAULT 1
|
||||
#define oland__GPU__CHIP__LARR_MEMVER 0_6ola
|
||||
#define oland__GPU__CHIP__LARR_MEMVER__0_6OLA 1
|
||||
#define oland__GPU__CHIP__MEMFABTECH TSMC28
|
||||
#define oland__GPU__CHIP__MEMFABTECH__TSMC28 1
|
||||
#define oland__GPU__CHIP__MEMVER 1_0
|
||||
#define oland__GPU__CHIP__MEMVER__1_0 1
|
||||
#define oland__GPU__CHIP__MEMTYPE slow
|
||||
#define oland__GPU__CHIP__MEMTYPE__SLOW 1
|
||||
#define oland__GPU__CHIP__LARRVER 0_6ola
|
||||
#define oland__GPU__CHIP__LARRVER__0_6OLA 1
|
||||
#define oland__GPU__CHIP__LARRTYPE default
|
||||
#define oland__GPU__CHIP__LARRTYPE__DEFAULT 1
|
||||
#define oland__GPU__CHIP__TILES_PRESENT 0
|
||||
#define oland__GPU__CHIP__TILES_PRESENT__0 1
|
||||
#define oland__GPU__CHIP__SMSGCOUNT 2
|
||||
#define oland__GPU__CHIP__SMSGCOUNT__2 1
|
||||
#define oland__GPU__CHIP__SMSG_0_PRESENT 1
|
||||
#define oland__GPU__CHIP__SMSG_0_PRESENT__1 1
|
||||
#define oland__GPU__CHIP__SMSG_1_PRESENT 1
|
||||
#define oland__GPU__CHIP__SMSG_1_PRESENT__1 1
|
||||
#define oland__GPU__CHIP__SMSG_2_PRESENT 0
|
||||
#define oland__GPU__CHIP__SMSG_2_PRESENT__0 1
|
||||
#define oland__GPU__CHIP__SMSG_3_PRESENT 0
|
||||
#define oland__GPU__CHIP__SMSG_3_PRESENT__0 1
|
||||
#define oland__GPU__CHIP__SMSG_FOR_BL 1
|
||||
#define oland__GPU__CHIP__SMSG_FOR_BL__1 1
|
||||
#define oland__GPU__CHIP__SMSG_FOR_TR 0
|
||||
#define oland__GPU__CHIP__SMSG_FOR_TR__0 1
|
||||
#define oland__GPU__CHIP__TCB_DEPTH 512
|
||||
#define oland__GPU__CHIP__TCB_DEPTH__512 1
|
||||
#define oland__GPU__CHIP__XCLK_MHZ 25
|
||||
#define oland__GPU__CHIP__XCLK_MHZ__25 1
|
||||
#define oland__GPU__LBIST__PRESENT 0
|
||||
#define oland__GPU__LBIST__PRESENT__0 1
|
||||
#define oland__GPU__CHIP__BACO 1
|
||||
#define oland__GPU__CHIP__BACO__1 1
|
||||
#define oland__GPU__CEC__PRESENT 1
|
||||
#define oland__GPU__CEC__PRESENT__1 1
|
||||
#define oland__GPU__CHIP__REAL_RDL_READY 1
|
||||
#define oland__GPU__CHIP__REAL_RDL_READY__1 1
|
||||
#define oland__GPU__CHIP__INFERRED_REPS 1
|
||||
#define oland__GPU__CHIP__INFERRED_REPS__1 1
|
||||
#define oland__GPU__CHIP__DRMDMA_POWERGATE 0
|
||||
#define oland__GPU__CHIP__DRMDMA_POWERGATE__0 1
|
||||
#define oland__GPU__CHIP__EDCMEM1 0
|
||||
#define oland__GPU__CHIP__EDCMEM1__0 1
|
||||
#define oland__GPU__CHIP__POWERGATE 0
|
||||
#define oland__GPU__CHIP__POWERGATE__0 1
|
||||
#define oland__GPU__THM__CMON_PRESENT 1
|
||||
#define oland__GPU__THM__CMON_PRESENT__1 1
|
||||
#define oland__GPU__TMON0__LEFT_NUM_RDI 6
|
||||
#define oland__GPU__TMON0__LEFT_NUM_RDI__6 1
|
||||
#define oland__GPU__TMON0__RIGHT_NUM_RDI 6
|
||||
#define oland__GPU__TMON0__RIGHT_NUM_RDI__6 1
|
||||
#define oland__GPU__DFT__IBIZA_TMON 1
|
||||
#define oland__GPU__DFT__IBIZA_TMON__1 1
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL 17
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL__17 1
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_LS 0
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_LS__0 1
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_DS_D 1
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_DS_D__1 1
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_DS_M 2
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_DS_M__2 1
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_SD_D 3
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_SD_D__3 1
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_SD_M 4
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_SD_M__4 1
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_DS 5
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_DS__5 1
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_SD 6
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_SD__6 1
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_FISO 7
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_FISO__7 1
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_V_RM_START 8
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_V_RM_START__8 1
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_V_RM_END 16
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_V_RM_END__16 1
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_A_RM_START 8
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_A_RM_START__8 1
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_A_RM_END 30
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_A_RM_END__30 1
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_V_RM_RF_RME 8
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_V_RM_RF_RME__8 1
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_V_RM_RF_RM_START 9
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_V_RM_RF_RM_START__9 1
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_V_RM_RF_RM_END 10
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_V_RM_RF_RM_END__10 1
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_V_RM_PDP_RME 11
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_V_RM_PDP_RME__11 1
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_V_RM_PDP_RM_START 12
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_V_RM_PDP_RM_START__12 1
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_V_RM_PDP_RM_END 13
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_V_RM_PDP_RM_END__13 1
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_V_RM_HD_RME 14
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_V_RM_HD_RME__14 1
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_V_RM_HD_RM_START 15
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_V_RM_HD_RM_START__15 1
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_V_RM_HD_RM_END 16
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_V_RM_HD_RM_END__16 1
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_A_RM_RF_RME 8
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_A_RM_RF_RME__8 1
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_A_RM_RF_RM_START 9
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_A_RM_RF_RM_START__9 1
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_A_RM_RF_RM_END 17
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_A_RM_RF_RM_END__17 1
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_A_RM_PDP_RME 18
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_A_RM_PDP_RME__18 1
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_A_RM_PDP_RM_START 19
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_A_RM_PDP_RM_START__19 1
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_A_RM_PDP_RM_END 30
|
||||
#define oland__GPU__CHIP__MEM_POWER_CTRL_A_RM_PDP_RM_END__30 1
|
||||
#define oland__GPU__TSS__NUM_TILES 5
|
||||
#define oland__GPU__TSS__NUM_TILES__5 1
|
||||
#define oland__GPU__TSS__TSS0_TILE 1
|
||||
#define oland__GPU__TSS__TSS0_TILE__1 1
|
||||
#define oland__GPU__TSS__TSS1_TILE 1
|
||||
#define oland__GPU__TSS__TSS1_TILE__1 1
|
||||
#define oland__GPU__TSS__TSS2_TILE 1
|
||||
#define oland__GPU__TSS__TSS2_TILE__1 1
|
||||
#define oland__GPU__TSS__TSS3_TILE 1
|
||||
#define oland__GPU__TSS__TSS3_TILE__1 1
|
||||
#define oland__GPU__TSS__TSS4_TILE 1
|
||||
#define oland__GPU__TSS__TSS4_TILE__1 1
|
||||
#define oland__GPU__TSS__TSS4_AS_ADC 1
|
||||
#define oland__GPU__TSS__TSS4_AS_ADC__1 1
|
||||
#define oland__GPU__RCU__PROGRAMMABLE_RMBITS 1
|
||||
#define oland__GPU__RCU__PROGRAMMABLE_RMBITS__1 1
|
||||
#define oland__GPU__CGTT_TILE__PDLY 1
|
||||
#define oland__GPU__CGTT_TILE__PDLY__1 1
|
||||
#define oland__GPU__PDLY_TILE__PDLY 1
|
||||
#define oland__GPU__PDLY_TILE__PDLY__1 1
|
||||
#define oland__GPU__PDLY_TILE__CLKGATE 0
|
||||
#define oland__GPU__PDLY_TILE__CLKGATE__0 1
|
||||
#define oland__GPU__CG__SMC_SCRATCH_REGS 1
|
||||
#define oland__GPU__CG__SMC_SCRATCH_REGS__1 1
|
||||
#define oland__GPU__CG__CG_DLL_PDNB 1
|
||||
#define oland__GPU__CG__CG_DLL_PDNB__1 1
|
||||
#define oland__GPU__SMU__USE_HW_VBI 1
|
||||
#define oland__GPU__SMU__USE_HW_VBI__1 1
|
||||
#define oland__GPU__SMU__NUM_CAC_MGR_4 1
|
||||
#define oland__GPU__SMU__NUM_CAC_MGR_4__1 1
|
||||
#define oland__GPU__PDMA__PRESENT 0
|
||||
#define oland__GPU__PDMA__PRESENT__0 1
|
||||
#define oland__GPU__DRMDMA__DUAL_DRMDMA_PRESENT 1
|
||||
#define oland__GPU__DRMDMA__DUAL_DRMDMA_PRESENT__1 1
|
||||
#define oland__GPU__DRM__BGAES_OFF 1
|
||||
#define oland__GPU__DRM__BGAES_OFF__1 1
|
||||
#define oland__GPU__DLB__SLEW 1
|
||||
#define oland__GPU__DLB__SLEW__1 1
|
||||
#define oland__GPU__ROM__EXT_CS_EN 1
|
||||
#define oland__GPU__ROM__EXT_CS_EN__1 1
|
||||
#define oland__GPU__CPL__GPIO_23_PRESENT 0
|
||||
#define oland__GPU__CPL__GPIO_23_PRESENT__0 1
|
||||
#define oland__GPU__CPL__GPIO_24_PRESENT 0
|
||||
#define oland__GPU__CPL__GPIO_24_PRESENT__0 1
|
||||
#define oland__GPU__CPL__GPIO_25_PRESENT 0
|
||||
#define oland__GPU__CPL__GPIO_25_PRESENT__0 1
|
||||
#define oland__GPU__CPL__GPIO_26_PRESENT 0
|
||||
#define oland__GPU__CPL__GPIO_26_PRESENT__0 1
|
||||
#define oland__GPU__CPL__GPIO_27_PRESENT 0
|
||||
#define oland__GPU__CPL__GPIO_27_PRESENT__0 1
|
||||
#define oland__GPU__CPL__MLPS_0_PRESENT 1
|
||||
#define oland__GPU__CPL__MLPS_0_PRESENT__1 1
|
||||
#define oland__GPU__CPL__MLPS_1_PRESENT 1
|
||||
#define oland__GPU__CPL__MLPS_1_PRESENT__1 1
|
||||
#define oland__GPU__CPL__MLPS_2_PRESENT 1
|
||||
#define oland__GPU__CPL__MLPS_2_PRESENT__1 1
|
||||
#define oland__GPU__CPL__MLPS_3_PRESENT 1
|
||||
#define oland__GPU__CPL__MLPS_3_PRESENT__1 1
|
||||
#define oland__GPU__CPL__SX_0_PRESENT 1
|
||||
#define oland__GPU__CPL__SX_0_PRESENT__1 1
|
||||
#define oland__GPU__SMC__TAP_FED_PRESENT 1
|
||||
#define oland__GPU__SMC__TAP_FED_PRESENT__1 1
|
||||
#define oland__GPU__CPL__PG_CODE_ENABLE 1
|
||||
#define oland__GPU__CPL__PG_CODE_ENABLE__1 1
|
||||
#define oland__GPU__CPL__PG_CODE_GPG 1
|
||||
#define oland__GPU__CPL__PG_CODE_GPG__1 1
|
||||
#define oland__GPU__AVP__MC_IF 1
|
||||
#define oland__GPU__AVP__MC_IF__1 1
|
||||
#define oland__GPU__AVP__UVD_RLC_CMC_IF 1
|
||||
#define oland__GPU__AVP__UVD_RLC_CMC_IF__1 1
|
||||
#define oland__GPU__DC__TMDS_LINK tmds_link_dual
|
||||
#define oland__GPU__DC__TMDS_LINK__TMDS_LINK_DUAL 1
|
||||
#define oland__GPU__DC__NUM_DDC_PAIRS 6
|
||||
#define oland__GPU__DC__NUM_DDC_PAIRS__6 1
|
||||
#define oland__GPU__DC__NUM_DDC_PAIRS__0_PRESENT 1
|
||||
#define oland__GPU__DC__NUM_DDC_PAIRS__1_PRESENT 1
|
||||
#define oland__GPU__DC__NUM_DDC_PAIRS__2_PRESENT 1
|
||||
#define oland__GPU__DC__NUM_DDC_PAIRS__3_PRESENT 1
|
||||
#define oland__GPU__DC__NUM_DDC_PAIRS__4_PRESENT 1
|
||||
#define oland__GPU__DC__NUM_DDC_PAIRS__5_PRESENT 1
|
||||
#define oland__GPU__DC__NUM_HPD 6
|
||||
#define oland__GPU__DC__NUM_HPD__6 1
|
||||
#define oland__GPU__DC__NUM_HPD__0_PRESENT 1
|
||||
#define oland__GPU__DC__NUM_HPD__1_PRESENT 1
|
||||
#define oland__GPU__DC__NUM_HPD__2_PRESENT 1
|
||||
#define oland__GPU__DC__NUM_HPD__3_PRESENT 1
|
||||
#define oland__GPU__DC__NUM_HPD__4_PRESENT 1
|
||||
#define oland__GPU__DC__NUM_HPD__5_PRESENT 1
|
||||
#define oland__GPU__DC__NUM_PIPE_PAIRS 3
|
||||
#define oland__GPU__DC__NUM_PIPE_PAIRS__3 1
|
||||
#define oland__GPU__DC__NUM_PIPE_PAIRS__0_PRESENT 1
|
||||
#define oland__GPU__DC__NUM_PIPE_PAIRS__1_PRESENT 1
|
||||
#define oland__GPU__DC__NUM_PIPE_PAIRS__2_PRESENT 1
|
||||
#define oland__GPU__DC__NUM_PIPES 6
|
||||
#define oland__GPU__DC__NUM_PIPES__6 1
|
||||
#define oland__GPU__DC__NUM_PIPES__0_PRESENT 1
|
||||
#define oland__GPU__DC__NUM_PIPES__1_PRESENT 1
|
||||
#define oland__GPU__DC__NUM_PIPES__2_PRESENT 1
|
||||
#define oland__GPU__DC__NUM_PIPES__3_PRESENT 1
|
||||
#define oland__GPU__DC__NUM_PIPES__4_PRESENT 1
|
||||
#define oland__GPU__DC__NUM_PIPES__5_PRESENT 1
|
||||
#define oland__GPU__DC__NUM_DIG 6
|
||||
#define oland__GPU__DC__NUM_DIG__6 1
|
||||
#define oland__GPU__DC__NUM_DIG__0_PRESENT 1
|
||||
#define oland__GPU__DC__NUM_DIG__1_PRESENT 1
|
||||
#define oland__GPU__DC__NUM_DIG__2_PRESENT 1
|
||||
#define oland__GPU__DC__NUM_DIG__3_PRESENT 1
|
||||
#define oland__GPU__DC__NUM_DIG__4_PRESENT 1
|
||||
#define oland__GPU__DC__NUM_DIG__5_PRESENT 1
|
||||
#define oland__GPU__DC__NUM_AUX 6
|
||||
#define oland__GPU__DC__NUM_AUX__6 1
|
||||
#define oland__GPU__DC__NUM_AUX__0_PRESENT 1
|
||||
#define oland__GPU__DC__NUM_AUX__1_PRESENT 1
|
||||
#define oland__GPU__DC__NUM_AUX__2_PRESENT 1
|
||||
#define oland__GPU__DC__NUM_AUX__3_PRESENT 1
|
||||
#define oland__GPU__DC__NUM_AUX__4_PRESENT 1
|
||||
#define oland__GPU__DC__NUM_AUX__5_PRESENT 1
|
||||
#define oland__GPU__DISPPLL__MACRO walden
|
||||
#define oland__GPU__DISPPLL__MACRO__WALDEN 1
|
||||
#define oland__GPU__TMDPA__MACRO walden
|
||||
#define oland__GPU__TMDPA__MACRO__WALDEN 1
|
||||
#define oland__GPU__TMDPB__MACRO walden
|
||||
#define oland__GPU__TMDPB__MACRO__WALDEN 1
|
||||
#define oland__GPU__LVTMDP__MACRO walden
|
||||
#define oland__GPU__LVTMDP__MACRO__WALDEN 1
|
||||
#define oland__GPU__DACA__MACRO walden
|
||||
#define oland__GPU__DACA__MACRO__WALDEN 1
|
||||
#define oland__GPU__DACB__MACRO walden
|
||||
#define oland__GPU__DACB__MACRO__WALDEN 1
|
||||
#define oland__GPU__DC__VIP_PRESENT 1
|
||||
#define oland__GPU__DC__VIP_PRESENT__1 1
|
||||
#define oland__GPU__DC__ABM_PRESENT 1
|
||||
#define oland__GPU__DC__ABM_PRESENT__1 1
|
||||
#define oland__GPU__DC__DMCU_PRESENT 1
|
||||
#define oland__GPU__DC__DMCU_PRESENT__1 1
|
||||
#define oland__GPU__DC__DVO_PRESENT 1
|
||||
#define oland__GPU__DC__DVO_PRESENT__1 1
|
||||
#define oland__GPU__DC__SDVO_PRESENT 1
|
||||
#define oland__GPU__DC__SDVO_PRESENT__1 1
|
||||
#define oland__GPU__DC__LVDS_PRESENT 1
|
||||
#define oland__GPU__DC__LVDS_PRESENT__1 1
|
||||
#define oland__GPU__UNIPHYAB__PRESENT 1
|
||||
#define oland__GPU__UNIPHYAB__PRESENT__1 1
|
||||
#define oland__GPU__UNIPHYCD__PRESENT 1
|
||||
#define oland__GPU__UNIPHYCD__PRESENT__1 1
|
||||
#define oland__GPU__UNIPHYEF__PRESENT 1
|
||||
#define oland__GPU__UNIPHYEF__PRESENT__1 1
|
||||
#define oland__GPU__UNIPHYAB__TYPE lvtmdp
|
||||
#define oland__GPU__UNIPHYAB__TYPE__LVTMDP 1
|
||||
#define oland__GPU__UNIPHYCD__TYPE tmdpa
|
||||
#define oland__GPU__UNIPHYCD__TYPE__TMDPA 1
|
||||
#define oland__GPU__UNIPHYEF__TYPE tmdpb
|
||||
#define oland__GPU__UNIPHYEF__TYPE__TMDPB 1
|
||||
#define oland__GPU__UNIPHYAB__LVTMDP 1
|
||||
#define oland__GPU__UNIPHYAB__LVTMDP__1 1
|
||||
#define oland__GPU__DC__DACA_PRESENT 1
|
||||
#define oland__GPU__DC__DACA_PRESENT__1 1
|
||||
#define oland__GPU__DC__DACB_PRESENT 1
|
||||
#define oland__GPU__DC__DACB_PRESENT__1 1
|
||||
#define oland__GPU__DC__TVOUT_PRESENT 1
|
||||
#define oland__GPU__DC__TVOUT_PRESENT__1 1
|
||||
#define oland__GPU__DC__MVP_PRESENT 1
|
||||
#define oland__GPU__DC__MVP_PRESENT__1 1
|
||||
#define oland__GPU__DC__DENTIST_INTERFACE_PRESENT 0
|
||||
#define oland__GPU__DC__DENTIST_INTERFACE_PRESENT__0 1
|
||||
#define oland__GPU__DC__DDC1AUX1 dual_mode
|
||||
#define oland__GPU__DC__DDC1AUX1__DUAL_MODE 1
|
||||
#define oland__GPU__DC__DDC2AUX2 dual_mode
|
||||
#define oland__GPU__DC__DDC2AUX2__DUAL_MODE 1
|
||||
#define oland__GPU__DC__DDC3AUX3 dual_mode
|
||||
#define oland__GPU__DC__DDC3AUX3__DUAL_MODE 1
|
||||
#define oland__GPU__DC__DDC4AUX4 dual_mode
|
||||
#define oland__GPU__DC__DDC4AUX4__DUAL_MODE 1
|
||||
#define oland__GPU__DC__DDC5AUX5 dual_mode
|
||||
#define oland__GPU__DC__DDC5AUX5__DUAL_MODE 1
|
||||
#define oland__GPU__DC__DDC6AUX6 dual_mode
|
||||
#define oland__GPU__DC__DDC6AUX6__DUAL_MODE 1
|
||||
#define oland__GPU__DC__AUX1_PRESENT 1
|
||||
#define oland__GPU__DC__AUX1_PRESENT__1 1
|
||||
#define oland__GPU__DC__AUX2_PRESENT 1
|
||||
#define oland__GPU__DC__AUX2_PRESENT__1 1
|
||||
#define oland__GPU__DC__AUX3_PRESENT 1
|
||||
#define oland__GPU__DC__AUX3_PRESENT__1 1
|
||||
#define oland__GPU__DC__AUX4_PRESENT 1
|
||||
#define oland__GPU__DC__AUX4_PRESENT__1 1
|
||||
#define oland__GPU__DC__AUX5_PRESENT 1
|
||||
#define oland__GPU__DC__AUX5_PRESENT__1 1
|
||||
#define oland__GPU__DC__AUX6_PRESENT 1
|
||||
#define oland__GPU__DC__AUX6_PRESENT__1 1
|
||||
#define oland__GPU__DC__DENTIST_PRESENT 0
|
||||
#define oland__GPU__DC__DENTIST_PRESENT__0 1
|
||||
#define oland__GPU__DC__GENERICA_PRESENT 1
|
||||
#define oland__GPU__DC__GENERICA_PRESENT__1 1
|
||||
#define oland__GPU__DC__GENERICB_PRESENT 1
|
||||
#define oland__GPU__DC__GENERICB_PRESENT__1 1
|
||||
#define oland__GPU__DC__GENERICC_PRESENT 1
|
||||
#define oland__GPU__DC__GENERICC_PRESENT__1 1
|
||||
#define oland__GPU__DC__GENERICD_PRESENT 1
|
||||
#define oland__GPU__DC__GENERICD_PRESENT__1 1
|
||||
#define oland__GPU__DC__GENERICE_PRESENT 1
|
||||
#define oland__GPU__DC__GENERICE_PRESENT__1 1
|
||||
#define oland__GPU__DC__GENERICF_PRESENT 1
|
||||
#define oland__GPU__DC__GENERICF_PRESENT__1 1
|
||||
#define oland__GPU__DC__GENERICG_PRESENT 1
|
||||
#define oland__GPU__DC__GENERICG_PRESENT__1 1
|
||||
#define oland__GPU__DC__BLON_TYPE 0
|
||||
#define oland__GPU__DC__BLON_TYPE__0 1
|
||||
#define oland__GPU__DC__NB_STUTTER_MODE_PRESENT 0
|
||||
#define oland__GPU__DC__NB_STUTTER_MODE_PRESENT__0 1
|
||||
#define oland__GPU__DC__PCIE_REFCLK_TEST_MODE_MUX_PRESENT 0
|
||||
#define oland__GPU__DC__PCIE_REFCLK_TEST_MODE_MUX_PRESENT__0 1
|
||||
#define oland__GPU__DC__REFCLK_TEST_MODE_MUX_PRESENT 0
|
||||
#define oland__GPU__DC__REFCLK_TEST_MODE_MUX_PRESENT__0 1
|
||||
#define oland__GPU__DC__PIXCLK_TEST_MODE_MUX_PRESENT 0
|
||||
#define oland__GPU__DC__PIXCLK_TEST_MODE_MUX_PRESENT__0 1
|
||||
#define oland__GPU__DC__SYMCLK_TEST_MODE_MUX_PRESENT 0
|
||||
#define oland__GPU__DC__SYMCLK_TEST_MODE_MUX_PRESENT__0 1
|
||||
#define oland__GPU__GC__NUM_SE 1
|
||||
#define oland__GPU__GC__NUM_SE__1 1
|
||||
#define oland__GPU__GC__NUM_SE__0_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_SH_PER_SE 1
|
||||
#define oland__GPU__GC__NUM_SH_PER_SE__1 1
|
||||
#define oland__GPU__GC__NUM_SH_PER_SE__0_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_RB_PER_SE 2
|
||||
#define oland__GPU__GC__NUM_RB_PER_SE__2 1
|
||||
#define oland__GPU__GC__NUM_RB_PER_SE__0_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_RB_PER_SE__1_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_CU_PER_SH 6
|
||||
#define oland__GPU__GC__NUM_CU_PER_SH__6 1
|
||||
#define oland__GPU__GC__NUM_CU_PER_SH__0_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_CU_PER_SH__1_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_CU_PER_SH__2_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_CU_PER_SH__3_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_CU_PER_SH__4_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_CU_PER_SH__5_PRESENT 1
|
||||
#define oland__GPU__GC__WAVE_SIZE 64
|
||||
#define oland__GPU__GC__WAVE_SIZE__64 1
|
||||
#define oland__GPU__GC__NUM_CP_RINGS 3
|
||||
#define oland__GPU__GC__NUM_CP_RINGS__3 1
|
||||
#define oland__GPU__GC__NUM_CP_RINGS__0_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_CP_RINGS__1_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_CP_RINGS__2_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_SC_PER_SE 1
|
||||
#define oland__GPU__GC__NUM_SC_PER_SE__1 1
|
||||
#define oland__GPU__GC__NUM_SC_PER_SE__0_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_BCI_PER_SE 1
|
||||
#define oland__GPU__GC__NUM_BCI_PER_SE__1 1
|
||||
#define oland__GPU__GC__NUM_BCI_PER_SE__0_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_RB_PER_SC 2
|
||||
#define oland__GPU__GC__NUM_RB_PER_SC__2 1
|
||||
#define oland__GPU__GC__NUM_RB_PER_SC__0_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_RB_PER_SC__1_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_RB_PER_PACKER 2
|
||||
#define oland__GPU__GC__NUM_RB_PER_PACKER__2 1
|
||||
#define oland__GPU__GC__NUM_RB_PER_PACKER__0_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_RB_PER_PACKER__1_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_PACKER_PER_SC 1
|
||||
#define oland__GPU__GC__NUM_PACKER_PER_SC__1 1
|
||||
#define oland__GPU__GC__NUM_PACKER_PER_SC__0_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_DB_PER_PACKER 2
|
||||
#define oland__GPU__GC__NUM_DB_PER_PACKER__2 1
|
||||
#define oland__GPU__GC__NUM_DB_PER_PACKER__0_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_DB_PER_PACKER__1_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_PACKER_PER_SE 1
|
||||
#define oland__GPU__GC__NUM_PACKER_PER_SE__1 1
|
||||
#define oland__GPU__GC__NUM_PACKER_PER_SE__0_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_RB_PER_SX 2
|
||||
#define oland__GPU__GC__NUM_RB_PER_SX__2 1
|
||||
#define oland__GPU__GC__NUM_RB_PER_SX__0_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_RB_PER_SX__1_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_CU_PER_SE 6
|
||||
#define oland__GPU__GC__NUM_CU_PER_SE__6 1
|
||||
#define oland__GPU__GC__NUM_CU_PER_SE__0_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_CU_PER_SE__1_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_CU_PER_SE__2_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_CU_PER_SE__3_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_CU_PER_SE__4_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_CU_PER_SE__5_PRESENT 1
|
||||
#define oland__GPU__GC__MAX_NUMBER_WAVES 240
|
||||
#define oland__GPU__GC__MAX_NUMBER_WAVES__240 1
|
||||
#define oland__GPU__GC__MAX_NUMBER_WAVES_PER_PACKER 240
|
||||
#define oland__GPU__GC__MAX_NUMBER_WAVES_PER_PACKER__240 1
|
||||
#define oland__GPU__SQ__NUM_WAVES_PER_SIMD 10
|
||||
#define oland__GPU__SQ__NUM_WAVES_PER_SIMD__10 1
|
||||
#define oland__GPU__SQ__THREAD_GROUPS_PER_CU 16
|
||||
#define oland__GPU__SQ__THREAD_GROUPS_PER_CU__16 1
|
||||
#define oland__GPU__SQ__NUM_PERF_CNTRS 8
|
||||
#define oland__GPU__SQ__NUM_PERF_CNTRS__8 1
|
||||
#define oland__GPU__SQ__NUM_PERF_CNTRS__0_PRESENT 1
|
||||
#define oland__GPU__SQ__NUM_PERF_CNTRS__1_PRESENT 1
|
||||
#define oland__GPU__SQ__NUM_PERF_CNTRS__2_PRESENT 1
|
||||
#define oland__GPU__SQ__NUM_PERF_CNTRS__3_PRESENT 1
|
||||
#define oland__GPU__SQ__NUM_PERF_CNTRS__4_PRESENT 1
|
||||
#define oland__GPU__SQ__NUM_PERF_CNTRS__5_PRESENT 1
|
||||
#define oland__GPU__SQ__NUM_PERF_CNTRS__6_PRESENT 1
|
||||
#define oland__GPU__SQ__NUM_PERF_CNTRS__7_PRESENT 1
|
||||
#define oland__GPU__SQ__NUM_SGPR_PER_SIMD 512
|
||||
#define oland__GPU__SQ__NUM_SGPR_PER_SIMD__512 1
|
||||
#define oland__GPU__SQ__P2_IS_P1 1
|
||||
#define oland__GPU__SQ__P2_IS_P1__1 1
|
||||
#define oland__GPU__SQ__USE_SV_PACKAGES 0
|
||||
#define oland__GPU__SQ__USE_SV_PACKAGES__0 1
|
||||
#define oland__GPU__SQ__BUG_307568_FIXED 1
|
||||
#define oland__GPU__SQ__BUG_307568_FIXED__1 1
|
||||
#define oland__GPU__SQC__NUM_SQC 2
|
||||
#define oland__GPU__SQC__NUM_SQC__2 1
|
||||
#define oland__GPU__SQC__NUM_SQC__0_PRESENT 1
|
||||
#define oland__GPU__SQC__NUM_SQC__1_PRESENT 1
|
||||
#define oland__GPU__SQC__NUM_SQC_PER_SH 2
|
||||
#define oland__GPU__SQC__NUM_SQC_PER_SH__2 1
|
||||
#define oland__GPU__SQC__NUM_SQC_PER_SH__0_PRESENT 1
|
||||
#define oland__GPU__SQC__NUM_SQC_PER_SH__1_PRESENT 1
|
||||
#define oland__GPU__SQC__IDENTICAL_NAMES 1
|
||||
#define oland__GPU__SQC__IDENTICAL_NAMES__1 1
|
||||
#define oland__GPU__SQC__SH_SQC0_POSN_AFTER_SQ 0
|
||||
#define oland__GPU__SQC__SH_SQC0_POSN_AFTER_SQ__0 1
|
||||
#define oland__GPU__SQC__SH_SQC0_FIRST_CONNECTED_SQ 0
|
||||
#define oland__GPU__SQC__SH_SQC0_FIRST_CONNECTED_SQ__0 1
|
||||
#define oland__GPU__SQC__SH_SQC0_NUM_CU 3
|
||||
#define oland__GPU__SQC__SH_SQC0_NUM_CU__3 1
|
||||
#define oland__GPU__SQC__SH_SQC0_NUM_CU__0_PRESENT 1
|
||||
#define oland__GPU__SQC__SH_SQC0_NUM_CU__1_PRESENT 1
|
||||
#define oland__GPU__SQC__SH_SQC0_NUM_CU__2_PRESENT 1
|
||||
#define oland__GPU__SQC__SH_SQC0_NUM_BANK 4
|
||||
#define oland__GPU__SQC__SH_SQC0_NUM_BANK__4 1
|
||||
#define oland__GPU__SQC__SH_SQC0_NUM_BANK__0_PRESENT 1
|
||||
#define oland__GPU__SQC__SH_SQC0_NUM_BANK__1_PRESENT 1
|
||||
#define oland__GPU__SQC__SH_SQC0_NUM_BANK__2_PRESENT 1
|
||||
#define oland__GPU__SQC__SH_SQC0_NUM_BANK__3_PRESENT 1
|
||||
#define oland__GPU__SQC__SH_SQC0_BANK_INST_CACHE_SIZE_KBYTES 8
|
||||
#define oland__GPU__SQC__SH_SQC0_BANK_INST_CACHE_SIZE_KBYTES__8 1
|
||||
#define oland__GPU__SQC__SH_SQC0_BANK_DATA_CACHE_SIZE_KBYTES 4
|
||||
#define oland__GPU__SQC__SH_SQC0_BANK_DATA_CACHE_SIZE_KBYTES__4 1
|
||||
#define oland__GPU__SQC__SH_SQC1_POSN_AFTER_SQ 3
|
||||
#define oland__GPU__SQC__SH_SQC1_POSN_AFTER_SQ__3 1
|
||||
#define oland__GPU__SQC__SH_SQC1_FIRST_CONNECTED_SQ 3
|
||||
#define oland__GPU__SQC__SH_SQC1_FIRST_CONNECTED_SQ__3 1
|
||||
#define oland__GPU__SQC__SH_SQC1_NUM_CU 3
|
||||
#define oland__GPU__SQC__SH_SQC1_NUM_CU__3 1
|
||||
#define oland__GPU__SQC__SH_SQC1_NUM_CU__0_PRESENT 1
|
||||
#define oland__GPU__SQC__SH_SQC1_NUM_CU__1_PRESENT 1
|
||||
#define oland__GPU__SQC__SH_SQC1_NUM_CU__2_PRESENT 1
|
||||
#define oland__GPU__SQC__SH_SQC1_NUM_BANK 4
|
||||
#define oland__GPU__SQC__SH_SQC1_NUM_BANK__4 1
|
||||
#define oland__GPU__SQC__SH_SQC1_NUM_BANK__0_PRESENT 1
|
||||
#define oland__GPU__SQC__SH_SQC1_NUM_BANK__1_PRESENT 1
|
||||
#define oland__GPU__SQC__SH_SQC1_NUM_BANK__2_PRESENT 1
|
||||
#define oland__GPU__SQC__SH_SQC1_NUM_BANK__3_PRESENT 1
|
||||
#define oland__GPU__SQC__SH_SQC1_BANK_INST_CACHE_SIZE_KBYTES 8
|
||||
#define oland__GPU__SQC__SH_SQC1_BANK_INST_CACHE_SIZE_KBYTES__8 1
|
||||
#define oland__GPU__SQC__SH_SQC1_BANK_DATA_CACHE_SIZE_KBYTES 4
|
||||
#define oland__GPU__SQC__SH_SQC1_BANK_DATA_CACHE_SIZE_KBYTES__4 1
|
||||
#define oland__GPU__SQC__SH_SQC2_POSN_AFTER_SQ 0
|
||||
#define oland__GPU__SQC__SH_SQC2_POSN_AFTER_SQ__0 1
|
||||
#define oland__GPU__SQC__SH_SQC2_FIRST_CONNECTED_SQ 0
|
||||
#define oland__GPU__SQC__SH_SQC2_FIRST_CONNECTED_SQ__0 1
|
||||
#define oland__GPU__SQC__SH_SQC2_NUM_CU 0
|
||||
#define oland__GPU__SQC__SH_SQC2_NUM_CU__0 1
|
||||
#define oland__GPU__SQC__SH_SQC2_NUM_BANK 0
|
||||
#define oland__GPU__SQC__SH_SQC2_NUM_BANK__0 1
|
||||
#define oland__GPU__SQC__SH_SQC2_BANK_INST_CACHE_SIZE_KBYTES 0
|
||||
#define oland__GPU__SQC__SH_SQC2_BANK_INST_CACHE_SIZE_KBYTES__0 1
|
||||
#define oland__GPU__SQC__SH_SQC2_BANK_DATA_CACHE_SIZE_KBYTES 0
|
||||
#define oland__GPU__SQC__SH_SQC2_BANK_DATA_CACHE_SIZE_KBYTES__0 1
|
||||
#define oland__GPU__SQC__P2_IS_P1 1
|
||||
#define oland__GPU__SQC__P2_IS_P1__1 1
|
||||
#define oland__GPU__SQC__BUG_303685_EXISTS 1
|
||||
#define oland__GPU__SQC__BUG_303685_EXISTS__1 1
|
||||
#define oland__GPU__GC__GDS_EXISTS 1
|
||||
#define oland__GPU__GC__GDS_EXISTS__1 1
|
||||
#define oland__GPU__GC__RB_REDUNDANCY 0
|
||||
#define oland__GPU__GC__RB_REDUNDANCY__0 1
|
||||
#define oland__GPU__GC__SC_DOES_RB_REDUNDANCY 0
|
||||
#define oland__GPU__GC__SC_DOES_RB_REDUNDANCY__0 1
|
||||
#define oland__GPU__GC__MEM_ADDR_BITS 40
|
||||
#define oland__GPU__GC__MEM_ADDR_BITS__40 1
|
||||
#define oland__GPU__GC__NEW_VERTEX_VECTOR_ORDER 0
|
||||
#define oland__GPU__GC__NEW_VERTEX_VECTOR_ORDER__0 1
|
||||
#define oland__GPU__GC__NUM_INTERPS 1
|
||||
#define oland__GPU__GC__NUM_INTERPS__1 1
|
||||
#define oland__GPU__GC__HZ_PRESENT 1
|
||||
#define oland__GPU__GC__HZ_PRESENT__1 1
|
||||
#define oland__GPU__GC__NUM_CLKS_PER_PRIM 1
|
||||
#define oland__GPU__GC__NUM_CLKS_PER_PRIM__1 1
|
||||
#define oland__GPU__GC__NUM_INTERP_PRIM_PER_CLK 2
|
||||
#define oland__GPU__GC__NUM_INTERP_PRIM_PER_CLK__2 1
|
||||
#define oland__GPU__GC__ATTR_BUS_PRIM_PER_CLK 2
|
||||
#define oland__GPU__GC__ATTR_BUS_PRIM_PER_CLK__2 1
|
||||
#define oland__GPU__GC__NUM_MAX_GS_THDS 16
|
||||
#define oland__GPU__GC__NUM_MAX_GS_THDS__16 1
|
||||
#define oland__GPU__GC__NUM_MIN_GS_THDS 4
|
||||
#define oland__GPU__GC__NUM_MIN_GS_THDS__4 1
|
||||
#define oland__GPU__GC__NUM_STATES 8
|
||||
#define oland__GPU__GC__NUM_STATES__8 1
|
||||
#define oland__GPU__GC__NUM_STATES__0_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_STATES__1_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_STATES__2_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_STATES__3_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_STATES__4_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_STATES__5_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_STATES__6_PRESENT 1
|
||||
#define oland__GPU__GC__NUM_STATES__7_PRESENT 1
|
||||
#define oland__GPU__GC__STWTPTR_WIDTH 3
|
||||
#define oland__GPU__GC__STWTPTR_WIDTH__3 1
|
||||
#define oland__GPU__SH__DOUBLE_FLOAT_PRESENT 1
|
||||
#define oland__GPU__SH__DOUBLE_FLOAT_PRESENT__1 1
|
||||
#define oland__GPU__SH__NUM_DOUBLE_VSPS_PER_SIMD 1
|
||||
#define oland__GPU__SH__NUM_DOUBLE_VSPS_PER_SIMD__1 1
|
||||
#define oland__GPU__SH__NUM_DOUBLE_VSPS_PER_SIMD__0_PRESENT 1
|
||||
#define oland__GPU__SH__NORM_SIN_COS 1
|
||||
#define oland__GPU__SH__NORM_SIN_COS__1 1
|
||||
#define oland__GPU__SH__MICROCODE_LEVEL 10
|
||||
#define oland__GPU__SH__MICROCODE_LEVEL__10 1
|
||||
#define oland__GPU__SH__NUM_EXPREQ_PER_CU 12
|
||||
#define oland__GPU__SH__NUM_EXPREQ_PER_CU__12 1
|
||||
#define oland__GPU__GC__GLOBAL_VGT_PA 0
|
||||
#define oland__GPU__GC__GLOBAL_VGT_PA__0 1
|
||||
#define oland__GPU__GC__NUM_FRONTEND 1
|
||||
#define oland__GPU__GC__NUM_FRONTEND__1 1
|
||||
#define oland__GPU__GC__NUM_FRONTEND__0_PRESENT 1
|
||||
#define oland__GPU__GC__COALESCED_READ_PRESENT 1
|
||||
#define oland__GPU__GC__COALESCED_READ_PRESENT__1 1
|
||||
#define oland__GPU__GC__NUM_CLKS_PER_TILE 1
|
||||
#define oland__GPU__GC__NUM_CLKS_PER_TILE__1 1
|
||||
#define oland__GPU__GC__DBSC_TRUE_QUAD_INTF 1
|
||||
#define oland__GPU__GC__DBSC_TRUE_QUAD_INTF__1 1
|
||||
#define oland__GPU__GC__ASYNC_DISPATCH 1
|
||||
#define oland__GPU__GC__ASYNC_DISPATCH__1 1
|
||||
#define oland__GPU__GC__VMID_PORTS_EXISTS 1
|
||||
#define oland__GPU__GC__VMID_PORTS_EXISTS__1 1
|
||||
#define oland__GPU__GC__NUM_EXPORT_BUS 2
|
||||
#define oland__GPU__GC__NUM_EXPORT_BUS__2 1
|
||||
#define oland__GPU__GC__TILING_CONFIG_TABLE 1
|
||||
#define oland__GPU__GC__TILING_CONFIG_TABLE__1 1
|
||||
#define oland__GPU__GC__FMASK_TILING_CONFIG_TABLE 1
|
||||
#define oland__GPU__GC__FMASK_TILING_CONFIG_TABLE__1 1
|
||||
#define oland__GPU__GC__NEW_SRC_COLOR_FORMAT 1
|
||||
#define oland__GPU__GC__NEW_SRC_COLOR_FORMAT__1 1
|
||||
#define oland__GPU__SP__NUM_GPRS 256
|
||||
#define oland__GPU__SP__NUM_GPRS__256 1
|
||||
#define oland__GPU__SP__GPR_ADDR_WIDTH 8
|
||||
#define oland__GPU__SP__GPR_ADDR_WIDTH__8 1
|
||||
#define oland__GPU__SP__WIDTH_GPRS 128
|
||||
#define oland__GPU__SP__WIDTH_GPRS__128 1
|
||||
#define oland__GPU__SPI__TMP_SCBD_SLOTS_PER_CU 32
|
||||
#define oland__GPU__SPI__TMP_SCBD_SLOTS_PER_CU__32 1
|
||||
#define oland__GPU__VGT__GSPRIM_BUFF_DEPTH 768
|
||||
#define oland__GPU__VGT__GSPRIM_BUFF_DEPTH__768 1
|
||||
#define oland__GPU__VGT__GS_TABLE_DEPTH 16
|
||||
#define oland__GPU__VGT__GS_TABLE_DEPTH__16 1
|
||||
#define oland__GPU__SX__PARAMETER_CACHE_DEPTH 512
|
||||
#define oland__GPU__SX__PARAMETER_CACHE_DEPTH__512 1
|
||||
#define oland__GPU__SX__PARAMETER_CACHE_WIDTH 16
|
||||
#define oland__GPU__SX__PARAMETER_CACHE_WIDTH__16 1
|
||||
#define oland__GPU__SX__PARAMETER_CACHE_WIDTH__0_PRESENT 1
|
||||
#define oland__GPU__SX__PARAMETER_CACHE_WIDTH__1_PRESENT 1
|
||||
#define oland__GPU__SX__PARAMETER_CACHE_WIDTH__2_PRESENT 1
|
||||
#define oland__GPU__SX__PARAMETER_CACHE_WIDTH__3_PRESENT 1
|
||||
#define oland__GPU__SX__PARAMETER_CACHE_WIDTH__4_PRESENT 1
|
||||
#define oland__GPU__SX__PARAMETER_CACHE_WIDTH__5_PRESENT 1
|
||||
#define oland__GPU__SX__PARAMETER_CACHE_WIDTH__6_PRESENT 1
|
||||
#define oland__GPU__SX__PARAMETER_CACHE_WIDTH__7_PRESENT 1
|
||||
#define oland__GPU__SX__PARAMETER_CACHE_WIDTH__8_PRESENT 1
|
||||
#define oland__GPU__SX__PARAMETER_CACHE_WIDTH__9_PRESENT 1
|
||||
#define oland__GPU__SX__PARAMETER_CACHE_WIDTH__10_PRESENT 1
|
||||
#define oland__GPU__SX__PARAMETER_CACHE_WIDTH__11_PRESENT 1
|
||||
#define oland__GPU__SX__PARAMETER_CACHE_WIDTH__12_PRESENT 1
|
||||
#define oland__GPU__SX__PARAMETER_CACHE_WIDTH__13_PRESENT 1
|
||||
#define oland__GPU__SX__PARAMETER_CACHE_WIDTH__14_PRESENT 1
|
||||
#define oland__GPU__SX__PARAMETER_CACHE_WIDTH__15_PRESENT 1
|
||||
#define oland__GPU__SX__COLOR_SCOREBOARD_SLOTS 64
|
||||
#define oland__GPU__SX__COLOR_SCOREBOARD_SLOTS__64 1
|
||||
#define oland__GPU__SX__POS_SCOREBOARD_SLOTS 16
|
||||
#define oland__GPU__SX__POS_SCOREBOARD_SLOTS__16 1
|
||||
#define oland__GPU__SX__COLOR_EXPORT_BUFFER_SIZE 256
|
||||
#define oland__GPU__SX__COLOR_EXPORT_BUFFER_SIZE__256 1
|
||||
#define oland__GPU__SX__POS_EXPORT_BUFFER_SIZE 256
|
||||
#define oland__GPU__SX__POS_EXPORT_BUFFER_SIZE__256 1
|
||||
#define oland__GPU__SX__COLOR_EXPORT_REG_BUFFER_SIZE 1024
|
||||
#define oland__GPU__SX__COLOR_EXPORT_REG_BUFFER_SIZE__1024 1
|
||||
#define oland__GPU__SX__POS_EXPORT_REG_BUFFER_SIZE 1024
|
||||
#define oland__GPU__SX__POS_EXPORT_REG_BUFFER_SIZE__1024 1
|
||||
#define oland__GPU__SX__PIXEL_FIFO_DEPTH 32
|
||||
#define oland__GPU__SX__PIXEL_FIFO_DEPTH__32 1
|
||||
#define oland__GPU__PA__PRIM_BUFF_DEPTH 1536
|
||||
#define oland__GPU__PA__PRIM_BUFF_DEPTH__1536 1
|
||||
#define oland__GPU__PA__NUM_CLIPPERS 4
|
||||
#define oland__GPU__PA__NUM_CLIPPERS__4 1
|
||||
#define oland__GPU__PA__LOG2_MAX_SAMPLES 3
|
||||
#define oland__GPU__PA__LOG2_MAX_SAMPLES__3 1
|
||||
#define oland__GPU__TA__GRBM_INTF_RESET_FIX 1
|
||||
#define oland__GPU__TA__GRBM_INTF_RESET_FIX__1 1
|
||||
#define oland__GPU__TC__TCC_PRESENT 1
|
||||
#define oland__GPU__TC__TCC_PRESENT__1 1
|
||||
#define oland__GPU__TC__TCR_TCA_REQ_CREDITS 32
|
||||
#define oland__GPU__TC__TCR_TCA_REQ_CREDITS__32 1
|
||||
#define oland__GPU__TC__TA_HANDLE_BASEADDR 1
|
||||
#define oland__GPU__TC__TA_HANDLE_BASEADDR__1 1
|
||||
#define oland__GPU__TC__TCP_L1_SIZE 16
|
||||
#define oland__GPU__TC__TCP_L1_SIZE__16 1
|
||||
#define oland__GPU__TC__NUM_TCPS 6
|
||||
#define oland__GPU__TC__NUM_TCPS__6 1
|
||||
#define oland__GPU__TC__NUM_TCPS__0_PRESENT 1
|
||||
#define oland__GPU__TC__NUM_TCPS__1_PRESENT 1
|
||||
#define oland__GPU__TC__NUM_TCPS__2_PRESENT 1
|
||||
#define oland__GPU__TC__NUM_TCPS__3_PRESENT 1
|
||||
#define oland__GPU__TC__NUM_TCPS__4_PRESENT 1
|
||||
#define oland__GPU__TC__NUM_TCPS__5_PRESENT 1
|
||||
#define oland__GPU__TC__NUM_TCCS 4
|
||||
#define oland__GPU__TC__NUM_TCCS__4 1
|
||||
#define oland__GPU__TC__NUM_TCCS__0_PRESENT 1
|
||||
#define oland__GPU__TC__NUM_TCCS__1_PRESENT 1
|
||||
#define oland__GPU__TC__NUM_TCCS__2_PRESENT 1
|
||||
#define oland__GPU__TC__NUM_TCCS__3_PRESENT 1
|
||||
#define oland__GPU__TC__NUM_TCAS 2
|
||||
#define oland__GPU__TC__NUM_TCAS__2 1
|
||||
#define oland__GPU__TC__NUM_TCAS__0_PRESENT 1
|
||||
#define oland__GPU__TC__NUM_TCAS__1_PRESENT 1
|
||||
#define oland__GPU__TC__NUM_TCIRS 3
|
||||
#define oland__GPU__TC__NUM_TCIRS__3 1
|
||||
#define oland__GPU__TC__NUM_TCIRS__0_PRESENT 1
|
||||
#define oland__GPU__TC__NUM_TCIRS__1_PRESENT 1
|
||||
#define oland__GPU__TC__NUM_TCIRS__2_PRESENT 1
|
||||
#define oland__GPU__TC__NUM_TCIWS 1
|
||||
#define oland__GPU__TC__NUM_TCIWS__1 1
|
||||
#define oland__GPU__TC__NUM_TCIWS__0_PRESENT 1
|
||||
#define oland__GPU__TC__CLIENT_TCI_REQ_CREDITS 8
|
||||
#define oland__GPU__TC__CLIENT_TCI_REQ_CREDITS__8 1
|
||||
#define oland__GPU__TC__VGT_TCI_REQ_CREDITS 8
|
||||
#define oland__GPU__TC__VGT_TCI_REQ_CREDITS__8 1
|
||||
#define oland__GPU__TC__SQC_TCI_REQ_CREDITS 8
|
||||
#define oland__GPU__TC__SQC_TCI_REQ_CREDITS__8 1
|
||||
#define oland__GPU__TC__CP_TCI_REQ_CREDITS 8
|
||||
#define oland__GPU__TC__CP_TCI_REQ_CREDITS__8 1
|
||||
#define oland__GPU__TC__NUM_TCIS 4
|
||||
#define oland__GPU__TC__NUM_TCIS__4 1
|
||||
#define oland__GPU__TC__NUM_TCIS__0_PRESENT 1
|
||||
#define oland__GPU__TC__NUM_TCIS__1_PRESENT 1
|
||||
#define oland__GPU__TC__NUM_TCIS__2_PRESENT 1
|
||||
#define oland__GPU__TC__NUM_TCIS__3_PRESENT 1
|
||||
#define oland__GPU__TC__TCC_NUM_LINES 1024
|
||||
#define oland__GPU__TC__TCC_NUM_LINES__1024 1
|
||||
#define oland__GPU__TC__TCA_PHASE 0
|
||||
#define oland__GPU__TC__TCA_PHASE__0 1
|
||||
#define oland__GPU__TC__TCA_RTN_ARB_IO_PIPELINING 0
|
||||
#define oland__GPU__TC__TCA_RTN_ARB_IO_PIPELINING__0 1
|
||||
#define oland__GPU__TC__CP_VGT_TCI_ABOVE_SH0 0
|
||||
#define oland__GPU__TC__CP_VGT_TCI_ABOVE_SH0__0 1
|
||||
#define oland__GPU__DB__TB_USES_EMULATOR_MODE 0
|
||||
#define oland__GPU__DB__TB_USES_EMULATOR_MODE__0 1
|
||||
#define oland__GPU__DB__USE_ADDRRAXX_LIB 1
|
||||
#define oland__GPU__DB__USE_ADDRRAXX_LIB__1 1
|
||||
#define oland__GPU__DB__LEGACY_TILE_MODE_ASSERTS 1
|
||||
#define oland__GPU__DB__LEGACY_TILE_MODE_ASSERTS__1 1
|
||||
#define oland__GPU__DB__SUBBLOCK_GATES_PRESENT 0
|
||||
#define oland__GPU__DB__SUBBLOCK_GATES_PRESENT__0 1
|
||||
#define oland__GPU__CB__BLENDER_NUM_PIXELS 4
|
||||
#define oland__GPU__CB__BLENDER_NUM_PIXELS__4 1
|
||||
#define oland__GPU__CB__BLENDER_NUM_FP32_COMPS 4
|
||||
#define oland__GPU__CB__BLENDER_NUM_FP32_COMPS__4 1
|
||||
#define oland__GPU__CB__COMPRESSION 1
|
||||
#define oland__GPU__CB__COMPRESSION__1 1
|
||||
#define oland__GPU__LDS__SIZE 64
|
||||
#define oland__GPU__LDS__SIZE__64 1
|
||||
#define oland__GPU__LDS__NUM_PIXELS 32
|
||||
#define oland__GPU__LDS__NUM_PIXELS__32 1
|
||||
#define oland__GPU__LDS__NUM_BANKS 32
|
||||
#define oland__GPU__LDS__NUM_BANKS__32 1
|
||||
#define oland__GPU__GDS__SIZE 64
|
||||
#define oland__GPU__GDS__SIZE__64 1
|
||||
#define oland__GPU__GDS__NUM_PIXELS 16
|
||||
#define oland__GPU__GDS__NUM_PIXELS__16 1
|
||||
#define oland__GPU__GDS__NUM_PIXELS__0_PRESENT 1
|
||||
#define oland__GPU__GDS__NUM_PIXELS__1_PRESENT 1
|
||||
#define oland__GPU__GDS__NUM_PIXELS__2_PRESENT 1
|
||||
#define oland__GPU__GDS__NUM_PIXELS__3_PRESENT 1
|
||||
#define oland__GPU__GDS__NUM_PIXELS__4_PRESENT 1
|
||||
#define oland__GPU__GDS__NUM_PIXELS__5_PRESENT 1
|
||||
#define oland__GPU__GDS__NUM_PIXELS__6_PRESENT 1
|
||||
#define oland__GPU__GDS__NUM_PIXELS__7_PRESENT 1
|
||||
#define oland__GPU__GDS__NUM_PIXELS__8_PRESENT 1
|
||||
#define oland__GPU__GDS__NUM_PIXELS__9_PRESENT 1
|
||||
#define oland__GPU__GDS__NUM_PIXELS__10_PRESENT 1
|
||||
#define oland__GPU__GDS__NUM_PIXELS__11_PRESENT 1
|
||||
#define oland__GPU__GDS__NUM_PIXELS__12_PRESENT 1
|
||||
#define oland__GPU__GDS__NUM_PIXELS__13_PRESENT 1
|
||||
#define oland__GPU__GDS__NUM_PIXELS__14_PRESENT 1
|
||||
#define oland__GPU__GDS__NUM_PIXELS__15_PRESENT 1
|
||||
#define oland__GPU__GDS__NUM_BANKS 16
|
||||
#define oland__GPU__GDS__NUM_BANKS__16 1
|
||||
#define oland__GPU__GDS__NUM_BANKS__0_PRESENT 1
|
||||
#define oland__GPU__GDS__NUM_BANKS__1_PRESENT 1
|
||||
#define oland__GPU__GDS__NUM_BANKS__2_PRESENT 1
|
||||
#define oland__GPU__GDS__NUM_BANKS__3_PRESENT 1
|
||||
#define oland__GPU__GDS__NUM_BANKS__4_PRESENT 1
|
||||
#define oland__GPU__GDS__NUM_BANKS__5_PRESENT 1
|
||||
#define oland__GPU__GDS__NUM_BANKS__6_PRESENT 1
|
||||
#define oland__GPU__GDS__NUM_BANKS__7_PRESENT 1
|
||||
#define oland__GPU__GDS__NUM_BANKS__8_PRESENT 1
|
||||
#define oland__GPU__GDS__NUM_BANKS__9_PRESENT 1
|
||||
#define oland__GPU__GDS__NUM_BANKS__10_PRESENT 1
|
||||
#define oland__GPU__GDS__NUM_BANKS__11_PRESENT 1
|
||||
#define oland__GPU__GDS__NUM_BANKS__12_PRESENT 1
|
||||
#define oland__GPU__GDS__NUM_BANKS__13_PRESENT 1
|
||||
#define oland__GPU__GDS__NUM_BANKS__14_PRESENT 1
|
||||
#define oland__GPU__GDS__NUM_BANKS__15_PRESENT 1
|
||||
#define oland__GPU__GDS__NUM_OA_COUNTERS 4
|
||||
#define oland__GPU__GDS__NUM_OA_COUNTERS__4 1
|
||||
#define oland__GPU__RLC__LARGE_UCODE_RAM 1
|
||||
#define oland__GPU__RLC__LARGE_UCODE_RAM__1 1
|
||||
#define oland__GPU__RLC__LARGE_SCRATCH_RAM 1
|
||||
#define oland__GPU__RLC__LARGE_SCRATCH_RAM__1 1
|
||||
#define oland__GPU__RLC__GFX_POWER_GATING 0
|
||||
#define oland__GPU__RLC__GFX_POWER_GATING__0 1
|
||||
#define oland__GPU__GC__SC_BCI_16_SAMPLE_PER_PIXEL 1
|
||||
#define oland__GPU__GC__SC_BCI_16_SAMPLE_PER_PIXEL__1 1
|
||||
#define oland__GPU__GC__TMP_USE_RASTER_CONFIG 1
|
||||
#define oland__GPU__GC__TMP_USE_RASTER_CONFIG__1 1
|
||||
#define oland__GPU__GC__FLT_NORM_0_6 0
|
||||
#define oland__GPU__GC__FLT_NORM_0_6__0 1
|
||||
#define oland__GPU__IO__PCIE_PHY falcon65g16x
|
||||
#define oland__GPU__IO__PCIE_PHY__FALCON65G16X 1
|
||||
#define oland__GPU__IO__DVP_SUBMOD io_r
|
||||
#define oland__GPU__IO__DVP_SUBMOD__IO_R 1
|
||||
#define oland__GPU__IO__SYNC_SUBMOD io_b
|
||||
#define oland__GPU__IO__SYNC_SUBMOD__IO_B 1
|
||||
#define oland__GPU__IO__GENERICA_SUBMOD io_b
|
||||
#define oland__GPU__IO__GENERICA_SUBMOD__IO_B 1
|
||||
#define oland__GPU__IO__GENERICB_SUBMOD io_b
|
||||
#define oland__GPU__IO__GENERICB_SUBMOD__IO_B 1
|
||||
#define oland__GPU__IO__GENERICC_SUBMOD io_b
|
||||
#define oland__GPU__IO__GENERICC_SUBMOD__IO_B 1
|
||||
#define oland__GPU__IO__GENERICD_SUBMOD io_b
|
||||
#define oland__GPU__IO__GENERICD_SUBMOD__IO_B 1
|
||||
#define oland__GPU__IO__GENERICE_SUBMOD io_b
|
||||
#define oland__GPU__IO__GENERICE_SUBMOD__IO_B 1
|
||||
#define oland__GPU__IO__GENERICF_SUBMOD io_b
|
||||
#define oland__GPU__IO__GENERICF_SUBMOD__IO_B 1
|
||||
#define oland__GPU__IO__GENERICG_SUBMOD io_b
|
||||
#define oland__GPU__IO__GENERICG_SUBMOD__IO_B 1
|
||||
#define oland__GPU__IO__VID_SUBMOD io_r
|
||||
#define oland__GPU__IO__VID_SUBMOD__IO_R 1
|
||||
#define oland__GPU__IO__GPIO_SUBMOD io_b
|
||||
#define oland__GPU__IO__GPIO_SUBMOD__IO_B 1
|
||||
#define oland__GPU__IO__PLL_SUBMOD io_b
|
||||
#define oland__GPU__IO__PLL_SUBMOD__IO_B 1
|
||||
#define oland__GPU__IO__SPLL_SUBMOD io_b
|
||||
#define oland__GPU__IO__SPLL_SUBMOD__IO_B 1
|
||||
#define oland__GPU__IO__UPLL_SUBMOD io_b
|
||||
#define oland__GPU__IO__UPLL_SUBMOD__IO_B 1
|
||||
#define oland__GPU__IO__HPD_SUBMOD io_b
|
||||
#define oland__GPU__IO__HPD_SUBMOD__IO_B 1
|
||||
#define oland__GPU__IO__I2C_SUBMOD io_b
|
||||
#define oland__GPU__IO__I2C_SUBMOD__IO_B 1
|
||||
#define oland__GPU__IO__ASAT_45_PLL 1
|
||||
#define oland__GPU__IO__ASAT_45_PLL__1 1
|
||||
#define oland__GPU__IO__PWRGOOD 1
|
||||
#define oland__GPU__IO__PWRGOOD__1 1
|
||||
#define oland__GPU__IO__NUM_MPLL 2
|
||||
#define oland__GPU__IO__NUM_MPLL__2 1
|
||||
#define oland__GPU__IO__READY 1
|
||||
#define oland__GPU__IO__READY__1 1
|
||||
#define oland__GPU__MC__NUM_MCB_BLOCKS 1
|
||||
#define oland__GPU__MC__NUM_MCB_BLOCKS__1 1
|
||||
#define oland__GPU__MC__NUM_MCB_BLOCKS__0_PRESENT 1
|
||||
#define oland__GPU__MC__NUM_MCB_TILES 1
|
||||
#define oland__GPU__MC__NUM_MCB_TILES__1 1
|
||||
#define oland__GPU__MC__NUM_MCB_TILES__0_PRESENT 1
|
||||
#define oland__GPU__MC__NUM_MCD_BLOCKS 2
|
||||
#define oland__GPU__MC__NUM_MCD_BLOCKS__2 1
|
||||
#define oland__GPU__MC__NUM_MCD_BLOCKS__0_PRESENT 1
|
||||
#define oland__GPU__MC__NUM_MCD_BLOCKS__1_PRESENT 1
|
||||
#define oland__GPU__MC__NUM_MCC_BLOCKS 2
|
||||
#define oland__GPU__MC__NUM_MCC_BLOCKS__2 1
|
||||
#define oland__GPU__MC__NUM_MCC_BLOCKS__0_PRESENT 1
|
||||
#define oland__GPU__MC__NUM_MCC_BLOCKS__1_PRESENT 1
|
||||
#define oland__GPU__MC__NUM_MCT_TILES 2
|
||||
#define oland__GPU__MC__NUM_MCT_TILES__2 1
|
||||
#define oland__GPU__MC__NUM_IO_CHNLS 4
|
||||
#define oland__GPU__MC__NUM_IO_CHNLS__4 1
|
||||
#define oland__GPU__MC__NUM_IO_CHNLS__0_PRESENT 1
|
||||
#define oland__GPU__MC__NUM_IO_CHNLS__1_PRESENT 1
|
||||
#define oland__GPU__MC__NUM_IO_CHNLS__2_PRESENT 1
|
||||
#define oland__GPU__MC__NUM_IO_CHNLS__3_PRESENT 1
|
||||
#define oland__GPU__MC__CDRRDBK 6
|
||||
#define oland__GPU__MC__CDRRDBK__6 1
|
||||
#define oland__GPU__MC__NUM_RPB_EFF_QUEUES 2
|
||||
#define oland__GPU__MC__NUM_RPB_EFF_QUEUES__2 1
|
||||
#define oland__GPU__MC__MCD0_BLOCK 1
|
||||
#define oland__GPU__MC__MCD0_BLOCK__1 1
|
||||
#define oland__GPU__MC__MCD1_BLOCK 1
|
||||
#define oland__GPU__MC__MCD1_BLOCK__1 1
|
||||
#define oland__GPU__MC__MCC0_BLOCK 1
|
||||
#define oland__GPU__MC__MCC0_BLOCK__1 1
|
||||
#define oland__GPU__MC__MCC1_BLOCK 1
|
||||
#define oland__GPU__MC__MCC1_BLOCK__1 1
|
||||
#define oland__GPU__MC__MCB_BLOCK 1
|
||||
#define oland__GPU__MC__MCB_BLOCK__1 1
|
||||
#define oland__GPU__MC__ALLOW_LARRAY 0
|
||||
#define oland__GPU__MC__ALLOW_LARRAY__0 1
|
||||
#define oland__GPU__MC__MCD_SRBM_PRESENT 1
|
||||
#define oland__GPU__MC__MCD_SRBM_PRESENT__1 1
|
||||
#define oland__GPU__MC__HDP_RD_ON_GBL1 1
|
||||
#define oland__GPU__MC__HDP_RD_ON_GBL1__1 1
|
||||
#define oland__GPU__MC__TWO_GBL0_RDRET 1
|
||||
#define oland__GPU__MC__TWO_GBL0_RDRET__1 1
|
||||
#define oland__GPU__MC__NUM_OF_RB_PER_MCD 1
|
||||
#define oland__GPU__MC__NUM_OF_RB_PER_MCD__1 1
|
||||
#define oland__GPU__MC__NUM_TC_PER_MCD 2
|
||||
#define oland__GPU__MC__NUM_TC_PER_MCD__2 1
|
||||
#define oland__GPU__MC__NUM_TCCS 4
|
||||
#define oland__GPU__MC__NUM_TCCS__4 1
|
||||
#define oland__GPU__MC__NUM_MCD_POW2 1
|
||||
#define oland__GPU__MC__NUM_MCD_POW2__1 1
|
||||
#define oland__GPU__MC__MCD0_IO0_REP 6
|
||||
#define oland__GPU__MC__MCD0_IO0_REP__6 1
|
||||
#define oland__GPU__MC__MCD0_IO1_REP 3
|
||||
#define oland__GPU__MC__MCD0_IO1_REP__3 1
|
||||
#define oland__GPU__MC__MCD1_IO0_REP 5
|
||||
#define oland__GPU__MC__MCD1_IO0_REP__5 1
|
||||
#define oland__GPU__MC__MCD1_IO1_REP 3
|
||||
#define oland__GPU__MC__MCD1_IO1_REP__3 1
|
||||
#define oland__GPU__MC__SIMPLIFIED_BLACKOUT 1
|
||||
#define oland__GPU__MC__SIMPLIFIED_BLACKOUT__1 1
|
||||
#define oland__GPU__MC__DDR5_MCLK_DEFAULT 5
|
||||
#define oland__GPU__MC__DDR5_MCLK_DEFAULT__5 1
|
||||
#define oland__GPU__MC__XBAR_REMAP 0
|
||||
#define oland__GPU__MC__XBAR_REMAP__0 1
|
||||
#define oland__GPU__MC__GPU_VIRTUAL_ADDRESS_WIDTH 40
|
||||
#define oland__GPU__MC__GPU_VIRTUAL_ADDRESS_WIDTH__40 1
|
||||
#define oland__GPU__MC__GPU_PHYSICAL_ADDRESS_WIDTH 40
|
||||
#define oland__GPU__MC__GPU_PHYSICAL_ADDRESS_WIDTH__40 1
|
||||
#define oland__GPU__MC__PCIE_VIRTUAL_ADDRESS_WIDTH 48
|
||||
#define oland__GPU__MC__PCIE_VIRTUAL_ADDRESS_WIDTH__48 1
|
||||
#define oland__GPU__MC__PCIE_PHYSICAL_ADDRESS_WIDTH 48
|
||||
#define oland__GPU__MC__PCIE_PHYSICAL_ADDRESS_WIDTH__48 1
|
||||
#define oland__GPU__MC__SPLIT_TILES 1
|
||||
#define oland__GPU__MC__SPLIT_TILES__1 1
|
||||
#define oland__GPU__MC__PAB_EXISTS 0
|
||||
#define oland__GPU__MC__PAB_EXISTS__0 1
|
||||
#define oland__GPU__MC__FUSION_FEATURE_ONLY 0
|
||||
#define oland__GPU__MC__FUSION_FEATURE_ONLY__0 1
|
||||
#define oland__GPU__MC__POWER_GATING 1
|
||||
#define oland__GPU__MC__POWER_GATING__1 1
|
||||
#define oland__GPU__MC__NUM_PGFSM_BLOCKS 3
|
||||
#define oland__GPU__MC__NUM_PGFSM_BLOCKS__3 1
|
||||
#define oland__GPU__MC__PHY_POWER_GATING 1
|
||||
#define oland__GPU__MC__PHY_POWER_GATING__1 1
|
||||
#define oland__GPU__MC__LOWSPEED_MEMPHY 1
|
||||
#define oland__GPU__MC__LOWSPEED_MEMPHY__1 1
|
||||
#define oland__GPU__VID__PRESENT 0
|
||||
#define oland__GPU__VID__PRESENT__0 1
|
||||
#define oland__GPU__DC__PRESENT 0
|
||||
#define oland__GPU__DC__PRESENT__0 1
|
||||
#define oland__GPU__AVP__PRESENT 0
|
||||
#define oland__GPU__AVP__PRESENT__0 1
|
||||
#define oland__GPU__UVD__PRESENT 0
|
||||
#define oland__GPU__UVD__PRESENT__0 1
|
||||
#define oland__ENV__GPU__UVD__HAVE_RTL 0
|
||||
#define oland__ENV__GPU__UVD__HAVE_RTL__0 1
|
||||
#define oland__ENV__GPU__MC__HAVE_BFM 1
|
||||
#define oland__ENV__GPU__MC__HAVE_BFM__1 1
|
||||
#define oland__ENV__GPU__MC__HAVE_RTL 0
|
||||
#define oland__ENV__GPU__MC__HAVE_RTL__0 1
|
||||
#define oland__GPU__UVD__PROJ_LARK 1
|
||||
#define oland__GPU__UVD__PROJ_LARK__1 1
|
||||
#define oland__GPU__UVD__CTX_ENABLE 1
|
||||
#define oland__GPU__UVD__CTX_ENABLE__1 1
|
||||
#define oland__GPU__UVD__MC_7XX 1
|
||||
#define oland__GPU__UVD__MC_7XX__1 1
|
||||
#define oland__GPU__UVD__CGC_CGTT_LOCAL_CLOCK_GATER 1
|
||||
#define oland__GPU__UVD__CGC_CGTT_LOCAL_CLOCK_GATER__1 1
|
||||
#define oland__GPU__MC__ARB_VM_CREDITS 32
|
||||
#define oland__GPU__MC__ARB_VM_CREDITS__32 1
|
||||
#define oland__GPU__MC__MCD_TLBS 4
|
||||
#define oland__GPU__MC__MCD_TLBS__4 1
|
||||
#define oland__GPU__MC__MCB_TLBS 3
|
||||
#define oland__GPU__MC__MCB_TLBS__3 1
|
||||
#define oland__GPU__MC__NO_STALL_ON_FAULT 1
|
||||
#define oland__GPU__MC__NO_STALL_ON_FAULT__1 1
|
||||
#define oland__GPU__MC__VMC_CACHES 2
|
||||
#define oland__GPU__MC__VMC_CACHES__2 1
|
||||
#define oland__GPU__MC__BIGK_CACHE_SIZE 4
|
||||
#define oland__GPU__MC__BIGK_CACHE_SIZE__4 1
|
||||
#define oland__GPU__MC__MCB_TLB0_CAM 5
|
||||
#define oland__GPU__MC__MCB_TLB0_CAM__5 1
|
||||
#define oland__GPU__MC__MCB_TLB1_CAM 4
|
||||
#define oland__GPU__MC__MCB_TLB1_CAM__4 1
|
||||
#define oland__GPU__MC__MCB_TLB2_CAM 4
|
||||
#define oland__GPU__MC__MCB_TLB2_CAM__4 1
|
||||
#define oland__GPU__MC__MCD_TLB0_CAM 4
|
||||
#define oland__GPU__MC__MCD_TLB0_CAM__4 1
|
||||
#define oland__GPU__MC__MCD_TLB1_CAM 4
|
||||
#define oland__GPU__MC__MCD_TLB1_CAM__4 1
|
||||
#define oland__GPU__MC__MCD_TLB2_CAM 4
|
||||
#define oland__GPU__MC__MCD_TLB2_CAM__4 1
|
||||
#define oland__GPU__MC__MCD_TLB3_CAM 4
|
||||
#define oland__GPU__MC__MCD_TLB3_CAM__4 1
|
||||
#define oland__GPU__MC__SEND_FREE_AT_RTN 1
|
||||
#define oland__GPU__MC__SEND_FREE_AT_RTN__1 1
|
||||
#define oland__GPU__MC__CONTEXT_WIDTH 3
|
||||
#define oland__GPU__MC__CONTEXT_WIDTH__3 1
|
||||
#define oland__GPU__MC__BUG_159204_EXISTS 1
|
||||
#define oland__GPU__MC__BUG_159204_EXISTS__1 1
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,962 +0,0 @@
|
||||
#ifndef verde____GPU_FEATURES_H__
|
||||
#define verde____GPU_FEATURES_H__
|
||||
#define verde__GPU__BIF__VC_PRESENT 0
|
||||
#define verde__GPU__BIF__VC_PRESENT__0 1
|
||||
#define verde__GPU__BIF__PCIEGEN2_MCB_DEPTH 96
|
||||
#define verde__GPU__BIF__PCIEGEN2_MCB_DEPTH__96 1
|
||||
#define verde__GPU__BIF__CLKBUF_PRESENT 1
|
||||
#define verde__GPU__BIF__CLKBUF_PRESENT__1 1
|
||||
#define verde__GPU__XSP__PRESENT 0
|
||||
#define verde__GPU__XSP__PRESENT__0 1
|
||||
#define verde__GPU__CHIP__DFS 1
|
||||
#define verde__GPU__CHIP__DFS__1 1
|
||||
#define verde__GPU__CHIP__TECH tsmc28hp
|
||||
#define verde__GPU__CHIP__TECH__TSMC28HP 1
|
||||
#define verde__GPU__CHIP__TECHVER 0.0.1e
|
||||
#define verde__GPU__CHIP__TECHVER__0_0_1E 1
|
||||
#define verde__TOOLS__GUTS__TECHNM tsmc28hp
|
||||
#define verde__TOOLS__GUTS__TECHNM__TSMC28HP 1
|
||||
#define verde__TOOLS__GUTS__MEMVENDOR Virage
|
||||
#define verde__TOOLS__GUTS__MEMVENDOR__VIRAGE 1
|
||||
#define verde__TOOLS__GUTS__MEMTECH 28nm
|
||||
#define verde__TOOLS__GUTS__MEMTECH__28NM 1
|
||||
#define verde__TOOLS__GUTS__LARRVENDOR AMD
|
||||
#define verde__TOOLS__GUTS__LARRVENDOR__AMD 1
|
||||
#define verde__TOOLS__GUTS__LARRTYPE default
|
||||
#define verde__TOOLS__GUTS__LARRTYPE__DEFAULT 1
|
||||
#define verde__TOOLS__GUTS__LARRVER 0_1
|
||||
#define verde__TOOLS__GUTS__LARRVER__0_1 1
|
||||
#define verde__TOOLS__GUTS__MEMFABTECH TSMC28
|
||||
#define verde__TOOLS__GUTS__MEMFABTECH__TSMC28 1
|
||||
#define verde__TOOLS__GUTS__MEMVER 0_1
|
||||
#define verde__TOOLS__GUTS__MEMVER__0_1 1
|
||||
#define verde__TOOLS__GUTS__MEMTYPE slow
|
||||
#define verde__TOOLS__GUTS__MEMTYPE__SLOW 1
|
||||
#define verde__GPU__CHIP__MEMTECH 28nm
|
||||
#define verde__GPU__CHIP__MEMTECH__28NM 1
|
||||
#define verde__GPU__CHIP__MEMVIEWVER 0_5
|
||||
#define verde__GPU__CHIP__MEMVIEWVER__0_5 1
|
||||
#define verde__GPU__CHIP__MEM virage
|
||||
#define verde__GPU__CHIP__MEM__VIRAGE 1
|
||||
#define verde__GPU__CHIP__MEMVENDOR Virage
|
||||
#define verde__GPU__CHIP__MEMVENDOR__VIRAGE 1
|
||||
#define verde__GPU__CHIP__SRAM_MEMFABTECH TSMC28
|
||||
#define verde__GPU__CHIP__SRAM_MEMFABTECH__TSMC28 1
|
||||
#define verde__GPU__CHIP__SRAM_TIMING slow
|
||||
#define verde__GPU__CHIP__SRAM_TIMING__SLOW 1
|
||||
#define verde__GPU__CHIP__SRAM_MEMVER 0_5_1
|
||||
#define verde__GPU__CHIP__SRAM_MEMVER__0_5_1 1
|
||||
#define verde__GPU__CHIP__LARRVENDOR AMD
|
||||
#define verde__GPU__CHIP__LARRVENDOR__AMD 1
|
||||
#define verde__GPU__CHIP__LARR_MEMFABTECH TSMC28
|
||||
#define verde__GPU__CHIP__LARR_MEMFABTECH__TSMC28 1
|
||||
#define verde__GPU__CHIP__LARR_TIMING default
|
||||
#define verde__GPU__CHIP__LARR_TIMING__DEFAULT 1
|
||||
#define verde__GPU__CHIP__LARR_MEMVER 0_3
|
||||
#define verde__GPU__CHIP__LARR_MEMVER__0_3 1
|
||||
#define verde__GPU__CHIP__TILES_PRESENT 0
|
||||
#define verde__GPU__CHIP__TILES_PRESENT__0 1
|
||||
#define verde__GPU__CHIP__SMSGCOUNT 4
|
||||
#define verde__GPU__CHIP__SMSGCOUNT__4 1
|
||||
#define verde__GPU__CHIP__SMSG_0_PRESENT 1
|
||||
#define verde__GPU__CHIP__SMSG_0_PRESENT__1 1
|
||||
#define verde__GPU__CHIP__SMSG_1_PRESENT 1
|
||||
#define verde__GPU__CHIP__SMSG_1_PRESENT__1 1
|
||||
#define verde__GPU__CHIP__SMSG_2_PRESENT 1
|
||||
#define verde__GPU__CHIP__SMSG_2_PRESENT__1 1
|
||||
#define verde__GPU__CHIP__SMSG_3_PRESENT 1
|
||||
#define verde__GPU__CHIP__SMSG_3_PRESENT__1 1
|
||||
#define verde__GPU__CHIP__XCLK_MHZ 25
|
||||
#define verde__GPU__CHIP__XCLK_MHZ__25 1
|
||||
#define verde__GPU__CHIP__POWERGATE 0
|
||||
#define verde__GPU__CHIP__POWERGATE__0 1
|
||||
#define verde__GPU__LBIST__PRESENT 0
|
||||
#define verde__GPU__LBIST__PRESENT__0 1
|
||||
#define verde__GPU__THM__TMON1_PRESENT 1
|
||||
#define verde__GPU__THM__TMON1_PRESENT__1 1
|
||||
#define verde__GPU__THM__TMON2_PRESENT 1
|
||||
#define verde__GPU__THM__TMON2_PRESENT__1 1
|
||||
#define verde__GPU__THM__TMON3_PRESENT 1
|
||||
#define verde__GPU__THM__TMON3_PRESENT__1 1
|
||||
#define verde__GPU__TMON0__LEFT_NUM_RDI 4
|
||||
#define verde__GPU__TMON0__LEFT_NUM_RDI__4 1
|
||||
#define verde__GPU__TMON0__RIGHT_NUM_RDI 4
|
||||
#define verde__GPU__TMON0__RIGHT_NUM_RDI__4 1
|
||||
#define verde__GPU__TMON1__LEFT_NUM_RDI 4
|
||||
#define verde__GPU__TMON1__LEFT_NUM_RDI__4 1
|
||||
#define verde__GPU__TMON1__RIGHT_NUM_RDI 4
|
||||
#define verde__GPU__TMON1__RIGHT_NUM_RDI__4 1
|
||||
#define verde__GPU__TMON2__LEFT_NUM_RDI 4
|
||||
#define verde__GPU__TMON2__LEFT_NUM_RDI__4 1
|
||||
#define verde__GPU__TMON2__RIGHT_NUM_RDI 4
|
||||
#define verde__GPU__TMON2__RIGHT_NUM_RDI__4 1
|
||||
#define verde__GPU__TMON3__LEFT_NUM_RDI 4
|
||||
#define verde__GPU__TMON3__LEFT_NUM_RDI__4 1
|
||||
#define verde__GPU__TMON3__RIGHT_NUM_RDI 4
|
||||
#define verde__GPU__TMON3__RIGHT_NUM_RDI__4 1
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL 17
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL__17 1
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_LS 0
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_LS__0 1
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_DS_D 1
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_DS_D__1 1
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_DS_M 2
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_DS_M__2 1
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_SD_D 3
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_SD_D__3 1
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_SD_M 4
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_SD_M__4 1
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_DS 5
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_DS__5 1
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_SD 6
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_SD__6 1
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_FISO 7
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_FISO__7 1
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_V_RM_START 8
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_V_RM_START__8 1
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_V_RM_END 16
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_V_RM_END__16 1
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_A_RM_START 8
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_A_RM_START__8 1
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_A_RM_END 30
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_A_RM_END__30 1
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_V_RM_RF_RME 8
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_V_RM_RF_RME__8 1
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_V_RM_RF_RM_START 9
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_V_RM_RF_RM_START__9 1
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_V_RM_RF_RM_END 10
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_V_RM_RF_RM_END__10 1
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_V_RM_PDP_RME 11
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_V_RM_PDP_RME__11 1
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_V_RM_PDP_RM_START 12
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_V_RM_PDP_RM_START__12 1
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_V_RM_PDP_RM_END 13
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_V_RM_PDP_RM_END__13 1
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_V_RM_HD_RME 14
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_V_RM_HD_RME__14 1
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_V_RM_HD_RM_START 15
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_V_RM_HD_RM_START__15 1
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_V_RM_HD_RM_END 16
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_V_RM_HD_RM_END__16 1
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_A_RM_RF_RME 8
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_A_RM_RF_RME__8 1
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_A_RM_RF_RM_START 9
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_A_RM_RF_RM_START__9 1
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_A_RM_RF_RM_END 17
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_A_RM_RF_RM_END__17 1
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_A_RM_PDP_RME 18
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_A_RM_PDP_RME__18 1
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_A_RM_PDP_RM_START 19
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_A_RM_PDP_RM_START__19 1
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_A_RM_PDP_RM_END 30
|
||||
#define verde__GPU__CHIP__MEM_POWER_CTRL_A_RM_PDP_RM_END__30 1
|
||||
#define verde__GPU__TSS__NUM_TILES 5
|
||||
#define verde__GPU__TSS__NUM_TILES__5 1
|
||||
#define verde__GPU__TSS__TSS0_TILE 1
|
||||
#define verde__GPU__TSS__TSS0_TILE__1 1
|
||||
#define verde__GPU__TSS__TSS1_TILE 1
|
||||
#define verde__GPU__TSS__TSS1_TILE__1 1
|
||||
#define verde__GPU__TSS__TSS2_TILE 1
|
||||
#define verde__GPU__TSS__TSS2_TILE__1 1
|
||||
#define verde__GPU__TSS__TSS3_TILE 1
|
||||
#define verde__GPU__TSS__TSS3_TILE__1 1
|
||||
#define verde__GPU__TSS__TSS4_TILE 1
|
||||
#define verde__GPU__TSS__TSS4_TILE__1 1
|
||||
#define verde__GPU__TSS__TSS4_AS_ADC 1
|
||||
#define verde__GPU__TSS__TSS4_AS_ADC__1 1
|
||||
#define verde__GPU__RCU__PROGRAMMABLE_RMBITS 1
|
||||
#define verde__GPU__RCU__PROGRAMMABLE_RMBITS__1 1
|
||||
#define verde__GPU__CGTT_TILE__PDLY 1
|
||||
#define verde__GPU__CGTT_TILE__PDLY__1 1
|
||||
#define verde__GPU__PDLY_TILE__PDLY 1
|
||||
#define verde__GPU__PDLY_TILE__PDLY__1 1
|
||||
#define verde__GPU__PDLY_TILE__CLKGATE 0
|
||||
#define verde__GPU__PDLY_TILE__CLKGATE__0 1
|
||||
#define verde__GPU__CG__SMC_SCRATCH_REGS 1
|
||||
#define verde__GPU__CG__SMC_SCRATCH_REGS__1 1
|
||||
#define verde__GPU__CG__CG_DLL_PDNB 1
|
||||
#define verde__GPU__CG__CG_DLL_PDNB__1 1
|
||||
#define verde__GPU__SMU__USE_HW_VBI 1
|
||||
#define verde__GPU__SMU__USE_HW_VBI__1 1
|
||||
#define verde__GPU__SMU__NUM_CAC_MGR_4 1
|
||||
#define verde__GPU__SMU__NUM_CAC_MGR_4__1 1
|
||||
#define verde__GPU__PDMA__PRESENT 0
|
||||
#define verde__GPU__PDMA__PRESENT__0 1
|
||||
#define verde__GPU__DRMDMA__DUAL_DRMDMA_PRESENT 1
|
||||
#define verde__GPU__DRMDMA__DUAL_DRMDMA_PRESENT__1 1
|
||||
#define verde__GPU__DRM__BGAES_OFF 1
|
||||
#define verde__GPU__DRM__BGAES_OFF__1 1
|
||||
#define verde__GPU__DLB__SLEW 1
|
||||
#define verde__GPU__DLB__SLEW__1 1
|
||||
#define verde__GPU__ROM__EXT_CS_EN 1
|
||||
#define verde__GPU__ROM__EXT_CS_EN__1 1
|
||||
#define verde__GPU__CPL__GPIO_23_PRESENT 0
|
||||
#define verde__GPU__CPL__GPIO_23_PRESENT__0 1
|
||||
#define verde__GPU__CPL__GPIO_24_PRESENT 0
|
||||
#define verde__GPU__CPL__GPIO_24_PRESENT__0 1
|
||||
#define verde__GPU__CPL__GPIO_25_PRESENT 0
|
||||
#define verde__GPU__CPL__GPIO_25_PRESENT__0 1
|
||||
#define verde__GPU__CPL__GPIO_26_PRESENT 0
|
||||
#define verde__GPU__CPL__GPIO_26_PRESENT__0 1
|
||||
#define verde__GPU__CPL__GPIO_27_PRESENT 0
|
||||
#define verde__GPU__CPL__GPIO_27_PRESENT__0 1
|
||||
#define verde__GPU__CPL__MLPS_0_PRESENT 1
|
||||
#define verde__GPU__CPL__MLPS_0_PRESENT__1 1
|
||||
#define verde__GPU__CPL__MLPS_1_PRESENT 1
|
||||
#define verde__GPU__CPL__MLPS_1_PRESENT__1 1
|
||||
#define verde__GPU__CPL__MLPS_2_PRESENT 1
|
||||
#define verde__GPU__CPL__MLPS_2_PRESENT__1 1
|
||||
#define verde__GPU__CPL__MLPS_3_PRESENT 1
|
||||
#define verde__GPU__CPL__MLPS_3_PRESENT__1 1
|
||||
#define verde__GPU__CPL__SX_0_PRESENT 1
|
||||
#define verde__GPU__CPL__SX_0_PRESENT__1 1
|
||||
#define verde__GPU__SMC__TAP_FED_PRESENT 1
|
||||
#define verde__GPU__SMC__TAP_FED_PRESENT__1 1
|
||||
#define verde__GPU__CPL__PG_CODE_ENABLE 1
|
||||
#define verde__GPU__CPL__PG_CODE_ENABLE__1 1
|
||||
#define verde__GPU__CPL__PG_CODE_GPG 1
|
||||
#define verde__GPU__CPL__PG_CODE_GPG__1 1
|
||||
#define verde__GPU__AVP__MC_IF 1
|
||||
#define verde__GPU__AVP__MC_IF__1 1
|
||||
#define verde__GPU__AVP__UVD_RLC_CMC_IF 1
|
||||
#define verde__GPU__AVP__UVD_RLC_CMC_IF__1 1
|
||||
#define verde__GPU__DC__TMDS_LINK tmds_link_dual
|
||||
#define verde__GPU__DC__TMDS_LINK__TMDS_LINK_DUAL 1
|
||||
#define verde__GPU__DC__NUM_DDC_PAIRS 6
|
||||
#define verde__GPU__DC__NUM_DDC_PAIRS__6 1
|
||||
#define verde__GPU__DC__NUM_DDC_PAIRS__0_PRESENT 1
|
||||
#define verde__GPU__DC__NUM_DDC_PAIRS__1_PRESENT 1
|
||||
#define verde__GPU__DC__NUM_DDC_PAIRS__2_PRESENT 1
|
||||
#define verde__GPU__DC__NUM_DDC_PAIRS__3_PRESENT 1
|
||||
#define verde__GPU__DC__NUM_DDC_PAIRS__4_PRESENT 1
|
||||
#define verde__GPU__DC__NUM_DDC_PAIRS__5_PRESENT 1
|
||||
#define verde__GPU__DC__NUM_HPD 6
|
||||
#define verde__GPU__DC__NUM_HPD__6 1
|
||||
#define verde__GPU__DC__NUM_HPD__0_PRESENT 1
|
||||
#define verde__GPU__DC__NUM_HPD__1_PRESENT 1
|
||||
#define verde__GPU__DC__NUM_HPD__2_PRESENT 1
|
||||
#define verde__GPU__DC__NUM_HPD__3_PRESENT 1
|
||||
#define verde__GPU__DC__NUM_HPD__4_PRESENT 1
|
||||
#define verde__GPU__DC__NUM_HPD__5_PRESENT 1
|
||||
#define verde__GPU__DC__NUM_PIPE_PAIRS 3
|
||||
#define verde__GPU__DC__NUM_PIPE_PAIRS__3 1
|
||||
#define verde__GPU__DC__NUM_PIPE_PAIRS__0_PRESENT 1
|
||||
#define verde__GPU__DC__NUM_PIPE_PAIRS__1_PRESENT 1
|
||||
#define verde__GPU__DC__NUM_PIPE_PAIRS__2_PRESENT 1
|
||||
#define verde__GPU__DC__NUM_PIPES 6
|
||||
#define verde__GPU__DC__NUM_PIPES__6 1
|
||||
#define verde__GPU__DC__NUM_PIPES__0_PRESENT 1
|
||||
#define verde__GPU__DC__NUM_PIPES__1_PRESENT 1
|
||||
#define verde__GPU__DC__NUM_PIPES__2_PRESENT 1
|
||||
#define verde__GPU__DC__NUM_PIPES__3_PRESENT 1
|
||||
#define verde__GPU__DC__NUM_PIPES__4_PRESENT 1
|
||||
#define verde__GPU__DC__NUM_PIPES__5_PRESENT 1
|
||||
#define verde__GPU__DC__NUM_DIG 6
|
||||
#define verde__GPU__DC__NUM_DIG__6 1
|
||||
#define verde__GPU__DC__NUM_DIG__0_PRESENT 1
|
||||
#define verde__GPU__DC__NUM_DIG__1_PRESENT 1
|
||||
#define verde__GPU__DC__NUM_DIG__2_PRESENT 1
|
||||
#define verde__GPU__DC__NUM_DIG__3_PRESENT 1
|
||||
#define verde__GPU__DC__NUM_DIG__4_PRESENT 1
|
||||
#define verde__GPU__DC__NUM_DIG__5_PRESENT 1
|
||||
#define verde__GPU__DC__NUM_AUX 6
|
||||
#define verde__GPU__DC__NUM_AUX__6 1
|
||||
#define verde__GPU__DC__NUM_AUX__0_PRESENT 1
|
||||
#define verde__GPU__DC__NUM_AUX__1_PRESENT 1
|
||||
#define verde__GPU__DC__NUM_AUX__2_PRESENT 1
|
||||
#define verde__GPU__DC__NUM_AUX__3_PRESENT 1
|
||||
#define verde__GPU__DC__NUM_AUX__4_PRESENT 1
|
||||
#define verde__GPU__DC__NUM_AUX__5_PRESENT 1
|
||||
#define verde__GPU__DISPPLL__MACRO walden
|
||||
#define verde__GPU__DISPPLL__MACRO__WALDEN 1
|
||||
#define verde__GPU__TMDPA__MACRO walden
|
||||
#define verde__GPU__TMDPA__MACRO__WALDEN 1
|
||||
#define verde__GPU__TMDPB__MACRO walden
|
||||
#define verde__GPU__TMDPB__MACRO__WALDEN 1
|
||||
#define verde__GPU__LVTMDP__MACRO walden
|
||||
#define verde__GPU__LVTMDP__MACRO__WALDEN 1
|
||||
#define verde__GPU__DACA__MACRO walden
|
||||
#define verde__GPU__DACA__MACRO__WALDEN 1
|
||||
#define verde__GPU__DACB__MACRO walden
|
||||
#define verde__GPU__DACB__MACRO__WALDEN 1
|
||||
#define verde__GPU__DC__VIP_PRESENT 1
|
||||
#define verde__GPU__DC__VIP_PRESENT__1 1
|
||||
#define verde__GPU__DC__ABM_PRESENT 1
|
||||
#define verde__GPU__DC__ABM_PRESENT__1 1
|
||||
#define verde__GPU__DC__DMCU_PRESENT 1
|
||||
#define verde__GPU__DC__DMCU_PRESENT__1 1
|
||||
#define verde__GPU__DC__DVO_PRESENT 1
|
||||
#define verde__GPU__DC__DVO_PRESENT__1 1
|
||||
#define verde__GPU__DC__SDVO_PRESENT 1
|
||||
#define verde__GPU__DC__SDVO_PRESENT__1 1
|
||||
#define verde__GPU__DC__LVDS_PRESENT 1
|
||||
#define verde__GPU__DC__LVDS_PRESENT__1 1
|
||||
#define verde__GPU__UNIPHYAB__PRESENT 1
|
||||
#define verde__GPU__UNIPHYAB__PRESENT__1 1
|
||||
#define verde__GPU__UNIPHYCD__PRESENT 1
|
||||
#define verde__GPU__UNIPHYCD__PRESENT__1 1
|
||||
#define verde__GPU__UNIPHYEF__PRESENT 1
|
||||
#define verde__GPU__UNIPHYEF__PRESENT__1 1
|
||||
#define verde__GPU__UNIPHYAB__TYPE lvtmdp
|
||||
#define verde__GPU__UNIPHYAB__TYPE__LVTMDP 1
|
||||
#define verde__GPU__UNIPHYCD__TYPE tmdpa
|
||||
#define verde__GPU__UNIPHYCD__TYPE__TMDPA 1
|
||||
#define verde__GPU__UNIPHYEF__TYPE tmdpb
|
||||
#define verde__GPU__UNIPHYEF__TYPE__TMDPB 1
|
||||
#define verde__GPU__UNIPHYAB__LVTMDP 1
|
||||
#define verde__GPU__UNIPHYAB__LVTMDP__1 1
|
||||
#define verde__GPU__DC__DACA_PRESENT 1
|
||||
#define verde__GPU__DC__DACA_PRESENT__1 1
|
||||
#define verde__GPU__DC__DACB_PRESENT 1
|
||||
#define verde__GPU__DC__DACB_PRESENT__1 1
|
||||
#define verde__GPU__DC__TVOUT_PRESENT 1
|
||||
#define verde__GPU__DC__TVOUT_PRESENT__1 1
|
||||
#define verde__GPU__DC__MVP_PRESENT 1
|
||||
#define verde__GPU__DC__MVP_PRESENT__1 1
|
||||
#define verde__GPU__DC__DENTIST_INTERFACE_PRESENT 0
|
||||
#define verde__GPU__DC__DENTIST_INTERFACE_PRESENT__0 1
|
||||
#define verde__GPU__DC__DDC1AUX1 dual_mode
|
||||
#define verde__GPU__DC__DDC1AUX1__DUAL_MODE 1
|
||||
#define verde__GPU__DC__DDC2AUX2 dual_mode
|
||||
#define verde__GPU__DC__DDC2AUX2__DUAL_MODE 1
|
||||
#define verde__GPU__DC__DDC3AUX3 dual_mode
|
||||
#define verde__GPU__DC__DDC3AUX3__DUAL_MODE 1
|
||||
#define verde__GPU__DC__DDC4AUX4 dual_mode
|
||||
#define verde__GPU__DC__DDC4AUX4__DUAL_MODE 1
|
||||
#define verde__GPU__DC__DDC5AUX5 dual_mode
|
||||
#define verde__GPU__DC__DDC5AUX5__DUAL_MODE 1
|
||||
#define verde__GPU__DC__DDC6AUX6 dual_mode
|
||||
#define verde__GPU__DC__DDC6AUX6__DUAL_MODE 1
|
||||
#define verde__GPU__DC__AUX1_PRESENT 1
|
||||
#define verde__GPU__DC__AUX1_PRESENT__1 1
|
||||
#define verde__GPU__DC__AUX2_PRESENT 1
|
||||
#define verde__GPU__DC__AUX2_PRESENT__1 1
|
||||
#define verde__GPU__DC__AUX3_PRESENT 1
|
||||
#define verde__GPU__DC__AUX3_PRESENT__1 1
|
||||
#define verde__GPU__DC__AUX4_PRESENT 1
|
||||
#define verde__GPU__DC__AUX4_PRESENT__1 1
|
||||
#define verde__GPU__DC__AUX5_PRESENT 1
|
||||
#define verde__GPU__DC__AUX5_PRESENT__1 1
|
||||
#define verde__GPU__DC__AUX6_PRESENT 1
|
||||
#define verde__GPU__DC__AUX6_PRESENT__1 1
|
||||
#define verde__GPU__DC__DENTIST_PRESENT 0
|
||||
#define verde__GPU__DC__DENTIST_PRESENT__0 1
|
||||
#define verde__GPU__DC__GENERICA_PRESENT 1
|
||||
#define verde__GPU__DC__GENERICA_PRESENT__1 1
|
||||
#define verde__GPU__DC__GENERICB_PRESENT 1
|
||||
#define verde__GPU__DC__GENERICB_PRESENT__1 1
|
||||
#define verde__GPU__DC__GENERICC_PRESENT 1
|
||||
#define verde__GPU__DC__GENERICC_PRESENT__1 1
|
||||
#define verde__GPU__DC__GENERICD_PRESENT 1
|
||||
#define verde__GPU__DC__GENERICD_PRESENT__1 1
|
||||
#define verde__GPU__DC__GENERICE_PRESENT 1
|
||||
#define verde__GPU__DC__GENERICE_PRESENT__1 1
|
||||
#define verde__GPU__DC__GENERICF_PRESENT 1
|
||||
#define verde__GPU__DC__GENERICF_PRESENT__1 1
|
||||
#define verde__GPU__DC__GENERICG_PRESENT 1
|
||||
#define verde__GPU__DC__GENERICG_PRESENT__1 1
|
||||
#define verde__GPU__DC__BLON_TYPE 0
|
||||
#define verde__GPU__DC__BLON_TYPE__0 1
|
||||
#define verde__GPU__DC__NB_STUTTER_MODE_PRESENT 0
|
||||
#define verde__GPU__DC__NB_STUTTER_MODE_PRESENT__0 1
|
||||
#define verde__GPU__DC__PCIE_REFCLK_TEST_MODE_MUX_PRESENT 0
|
||||
#define verde__GPU__DC__PCIE_REFCLK_TEST_MODE_MUX_PRESENT__0 1
|
||||
#define verde__GPU__DC__REFCLK_TEST_MODE_MUX_PRESENT 0
|
||||
#define verde__GPU__DC__REFCLK_TEST_MODE_MUX_PRESENT__0 1
|
||||
#define verde__GPU__DC__PIXCLK_TEST_MODE_MUX_PRESENT 0
|
||||
#define verde__GPU__DC__PIXCLK_TEST_MODE_MUX_PRESENT__0 1
|
||||
#define verde__GPU__DC__SYMCLK_TEST_MODE_MUX_PRESENT 0
|
||||
#define verde__GPU__DC__SYMCLK_TEST_MODE_MUX_PRESENT__0 1
|
||||
#define verde__GPU__GC__NUM_SE 1
|
||||
#define verde__GPU__GC__NUM_SE__1 1
|
||||
#define verde__GPU__GC__NUM_SE__0_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_SH_PER_SE 2
|
||||
#define verde__GPU__GC__NUM_SH_PER_SE__2 1
|
||||
#define verde__GPU__GC__NUM_SH_PER_SE__0_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_SH_PER_SE__1_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_RB_PER_SE 4
|
||||
#define verde__GPU__GC__NUM_RB_PER_SE__4 1
|
||||
#define verde__GPU__GC__NUM_RB_PER_SE__0_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_RB_PER_SE__1_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_RB_PER_SE__2_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_RB_PER_SE__3_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_CU_PER_SH 5
|
||||
#define verde__GPU__GC__NUM_CU_PER_SH__5 1
|
||||
#define verde__GPU__GC__NUM_CU_PER_SH__0_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_CU_PER_SH__1_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_CU_PER_SH__2_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_CU_PER_SH__3_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_CU_PER_SH__4_PRESENT 1
|
||||
#define verde__GPU__GC__WAVE_SIZE 64
|
||||
#define verde__GPU__GC__WAVE_SIZE__64 1
|
||||
#define verde__GPU__GC__NUM_CP_RINGS 3
|
||||
#define verde__GPU__GC__NUM_CP_RINGS__3 1
|
||||
#define verde__GPU__GC__NUM_CP_RINGS__0_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_CP_RINGS__1_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_CP_RINGS__2_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_SC_PER_SE 1
|
||||
#define verde__GPU__GC__NUM_SC_PER_SE__1 1
|
||||
#define verde__GPU__GC__NUM_SC_PER_SE__0_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_BCI_PER_SE 1
|
||||
#define verde__GPU__GC__NUM_BCI_PER_SE__1 1
|
||||
#define verde__GPU__GC__NUM_BCI_PER_SE__0_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_RB_PER_SC 2
|
||||
#define verde__GPU__GC__NUM_RB_PER_SC__2 1
|
||||
#define verde__GPU__GC__NUM_RB_PER_SC__0_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_RB_PER_SC__1_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_RB_PER_PACKER 2
|
||||
#define verde__GPU__GC__NUM_RB_PER_PACKER__2 1
|
||||
#define verde__GPU__GC__NUM_RB_PER_PACKER__0_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_RB_PER_PACKER__1_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_PACKER_PER_SC 1
|
||||
#define verde__GPU__GC__NUM_PACKER_PER_SC__1 1
|
||||
#define verde__GPU__GC__NUM_PACKER_PER_SC__0_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_DB_PER_PACKER 2
|
||||
#define verde__GPU__GC__NUM_DB_PER_PACKER__2 1
|
||||
#define verde__GPU__GC__NUM_DB_PER_PACKER__0_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_DB_PER_PACKER__1_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_PACKER_PER_SE 1
|
||||
#define verde__GPU__GC__NUM_PACKER_PER_SE__1 1
|
||||
#define verde__GPU__GC__NUM_PACKER_PER_SE__0_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_RB_PER_SX 2
|
||||
#define verde__GPU__GC__NUM_RB_PER_SX__2 1
|
||||
#define verde__GPU__GC__NUM_RB_PER_SX__0_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_RB_PER_SX__1_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_CU_PER_SE 8
|
||||
#define verde__GPU__GC__NUM_CU_PER_SE__8 1
|
||||
#define verde__GPU__GC__NUM_CU_PER_SE__0_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_CU_PER_SE__1_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_CU_PER_SE__2_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_CU_PER_SE__3_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_CU_PER_SE__4_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_CU_PER_SE__5_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_CU_PER_SE__6_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_CU_PER_SE__7_PRESENT 1
|
||||
#define verde__GPU__GC__MAX_NUMBER_WAVES 320
|
||||
#define verde__GPU__GC__MAX_NUMBER_WAVES__320 1
|
||||
#define verde__GPU__GC__MAX_NUMBER_WAVES_PER_PACKER 320
|
||||
#define verde__GPU__GC__MAX_NUMBER_WAVES_PER_PACKER__320 1
|
||||
#define verde__GPU__SQ__NEW_MTBUF_DSTSEL 1
|
||||
#define verde__GPU__SQ__NEW_MTBUF_DSTSEL__1 1
|
||||
#define verde__GPU__SQ__NUM_WAVES_PER_SIMD 10
|
||||
#define verde__GPU__SQ__NUM_WAVES_PER_SIMD__10 1
|
||||
#define verde__GPU__SQ__THREAD_GROUPS_PER_CU 16
|
||||
#define verde__GPU__SQ__THREAD_GROUPS_PER_CU__16 1
|
||||
#define verde__GPU__SQ__NUM_PERF_CNTRS 8
|
||||
#define verde__GPU__SQ__NUM_PERF_CNTRS__8 1
|
||||
#define verde__GPU__SQ__NUM_PERF_CNTRS__0_PRESENT 1
|
||||
#define verde__GPU__SQ__NUM_PERF_CNTRS__1_PRESENT 1
|
||||
#define verde__GPU__SQ__NUM_PERF_CNTRS__2_PRESENT 1
|
||||
#define verde__GPU__SQ__NUM_PERF_CNTRS__3_PRESENT 1
|
||||
#define verde__GPU__SQ__NUM_PERF_CNTRS__4_PRESENT 1
|
||||
#define verde__GPU__SQ__NUM_PERF_CNTRS__5_PRESENT 1
|
||||
#define verde__GPU__SQ__NUM_PERF_CNTRS__6_PRESENT 1
|
||||
#define verde__GPU__SQ__NUM_PERF_CNTRS__7_PRESENT 1
|
||||
#define verde__GPU__SQ__NUM_SGPR_PER_SIMD 512
|
||||
#define verde__GPU__SQ__NUM_SGPR_PER_SIMD__512 1
|
||||
#define verde__GPU__SQ__P2_IS_P1 1
|
||||
#define verde__GPU__SQ__P2_IS_P1__1 1
|
||||
#define verde__GPU__SQ__USE_SV_PACKAGES 0
|
||||
#define verde__GPU__SQ__USE_SV_PACKAGES__0 1
|
||||
#define verde__GPU__SQC__NUM_SQC 2
|
||||
#define verde__GPU__SQC__NUM_SQC__2 1
|
||||
#define verde__GPU__SQC__NUM_SQC__0_PRESENT 1
|
||||
#define verde__GPU__SQC__NUM_SQC__1_PRESENT 1
|
||||
#define verde__GPU__SQC__NUM_SQC_PER_SH 2
|
||||
#define verde__GPU__SQC__NUM_SQC_PER_SH__2 1
|
||||
#define verde__GPU__SQC__NUM_SQC_PER_SH__0_PRESENT 1
|
||||
#define verde__GPU__SQC__NUM_SQC_PER_SH__1_PRESENT 1
|
||||
#define verde__GPU__SQC__SH_SQC0_POSN_AFTER_SQ 1
|
||||
#define verde__GPU__SQC__SH_SQC0_POSN_AFTER_SQ__1 1
|
||||
#define verde__GPU__SQC__SH_SQC0_NUM_CU 4
|
||||
#define verde__GPU__SQC__SH_SQC0_NUM_CU__4 1
|
||||
#define verde__GPU__SQC__SH_SQC0_NUM_CU__0_PRESENT 1
|
||||
#define verde__GPU__SQC__SH_SQC0_NUM_CU__1_PRESENT 1
|
||||
#define verde__GPU__SQC__SH_SQC0_NUM_CU__2_PRESENT 1
|
||||
#define verde__GPU__SQC__SH_SQC0_NUM_CU__3_PRESENT 1
|
||||
#define verde__GPU__SQC__SH_SQC0_NUM_BANK 4
|
||||
#define verde__GPU__SQC__SH_SQC0_NUM_BANK__4 1
|
||||
#define verde__GPU__SQC__SH_SQC0_NUM_BANK__0_PRESENT 1
|
||||
#define verde__GPU__SQC__SH_SQC0_NUM_BANK__1_PRESENT 1
|
||||
#define verde__GPU__SQC__SH_SQC0_NUM_BANK__2_PRESENT 1
|
||||
#define verde__GPU__SQC__SH_SQC0_NUM_BANK__3_PRESENT 1
|
||||
#define verde__GPU__SQC__SH_SQC0_BANK_INST_CACHE_SIZE_KBYTES 8
|
||||
#define verde__GPU__SQC__SH_SQC0_BANK_INST_CACHE_SIZE_KBYTES__8 1
|
||||
#define verde__GPU__SQC__SH_SQC0_BANK_DATA_CACHE_SIZE_KBYTES 4
|
||||
#define verde__GPU__SQC__SH_SQC0_BANK_DATA_CACHE_SIZE_KBYTES__4 1
|
||||
#define verde__GPU__SQC__SH_SQC1_POSN_AFTER_SQ 5
|
||||
#define verde__GPU__SQC__SH_SQC1_POSN_AFTER_SQ__5 1
|
||||
#define verde__GPU__SQC__SH_SQC1_NUM_CU 4
|
||||
#define verde__GPU__SQC__SH_SQC1_NUM_CU__4 1
|
||||
#define verde__GPU__SQC__SH_SQC1_NUM_CU__0_PRESENT 1
|
||||
#define verde__GPU__SQC__SH_SQC1_NUM_CU__1_PRESENT 1
|
||||
#define verde__GPU__SQC__SH_SQC1_NUM_CU__2_PRESENT 1
|
||||
#define verde__GPU__SQC__SH_SQC1_NUM_CU__3_PRESENT 1
|
||||
#define verde__GPU__SQC__SH_SQC1_NUM_BANK 4
|
||||
#define verde__GPU__SQC__SH_SQC1_NUM_BANK__4 1
|
||||
#define verde__GPU__SQC__SH_SQC1_NUM_BANK__0_PRESENT 1
|
||||
#define verde__GPU__SQC__SH_SQC1_NUM_BANK__1_PRESENT 1
|
||||
#define verde__GPU__SQC__SH_SQC1_NUM_BANK__2_PRESENT 1
|
||||
#define verde__GPU__SQC__SH_SQC1_NUM_BANK__3_PRESENT 1
|
||||
#define verde__GPU__SQC__SH_SQC1_BANK_INST_CACHE_SIZE_KBYTES 8
|
||||
#define verde__GPU__SQC__SH_SQC1_BANK_INST_CACHE_SIZE_KBYTES__8 1
|
||||
#define verde__GPU__SQC__SH_SQC1_BANK_DATA_CACHE_SIZE_KBYTES 4
|
||||
#define verde__GPU__SQC__SH_SQC1_BANK_DATA_CACHE_SIZE_KBYTES__4 1
|
||||
#define verde__GPU__SQC__SH_SQC2_POSN_AFTER_SQ 0
|
||||
#define verde__GPU__SQC__SH_SQC2_POSN_AFTER_SQ__0 1
|
||||
#define verde__GPU__SQC__SH_SQC2_NUM_CU 0
|
||||
#define verde__GPU__SQC__SH_SQC2_NUM_CU__0 1
|
||||
#define verde__GPU__SQC__SH_SQC2_NUM_BANK 0
|
||||
#define verde__GPU__SQC__SH_SQC2_NUM_BANK__0 1
|
||||
#define verde__GPU__SQC__SH_SQC2_BANK_INST_CACHE_SIZE_KBYTES 0
|
||||
#define verde__GPU__SQC__SH_SQC2_BANK_INST_CACHE_SIZE_KBYTES__0 1
|
||||
#define verde__GPU__SQC__SH_SQC2_BANK_DATA_CACHE_SIZE_KBYTES 0
|
||||
#define verde__GPU__SQC__SH_SQC2_BANK_DATA_CACHE_SIZE_KBYTES__0 1
|
||||
#define verde__GPU__SQC__P2_IS_P1 1
|
||||
#define verde__GPU__SQC__P2_IS_P1__1 1
|
||||
#define verde__GPU__GC__GDS_EXISTS 1
|
||||
#define verde__GPU__GC__GDS_EXISTS__1 1
|
||||
#define verde__GPU__GC__RB_REDUNDANCY 0
|
||||
#define verde__GPU__GC__RB_REDUNDANCY__0 1
|
||||
#define verde__GPU__GC__SC_DOES_RB_REDUNDANCY 0
|
||||
#define verde__GPU__GC__SC_DOES_RB_REDUNDANCY__0 1
|
||||
#define verde__GPU__GC__MEM_ADDR_BITS 40
|
||||
#define verde__GPU__GC__MEM_ADDR_BITS__40 1
|
||||
#define verde__GPU__GC__NEW_VERTEX_VECTOR_ORDER 0
|
||||
#define verde__GPU__GC__NEW_VERTEX_VECTOR_ORDER__0 1
|
||||
#define verde__GPU__GC__NUM_INTERPS 1
|
||||
#define verde__GPU__GC__NUM_INTERPS__1 1
|
||||
#define verde__GPU__GC__HZ_PRESENT 1
|
||||
#define verde__GPU__GC__HZ_PRESENT__1 1
|
||||
#define verde__GPU__GC__NUM_CLKS_PER_PRIM 1
|
||||
#define verde__GPU__GC__NUM_CLKS_PER_PRIM__1 1
|
||||
#define verde__GPU__GC__NUM_INTERP_PRIM_PER_CLK 2
|
||||
#define verde__GPU__GC__NUM_INTERP_PRIM_PER_CLK__2 1
|
||||
#define verde__GPU__GC__ATTR_BUS_PRIM_PER_CLK 2
|
||||
#define verde__GPU__GC__ATTR_BUS_PRIM_PER_CLK__2 1
|
||||
#define verde__GPU__GC__NUM_MAX_GS_THDS 32
|
||||
#define verde__GPU__GC__NUM_MAX_GS_THDS__32 1
|
||||
#define verde__GPU__GC__NUM_MIN_GS_THDS 4
|
||||
#define verde__GPU__GC__NUM_MIN_GS_THDS__4 1
|
||||
#define verde__GPU__GC__NUM_STATES 8
|
||||
#define verde__GPU__GC__NUM_STATES__8 1
|
||||
#define verde__GPU__GC__NUM_STATES__0_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_STATES__1_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_STATES__2_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_STATES__3_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_STATES__4_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_STATES__5_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_STATES__6_PRESENT 1
|
||||
#define verde__GPU__GC__NUM_STATES__7_PRESENT 1
|
||||
#define verde__GPU__GC__STWTPTR_WIDTH 3
|
||||
#define verde__GPU__GC__STWTPTR_WIDTH__3 1
|
||||
#define verde__GPU__SH__DOUBLE_FLOAT_PRESENT 1
|
||||
#define verde__GPU__SH__DOUBLE_FLOAT_PRESENT__1 1
|
||||
#define verde__GPU__SH__NUM_DOUBLE_VSPS_PER_SIMD 1
|
||||
#define verde__GPU__SH__NUM_DOUBLE_VSPS_PER_SIMD__1 1
|
||||
#define verde__GPU__SH__NUM_DOUBLE_VSPS_PER_SIMD__0_PRESENT 1
|
||||
#define verde__GPU__SH__NORM_SIN_COS 1
|
||||
#define verde__GPU__SH__NORM_SIN_COS__1 1
|
||||
#define verde__GPU__SH__MICROCODE_LEVEL 10
|
||||
#define verde__GPU__SH__MICROCODE_LEVEL__10 1
|
||||
#define verde__GPU__SH__NUM_EXPREQ_PER_CU 12
|
||||
#define verde__GPU__SH__NUM_EXPREQ_PER_CU__12 1
|
||||
#define verde__GPU__GC__GLOBAL_VGT_PA 0
|
||||
#define verde__GPU__GC__GLOBAL_VGT_PA__0 1
|
||||
#define verde__GPU__GC__NUM_FRONTEND 1
|
||||
#define verde__GPU__GC__NUM_FRONTEND__1 1
|
||||
#define verde__GPU__GC__NUM_FRONTEND__0_PRESENT 1
|
||||
#define verde__GPU__GC__COALESCED_READ_PRESENT 1
|
||||
#define verde__GPU__GC__COALESCED_READ_PRESENT__1 1
|
||||
#define verde__GPU__GC__NUM_CLKS_PER_TILE 1
|
||||
#define verde__GPU__GC__NUM_CLKS_PER_TILE__1 1
|
||||
#define verde__GPU__GC__DBSC_TRUE_QUAD_INTF 1
|
||||
#define verde__GPU__GC__DBSC_TRUE_QUAD_INTF__1 1
|
||||
#define verde__GPU__GC__ASYNC_DISPATCH 1
|
||||
#define verde__GPU__GC__ASYNC_DISPATCH__1 1
|
||||
#define verde__GPU__GC__VMID_PORTS_EXISTS 1
|
||||
#define verde__GPU__GC__VMID_PORTS_EXISTS__1 1
|
||||
#define verde__GPU__GC__NUM_EXPORT_BUS 2
|
||||
#define verde__GPU__GC__NUM_EXPORT_BUS__2 1
|
||||
#define verde__GPU__GC__TILING_CONFIG_TABLE 1
|
||||
#define verde__GPU__GC__TILING_CONFIG_TABLE__1 1
|
||||
#define verde__GPU__GC__FMASK_TILING_CONFIG_TABLE 1
|
||||
#define verde__GPU__GC__FMASK_TILING_CONFIG_TABLE__1 1
|
||||
#define verde__GPU__GC__NEW_SRC_COLOR_FORMAT 1
|
||||
#define verde__GPU__GC__NEW_SRC_COLOR_FORMAT__1 1
|
||||
#define verde__GPU__SP__NUM_GPRS 256
|
||||
#define verde__GPU__SP__NUM_GPRS__256 1
|
||||
#define verde__GPU__SP__GPR_ADDR_WIDTH 8
|
||||
#define verde__GPU__SP__GPR_ADDR_WIDTH__8 1
|
||||
#define verde__GPU__SP__WIDTH_GPRS 128
|
||||
#define verde__GPU__SP__WIDTH_GPRS__128 1
|
||||
#define verde__GPU__SPI__TMP_SCBD_SLOTS_PER_CU 32
|
||||
#define verde__GPU__SPI__TMP_SCBD_SLOTS_PER_CU__32 1
|
||||
#define verde__GPU__VGT__GSPRIM_BUFF_DEPTH 768
|
||||
#define verde__GPU__VGT__GSPRIM_BUFF_DEPTH__768 1
|
||||
#define verde__GPU__VGT__GS_TABLE_DEPTH 16
|
||||
#define verde__GPU__VGT__GS_TABLE_DEPTH__16 1
|
||||
#define verde__GPU__SX__PARAMETER_CACHE_DEPTH 512
|
||||
#define verde__GPU__SX__PARAMETER_CACHE_DEPTH__512 1
|
||||
#define verde__GPU__SX__PARAMETER_CACHE_WIDTH 16
|
||||
#define verde__GPU__SX__PARAMETER_CACHE_WIDTH__16 1
|
||||
#define verde__GPU__SX__PARAMETER_CACHE_WIDTH__0_PRESENT 1
|
||||
#define verde__GPU__SX__PARAMETER_CACHE_WIDTH__1_PRESENT 1
|
||||
#define verde__GPU__SX__PARAMETER_CACHE_WIDTH__2_PRESENT 1
|
||||
#define verde__GPU__SX__PARAMETER_CACHE_WIDTH__3_PRESENT 1
|
||||
#define verde__GPU__SX__PARAMETER_CACHE_WIDTH__4_PRESENT 1
|
||||
#define verde__GPU__SX__PARAMETER_CACHE_WIDTH__5_PRESENT 1
|
||||
#define verde__GPU__SX__PARAMETER_CACHE_WIDTH__6_PRESENT 1
|
||||
#define verde__GPU__SX__PARAMETER_CACHE_WIDTH__7_PRESENT 1
|
||||
#define verde__GPU__SX__PARAMETER_CACHE_WIDTH__8_PRESENT 1
|
||||
#define verde__GPU__SX__PARAMETER_CACHE_WIDTH__9_PRESENT 1
|
||||
#define verde__GPU__SX__PARAMETER_CACHE_WIDTH__10_PRESENT 1
|
||||
#define verde__GPU__SX__PARAMETER_CACHE_WIDTH__11_PRESENT 1
|
||||
#define verde__GPU__SX__PARAMETER_CACHE_WIDTH__12_PRESENT 1
|
||||
#define verde__GPU__SX__PARAMETER_CACHE_WIDTH__13_PRESENT 1
|
||||
#define verde__GPU__SX__PARAMETER_CACHE_WIDTH__14_PRESENT 1
|
||||
#define verde__GPU__SX__PARAMETER_CACHE_WIDTH__15_PRESENT 1
|
||||
#define verde__GPU__SX__COLOR_SCOREBOARD_SLOTS 64
|
||||
#define verde__GPU__SX__COLOR_SCOREBOARD_SLOTS__64 1
|
||||
#define verde__GPU__SX__POS_SCOREBOARD_SLOTS 16
|
||||
#define verde__GPU__SX__POS_SCOREBOARD_SLOTS__16 1
|
||||
#define verde__GPU__SX__COLOR_EXPORT_BUFFER_SIZE 256
|
||||
#define verde__GPU__SX__COLOR_EXPORT_BUFFER_SIZE__256 1
|
||||
#define verde__GPU__SX__POS_EXPORT_BUFFER_SIZE 256
|
||||
#define verde__GPU__SX__POS_EXPORT_BUFFER_SIZE__256 1
|
||||
#define verde__GPU__SX__COLOR_EXPORT_REG_BUFFER_SIZE 1024
|
||||
#define verde__GPU__SX__COLOR_EXPORT_REG_BUFFER_SIZE__1024 1
|
||||
#define verde__GPU__SX__POS_EXPORT_REG_BUFFER_SIZE 1024
|
||||
#define verde__GPU__SX__POS_EXPORT_REG_BUFFER_SIZE__1024 1
|
||||
#define verde__GPU__SX__PIXEL_FIFO_DEPTH 32
|
||||
#define verde__GPU__SX__PIXEL_FIFO_DEPTH__32 1
|
||||
#define verde__GPU__PA__PRIM_BUFF_DEPTH 1536
|
||||
#define verde__GPU__PA__PRIM_BUFF_DEPTH__1536 1
|
||||
#define verde__GPU__PA__NUM_CLIPPERS 4
|
||||
#define verde__GPU__PA__NUM_CLIPPERS__4 1
|
||||
#define verde__GPU__PA__LOG2_MAX_SAMPLES 3
|
||||
#define verde__GPU__PA__LOG2_MAX_SAMPLES__3 1
|
||||
#define verde__GPU__TC__TCC_PRESENT 1
|
||||
#define verde__GPU__TC__TCC_PRESENT__1 1
|
||||
#define verde__GPU__TC__TCR_TCA_REQ_CREDITS 16
|
||||
#define verde__GPU__TC__TCR_TCA_REQ_CREDITS__16 1
|
||||
#define verde__GPU__TC__TA_HANDLE_BASEADDR 1
|
||||
#define verde__GPU__TC__TA_HANDLE_BASEADDR__1 1
|
||||
#define verde__GPU__TC__TCP_L1_SIZE 16
|
||||
#define verde__GPU__TC__TCP_L1_SIZE__16 1
|
||||
#define verde__GPU__TC__NUM_TCPS 8
|
||||
#define verde__GPU__TC__NUM_TCPS__8 1
|
||||
#define verde__GPU__TC__NUM_TCPS__0_PRESENT 1
|
||||
#define verde__GPU__TC__NUM_TCPS__1_PRESENT 1
|
||||
#define verde__GPU__TC__NUM_TCPS__2_PRESENT 1
|
||||
#define verde__GPU__TC__NUM_TCPS__3_PRESENT 1
|
||||
#define verde__GPU__TC__NUM_TCPS__4_PRESENT 1
|
||||
#define verde__GPU__TC__NUM_TCPS__5_PRESENT 1
|
||||
#define verde__GPU__TC__NUM_TCPS__6_PRESENT 1
|
||||
#define verde__GPU__TC__NUM_TCPS__7_PRESENT 1
|
||||
#define verde__GPU__TC__NUM_TCCS 4
|
||||
#define verde__GPU__TC__NUM_TCCS__4 1
|
||||
#define verde__GPU__TC__NUM_TCCS__0_PRESENT 1
|
||||
#define verde__GPU__TC__NUM_TCCS__1_PRESENT 1
|
||||
#define verde__GPU__TC__NUM_TCCS__2_PRESENT 1
|
||||
#define verde__GPU__TC__NUM_TCCS__3_PRESENT 1
|
||||
#define verde__GPU__TC__NUM_TCAS 2
|
||||
#define verde__GPU__TC__NUM_TCAS__2 1
|
||||
#define verde__GPU__TC__NUM_TCAS__0_PRESENT 1
|
||||
#define verde__GPU__TC__NUM_TCAS__1_PRESENT 1
|
||||
#define verde__GPU__TC__NUM_TCIRS 3
|
||||
#define verde__GPU__TC__NUM_TCIRS__3 1
|
||||
#define verde__GPU__TC__NUM_TCIRS__0_PRESENT 1
|
||||
#define verde__GPU__TC__NUM_TCIRS__1_PRESENT 1
|
||||
#define verde__GPU__TC__NUM_TCIRS__2_PRESENT 1
|
||||
#define verde__GPU__TC__NUM_TCIWS 1
|
||||
#define verde__GPU__TC__NUM_TCIWS__1 1
|
||||
#define verde__GPU__TC__NUM_TCIWS__0_PRESENT 1
|
||||
#define verde__GPU__TC__CLIENT_TCI_REQ_CREDITS 8
|
||||
#define verde__GPU__TC__CLIENT_TCI_REQ_CREDITS__8 1
|
||||
#define verde__GPU__TC__VGT_TCI_REQ_CREDITS 8
|
||||
#define verde__GPU__TC__VGT_TCI_REQ_CREDITS__8 1
|
||||
#define verde__GPU__TC__SQC_TCI_REQ_CREDITS 8
|
||||
#define verde__GPU__TC__SQC_TCI_REQ_CREDITS__8 1
|
||||
#define verde__GPU__TC__CP_TCI_REQ_CREDITS 8
|
||||
#define verde__GPU__TC__CP_TCI_REQ_CREDITS__8 1
|
||||
#define verde__GPU__TC__NUM_TCIS 4
|
||||
#define verde__GPU__TC__NUM_TCIS__4 1
|
||||
#define verde__GPU__TC__NUM_TCIS__0_PRESENT 1
|
||||
#define verde__GPU__TC__NUM_TCIS__1_PRESENT 1
|
||||
#define verde__GPU__TC__NUM_TCIS__2_PRESENT 1
|
||||
#define verde__GPU__TC__NUM_TCIS__3_PRESENT 1
|
||||
#define verde__GPU__TC__TCC_NUM_LINES 2048
|
||||
#define verde__GPU__TC__TCC_NUM_LINES__2048 1
|
||||
#define verde__GPU__TC__TCA_PHASE 1
|
||||
#define verde__GPU__TC__TCA_PHASE__1 1
|
||||
#define verde__GPU__TC__TCA_RTN_ARB_IO_PIPELINING 0
|
||||
#define verde__GPU__TC__TCA_RTN_ARB_IO_PIPELINING__0 1
|
||||
#define verde__GPU__TC__CP_VGT_TCI_ABOVE_SH0 0
|
||||
#define verde__GPU__TC__CP_VGT_TCI_ABOVE_SH0__0 1
|
||||
#define verde__GPU__DB__TB_USES_EMULATOR_MODE 0
|
||||
#define verde__GPU__DB__TB_USES_EMULATOR_MODE__0 1
|
||||
#define verde__GPU__DB__USE_ADDRRAXX_LIB 1
|
||||
#define verde__GPU__DB__USE_ADDRRAXX_LIB__1 1
|
||||
#define verde__GPU__DB__LEGACY_TILE_MODE_ASSERTS 1
|
||||
#define verde__GPU__DB__LEGACY_TILE_MODE_ASSERTS__1 1
|
||||
#define verde__GPU__DB__SUBBLOCK_GATES_PRESENT 0
|
||||
#define verde__GPU__DB__SUBBLOCK_GATES_PRESENT__0 1
|
||||
#define verde__GPU__CB__BLENDER_NUM_PIXELS 4
|
||||
#define verde__GPU__CB__BLENDER_NUM_PIXELS__4 1
|
||||
#define verde__GPU__CB__BLENDER_NUM_FP32_COMPS 4
|
||||
#define verde__GPU__CB__BLENDER_NUM_FP32_COMPS__4 1
|
||||
#define verde__GPU__CB__COMPRESSION 1
|
||||
#define verde__GPU__CB__COMPRESSION__1 1
|
||||
#define verde__GPU__LDS__SIZE 64
|
||||
#define verde__GPU__LDS__SIZE__64 1
|
||||
#define verde__GPU__LDS__NUM_PIXELS 32
|
||||
#define verde__GPU__LDS__NUM_PIXELS__32 1
|
||||
#define verde__GPU__LDS__NUM_BANKS 32
|
||||
#define verde__GPU__LDS__NUM_BANKS__32 1
|
||||
#define verde__GPU__GDS__SIZE 64
|
||||
#define verde__GPU__GDS__SIZE__64 1
|
||||
#define verde__GPU__GDS__NUM_PIXELS 16
|
||||
#define verde__GPU__GDS__NUM_PIXELS__16 1
|
||||
#define verde__GPU__GDS__NUM_PIXELS__0_PRESENT 1
|
||||
#define verde__GPU__GDS__NUM_PIXELS__1_PRESENT 1
|
||||
#define verde__GPU__GDS__NUM_PIXELS__2_PRESENT 1
|
||||
#define verde__GPU__GDS__NUM_PIXELS__3_PRESENT 1
|
||||
#define verde__GPU__GDS__NUM_PIXELS__4_PRESENT 1
|
||||
#define verde__GPU__GDS__NUM_PIXELS__5_PRESENT 1
|
||||
#define verde__GPU__GDS__NUM_PIXELS__6_PRESENT 1
|
||||
#define verde__GPU__GDS__NUM_PIXELS__7_PRESENT 1
|
||||
#define verde__GPU__GDS__NUM_PIXELS__8_PRESENT 1
|
||||
#define verde__GPU__GDS__NUM_PIXELS__9_PRESENT 1
|
||||
#define verde__GPU__GDS__NUM_PIXELS__10_PRESENT 1
|
||||
#define verde__GPU__GDS__NUM_PIXELS__11_PRESENT 1
|
||||
#define verde__GPU__GDS__NUM_PIXELS__12_PRESENT 1
|
||||
#define verde__GPU__GDS__NUM_PIXELS__13_PRESENT 1
|
||||
#define verde__GPU__GDS__NUM_PIXELS__14_PRESENT 1
|
||||
#define verde__GPU__GDS__NUM_PIXELS__15_PRESENT 1
|
||||
#define verde__GPU__GDS__NUM_BANKS 16
|
||||
#define verde__GPU__GDS__NUM_BANKS__16 1
|
||||
#define verde__GPU__GDS__NUM_BANKS__0_PRESENT 1
|
||||
#define verde__GPU__GDS__NUM_BANKS__1_PRESENT 1
|
||||
#define verde__GPU__GDS__NUM_BANKS__2_PRESENT 1
|
||||
#define verde__GPU__GDS__NUM_BANKS__3_PRESENT 1
|
||||
#define verde__GPU__GDS__NUM_BANKS__4_PRESENT 1
|
||||
#define verde__GPU__GDS__NUM_BANKS__5_PRESENT 1
|
||||
#define verde__GPU__GDS__NUM_BANKS__6_PRESENT 1
|
||||
#define verde__GPU__GDS__NUM_BANKS__7_PRESENT 1
|
||||
#define verde__GPU__GDS__NUM_BANKS__8_PRESENT 1
|
||||
#define verde__GPU__GDS__NUM_BANKS__9_PRESENT 1
|
||||
#define verde__GPU__GDS__NUM_BANKS__10_PRESENT 1
|
||||
#define verde__GPU__GDS__NUM_BANKS__11_PRESENT 1
|
||||
#define verde__GPU__GDS__NUM_BANKS__12_PRESENT 1
|
||||
#define verde__GPU__GDS__NUM_BANKS__13_PRESENT 1
|
||||
#define verde__GPU__GDS__NUM_BANKS__14_PRESENT 1
|
||||
#define verde__GPU__GDS__NUM_BANKS__15_PRESENT 1
|
||||
#define verde__GPU__GDS__NUM_OA_COUNTERS 4
|
||||
#define verde__GPU__GDS__NUM_OA_COUNTERS__4 1
|
||||
#define verde__GPU__RLC__LARGE_UCODE_RAM 1
|
||||
#define verde__GPU__RLC__LARGE_UCODE_RAM__1 1
|
||||
#define verde__GPU__RLC__LARGE_SCRATCH_RAM 1
|
||||
#define verde__GPU__RLC__LARGE_SCRATCH_RAM__1 1
|
||||
#define verde__GPU__RLC__GFX_POWER_GATING 1
|
||||
#define verde__GPU__RLC__GFX_POWER_GATING__1 1
|
||||
#define verde__GPU__GC__SC_BCI_16_SAMPLE_PER_PIXEL 1
|
||||
#define verde__GPU__GC__SC_BCI_16_SAMPLE_PER_PIXEL__1 1
|
||||
#define verde__GPU__GC__TMP_USE_RASTER_CONFIG 1
|
||||
#define verde__GPU__GC__TMP_USE_RASTER_CONFIG__1 1
|
||||
#define verde__GPU__GC__FLT_NORM_0_6 0
|
||||
#define verde__GPU__GC__FLT_NORM_0_6__0 1
|
||||
#define verde__GPU__IO__PCIE_PHY falcon65g16x
|
||||
#define verde__GPU__IO__PCIE_PHY__FALCON65G16X 1
|
||||
#define verde__GPU__IO__DVP_SUBMOD io_r
|
||||
#define verde__GPU__IO__DVP_SUBMOD__IO_R 1
|
||||
#define verde__GPU__IO__SYNC_SUBMOD io_b
|
||||
#define verde__GPU__IO__SYNC_SUBMOD__IO_B 1
|
||||
#define verde__GPU__IO__GENERICA_SUBMOD io_b
|
||||
#define verde__GPU__IO__GENERICA_SUBMOD__IO_B 1
|
||||
#define verde__GPU__IO__GENERICB_SUBMOD io_b
|
||||
#define verde__GPU__IO__GENERICB_SUBMOD__IO_B 1
|
||||
#define verde__GPU__IO__GENERICC_SUBMOD io_b
|
||||
#define verde__GPU__IO__GENERICC_SUBMOD__IO_B 1
|
||||
#define verde__GPU__IO__GENERICD_SUBMOD io_b
|
||||
#define verde__GPU__IO__GENERICD_SUBMOD__IO_B 1
|
||||
#define verde__GPU__IO__GENERICE_SUBMOD io_b
|
||||
#define verde__GPU__IO__GENERICE_SUBMOD__IO_B 1
|
||||
#define verde__GPU__IO__GENERICF_SUBMOD io_b
|
||||
#define verde__GPU__IO__GENERICF_SUBMOD__IO_B 1
|
||||
#define verde__GPU__IO__GENERICG_SUBMOD io_b
|
||||
#define verde__GPU__IO__GENERICG_SUBMOD__IO_B 1
|
||||
#define verde__GPU__IO__VID_SUBMOD io_r
|
||||
#define verde__GPU__IO__VID_SUBMOD__IO_R 1
|
||||
#define verde__GPU__IO__GPIO_SUBMOD io_b
|
||||
#define verde__GPU__IO__GPIO_SUBMOD__IO_B 1
|
||||
#define verde__GPU__IO__PLL_SUBMOD io_b
|
||||
#define verde__GPU__IO__PLL_SUBMOD__IO_B 1
|
||||
#define verde__GPU__IO__SPLL_SUBMOD io_b
|
||||
#define verde__GPU__IO__SPLL_SUBMOD__IO_B 1
|
||||
#define verde__GPU__IO__UPLL_SUBMOD io_b
|
||||
#define verde__GPU__IO__UPLL_SUBMOD__IO_B 1
|
||||
#define verde__GPU__IO__HPD_SUBMOD io_b
|
||||
#define verde__GPU__IO__HPD_SUBMOD__IO_B 1
|
||||
#define verde__GPU__IO__I2C_SUBMOD io_b
|
||||
#define verde__GPU__IO__I2C_SUBMOD__IO_B 1
|
||||
#define verde__GPU__IO__ASAT_45_PLL 1
|
||||
#define verde__GPU__IO__ASAT_45_PLL__1 1
|
||||
#define verde__GPU__IO__PWRGOOD 1
|
||||
#define verde__GPU__IO__PWRGOOD__1 1
|
||||
#define verde__GPU__IO__NUM_MPLL 2
|
||||
#define verde__GPU__IO__NUM_MPLL__2 1
|
||||
#define verde__GPU__IO__READY 1
|
||||
#define verde__GPU__IO__READY__1 1
|
||||
#define verde__GPU__MC__NUM_MCB_BLOCKS 1
|
||||
#define verde__GPU__MC__NUM_MCB_BLOCKS__1 1
|
||||
#define verde__GPU__MC__NUM_MCB_BLOCKS__0_PRESENT 1
|
||||
#define verde__GPU__MC__NUM_MCB_TILES 1
|
||||
#define verde__GPU__MC__NUM_MCB_TILES__1 1
|
||||
#define verde__GPU__MC__NUM_MCB_TILES__0_PRESENT 1
|
||||
#define verde__GPU__MC__NUM_MCD_BLOCKS 3
|
||||
#define verde__GPU__MC__NUM_MCD_BLOCKS__3 1
|
||||
#define verde__GPU__MC__NUM_MCD_BLOCKS__0_PRESENT 1
|
||||
#define verde__GPU__MC__NUM_MCD_BLOCKS__1_PRESENT 1
|
||||
#define verde__GPU__MC__NUM_MCD_BLOCKS__2_PRESENT 1
|
||||
#define verde__GPU__MC__NUM_MCC_BLOCKS 2
|
||||
#define verde__GPU__MC__NUM_MCC_BLOCKS__2 1
|
||||
#define verde__GPU__MC__NUM_MCC_BLOCKS__0_PRESENT 1
|
||||
#define verde__GPU__MC__NUM_MCC_BLOCKS__1_PRESENT 1
|
||||
#define verde__GPU__MC__NUM_MCT_TILES 3
|
||||
#define verde__GPU__MC__NUM_MCT_TILES__3 1
|
||||
#define verde__GPU__MC__NUM_IO_CHNLS 6
|
||||
#define verde__GPU__MC__NUM_IO_CHNLS__6 1
|
||||
#define verde__GPU__MC__NUM_IO_CHNLS__0_PRESENT 1
|
||||
#define verde__GPU__MC__NUM_IO_CHNLS__1_PRESENT 1
|
||||
#define verde__GPU__MC__NUM_IO_CHNLS__2_PRESENT 1
|
||||
#define verde__GPU__MC__NUM_IO_CHNLS__3_PRESENT 1
|
||||
#define verde__GPU__MC__NUM_IO_CHNLS__4_PRESENT 1
|
||||
#define verde__GPU__MC__NUM_IO_CHNLS__5_PRESENT 1
|
||||
#define verde__GPU__MC__CDRRDBK 6
|
||||
#define verde__GPU__MC__CDRRDBK__6 1
|
||||
#define verde__GPU__MC__RPB_NEW_STREAM 1
|
||||
#define verde__GPU__MC__RPB_NEW_STREAM__1 1
|
||||
#define verde__GPU__MC__MCD0_BLOCK 1
|
||||
#define verde__GPU__MC__MCD0_BLOCK__1 1
|
||||
#define verde__GPU__MC__MCD1_BLOCK 1
|
||||
#define verde__GPU__MC__MCD1_BLOCK__1 1
|
||||
#define verde__GPU__MC__MCD2_BLOCK 1
|
||||
#define verde__GPU__MC__MCD2_BLOCK__1 1
|
||||
#define verde__GPU__MC__MCC0_BLOCK 1
|
||||
#define verde__GPU__MC__MCC0_BLOCK__1 1
|
||||
#define verde__GPU__MC__MCC1_BLOCK 1
|
||||
#define verde__GPU__MC__MCC1_BLOCK__1 1
|
||||
#define verde__GPU__MC__MCB_BLOCK 1
|
||||
#define verde__GPU__MC__MCB_BLOCK__1 1
|
||||
#define verde__GPU__MC__RB_REDUNDANCY 0
|
||||
#define verde__GPU__MC__RB_REDUNDANCY__0 1
|
||||
#define verde__GPU__MC__ALLOW_LARRAY 0
|
||||
#define verde__GPU__MC__ALLOW_LARRAY__0 1
|
||||
#define verde__GPU__MC__MCD_SRBM_PRESENT 1
|
||||
#define verde__GPU__MC__MCD_SRBM_PRESENT__1 1
|
||||
#define verde__GPU__MC__HDP_RD_ON_GBL1 1
|
||||
#define verde__GPU__MC__HDP_RD_ON_GBL1__1 1
|
||||
#define verde__GPU__MC__TWO_GBL0_RDRET 1
|
||||
#define verde__GPU__MC__TWO_GBL0_RDRET__1 1
|
||||
#define verde__GPU__MC__TWO_RB_PER_MCD 1
|
||||
#define verde__GPU__MC__TWO_RB_PER_MCD__1 1
|
||||
#define verde__GPU__MC__NUM_OF_RB_PER_MCD 2
|
||||
#define verde__GPU__MC__NUM_OF_RB_PER_MCD__2 1
|
||||
#define verde__GPU__MC__NUM_TC_PER_MCD 3
|
||||
#define verde__GPU__MC__NUM_TC_PER_MCD__3 1
|
||||
#define verde__GPU__MC__NUM_TCCS 6
|
||||
#define verde__GPU__MC__NUM_TCCS__6 1
|
||||
#define verde__GPU__MC__MCD0_IO0_REP 1
|
||||
#define verde__GPU__MC__MCD0_IO0_REP__1 1
|
||||
#define verde__GPU__MC__MCD0_IO1_REP 1
|
||||
#define verde__GPU__MC__MCD0_IO1_REP__1 1
|
||||
#define verde__GPU__MC__MCD1_IO0_REP 1
|
||||
#define verde__GPU__MC__MCD1_IO0_REP__1 1
|
||||
#define verde__GPU__MC__MCD1_IO1_REP 1
|
||||
#define verde__GPU__MC__MCD1_IO1_REP__1 1
|
||||
#define verde__GPU__MC__MCD2_IO0_REP 1
|
||||
#define verde__GPU__MC__MCD2_IO0_REP__1 1
|
||||
#define verde__GPU__MC__MCD2_IO1_REP 1
|
||||
#define verde__GPU__MC__MCD2_IO1_REP__1 1
|
||||
#define verde__GPU__MC__SIMPLIFIED_BLACKOUT 1
|
||||
#define verde__GPU__MC__SIMPLIFIED_BLACKOUT__1 1
|
||||
#define verde__GPU__MC__DDR5_MCLK_DEFAULT 5
|
||||
#define verde__GPU__MC__DDR5_MCLK_DEFAULT__5 1
|
||||
#define verde__GPU__MC__XBAR_REMAP 1
|
||||
#define verde__GPU__MC__XBAR_REMAP__1 1
|
||||
#define verde__GPU__MC__PAB_EXISTS 0
|
||||
#define verde__GPU__MC__PAB_EXISTS__0 1
|
||||
#define verde__GPU__MC__GPU_VIRTUAL_ADDRESS_WIDTH 40
|
||||
#define verde__GPU__MC__GPU_VIRTUAL_ADDRESS_WIDTH__40 1
|
||||
#define verde__GPU__MC__GPU_PHYSICAL_ADDRESS_WIDTH 40
|
||||
#define verde__GPU__MC__GPU_PHYSICAL_ADDRESS_WIDTH__40 1
|
||||
#define verde__GPU__MC__PCIE_VIRTUAL_ADDRESS_WIDTH 48
|
||||
#define verde__GPU__MC__PCIE_VIRTUAL_ADDRESS_WIDTH__48 1
|
||||
#define verde__GPU__MC__PCIE_PHYSICAL_ADDRESS_WIDTH 48
|
||||
#define verde__GPU__MC__PCIE_PHYSICAL_ADDRESS_WIDTH__48 1
|
||||
#define verde__GPU__MC__SPLIT_TILES 1
|
||||
#define verde__GPU__MC__SPLIT_TILES__1 1
|
||||
#define verde__GPU__MC__FUSION_FEATURE_ONLY 0
|
||||
#define verde__GPU__MC__FUSION_FEATURE_ONLY__0 1
|
||||
#define verde__GPU__MC__POWER_GATING 1
|
||||
#define verde__GPU__MC__POWER_GATING__1 1
|
||||
#define verde__GPU__MC__NUM_PGFSM_BLOCKS 3
|
||||
#define verde__GPU__MC__NUM_PGFSM_BLOCKS__3 1
|
||||
#define verde__GPU__MC__PHY_POWER_GATING 1
|
||||
#define verde__GPU__MC__PHY_POWER_GATING__1 1
|
||||
#define verde__GPU__VID__PRESENT 0
|
||||
#define verde__GPU__VID__PRESENT__0 1
|
||||
#define verde__GPU__DC__PRESENT 0
|
||||
#define verde__GPU__DC__PRESENT__0 1
|
||||
#define verde__GPU__AVP__PRESENT 0
|
||||
#define verde__GPU__AVP__PRESENT__0 1
|
||||
#define verde__GPU__UVD__PRESENT 0
|
||||
#define verde__GPU__UVD__PRESENT__0 1
|
||||
#define verde__ENV__GPU__UVD__HAVE_RTL 0
|
||||
#define verde__ENV__GPU__UVD__HAVE_RTL__0 1
|
||||
#define verde__ENV__GPU__MC__HAVE_BFM 1
|
||||
#define verde__ENV__GPU__MC__HAVE_BFM__1 1
|
||||
#define verde__ENV__GPU__MC__HAVE_RTL 0
|
||||
#define verde__ENV__GPU__MC__HAVE_RTL__0 1
|
||||
#define verde__GPU__UVD__PROJ_LARK 1
|
||||
#define verde__GPU__UVD__PROJ_LARK__1 1
|
||||
#define verde__GPU__UVD__CTX_ENABLE 1
|
||||
#define verde__GPU__UVD__CTX_ENABLE__1 1
|
||||
#define verde__GPU__UVD__MC_7XX 1
|
||||
#define verde__GPU__UVD__MC_7XX__1 1
|
||||
#define verde__GPU__UVD__CGC_CGTT_LOCAL_CLOCK_GATER 1
|
||||
#define verde__GPU__UVD__CGC_CGTT_LOCAL_CLOCK_GATER__1 1
|
||||
#define verde__GPU__MC__ARB_VM_CREDITS 32
|
||||
#define verde__GPU__MC__ARB_VM_CREDITS__32 1
|
||||
#define verde__GPU__MC__MCD_TLBS 4
|
||||
#define verde__GPU__MC__MCD_TLBS__4 1
|
||||
#define verde__GPU__MC__MCB_TLBS 3
|
||||
#define verde__GPU__MC__MCB_TLBS__3 1
|
||||
#define verde__GPU__MC__NO_STALL_ON_FAULT 1
|
||||
#define verde__GPU__MC__NO_STALL_ON_FAULT__1 1
|
||||
#define verde__GPU__MC__VMC_CACHES 2
|
||||
#define verde__GPU__MC__VMC_CACHES__2 1
|
||||
#define verde__GPU__MC__BIGK_CACHE_SIZE 4
|
||||
#define verde__GPU__MC__BIGK_CACHE_SIZE__4 1
|
||||
#define verde__GPU__MC__MCB_TLB0_CAM 5
|
||||
#define verde__GPU__MC__MCB_TLB0_CAM__5 1
|
||||
#define verde__GPU__MC__MCB_TLB1_CAM 4
|
||||
#define verde__GPU__MC__MCB_TLB1_CAM__4 1
|
||||
#define verde__GPU__MC__MCB_TLB2_CAM 4
|
||||
#define verde__GPU__MC__MCB_TLB2_CAM__4 1
|
||||
#define verde__GPU__MC__MCD_TLB0_CAM 4
|
||||
#define verde__GPU__MC__MCD_TLB0_CAM__4 1
|
||||
#define verde__GPU__MC__MCD_TLB1_CAM 4
|
||||
#define verde__GPU__MC__MCD_TLB1_CAM__4 1
|
||||
#define verde__GPU__MC__MCD_TLB2_CAM 4
|
||||
#define verde__GPU__MC__MCD_TLB2_CAM__4 1
|
||||
#define verde__GPU__MC__MCD_TLB3_CAM 4
|
||||
#define verde__GPU__MC__MCD_TLB3_CAM__4 1
|
||||
#define verde__GPU__MC__SEND_FREE_AT_RTN 1
|
||||
#define verde__GPU__MC__SEND_FREE_AT_RTN__1 1
|
||||
#define verde__GPU__MC__CONTEXT_WIDTH 3
|
||||
#define verde__GPU__MC__CONTEXT_WIDTH__3 1
|
||||
#define verde__GPU__MC__BUG_159204_EXISTS 1
|
||||
#define verde__GPU__MC__BUG_159204_EXISTS__1 1
|
||||
#endif
|
||||
@@ -1,98 +0,0 @@
|
||||
// Common header file for Si and Ci CommandWriter implementations
|
||||
|
||||
#ifndef _GFX8_UTILS_H_
|
||||
#define _GFX8_UTILS_H_
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
namespace pm4_profile {
|
||||
|
||||
namespace gfx8 {
|
||||
|
||||
static const uint8_t EventTypeToIndexTable[] = {
|
||||
0, // Reserved_0x00 0x00000000
|
||||
EVENT_WRITE_INDEX_SAMPLE_STREAMOUTSTATS, // SAMPLE_STREAMOUTSTATS1
|
||||
// 0x00000001
|
||||
EVENT_WRITE_INDEX_SAMPLE_STREAMOUTSTATS, // SAMPLE_STREAMOUTSTATS2
|
||||
// 0x00000002
|
||||
EVENT_WRITE_INDEX_SAMPLE_STREAMOUTSTATS, // SAMPLE_STREAMOUTSTATS3
|
||||
// 0x00000003
|
||||
EVENT_WRITE_INDEX_ANY_EOP_TIMESTAMP, // CACHE_FLUSH_TS 0x00000004
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // CONTEXT_DONE 0x00000005
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // CACHE_FLUSH 0x00000006
|
||||
EVENT_WRITE_INDEX_VS_PS_PARTIAL_FLUSH, // CS_PARTIAL_FLUSH 0x00000007
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // VGT_STREAMOUT_SYNC 0x00000008
|
||||
0, // Reserved_0x09 0x00000009
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // VGT_STREAMOUT_RESET 0x0000000a
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // END_OF_PIPE_INCR_DE 0x0000000b
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // END_OF_PIPE_IB_END 0x0000000c
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // RST_PIX_CNT 0x0000000d
|
||||
0, // Reserved_0x0E 0x0000000e
|
||||
EVENT_WRITE_INDEX_VS_PS_PARTIAL_FLUSH, // VS_PARTIAL_FLUSH 0x0000000f
|
||||
EVENT_WRITE_INDEX_VS_PS_PARTIAL_FLUSH, // PS_PARTIAL_FLUSH 0x00000010
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // FLUSH_HS_OUTPUT 0x00000011
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // FLUSH_LS_OUTPUT 0x00000012
|
||||
0, // Reserved_0x13 0x00000013
|
||||
EVENT_WRITE_INDEX_ANY_EOP_TIMESTAMP, // CACHE_FLUSH_AND_INV_TS_EVENT
|
||||
// 0x00000014
|
||||
EVENT_WRITE_INDEX_ZPASS_DONE, // ZPASS_DONE 0x00000015
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // CACHE_FLUSH_AND_INV_EVENT
|
||||
// 0x00000016
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // PERFCOUNTER_START 0x00000017
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // PERFCOUNTER_STOP 0x00000018
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // PIPELINESTAT_START 0x00000019
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // PIPELINESTAT_STOP 0x0000001a
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // PERFCOUNTER_SAMPLE 0x0000001b
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // FLUSH_ES_OUTPUT 0x0000001c
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // FLUSH_GS_OUTPUT 0x0000001d
|
||||
EVENT_WRITE_INDEX_SAMPLE_PIPELINESTAT, // SAMPLE_PIPELINESTAT 0x0000001e
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // SO_VGTSTREAMOUT_FLUSH 0x0000001f
|
||||
EVENT_WRITE_INDEX_SAMPLE_STREAMOUTSTATS, // SAMPLE_STREAMOUTSTATS
|
||||
// 0x00000020
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // RESET_VTX_CNT 0x00000021
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // BLOCK_CONTEXT_DONE 0x00000022
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // CS_CONTEXT_DONE 0x00000023
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // VGT_FLUSH 0x00000024
|
||||
0, // Reserved_0x25 0x00000025
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // SQ_NON_EVENT 0x00000026
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // SC_SEND_DB_VPZ 0x00000027
|
||||
EVENT_WRITE_INDEX_ANY_EOP_TIMESTAMP, // BOTTOM_OF_PIPE_TS 0x00000028
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // FLUSH_SX_TS 0x00000029
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // DB_CACHE_FLUSH_AND_INV 0x0000002a
|
||||
EVENT_WRITE_INDEX_ANY_EOP_TIMESTAMP, // FLUSH_AND_INV_DB_DATA_TS 0x0000002b
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // FLUSH_AND_INV_DB_META 0x0000002c
|
||||
EVENT_WRITE_INDEX_ANY_EOP_TIMESTAMP, // FLUSH_AND_INV_CB_DATA_TS 0x0000002d
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // FLUSH_AND_INV_CB_META 0x0000002e
|
||||
EVENT_WRITE_EOS_INDEX_CSDONE_PSDONE, // CS_DONE 0x0000002f
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // PS_DONE 0x00000030
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // FLUSH_AND_INV_CB_PIXEL_DATA
|
||||
// 0x00000031
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // SX_CB_RAT_ACK_REQUEST 0x00000032
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // THREAD_TRACE_START 0x00000033
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // THREAD_TRACE_STOP 0x00000034
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // THREAD_TRACE_MARKER 0x00000035
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // THREAD_TRACE_FLUSH 0x00000036
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // THREAD_TRACE_FINISH 0x00000037
|
||||
};
|
||||
|
||||
/// @brief Enum specifying the size of elements of a buffer
|
||||
enum BufElementSize {
|
||||
kBufElementSize2 = 0,
|
||||
kBufElementSize4 = 1,
|
||||
kBufElementSize8 = 2,
|
||||
kBufElementSize16 = 3
|
||||
};
|
||||
|
||||
/// @brief Enum specifying the striding of a buffer
|
||||
enum BufIndexStride {
|
||||
kBufIndexStride8 = 0,
|
||||
kBufIndexStride16 = 1,
|
||||
kBufIndexStride32 = 2,
|
||||
kBufIndexStride64 = 3
|
||||
};
|
||||
|
||||
} // gfx8
|
||||
|
||||
} // pm4_profile
|
||||
|
||||
#endif // _GFX8_UTILS_H_
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,141 +0,0 @@
|
||||
//////////////////////////////////////////////////////////////////////////////////
|
||||
// THIS FILE IS AUTO-GENERATED BY PITGEN (vA)
|
||||
// !!!! DO NOT EDIT BY HAND !!!!
|
||||
//////////////////////////////////////////////////////////////////////////////////
|
||||
// Project: 10xx or later
|
||||
// Description:
|
||||
//
|
||||
// PM4 PacketType3 IT_OpCode Definitions
|
||||
// Extracted From ME and PFP F32 Microcode Jump Tables:
|
||||
//
|
||||
//////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Trade secret of ATI Technologies, Inc.
|
||||
// Copyright 1999, ATI Technologies, Inc., (unpublished)
|
||||
//
|
||||
// All rights reserved. This notice is intended as a precaution against
|
||||
// inadvertent publication and does not imply publication or any waiver
|
||||
// of confidentiality. The year included in the foregoing notice is the
|
||||
// year of creation of the work.
|
||||
//////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef PM4_IT_OPCODES_H
|
||||
#define PM4_IT_OPCODES_H
|
||||
|
||||
enum IT_OpCodeType {
|
||||
IT_NOP = 0x10,
|
||||
IT_SET_BASE = 0x11,
|
||||
IT_CLEAR_STATE = 0x12,
|
||||
IT_INDEX_BUFFER_SIZE = 0x13,
|
||||
IT_DISPATCH_DIRECT = 0x15,
|
||||
IT_DISPATCH_INDIRECT = 0x16,
|
||||
IT_INDIRECT_BUFFER_END = 0x17,
|
||||
IT_INDIRECT_BUFFER_CNST_END = 0x19,
|
||||
IT_ALLOC_GDS__SI = 0x1B,
|
||||
IT_WRITE_GDS_RAM__SI = 0x1C,
|
||||
IT_ATOMIC_GDS = 0x1D,
|
||||
IT_ATOMIC__SI__VI = 0x1E,
|
||||
IT_OCCLUSION_QUERY = 0x1F,
|
||||
IT_SET_PREDICATION = 0x20,
|
||||
IT_REG_RMW = 0x21,
|
||||
IT_COND_EXEC = 0x22,
|
||||
IT_PRED_EXEC = 0x23,
|
||||
IT_DRAW_INDIRECT = 0x24,
|
||||
IT_DRAW_INDEX_INDIRECT = 0x25,
|
||||
IT_INDEX_BASE = 0x26,
|
||||
IT_DRAW_INDEX_2 = 0x27,
|
||||
IT_CONTEXT_CONTROL = 0x28,
|
||||
IT_INDEX_TYPE = 0x2A,
|
||||
IT_DRAW_INDIRECT_MULTI = 0x2C,
|
||||
IT_DRAW_INDEX_AUTO = 0x2D,
|
||||
IT_DRAW_INDEX_IMMD__SI = 0x2E,
|
||||
IT_NUM_INSTANCES = 0x2F,
|
||||
IT_DRAW_INDEX_MULTI_AUTO = 0x30,
|
||||
IT_INDIRECT_BUFFER_CNST_PRIV__SI = 0x31,
|
||||
IT_INDIRECT_BUFFER_PRIV = 0x32,
|
||||
IT_INDIRECT_BUFFER_CNST = 0x33,
|
||||
IT_STRMOUT_BUFFER_UPDATE = 0x34,
|
||||
IT_DRAW_INDEX_OFFSET_2 = 0x35,
|
||||
IT_WRITE_DATA = 0x37,
|
||||
IT_DRAW_INDEX_INDIRECT_MULTI = 0x38,
|
||||
IT_MEM_SEMAPHORE = 0x39,
|
||||
IT_MPEG_INDEX__SI = 0x3A,
|
||||
IT_COPY_DW__SI__CI = 0x3B,
|
||||
IT_WAIT_REG_MEM = 0x3C,
|
||||
IT_MEM_WRITE__SI = 0x3D,
|
||||
IT_INDIRECT_BUFFER = 0x3F,
|
||||
IT_COPY_DATA = 0x40,
|
||||
IT_CP_DMA = 0x41,
|
||||
IT_PFP_SYNC_ME = 0x42,
|
||||
IT_SURFACE_SYNC = 0x43,
|
||||
IT_ME_INITIALIZE = 0x44,
|
||||
IT_COND_WRITE = 0x45,
|
||||
IT_EVENT_WRITE = 0x46,
|
||||
IT_EVENT_WRITE_EOP = 0x47,
|
||||
IT_EVENT_WRITE_EOS = 0x48,
|
||||
IT_PREAMBLE_CNTL = 0x4A,
|
||||
IT_GFX_CNTX_UPDATE = 0x52,
|
||||
IT_BLK_CNTX_UPDATE = 0x53,
|
||||
IT_INCR_UPDT_STATE = 0x55,
|
||||
IT_ONE_REG_WRITE__SI = 0x57,
|
||||
IT_LOAD_SH_REG = 0x5F,
|
||||
IT_LOAD_CONFIG_REG = 0x60,
|
||||
IT_LOAD_CONTEXT_REG = 0x61,
|
||||
IT_SET_CONFIG_REG = 0x68,
|
||||
IT_SET_CONTEXT_REG = 0x69,
|
||||
IT_SET_SH_REG_DI = 0x72,
|
||||
IT_SET_CONTEXT_REG_INDIRECT = 0x73,
|
||||
IT_SET_SH_REG = 0x76,
|
||||
IT_SET_SH_REG_OFFSET = 0x77,
|
||||
IT_ME_WRITE__SI = 0x7A,
|
||||
IT_PFP_WRITE__SI = 0x7B,
|
||||
IT_SCRATCH_RAM_WRITE = 0x7D,
|
||||
IT_SCRATCH_RAM_READ = 0x7E,
|
||||
IT_CE_WRITE__SI = 0x7F,
|
||||
IT_LOAD_CONST_RAM = 0x80,
|
||||
IT_WRITE_CONST_RAM = 0x81,
|
||||
IT_WRITE_CONST_RAM_OFFSET__SI = 0x82,
|
||||
IT_DUMP_CONST_RAM = 0x83,
|
||||
IT_INCREMENT_CE_COUNTER = 0x84,
|
||||
IT_INCREMENT_DE_COUNTER = 0x85,
|
||||
IT_WAIT_ON_CE_COUNTER = 0x86,
|
||||
IT_WAIT_ON_DE_COUNTER__SI = 0x87,
|
||||
IT_WAIT_ON_DE_COUNTER_DIFF = 0x88,
|
||||
IT_SET_CE_DE_COUNTERS__SI = 0x89,
|
||||
IT_WAIT_ON_AVAIL_BUFFER__SI = 0x8A,
|
||||
IT_SWITCH_BUFFER = 0x8B,
|
||||
IT_FORWARD_HEADER = 0x7C,
|
||||
IT_ATOMIC_MEM__CI = 0x1E,
|
||||
IT_DRAW_PREAMBLE__CI__VI = 0x36,
|
||||
IT_RELEASE_MEM__CI__VI = 0x49,
|
||||
IT_DMA_DATA__CI__VI = 0x50,
|
||||
IT_ACQUIRE_MEM__CI__VI = 0x58,
|
||||
IT_REWIND__CI__VI = 0x59,
|
||||
IT_INTERRUPT__CI__VI = 0x5A,
|
||||
IT_LOAD_UCONFIG_REG__CI__VI = 0x5E,
|
||||
IT_SET_QUEUE_REG__CI__VI = 0x78,
|
||||
IT_SET_UCONFIG_REG__CI__VI = 0x79,
|
||||
IT_EOP_BUFFER_END__CI__VI = 0x18,
|
||||
IT_INTR_BUFFER_END__CI__VI = 0x1A,
|
||||
IT_RUN_LIST__CI = 0x3E,
|
||||
IT_SET_RESOURCES__CI__VI = 0xA0,
|
||||
IT_MAP_PROCESS__CI__VI = 0xA1,
|
||||
IT_MAP_QUEUES__CI__VI = 0xA2,
|
||||
IT_QUERY_STATUS__CI = 0xA3,
|
||||
IT_UNMAP_QUEUES__CI = 0xA4,
|
||||
IT_COND_PREEMPT__VI = 0x8E,
|
||||
IT_DISPATCH_DRAW_PREAMBLE__VI = 0x8C,
|
||||
IT_DISPATCH_DRAW__VI = 0x8D,
|
||||
IT_DISPATCH_DRAW_PREAMBLE_ACE__VI = 0x8C,
|
||||
IT_DISPATCH_DRAW_ACE__VI = 0x8D,
|
||||
IT_PRIME_ATCL2__VI = 0x8E,
|
||||
IT_UNMAP_QUEUES__VI = 0xA3,
|
||||
IT_QUERY_STATUS__VI = 0xA4,
|
||||
IT_RUN_LIST__VI = 0xA5,
|
||||
};
|
||||
|
||||
#define PM4_TYPE_0 0
|
||||
#define PM4_TYPE_2 2
|
||||
#define PM4_TYPE_3 3
|
||||
|
||||
#endif // PM4_IT_OPCODES_H
|
||||
@@ -1,79 +0,0 @@
|
||||
/*
|
||||
***************************************************************************************************
|
||||
*
|
||||
* Trade secret of Advanced Micro Devices, Inc.
|
||||
* Copyright (c) 2010 Advanced Micro Devices, Inc. (unpublished)
|
||||
*
|
||||
* All rights reserved. This notice is intended as a precaution against inadvertent publication and
|
||||
* does not imply publication or any waiver of confidentiality. The year included in the foregoing
|
||||
* notice is the year of creation of the work.
|
||||
*
|
||||
***************************************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _SI_CI_VI_PM4CMDS_H_
|
||||
#define _SI_CI_VI_PM4CMDS_H_
|
||||
|
||||
/******************************************************************************
|
||||
*
|
||||
* si_ci_vi_merged_pm4cmds.h
|
||||
*
|
||||
* SI PM4 definitions, typedefs, and enumerations.
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
#include "si_pm4defs.h"
|
||||
#include "si_ci_vi_merged_pm4_it_opcodes.h"
|
||||
|
||||
// Wrapper on the new header-generation macro
|
||||
#define PM4_CMD(op, count) PM4_TYPE_3_HDR(op, count, ShaderGraphics, PredDisable)
|
||||
|
||||
// IT_DRAW_INDEX is replaced by IT_DRAW_INDEX_2
|
||||
#define PM4_CMD_DRAW_INDEX_2(count) PM4_CMD(IT_DRAW_INDEX_2, count)
|
||||
#define PM4_CMD_DRAW_INDEX_AUTO(count) PM4_CMD(IT_DRAW_INDEX_AUTO, count)
|
||||
#define PM4_CMD_DRAW_INDEX_IMMD_SI(count) PM4_CMD(IT_DRAW_INDEX_IMMD__SI, count)
|
||||
#define PM4_CMD_DRAW_INDEX_TYPE(count) PM4_CMD(IT_INDEX_TYPE, count)
|
||||
#define PM4_CMD_DRAW_NUM_INSTANCES(count) PM4_CMD(IT_NUM_INSTANCES, count)
|
||||
#define PM4_CMD_DRAW_PREAMBLE(count) PM4_CMD(IT_DRAW_PREAMBLE__CI__VI, count)
|
||||
|
||||
#define PM4_CMD_WAIT_REG_MEM(count) PM4_CMD(IT_WAIT_REG_MEM, count)
|
||||
#define PM4_CMD_MEM_WRITE(count) PM4_CMD(IT_MEM_WRITE, count)
|
||||
#define PM4_CMD_EVENT_WRITE(count) PM4_CMD(IT_EVENT_WRITE, count)
|
||||
#define PM4_CMD_EVENT_WRITE_EOP(count) PM4_CMD(IT_EVENT_WRITE_EOP, count)
|
||||
#define PM4_CMD_STRMOUT_BUFFER_UPDATE(count) PM4_CMD(IT_STRMOUT_BUFFER_UPDATE, count)
|
||||
#define PM4_CMD_COPY_DATA(count) PM4_CMD(IT_COPY_DATA, count)
|
||||
#define PM4_CMD_CP_DMA(count) PM4_CMD(IT_CP_DMA, count)
|
||||
#define PM4_CMD_SET_PREDICATION(count) PM4_CMD(IT_SET_PREDICATION, count)
|
||||
#define PM4_CMD_SURFACE_BASE_UPDATE(count) PM4_CMD(IT_SURFACE_BASE_UPDATE, count)
|
||||
#define PM4_CMD_STRMOUT_BASE_UPDATE(count) PM4_CMD(IT_STRMOUT_BASE_UPDATE, count)
|
||||
#define PM4_CMD_START_3D_CMDBUF(count) PM4_CMD(IT_START_3D_CMDBUF, count)
|
||||
#define PM4_CMD_ROLL_CONTEXT(count) PM4_CMD(IT_ROLL_CONTEXT, count)
|
||||
#define PM4_CMD_CONTEXT_CTL(count) PM4_CMD(IT_CONTEXT_CONTROL, count)
|
||||
#define PM4_CMD_PRED_EXEC PM4_CMD(IT_PRED_EXEC, 2)
|
||||
#define PM4_CMD_SURFACE_SYNC(count) PM4_CMD(IT_SURFACE_SYNC, count)
|
||||
|
||||
#define PM4_CMD_LOAD_CONFIG_REG(count) PM4_CMD(IT_LOAD_CONFIG_REG, count)
|
||||
#define PM4_CMD_LOAD_CONTEXT_REG(count) PM4_CMD(IT_LOAD_CONTEXT_REG, count)
|
||||
#define PM4_CMD_LOAD_SH_REG(count) PM4_CMD(IT_LOAD_SH_REG, count)
|
||||
|
||||
#define PM4_CMD_SET_CONFIG_REG(count) PM4_CMD(IT_SET_CONFIG_REG, count)
|
||||
#define PM4_CMD_SET_CONTEXT_REG(count) PM4_CMD(IT_SET_CONTEXT_REG, count)
|
||||
#define PM4_CMD_SET_SH_REG(count) PM4_CMD(IT_SET_SH_REG, count)
|
||||
|
||||
#define PM4_CMD_INDIRECT_BUFFER_CNST_END(count) PM4_CMD(IT_INDIRECT_BUFFER_CNST_END, count)
|
||||
#define PM4_CMD_INDIRECT_BUFFER_CNST_PRIV(count) PM4_CMD(IT_INDIRECT_BUFFER_CNST_PRIV, count)
|
||||
#define PM4_CMD_INDIRECT_BUFFER_CNST(count) PM4_CMD(IT_INDIRECT_BUFFER_CNST, count)
|
||||
|
||||
#define PM4_CMD_LOAD_CONST_RAM(count) PM4_CMD(IT_LOAD_CONST_RAM, count)
|
||||
#define PM4_CMD_WRITE_CONST_RAM(count) PM4_CMD(IT_WRITE_CONST_RAM, count)
|
||||
#define PM4_CMD_DUMP_CONST_RAM(count) PM4_CMD(IT_DUMP_CONST_RAM, count)
|
||||
|
||||
#define PM4_CMD_INC_CE_COUNTER(count) PM4_CMD(IT_INCREMENT_CE_COUNTER, count)
|
||||
#define PM4_CMD_INC_DE_COUNTER(count) PM4_CMD(IT_INCREMENT_DE_COUNTER, count)
|
||||
|
||||
#define PM4_CMD_WAIT_ON_CE_COUNTER(count) PM4_CMD(IT_WAIT_ON_CE_COUNTER, count)
|
||||
#define PM4_CMD_WAIT_ON_DE_COUNTER_DIFF(count) PM4_CMD(IT_WAIT_ON_DE_COUNTER_DIFF, count)
|
||||
|
||||
#define PM4_CMD_WRITE_DATA(count) PM4_CMD(IT_WRITE_DATA, count)
|
||||
|
||||
#endif // _SI_CI_VI_PM4CMDS_H_
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,676 +0,0 @@
|
||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
//
|
||||
// Trade secret of Advanced Micro Devices, Inc.
|
||||
// Copyright 2014, Advanced Micro Devices, Inc., (unpublished)
|
||||
//
|
||||
// All rights reserved. This notice is intended as a precaution against
|
||||
// inadvertent publication and does not imply publication or any waiver
|
||||
// of confidentiality. The year included in the foregoing notice is the
|
||||
// year of creation of the work.
|
||||
//
|
||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
#ifndef F32_CE_PM4_PACKETS_H
|
||||
#define F32_CE_PM4_PACKETS_H
|
||||
|
||||
namespace pm4_profile {
|
||||
namespace gfx9 {
|
||||
|
||||
|
||||
#ifndef PM4_CE_HEADER_DEFINED
|
||||
#define PM4_CE_HEADER_DEFINED
|
||||
typedef union PM4_CE_TYPE_3_HEADER {
|
||||
struct {
|
||||
uint32_t reserved1 : 8; ///< reserved
|
||||
uint32_t opcode : 8; ///< IT opcode
|
||||
uint32_t count : 14; ///< number of DWORDs - 1 in the information body.
|
||||
uint32_t type : 2; ///< packet identifier. It should be 3 for type 3 packets
|
||||
};
|
||||
uint32_t u32All;
|
||||
} PM4_CE_TYPE_3_HEADER;
|
||||
#endif // PM4_CE_HEADER_DEFINED
|
||||
|
||||
//--------------------CE_COND_EXEC--------------------
|
||||
|
||||
#ifndef PM4_CE_COND_EXEC_DEFINED
|
||||
#define PM4_CE_COND_EXEC_DEFINED
|
||||
|
||||
typedef struct PM4_CE_COND_EXEC {
|
||||
union {
|
||||
PM4_CE_TYPE_3_HEADER header; /// header
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t reserved1 : 2;
|
||||
uint32_t addr_lo : 30;
|
||||
} bitfields2;
|
||||
uint32_t ordinal2;
|
||||
};
|
||||
|
||||
uint32_t addr_hi;
|
||||
|
||||
uint32_t reserved2;
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t exec_count : 14;
|
||||
uint32_t reserved3 : 18;
|
||||
} bitfields5;
|
||||
uint32_t ordinal5;
|
||||
};
|
||||
|
||||
} PM4CE_COND_EXEC, *PPM4CE_COND_EXEC;
|
||||
#endif
|
||||
|
||||
//--------------------CE_CONTEXT_CONTROL--------------------
|
||||
|
||||
#ifndef PM4_CE_CONTEXT_CONTROL_DEFINED
|
||||
#define PM4_CE_CONTEXT_CONTROL_DEFINED
|
||||
|
||||
typedef struct PM4_CE_CONTEXT_CONTROL {
|
||||
union {
|
||||
PM4_CE_TYPE_3_HEADER header; /// header
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t reserved1 : 28;
|
||||
uint32_t load_ce_ram : 1;
|
||||
uint32_t reserved2 : 2;
|
||||
uint32_t load_enable : 1;
|
||||
} bitfields2;
|
||||
uint32_t ordinal2;
|
||||
};
|
||||
|
||||
uint32_t reserved3;
|
||||
|
||||
} PM4CE_CONTEXT_CONTROL, *PPM4CE_CONTEXT_CONTROL;
|
||||
#endif
|
||||
|
||||
//--------------------CE_COPY_DATA--------------------
|
||||
|
||||
#ifndef PM4_CE_COPY_DATA_DEFINED
|
||||
#define PM4_CE_COPY_DATA_DEFINED
|
||||
enum CE_COPY_DATA_src_sel_enum {
|
||||
src_sel__ce_copy_data__mem_mapped_register = 0,
|
||||
src_sel__ce_copy_data__memory = 1,
|
||||
src_sel__ce_copy_data__tc_l2 = 2,
|
||||
src_sel__ce_copy_data__immediate_data = 5
|
||||
};
|
||||
|
||||
enum CE_COPY_DATA_dst_sel_enum {
|
||||
dst_sel__ce_copy_data__mem_mapped_register = 0,
|
||||
dst_sel__ce_copy_data__tc_l2 = 2,
|
||||
dst_sel__ce_copy_data__memory = 5
|
||||
};
|
||||
|
||||
enum CE_COPY_DATA_src_cache_policy_enum {
|
||||
src_cache_policy__ce_copy_data__lru = 0,
|
||||
src_cache_policy__ce_copy_data__stream = 1
|
||||
};
|
||||
|
||||
enum CE_COPY_DATA_count_sel_enum {
|
||||
count_sel__ce_copy_data__32_bits_of_data = 0,
|
||||
count_sel__ce_copy_data__64_bits_of_data = 1
|
||||
};
|
||||
|
||||
enum CE_COPY_DATA_wr_confirm_enum {
|
||||
wr_confirm__ce_copy_data__do_not_wait_for_confirmation = 0,
|
||||
wr_confirm__ce_copy_data__wait_for_confirmation = 1
|
||||
};
|
||||
|
||||
enum CE_COPY_DATA_dst_cache_policy_enum {
|
||||
dst_cache_policy__ce_copy_data__lru = 0,
|
||||
dst_cache_policy__ce_copy_data__stream = 1
|
||||
};
|
||||
|
||||
enum CE_COPY_DATA_engine_sel_enum { engine_sel__ce_copy_data__constant_engine = 2 };
|
||||
|
||||
|
||||
typedef struct PM4_CE_COPY_DATA {
|
||||
union {
|
||||
PM4_CE_TYPE_3_HEADER header; /// header
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
CE_COPY_DATA_src_sel_enum src_sel : 4;
|
||||
uint32_t reserved1 : 4;
|
||||
CE_COPY_DATA_dst_sel_enum dst_sel : 4;
|
||||
uint32_t reserved2 : 1;
|
||||
CE_COPY_DATA_src_cache_policy_enum src_cache_policy : 2;
|
||||
uint32_t reserved3 : 1;
|
||||
CE_COPY_DATA_count_sel_enum count_sel : 1;
|
||||
uint32_t reserved4 : 3;
|
||||
CE_COPY_DATA_wr_confirm_enum wr_confirm : 1;
|
||||
uint32_t reserved5 : 4;
|
||||
CE_COPY_DATA_dst_cache_policy_enum dst_cache_policy : 2;
|
||||
uint32_t reserved6 : 3;
|
||||
CE_COPY_DATA_engine_sel_enum engine_sel : 2;
|
||||
} bitfields2;
|
||||
uint32_t ordinal2;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t src_reg_offset : 18;
|
||||
uint32_t reserved7 : 14;
|
||||
} bitfields3a;
|
||||
struct {
|
||||
uint32_t reserved8 : 2;
|
||||
uint32_t src_32b_addr_lo : 30;
|
||||
} bitfields3b;
|
||||
struct {
|
||||
uint32_t reserved9 : 3;
|
||||
uint32_t src_64b_addr_lo : 29;
|
||||
} bitfields3c;
|
||||
uint32_t imm_data;
|
||||
|
||||
uint32_t ordinal3;
|
||||
};
|
||||
|
||||
union {
|
||||
uint32_t src_memtc_addr_hi;
|
||||
|
||||
uint32_t src_imm_data;
|
||||
|
||||
uint32_t ordinal4;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t dst_reg_offset : 18;
|
||||
uint32_t reserved10 : 14;
|
||||
} bitfields5a;
|
||||
struct {
|
||||
uint32_t reserved11 : 2;
|
||||
uint32_t dst_32b_addr_lo : 30;
|
||||
} bitfields5b;
|
||||
struct {
|
||||
uint32_t reserved12 : 3;
|
||||
uint32_t dst_64b_addr_lo : 29;
|
||||
} bitfields5c;
|
||||
uint32_t ordinal5;
|
||||
};
|
||||
|
||||
uint32_t dst_addr_hi;
|
||||
|
||||
} PM4CE_COPY_DATA, *PPM4CE_COPY_DATA;
|
||||
#endif
|
||||
|
||||
//--------------------CE_DUMP_CONST_RAM--------------------
|
||||
|
||||
#ifndef PM4_CE_DUMP_CONST_RAM_DEFINED
|
||||
#define PM4_CE_DUMP_CONST_RAM_DEFINED
|
||||
enum CE_DUMP_CONST_RAM_cache_policy_enum {
|
||||
cache_policy__ce_dump_const_ram__lru = 0,
|
||||
cache_policy__ce_dump_const_ram__stream = 1,
|
||||
cache_policy__ce_dump_const_ram__bypass = 2
|
||||
};
|
||||
|
||||
|
||||
typedef struct PM4_CE_DUMP_CONST_RAM {
|
||||
union {
|
||||
PM4_CE_TYPE_3_HEADER header; /// header
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t offset : 16;
|
||||
uint32_t reserved1 : 9;
|
||||
CE_DUMP_CONST_RAM_cache_policy_enum cache_policy : 2;
|
||||
uint32_t reserved2 : 3;
|
||||
uint32_t increment_cs : 1;
|
||||
uint32_t increment_ce : 1;
|
||||
} bitfields2;
|
||||
uint32_t ordinal2;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t num_dw : 15;
|
||||
uint32_t reserved3 : 17;
|
||||
} bitfields3;
|
||||
uint32_t ordinal3;
|
||||
};
|
||||
|
||||
uint32_t addr_lo;
|
||||
|
||||
uint32_t addr_hi;
|
||||
|
||||
} PM4CE_DUMP_CONST_RAM, *PPM4CE_DUMP_CONST_RAM;
|
||||
#endif
|
||||
|
||||
//--------------------CE_DUMP_CONST_RAM_OFFSET--------------------
|
||||
|
||||
#ifndef PM4_CE_DUMP_CONST_RAM_OFFSET_DEFINED
|
||||
#define PM4_CE_DUMP_CONST_RAM_OFFSET_DEFINED
|
||||
enum CE_DUMP_CONST_RAM_OFFSET_cache_policy_enum {
|
||||
cache_policy__ce_dump_const_ram_offset__lru = 0,
|
||||
cache_policy__ce_dump_const_ram_offset__stream = 1,
|
||||
cache_policy__ce_dump_const_ram_offset__bypass = 2
|
||||
};
|
||||
|
||||
|
||||
typedef struct PM4_CE_DUMP_CONST_RAM_OFFSET {
|
||||
union {
|
||||
PM4_CE_TYPE_3_HEADER header; /// header
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t offset : 16;
|
||||
uint32_t reserved1 : 9;
|
||||
CE_DUMP_CONST_RAM_OFFSET_cache_policy_enum cache_policy : 2;
|
||||
uint32_t reserved2 : 3;
|
||||
uint32_t increment_cs : 1;
|
||||
uint32_t increment_ce : 1;
|
||||
} bitfields2;
|
||||
uint32_t ordinal2;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t num_dw : 15;
|
||||
uint32_t reserved3 : 17;
|
||||
} bitfields3;
|
||||
uint32_t ordinal3;
|
||||
};
|
||||
|
||||
uint32_t addr_offset;
|
||||
|
||||
} PM4CE_DUMP_CONST_RAM_OFFSET, *PPM4CE_DUMP_CONST_RAM_OFFSET;
|
||||
#endif
|
||||
|
||||
//--------------------CE_FRAME_CONTROL--------------------
|
||||
|
||||
#ifndef PM4_CE_FRAME_CONTROL_DEFINED
|
||||
#define PM4_CE_FRAME_CONTROL_DEFINED
|
||||
enum CE_FRAME_CONTROL_command_enum {
|
||||
command__ce_frame_control__tmz_begin = 0,
|
||||
command__ce_frame_control__tmz_end = 1
|
||||
};
|
||||
|
||||
|
||||
typedef struct PM4_CE_FRAME_CONTROL {
|
||||
union {
|
||||
PM4_CE_TYPE_3_HEADER header; /// header
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t tmz : 1;
|
||||
uint32_t reserved1 : 27;
|
||||
CE_FRAME_CONTROL_command_enum command : 4;
|
||||
} bitfields2;
|
||||
uint32_t ordinal2;
|
||||
};
|
||||
|
||||
} PM4CE_FRAME_CONTROL, *PPM4CE_FRAME_CONTROL;
|
||||
#endif
|
||||
|
||||
//--------------------CE_INCREMENT_CE_COUNTER--------------------
|
||||
|
||||
#ifndef PM4_CE_INCREMENT_CE_COUNTER_DEFINED
|
||||
#define PM4_CE_INCREMENT_CE_COUNTER_DEFINED
|
||||
enum CE_INCREMENT_CE_COUNTER_cntrsel_enum {
|
||||
cntrsel__ce_increment_ce_counter__invalid = 0,
|
||||
cntrsel__ce_increment_ce_counter__increment_ce_counter = 1,
|
||||
cntrsel__ce_increment_ce_counter__increment_cs_counter = 2,
|
||||
cntrsel__ce_increment_ce_counter__increment_ce_and_cs_counters = 3
|
||||
};
|
||||
|
||||
|
||||
typedef struct PM4_CE_INCREMENT_CE_COUNTER {
|
||||
union {
|
||||
PM4_CE_TYPE_3_HEADER header; /// header
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
CE_INCREMENT_CE_COUNTER_cntrsel_enum cntrsel : 2;
|
||||
uint32_t reserved1 : 30;
|
||||
} bitfields2;
|
||||
uint32_t ordinal2;
|
||||
};
|
||||
|
||||
} PM4CE_INCREMENT_CE_COUNTER, *PPM4CE_INCREMENT_CE_COUNTER;
|
||||
#endif
|
||||
|
||||
//--------------------CE_INDIRECT_BUFFER_CONST--------------------
|
||||
|
||||
#ifndef PM4_CE_INDIRECT_BUFFER_CONST_DEFINED
|
||||
#define PM4_CE_INDIRECT_BUFFER_CONST_DEFINED
|
||||
|
||||
typedef struct PM4_CE_INDIRECT_BUFFER_CONST {
|
||||
union {
|
||||
PM4_CE_TYPE_3_HEADER header; /// header
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t reserved1 : 2;
|
||||
uint32_t ib_base_lo : 30;
|
||||
} bitfields2;
|
||||
uint32_t ordinal2;
|
||||
};
|
||||
|
||||
uint32_t ib_base_hi;
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t ib_size : 20;
|
||||
uint32_t chain : 1;
|
||||
uint32_t pre_ena : 1;
|
||||
uint32_t reserved2 : 2;
|
||||
uint32_t vmid : 4;
|
||||
uint32_t cache_policy : 2;
|
||||
uint32_t pre_resume : 1;
|
||||
uint32_t priv : 1;
|
||||
} bitfields4;
|
||||
uint32_t ordinal4;
|
||||
};
|
||||
|
||||
} PM4CE_INDIRECT_BUFFER_CONST, *PPM4CE_INDIRECT_BUFFER_CONST;
|
||||
#endif
|
||||
|
||||
//--------------------CE_LOAD_CONST_RAM--------------------
|
||||
|
||||
#ifndef PM4_CE_LOAD_CONST_RAM_DEFINED
|
||||
#define PM4_CE_LOAD_CONST_RAM_DEFINED
|
||||
enum CE_LOAD_CONST_RAM_cache_policy_enum {
|
||||
cache_policy__ce_load_const_ram__lru = 0,
|
||||
cache_policy__ce_load_const_ram__stream = 1
|
||||
};
|
||||
|
||||
|
||||
typedef struct PM4_CE_LOAD_CONST_RAM {
|
||||
union {
|
||||
PM4_CE_TYPE_3_HEADER header; /// header
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
uint32_t addr_lo;
|
||||
|
||||
uint32_t addr_hi;
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t num_dw : 15;
|
||||
uint32_t reserved1 : 17;
|
||||
} bitfields4;
|
||||
uint32_t ordinal4;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t start_addr : 16;
|
||||
uint32_t reserved2 : 9;
|
||||
CE_LOAD_CONST_RAM_cache_policy_enum cache_policy : 2;
|
||||
uint32_t reserved3 : 5;
|
||||
} bitfields5;
|
||||
uint32_t ordinal5;
|
||||
};
|
||||
|
||||
} PM4CE_LOAD_CONST_RAM, *PPM4CE_LOAD_CONST_RAM;
|
||||
#endif
|
||||
|
||||
//--------------------CE_NOP--------------------
|
||||
|
||||
#ifndef PM4_CE_NOP_DEFINED
|
||||
#define PM4_CE_NOP_DEFINED
|
||||
|
||||
typedef struct PM4_CE_NOP {
|
||||
union {
|
||||
PM4_CE_TYPE_3_HEADER header; /// header
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
// uint32_t data_block[]; // N-DWords
|
||||
|
||||
} PM4CE_NOP, *PPM4CE_NOP;
|
||||
#endif
|
||||
|
||||
//--------------------CE_PRIME_UTCL2--------------------
|
||||
|
||||
#ifndef PM4_CE_PRIME_UTCL2_DEFINED
|
||||
#define PM4_CE_PRIME_UTCL2_DEFINED
|
||||
enum CE_PRIME_UTCL2_cache_perm_enum {
|
||||
cache_perm__ce_prime_utcl2__read = 0,
|
||||
cache_perm__ce_prime_utcl2__write = 1,
|
||||
cache_perm__ce_prime_utcl2__execute = 2
|
||||
};
|
||||
|
||||
enum CE_PRIME_UTCL2_prime_mode_enum {
|
||||
prime_mode__ce_prime_utcl2__dont_wait_for_xack = 0,
|
||||
prime_mode__ce_prime_utcl2__wait_for_xack = 1
|
||||
};
|
||||
|
||||
enum CE_PRIME_UTCL2_engine_sel_enum { engine_sel__ce_prime_utcl2__constant_engine = 2 };
|
||||
|
||||
|
||||
typedef struct PM4_CE_PRIME_UTCL2 {
|
||||
union {
|
||||
PM4_CE_TYPE_3_HEADER header; /// header
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
CE_PRIME_UTCL2_cache_perm_enum cache_perm : 3;
|
||||
CE_PRIME_UTCL2_prime_mode_enum prime_mode : 1;
|
||||
uint32_t reserved1 : 26;
|
||||
CE_PRIME_UTCL2_engine_sel_enum engine_sel : 2;
|
||||
} bitfields2;
|
||||
uint32_t ordinal2;
|
||||
};
|
||||
|
||||
uint32_t addr_lo;
|
||||
|
||||
uint32_t addr_hi;
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t requested_pages : 14;
|
||||
uint32_t reserved2 : 18;
|
||||
} bitfields5;
|
||||
uint32_t ordinal5;
|
||||
};
|
||||
|
||||
} PM4CE_PRIME_UTCL2, *PPM4CE_PRIME_UTCL2;
|
||||
#endif
|
||||
|
||||
//--------------------CE_SET_BASE--------------------
|
||||
|
||||
#ifndef PM4_CE_SET_BASE_DEFINED
|
||||
#define PM4_CE_SET_BASE_DEFINED
|
||||
enum CE_SET_BASE_base_index_enum {
|
||||
base_index__ce_set_base__ce_dst_base_addr = 2,
|
||||
base_index__ce_set_base__ce_partition_bases = 3
|
||||
};
|
||||
|
||||
|
||||
typedef struct PM4_CE_SET_BASE {
|
||||
union {
|
||||
PM4_CE_TYPE_3_HEADER header; /// header
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
CE_SET_BASE_base_index_enum base_index : 4;
|
||||
uint32_t reserved1 : 28;
|
||||
} bitfields2;
|
||||
uint32_t ordinal2;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t reserved2 : 3;
|
||||
uint32_t address_lo : 29;
|
||||
} bitfields3a;
|
||||
struct {
|
||||
uint32_t cs1_index : 16;
|
||||
uint32_t reserved3 : 16;
|
||||
} bitfields3b;
|
||||
uint32_t ordinal3;
|
||||
};
|
||||
|
||||
union {
|
||||
uint32_t address_hi;
|
||||
|
||||
struct {
|
||||
uint32_t cs2_index : 16;
|
||||
uint32_t reserved4 : 16;
|
||||
} bitfields4b;
|
||||
uint32_t ordinal4;
|
||||
};
|
||||
|
||||
} PM4CE_SET_BASE, *PPM4CE_SET_BASE;
|
||||
#endif
|
||||
|
||||
//--------------------CE_SWITCH_BUFFER--------------------
|
||||
|
||||
#ifndef PM4_CE_SWITCH_BUFFER_DEFINED
|
||||
#define PM4_CE_SWITCH_BUFFER_DEFINED
|
||||
|
||||
typedef struct PM4_CE_SWITCH_BUFFER {
|
||||
union {
|
||||
PM4_CE_TYPE_3_HEADER header; /// header
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t tmz : 1;
|
||||
uint32_t reserved1 : 31;
|
||||
} bitfields2;
|
||||
uint32_t ordinal2;
|
||||
};
|
||||
|
||||
} PM4CE_SWITCH_BUFFER, *PPM4CE_SWITCH_BUFFER;
|
||||
#endif
|
||||
|
||||
//--------------------CE_WAIT_ON_DE_COUNTER_DIFF--------------------
|
||||
|
||||
#ifndef PM4_CE_WAIT_ON_DE_COUNTER_DIFF_DEFINED
|
||||
#define PM4_CE_WAIT_ON_DE_COUNTER_DIFF_DEFINED
|
||||
|
||||
typedef struct PM4_CE_WAIT_ON_DE_COUNTER_DIFF {
|
||||
union {
|
||||
PM4_CE_TYPE_3_HEADER header; /// header
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
uint32_t diff;
|
||||
|
||||
} PM4CE_WAIT_ON_DE_COUNTER_DIFF, *PPM4CE_WAIT_ON_DE_COUNTER_DIFF;
|
||||
#endif
|
||||
|
||||
//--------------------CE_WRITE_CONST_RAM--------------------
|
||||
|
||||
#ifndef PM4_CE_WRITE_CONST_RAM_DEFINED
|
||||
#define PM4_CE_WRITE_CONST_RAM_DEFINED
|
||||
|
||||
typedef struct PM4_CE_WRITE_CONST_RAM {
|
||||
union {
|
||||
PM4_CE_TYPE_3_HEADER header; /// header
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t offset : 16;
|
||||
uint32_t reserved1 : 16;
|
||||
} bitfields2;
|
||||
uint32_t ordinal2;
|
||||
};
|
||||
|
||||
// uint32_t data[]; // N-DWords
|
||||
|
||||
} PM4CE_WRITE_CONST_RAM, *PPM4CE_WRITE_CONST_RAM;
|
||||
#endif
|
||||
|
||||
//--------------------CE_WRITE_DATA--------------------
|
||||
|
||||
#ifndef PM4_CE_WRITE_DATA_DEFINED
|
||||
#define PM4_CE_WRITE_DATA_DEFINED
|
||||
enum CE_WRITE_DATA_dst_sel_enum {
|
||||
dst_sel__ce_write_data__mem_mapped_register = 0,
|
||||
dst_sel__ce_write_data__memory = 5,
|
||||
dst_sel__ce_write_data__preemption_meta_memory = 8
|
||||
};
|
||||
|
||||
enum CE_WRITE_DATA_addr_incr_enum {
|
||||
addr_incr__ce_write_data__increment_address = 0,
|
||||
addr_incr__ce_write_data__do_not_increment_address = 1
|
||||
};
|
||||
|
||||
enum CE_WRITE_DATA_wr_confirm_enum {
|
||||
wr_confirm__ce_write_data__do_not_wait_for_write_confirmation = 0,
|
||||
wr_confirm__ce_write_data__wait_for_write_confirmation = 1
|
||||
};
|
||||
|
||||
enum CE_WRITE_DATA_cache_policy_enum {
|
||||
cache_policy__ce_write_data__lru = 0,
|
||||
cache_policy__ce_write_data__stream = 1
|
||||
};
|
||||
|
||||
enum CE_WRITE_DATA_engine_sel_enum { engine_sel__ce_write_data__constant_engine = 2 };
|
||||
|
||||
|
||||
typedef struct PM4_CE_WRITE_DATA {
|
||||
union {
|
||||
PM4_CE_TYPE_3_HEADER header; /// header
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t reserved1 : 8;
|
||||
CE_WRITE_DATA_dst_sel_enum dst_sel : 4;
|
||||
uint32_t reserved2 : 4;
|
||||
CE_WRITE_DATA_addr_incr_enum addr_incr : 1;
|
||||
uint32_t reserved3 : 2;
|
||||
uint32_t resume_vf : 1;
|
||||
CE_WRITE_DATA_wr_confirm_enum wr_confirm : 1;
|
||||
uint32_t reserved4 : 4;
|
||||
CE_WRITE_DATA_cache_policy_enum cache_policy : 2;
|
||||
uint32_t reserved5 : 3;
|
||||
CE_WRITE_DATA_engine_sel_enum engine_sel : 2;
|
||||
} bitfields2;
|
||||
uint32_t ordinal2;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t dst_mmreg_addr : 18;
|
||||
uint32_t reserved6 : 14;
|
||||
} bitfields3a;
|
||||
struct {
|
||||
uint32_t reserved7 : 2;
|
||||
uint32_t dst_mem_addr_lo : 30;
|
||||
} bitfields3b;
|
||||
uint32_t ordinal3;
|
||||
};
|
||||
|
||||
uint32_t dst_mem_addr_hi;
|
||||
|
||||
// uint32_t data[]; // N-DWords
|
||||
|
||||
} PM4CE_WRITE_DATA, *PPM4CE_WRITE_DATA;
|
||||
#endif
|
||||
|
||||
} // gfx9
|
||||
} // pm4_profile
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,267 +0,0 @@
|
||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
//
|
||||
// f32_aql_mec_packets.h
|
||||
//
|
||||
// Trade secret of Advanced Micro Devices, Inc.
|
||||
// Copyright 2010, Advanced Micro Devices, Inc., (unpublished)
|
||||
//
|
||||
// All rights reserved. This notice is intended as a precaution against
|
||||
// inadvertent publication and does not imply publication or any waiver
|
||||
// of confidentiality. The year included in the foregoing notice is the
|
||||
// year of creation of the work.
|
||||
//
|
||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
#ifndef F32_MEC_AQL_PACKETS_H
|
||||
#define F32_MEC_AQL_PACKETS_H
|
||||
|
||||
namespace pm4_profile {
|
||||
namespace gfx9 {
|
||||
|
||||
//--------------------MEC_AQL_DISPATCH--------------------
|
||||
|
||||
#ifndef AQL_MEC_AQL_DISPATCH_DEFINED
|
||||
#define AQL_MEC_AQL_DISPATCH_DEFINED
|
||||
|
||||
typedef struct AQL_MEC_AQL_DISPATCH {
|
||||
union {
|
||||
struct {
|
||||
uint32_t header : 16;
|
||||
uint32_t dimensions : 2;
|
||||
uint32_t reserved1 : 14;
|
||||
} bitfields1;
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t workgroupsizex : 16;
|
||||
uint32_t workgroupsizey : 16;
|
||||
} bitfields2;
|
||||
uint32_t ordinal2;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t workgroupsizez : 16;
|
||||
uint32_t reserved2 : 16;
|
||||
} bitfields3;
|
||||
uint32_t ordinal3;
|
||||
};
|
||||
|
||||
uint32_t gridsizex;
|
||||
|
||||
uint32_t gridsizey;
|
||||
|
||||
uint32_t gridsizez;
|
||||
|
||||
uint32_t privatesegmentsizebytes;
|
||||
|
||||
uint32_t groupsegmentsizebytes;
|
||||
|
||||
uint64_t kernelobjectaddress;
|
||||
|
||||
uint64_t kernargaddress;
|
||||
|
||||
uint64_t reserved3;
|
||||
|
||||
uint64_t completionsignal;
|
||||
|
||||
} AQLMEC_AQL_DISPATCH, *PAQLMEC_AQL_DISPATCH;
|
||||
#endif
|
||||
|
||||
//--------------------MEC_AQL_BARRIER--------------------
|
||||
|
||||
#ifndef AQL_MEC_AQL_BARRIER_DEFINED
|
||||
#define AQL_MEC_AQL_BARRIER_DEFINED
|
||||
|
||||
typedef struct AQL_MEC_AQL_BARRIER {
|
||||
union {
|
||||
struct {
|
||||
uint32_t header : 16;
|
||||
uint32_t type : 16;
|
||||
} bitfields1;
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t polltime : 16;
|
||||
uint32_t reserved1 : 16;
|
||||
} bitfields2;
|
||||
uint32_t ordinal2;
|
||||
};
|
||||
|
||||
uint64_t barrierfield0;
|
||||
|
||||
uint64_t barrierfield1;
|
||||
|
||||
uint64_t barrierfield2;
|
||||
|
||||
uint64_t barrierfield3;
|
||||
|
||||
uint64_t barrierfield4;
|
||||
|
||||
uint64_t reserved2;
|
||||
|
||||
uint64_t completionsignal;
|
||||
|
||||
} AQLMEC_AQL_BARRIER, *PAQLMEC_AQL_BARRIER;
|
||||
#endif
|
||||
|
||||
//--------------------MEC_AQL_CALL--------------------
|
||||
|
||||
#ifndef AQL_MEC_AQL_CALL_DEFINED
|
||||
#define AQL_MEC_AQL_CALL_DEFINED
|
||||
|
||||
typedef struct AQL_MEC_AQL_CALL {
|
||||
union {
|
||||
struct {
|
||||
uint32_t header : 16;
|
||||
uint32_t type : 16;
|
||||
} bitfields1;
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
uint32_t reserved1;
|
||||
|
||||
uint64_t returnlocation;
|
||||
|
||||
uint64_t compareaddress;
|
||||
|
||||
uint64_t comparemask;
|
||||
|
||||
uint64_t compareref;
|
||||
|
||||
uint64_t ibbase;
|
||||
|
||||
uint64_t ibsize;
|
||||
|
||||
uint64_t reserved2;
|
||||
|
||||
uint64_t completionsignal;
|
||||
|
||||
} AQLMEC_AQL_CALL, *PAQLMEC_AQL_CALL;
|
||||
#endif
|
||||
|
||||
//--------------------MEC_AQL_DMA--------------------
|
||||
|
||||
#ifndef AQL_MEC_AQL_DMA_DEFINED
|
||||
#define AQL_MEC_AQL_DMA_DEFINED
|
||||
|
||||
typedef struct AQL_MEC_AQL_DMA {
|
||||
union {
|
||||
struct {
|
||||
uint32_t header : 16;
|
||||
uint32_t type : 16;
|
||||
} bitfields1;
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
uint32_t reserved1;
|
||||
|
||||
uint64_t returnlocation;
|
||||
|
||||
uint32_t stateobjaddress;
|
||||
|
||||
uint64_t sourceaddress;
|
||||
|
||||
uint64_t destaddress;
|
||||
|
||||
uint64_t size;
|
||||
|
||||
uint64_t reserved2;
|
||||
|
||||
uint64_t completionsignal;
|
||||
|
||||
} AQLMEC_AQL_DMA, *PAQLMEC_AQL_DMA;
|
||||
#endif
|
||||
|
||||
//--------------------MEC_AQL_DRAW--------------------
|
||||
|
||||
#ifndef AQL_MEC_AQL_DRAW_DEFINED
|
||||
#define AQL_MEC_AQL_DRAW_DEFINED
|
||||
|
||||
typedef struct AQL_MEC_AQL_DRAW {
|
||||
union {
|
||||
struct {
|
||||
uint32_t header : 16;
|
||||
uint32_t type : 16;
|
||||
} bitfields1;
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
uint32_t maxsize;
|
||||
|
||||
uint32_t indexbase;
|
||||
|
||||
uint32_t indexcount;
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t indextype : 16;
|
||||
uint32_t primtype : 16;
|
||||
} bitfields5;
|
||||
uint32_t ordinal5;
|
||||
};
|
||||
|
||||
uint32_t numinstances;
|
||||
|
||||
uint32_t privatesegmentsizebytes;
|
||||
|
||||
uint32_t groupsegmentsizebytes;
|
||||
|
||||
uint32_t kernelobjectaddress;
|
||||
|
||||
uint32_t kernargaddress;
|
||||
|
||||
uint32_t reserved1;
|
||||
|
||||
uint32_t reserved2;
|
||||
|
||||
uint32_t completionsignal;
|
||||
|
||||
} AQLMEC_AQL_DRAW, *PAQLMEC_AQL_DRAW;
|
||||
#endif
|
||||
|
||||
//--------------------MEC_AQL_JUMP--------------------
|
||||
|
||||
#ifndef AQL_MEC_AQL_JUMP_DEFINED
|
||||
#define AQL_MEC_AQL_JUMP_DEFINED
|
||||
enum MEC_AQL_JUMP_type_enum { type__mec_aql_jump__cond_jump_to_queue_index = 0 };
|
||||
|
||||
|
||||
typedef struct AQL_MEC_AQL_JUMP {
|
||||
union {
|
||||
struct {
|
||||
uint32_t header : 16;
|
||||
MEC_AQL_JUMP_type_enum type : 16;
|
||||
} bitfields1;
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
uint32_t reserved1;
|
||||
|
||||
uint64_t returnlocation;
|
||||
|
||||
uint64_t compareaddress;
|
||||
|
||||
uint64_t comparemask;
|
||||
|
||||
uint64_t compareref;
|
||||
|
||||
uint64_t ibbase;
|
||||
|
||||
uint64_t ibsize;
|
||||
|
||||
uint64_t reserved2;
|
||||
|
||||
uint64_t completionsignal;
|
||||
|
||||
} AQLMEC_AQL_JUMP, *PAQLMEC_AQL_JUMP;
|
||||
#endif
|
||||
|
||||
} // gfx9
|
||||
} // pm4_profile
|
||||
|
||||
#endif
|
||||
@@ -1,682 +0,0 @@
|
||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
//
|
||||
// f32_mec_cmn_structs.h
|
||||
//
|
||||
// Trade secret of Advanced Micro Devices, Inc.
|
||||
// Copyright 2010, Advanced Micro Devices, Inc., (unpublished)
|
||||
//
|
||||
// All rights reserved. This notice is intended as a precaution against
|
||||
// inadvertent publication and does not imply publication or any waiver
|
||||
// of confidentiality. The year included in the foregoing notice is the
|
||||
// year of creation of the work.
|
||||
//
|
||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
#ifndef F32_MEC_CMN_STRUCTS_H
|
||||
#define F32_MEC_CMN_STRUCTS_H
|
||||
|
||||
namespace pm4_profile {
|
||||
namespace gfx9 {
|
||||
|
||||
//--------------------MEC_SDMA_128DW_MQD--------------------
|
||||
|
||||
#ifndef STR_MEC_SDMA_128DW_MQD_DEFINED
|
||||
#define STR_MEC_SDMA_128DW_MQD_DEFINED
|
||||
|
||||
typedef struct STR_MEC_SDMA_128DW_MQD {
|
||||
uint32_t sdmax_rlcx_rb_cntl; // offset: 0 (0x0)
|
||||
uint32_t sdmax_rlcx_rb_base; // offset: 1 (0x1)
|
||||
uint32_t sdmax_rlcx_rb_base_hi; // offset: 2 (0x2)
|
||||
uint32_t sdmax_rlcx_rb_rptr; // offset: 3 (0x3)
|
||||
uint32_t sdmax_rlcx_rb_wptr; // offset: 4 (0x4)
|
||||
uint32_t sdmax_rlcx_rb_wptr_poll_cntl; // offset: 5 (0x5)
|
||||
uint32_t sdmax_rlcx_rb_wptr_poll_addr_hi; // offset: 6 (0x6)
|
||||
uint32_t sdmax_rlcx_rb_wptr_poll_addr_lo; // offset: 7 (0x7)
|
||||
uint32_t sdmax_rlcx_rb_rptr_addr_hi; // offset: 8 (0x8)
|
||||
uint32_t sdmax_rlcx_rb_rptr_addr_lo; // offset: 9 (0x9)
|
||||
uint32_t sdmax_rlcx_ib_cntl; // offset: 10 (0xa)
|
||||
uint32_t sdmax_rlcx_ib_rptr; // offset: 11 (0xb)
|
||||
uint32_t sdmax_rlcx_ib_offset; // offset: 12 (0xc)
|
||||
uint32_t sdmax_rlcx_ib_base_lo; // offset: 13 (0xd)
|
||||
uint32_t sdmax_rlcx_ib_base_hi; // offset: 14 (0xe)
|
||||
uint32_t sdmax_rlcx_ib_size; // offset: 15 (0xf)
|
||||
uint32_t sdmax_rlcx_skip_cntl; // offset: 16 (0x10)
|
||||
uint32_t sdmax_rlcx_context_status; // offset: 17 (0x11)
|
||||
uint32_t sdmax_rlcx_doorbell; // offset: 18 (0x12)
|
||||
uint32_t sdmax_rlcx_virtual_addr; // offset: 19 (0x13)
|
||||
uint32_t sdmax_rlcx_ape1_cntl; // offset: 20 (0x14)
|
||||
uint32_t sdmax_rlcx_doorbell_log; // offset: 21 (0x15)
|
||||
uint32_t reserved_22; // offset: 22 (0x16)
|
||||
uint32_t reserved_23; // offset: 23 (0x17)
|
||||
uint32_t reserved_24; // offset: 24 (0x18)
|
||||
uint32_t reserved_25; // offset: 25 (0x19)
|
||||
uint32_t reserved_26; // offset: 26 (0x1A)
|
||||
uint32_t reserved_27; // offset: 27 (0x1B)
|
||||
uint32_t reserved_28; // offset: 28 (0x1C)
|
||||
uint32_t reserved_29; // offset: 29 (0x1D)
|
||||
uint32_t reserved_30; // offset: 30 (0x1E)
|
||||
uint32_t reserved_31; // offset: 31 (0x1F)
|
||||
uint32_t reserved_32; // offset: 32 (0x20)
|
||||
uint32_t reserved_33; // offset: 33 (0x21)
|
||||
uint32_t reserved_34; // offset: 34 (0x22)
|
||||
uint32_t reserved_35; // offset: 35 (0x23)
|
||||
uint32_t reserved_36; // offset: 36 (0x24)
|
||||
uint32_t reserved_37; // offset: 37 (0x25)
|
||||
uint32_t reserved_38; // offset: 38 (0x26)
|
||||
uint32_t reserved_39; // offset: 39 (0x27)
|
||||
uint32_t reserved_40; // offset: 40 (0x28)
|
||||
uint32_t reserved_41; // offset: 41 (0x29)
|
||||
uint32_t reserved_42; // offset: 42 (0x2A)
|
||||
uint32_t reserved_43; // offset: 43 (0x2B)
|
||||
uint32_t reserved_44; // offset: 44 (0x2C)
|
||||
uint32_t reserved_45; // offset: 45 (0x2D)
|
||||
uint32_t reserved_46; // offset: 46 (0x2E)
|
||||
uint32_t reserved_47; // offset: 47 (0x2F)
|
||||
uint32_t reserved_48; // offset: 48 (0x30)
|
||||
uint32_t reserved_49; // offset: 49 (0x31)
|
||||
uint32_t reserved_50; // offset: 50 (0x32)
|
||||
uint32_t reserved_51; // offset: 51 (0x33)
|
||||
uint32_t reserved_52; // offset: 52 (0x34)
|
||||
uint32_t reserved_53; // offset: 53 (0x35)
|
||||
uint32_t reserved_54; // offset: 54 (0x36)
|
||||
uint32_t reserved_55; // offset: 55 (0x37)
|
||||
uint32_t reserved_56; // offset: 56 (0x38)
|
||||
uint32_t reserved_57; // offset: 57 (0x39)
|
||||
uint32_t reserved_58; // offset: 58 (0x3A)
|
||||
uint32_t reserved_59; // offset: 59 (0x3B)
|
||||
uint32_t reserved_60; // offset: 60 (0x3C)
|
||||
uint32_t reserved_61; // offset: 61 (0x3D)
|
||||
uint32_t reserved_62; // offset: 62 (0x3E)
|
||||
uint32_t reserved_63; // offset: 63 (0x3F)
|
||||
uint32_t reserved_64; // offset: 64 (0x40)
|
||||
uint32_t reserved_65; // offset: 65 (0x41)
|
||||
uint32_t reserved_66; // offset: 66 (0x42)
|
||||
uint32_t reserved_67; // offset: 67 (0x43)
|
||||
uint32_t reserved_68; // offset: 68 (0x44)
|
||||
uint32_t reserved_69; // offset: 69 (0x45)
|
||||
uint32_t reserved_70; // offset: 70 (0x46)
|
||||
uint32_t reserved_71; // offset: 0 (0x47)
|
||||
uint32_t reserved_72; // offset: 1 (0x48)
|
||||
uint32_t reserved_73; // offset: 2 (0x49)
|
||||
uint32_t reserved_74; // offset: 3 (0x4A)
|
||||
uint32_t reserved_75; // offset: 4 (0x4B)
|
||||
uint32_t reserved_76; // offset: 5 (0x4C)
|
||||
uint32_t reserved_77; // offset: 6 (0x4D)
|
||||
uint32_t reserved_78; // offset: 7 (0x4E)
|
||||
uint32_t reserved_79; // offset: 79 (0x4F)
|
||||
uint32_t reserved_80; // offset: 80 (0x50)
|
||||
uint32_t reserved_81; // offset: 81 (0x51)
|
||||
uint32_t reserved_82; // offset: 82 (0x52)
|
||||
uint32_t reserved_83; // offset: 83 (0x53)
|
||||
uint32_t reserved_84; // offset: 84 (0x54)
|
||||
uint32_t reserved_85; // offset: 85 (0x55)
|
||||
uint32_t reserved_86; // offset: 86 (0x56)
|
||||
uint32_t reserved_87; // offset: 87 (0x57)
|
||||
uint32_t reserved_88; // offset: 88 (0x58)
|
||||
uint32_t reserved_89; // offset: 89 (0x59)
|
||||
uint32_t reserved_90; // offset: 90 (0x5A)
|
||||
uint32_t reserved_91; // offset: 91 (0x5B)
|
||||
uint32_t reserved_92; // offset: 92 (0x5C)
|
||||
uint32_t reserved_93; // offset: 93 (0x5D)
|
||||
uint32_t reserved_94; // offset: 94 (0x5E)
|
||||
uint32_t reserved_95; // offset: 95 (0x5F)
|
||||
uint32_t reserved_96; // offset: 96 (0x60)
|
||||
uint32_t reserved_97; // offset: 97 (0x61)
|
||||
uint32_t reserved_98; // offset: 98 (0x62)
|
||||
uint32_t reserved_99; // offset: 99 (0x63)
|
||||
uint32_t reserved_100; // offset: 100 (0x64)
|
||||
uint32_t reserved_101; // offset: 101 (0x65)
|
||||
uint32_t reserved_102; // offset: 102 (0x66)
|
||||
uint32_t reserved_103; // offset: 103 (0x67)
|
||||
uint32_t reserved_104; // offset: 104 (0x68)
|
||||
uint32_t reserved_105; // offset: 105 (0x69)
|
||||
uint32_t reserved_106; // offset: 106 (0x6A)
|
||||
uint32_t reserved_107; // offset: 107 (0x6B)
|
||||
uint32_t reserved_108; // offset: 108 (0x6C)
|
||||
uint32_t reserved_109; // offset: 109 (0x6D)
|
||||
uint32_t reserved_110; // offset: 110 (0x6E)
|
||||
uint32_t reserved_111; // offset: 111 (0x6F)
|
||||
uint32_t reserved_112; // offset: 112 (0x70)
|
||||
uint32_t reserved_113; // offset: 113 (0x71)
|
||||
uint32_t reserved_114; // offset: 114 (0x72)
|
||||
uint32_t reserved_115; // offset: 115 (0x73)
|
||||
uint32_t reserved_116; // offset: 116 (0x74)
|
||||
uint32_t reserved_117; // offset: 117 (0x75)
|
||||
uint32_t reserved_118; // offset: 118 (0x76)
|
||||
uint32_t reserved_119; // offset: 119 (0x77)
|
||||
uint32_t reserved_120; // offset: 120 (0x78)
|
||||
uint32_t reserved_121; // offset: 121 (0x79)
|
||||
uint32_t reserved_122; // offset: 122 (0x7A)
|
||||
uint32_t reserved_123; // offset: 123 (0x7B)
|
||||
uint32_t reserved_124; // offset: 124 (0x7C)
|
||||
uint32_t reserved_125; // offset: 125 (0x7D)
|
||||
uint32_t reserved_126; // offset: 126 (0x7E)
|
||||
uint32_t reserved_127; // offset: 127 (0x7F)
|
||||
} STRMEC_SDMA_128DW_MQD, *PSTRMEC_SDMA_128DW_MQD;
|
||||
#endif
|
||||
|
||||
//--------------------MEC_Compute_512DW_MQD--------------------
|
||||
|
||||
#ifndef STR_MEC_COMPUTE_512DW_MQD_DEFINED
|
||||
#define STR_MEC_COMPUTE_512DW_MQD_DEFINED
|
||||
|
||||
typedef struct STR_MEC_COMPUTE_512DW_MQD {
|
||||
uint32_t header; // offset: 0 (0x0)
|
||||
uint32_t compute_dispatch_initiator; // offset: 1 (0x1)
|
||||
uint32_t compute_dim_x; // offset: 2 (0x2)
|
||||
uint32_t compute_dim_y; // offset: 3 (0x3)
|
||||
uint32_t compute_dim_z; // offset: 4 (0x4)
|
||||
uint32_t compute_start_x; // offset: 5 (0x5)
|
||||
uint32_t compute_start_y; // offset: 6 (0x6)
|
||||
uint32_t compute_start_z; // offset: 7 (0x7)
|
||||
uint32_t compute_num_thread_x; // offset: 8 (0x8)
|
||||
uint32_t compute_num_thread_y; // offset: 9 (0x9)
|
||||
uint32_t compute_num_thread_z; // offset: 10 (0xA)
|
||||
uint32_t compute_pipelinestat_enable; // offset: 11 (0xB)
|
||||
uint32_t compute_perfcount_enable; // offset: 12 (0xC)
|
||||
uint32_t compute_pgm_lo; // offset: 13 (0xD)
|
||||
uint32_t compute_pgm_hi; // offset: 14 (0xE)
|
||||
uint32_t compute_tba_lo; // offset: 15 (0xF)
|
||||
uint32_t compute_tba_hi; // offset: 16 (0x10)
|
||||
uint32_t compute_tma_lo; // offset: 17 (0x11)
|
||||
uint32_t compute_tma_hi; // offset: 18 (0x12)
|
||||
uint32_t compute_pgm_rsrc1; // offset: 19 (0x13)
|
||||
uint32_t compute_pgm_rsrc2; // offset: 20 (0x14)
|
||||
uint32_t compute_vmid; // offset: 21 (0x15)
|
||||
uint32_t compute_resource_limits; // offset: 22 (0x16)
|
||||
uint32_t compute_static_thread_mgmt_se0; // offset: 23 (0x17)
|
||||
uint32_t compute_static_thread_mgmt_se1; // offset: 24 (0x18)
|
||||
uint32_t compute_tmpring_size; // offset: 25 (0x19)
|
||||
uint32_t compute_static_thread_mgmt_se2; // offset: 26 (0x1A)
|
||||
uint32_t compute_static_thread_mgmt_se3; // offset: 27 (0x1B)
|
||||
uint32_t compute_restart_x; // offset: 28 (0x1C)
|
||||
uint32_t compute_restart_y; // offset: 29 (0x1D)
|
||||
uint32_t compute_restart_z; // offset: 30 (0x1E)
|
||||
uint32_t compute_thread_trace_enable; // offset: 31 (0x1F)
|
||||
uint32_t compute_misc_reserved; // offset: 32 (0x20)
|
||||
uint32_t compute_dispatch_id; // offset: 33 (0x21)
|
||||
uint32_t compute_threadgroup_id; // offset: 34 (0x22)
|
||||
uint32_t compute_relaunch; // offset: 35 (0x23)
|
||||
uint32_t compute_wave_restore_addr_lo; // offset: 36 (0x24)
|
||||
uint32_t compute_wave_restore_addr_hi; // offset: 37 (0x25)
|
||||
uint32_t compute_wave_restore_control; // offset: 38 (0x26)
|
||||
uint32_t reserved_39; // offset: 39 (0x27)
|
||||
uint32_t reserved_40; // offset: 40 (0x28)
|
||||
uint32_t reserved_41; // offset: 41 (0x29)
|
||||
uint32_t reserved_42; // offset: 42 (0x2A)
|
||||
uint32_t reserved_43; // offset: 43 (0x2B)
|
||||
uint32_t reserved_44; // offset: 44 (0x2C)
|
||||
uint32_t reserved_45; // offset: 45 (0x2D)
|
||||
uint32_t reserved_46; // offset: 46 (0x2E)
|
||||
uint32_t reserved_47; // offset: 47 (0x2F)
|
||||
uint32_t reserved_48; // offset: 48 (0x30)
|
||||
uint32_t reserved_49; // offset: 49 (0x31)
|
||||
uint32_t reserved_50; // offset: 50 (0x32)
|
||||
uint32_t reserved_51; // offset: 51 (0x33)
|
||||
uint32_t reserved_52; // offset: 52 (0x34)
|
||||
uint32_t reserved_53; // offset: 53 (0x35)
|
||||
uint32_t reserved_54; // offset: 54 (0x36)
|
||||
uint32_t reserved_55; // offset: 55 (0x37)
|
||||
uint32_t reserved_56; // offset: 56 (0x38)
|
||||
uint32_t reserved_57; // offset: 57 (0x39)
|
||||
uint32_t reserved_58; // offset: 58 (0x3A)
|
||||
uint32_t reserved_59; // offset: 59 (0x3B)
|
||||
uint32_t reserved_60; // offset: 60 (0x3C)
|
||||
uint32_t reserved_61; // offset: 61 (0x3D)
|
||||
uint32_t reserved_62; // offset: 62 (0x3E)
|
||||
uint32_t reserved_63; // offset: 63 (0x3F)
|
||||
uint32_t reserved_64; // offset: 64 (0x40)
|
||||
uint32_t compute_user_data_0; // offset: 65 (0x41)
|
||||
uint32_t compute_user_data_1; // offset: 66 (0x42)
|
||||
uint32_t compute_user_data_2; // offset: 67 (0x43)
|
||||
uint32_t compute_user_data_3; // offset: 68 (0x44)
|
||||
uint32_t compute_user_data_4; // offset: 69 (0x45)
|
||||
uint32_t compute_user_data_5; // offset: 70 (0x46)
|
||||
uint32_t compute_user_data_6; // offset: 71 (0x47)
|
||||
uint32_t compute_user_data_7; // offset: 72 (0x48)
|
||||
uint32_t compute_user_data_8; // offset: 73 (0x49)
|
||||
uint32_t compute_user_data_9; // offset: 74 (0x4A)
|
||||
uint32_t compute_user_data_10; // offset: 75 (0x4B)
|
||||
uint32_t compute_user_data_11; // offset: 76 (0x4C)
|
||||
uint32_t compute_user_data_12; // offset: 77 (0x4D)
|
||||
uint32_t compute_user_data_13; // offset: 78 (0x4E)
|
||||
uint32_t compute_user_data_14; // offset: 79 (0x4F)
|
||||
uint32_t compute_user_data_15; // offset: 80 (0x50)
|
||||
uint32_t cp_compute_csinvoc_count_lo; // offset: 81 (0x51)
|
||||
uint32_t cp_compute_csinvoc_count_hi; // offset: 82 (0x52)
|
||||
uint32_t reserved_83; // offset: 83 (0x53)
|
||||
uint32_t reserved_84; // offset: 84 (0x54)
|
||||
uint32_t reserved_85; // offset: 85 (0x55)
|
||||
uint32_t cp_mqd_query_time_lo; // offset: 86 (0x56)
|
||||
uint32_t cp_mqd_query_time_hi; // offset: 87 (0x57)
|
||||
uint32_t cp_mqd_connect_start_time_lo; // offset: 88 (0x58)
|
||||
uint32_t cp_mqd_connect_start_time_hi; // offset: 89 (0x59)
|
||||
uint32_t cp_mqd_connect_end_time_lo; // offset: 90 (0x5A)
|
||||
uint32_t cp_mqd_connect_end_time_hi; // offset: 91 (0x5B)
|
||||
uint32_t cp_mqd_connect_end_wf_count; // offset: 92 (0x5C)
|
||||
uint32_t cp_mqd_connect_end_pq_rptr; // offset: 93 (0x5D)
|
||||
uint32_t cp_mqd_connect_end_pq_wptr; // offset: 94 (0x5E)
|
||||
uint32_t cp_mqd_connect_end_ib_rptr; // offset: 95 (0x5F)
|
||||
uint32_t cp_mqd_readindex_lo; // offset: 96 (0x60)
|
||||
uint32_t cp_mqd_readindex_hi; // offset: 97 (0x61)
|
||||
uint32_t cp_mqd_save_start_time_lo; // offset: 98 (0x62)
|
||||
uint32_t cp_mqd_save_start_time_hi; // offset: 99 (0x63)
|
||||
uint32_t cp_mqd_save_end_time_lo; // offset: 100 (0x64)
|
||||
uint32_t cp_mqd_save_end_time_hi; // offset: 101 (0x65)
|
||||
uint32_t cp_mqd_restore_start_time_lo; // offset: 102 (0x66)
|
||||
uint32_t cp_mqd_restore_start_time_hi; // offset: 103 (0x67)
|
||||
uint32_t cp_mqd_restore_end_time_lo; // offset: 104 (0x68)
|
||||
uint32_t cp_mqd_restore_end_time_hi; // offset: 105 (0x69)
|
||||
uint32_t disable_queue; // offset: 106 (0x6A)
|
||||
uint32_t reserved_107; // offset: 107 (0x6B)
|
||||
uint32_t gds_cs_ctxsw_cnt0; // offset: 108 (0x6C)
|
||||
uint32_t gds_cs_ctxsw_cnt1; // offset: 109 (0x6D)
|
||||
uint32_t gds_cs_ctxsw_cnt2; // offset: 110 (0x6E)
|
||||
uint32_t gds_cs_ctxsw_cnt3; // offset: 111 (0x6F)
|
||||
uint32_t reserved_112; // offset: 112 (0x70)
|
||||
uint32_t reserved_113; // offset: 113 (0x71)
|
||||
uint32_t cp_pq_exe_status_lo; // offset: 114 (0x72)
|
||||
uint32_t cp_pq_exe_status_hi; // offset: 115 (0x73)
|
||||
uint32_t cp_packet_id_lo; // offset: 116 (0x74)
|
||||
uint32_t cp_packet_id_hi; // offset: 117 (0x75)
|
||||
uint32_t cp_packet_exe_status_lo; // offset: 118 (0x76)
|
||||
uint32_t cp_packet_exe_status_hi; // offset: 119 (0x77)
|
||||
uint32_t gds_save_base_addr_lo; // offset: 120 (0x78)
|
||||
uint32_t gds_save_base_addr_hi; // offset: 121 (0x79)
|
||||
uint32_t gds_save_mask_lo; // offset: 122 (0x7A)
|
||||
uint32_t gds_save_mask_hi; // offset: 123 (0x7B)
|
||||
uint32_t ctx_save_base_addr_lo; // offset: 124 (0x7C)
|
||||
uint32_t ctx_save_base_addr_hi; // offset: 125 (0x7D)
|
||||
uint32_t reserved_126; // offset: 126 (0x7E)
|
||||
uint32_t reserved_127; // offset: 127 (0x7F)
|
||||
uint32_t cp_mqd_base_addr_lo; // offset: 128 (0x80)
|
||||
uint32_t cp_mqd_base_addr_hi; // offset: 129 (0x81)
|
||||
uint32_t cp_hqd_active; // offset: 130 (0x82)
|
||||
uint32_t cp_hqd_vmid; // offset: 131 (0x83)
|
||||
uint32_t cp_hqd_persistent_state; // offset: 132 (0x84)
|
||||
uint32_t cp_hqd_pipe_priority; // offset: 133 (0x85)
|
||||
uint32_t cp_hqd_queue_priority; // offset: 134 (0x86)
|
||||
uint32_t cp_hqd_quantum; // offset: 135 (0x87)
|
||||
uint32_t cp_hqd_pq_base_lo; // offset: 136 (0x88)
|
||||
uint32_t cp_hqd_pq_base_hi; // offset: 137 (0x89)
|
||||
uint32_t cp_hqd_pq_rptr; // offset: 138 (0x8A)
|
||||
uint32_t cp_hqd_pq_rptr_report_addr_lo; // offset: 139 (0x8B)
|
||||
uint32_t cp_hqd_pq_rptr_report_addr_hi; // offset: 140 (0x8C)
|
||||
uint32_t cp_hqd_pq_wptr_poll_addr_lo; // offset: 141 (0x8D)
|
||||
uint32_t cp_hqd_pq_wptr_poll_addr_hi; // offset: 142 (0x8E)
|
||||
uint32_t cp_hqd_pq_doorbell_control; // offset: 143 (0x8F)
|
||||
uint32_t reserved_144; // offset: 144 (0x90)
|
||||
uint32_t cp_hqd_pq_control; // offset: 145 (0x91)
|
||||
uint32_t cp_hqd_ib_base_addr_lo; // offset: 146 (0x92)
|
||||
uint32_t cp_hqd_ib_base_addr_hi; // offset: 147 (0x93)
|
||||
uint32_t cp_hqd_ib_rptr; // offset: 148 (0x94)
|
||||
uint32_t cp_hqd_ib_control; // offset: 149 (0x95)
|
||||
uint32_t cp_hqd_iq_timer; // offset: 150 (0x96)
|
||||
uint32_t cp_hqd_iq_rptr; // offset: 151 (0x97)
|
||||
uint32_t cp_hqd_dequeue_request; // offset: 152 (0x98)
|
||||
uint32_t cp_hqd_dma_offload; // offset: 153 (0x99)
|
||||
uint32_t cp_hqd_sema_cmd; // offset: 154 (0x9A)
|
||||
uint32_t cp_hqd_msg_type; // offset: 155 (0x9B)
|
||||
uint32_t cp_hqd_atomic0_preop_lo; // offset: 156 (0x9C)
|
||||
uint32_t cp_hqd_atomic0_preop_hi; // offset: 157 (0x9D)
|
||||
uint32_t cp_hqd_atomic1_preop_lo; // offset: 158 (0x9E)
|
||||
uint32_t cp_hqd_atomic1_preop_hi; // offset: 159 (0x9F)
|
||||
uint32_t cp_hqd_hq_status0; // offset: 160 (0xA0)
|
||||
uint32_t cp_hqd_hq_control0; // offset: 161 (0xA1)
|
||||
uint32_t cp_mqd_control; // offset: 162 (0xA2)
|
||||
uint32_t cp_hqd_hq_status1; // offset: 163 (0xA3)
|
||||
uint32_t cp_hqd_hq_control1; // offset: 164 (0xA4)
|
||||
uint32_t cp_hqd_eop_base_addr_lo; // offset: 165 (0xA5)
|
||||
uint32_t cp_hqd_eop_base_addr_hi; // offset: 166 (0xA6)
|
||||
uint32_t cp_hqd_eop_control; // offset: 167 (0xA7)
|
||||
uint32_t cp_hqd_eop_rptr; // offset: 168 (0xA8)
|
||||
uint32_t cp_hqd_eop_wptr; // offset: 169 (0xA9)
|
||||
uint32_t cp_hqd_eop_done_events; // offset: 170 (0xAA)
|
||||
uint32_t cp_hqd_ctx_save_base_addr_lo; // offset: 171 (0xAB)
|
||||
uint32_t cp_hqd_ctx_save_base_addr_hi; // offset: 172 (0xAC)
|
||||
uint32_t cp_hqd_ctx_save_control; // offset: 173 (0xAD)
|
||||
uint32_t cp_hqd_cntl_stack_offset; // offset: 174 (0xAE)
|
||||
uint32_t cp_hqd_cntl_stack_size; // offset: 175 (0xAF)
|
||||
uint32_t cp_hqd_wg_state_offset; // offset: 176 (0xB0)
|
||||
uint32_t cp_hqd_ctx_save_size; // offset: 177 (0xB1)
|
||||
uint32_t cp_hqd_gds_resource_state; // offset: 178 (0xB2)
|
||||
uint32_t cp_hqd_error; // offset: 179 (0xB3)
|
||||
uint32_t cp_hqd_eop_wptr_mem; // offset: 180 (0xB4)
|
||||
uint32_t cp_hqd_aql_control; // offset: 181 (0xB5)
|
||||
uint32_t cp_hqd_pq_wptr_lo; // offset: 182 (0xB6)
|
||||
uint32_t cp_hqd_pq_wptr_hi; // offset: 183 (0xB7)
|
||||
uint32_t reserved_184; // offset: 184 (0xB8)
|
||||
uint32_t reserved_185; // offset: 185 (0xB9)
|
||||
uint32_t reserved_186; // offset: 186 (0xBA)
|
||||
uint32_t reserved_187; // offset: 187 (0xBB)
|
||||
uint32_t reserved_188; // offset: 188 (0xBC)
|
||||
uint32_t reserved_189; // offset: 189 (0xBD)
|
||||
uint32_t reserved_190; // offset: 190 (0xBE)
|
||||
uint32_t reserved_191; // offset: 191 (0xBF)
|
||||
uint32_t iqtimer_pkt_header; // offset: 192 (0xC0)
|
||||
uint32_t iqtimer_pkt_dw0; // offset: 193 (0xC1)
|
||||
uint32_t iqtimer_pkt_dw1; // offset: 194 (0xC2)
|
||||
uint32_t iqtimer_pkt_dw2; // offset: 195 (0xC3)
|
||||
uint32_t iqtimer_pkt_dw3; // offset: 196 (0xC4)
|
||||
uint32_t iqtimer_pkt_dw4; // offset: 197 (0xC5)
|
||||
uint32_t iqtimer_pkt_dw5; // offset: 198 (0xC6)
|
||||
uint32_t iqtimer_pkt_dw6; // offset: 199 (0xC7)
|
||||
uint32_t iqtimer_pkt_dw7; // offset: 200 (0xC8)
|
||||
uint32_t iqtimer_pkt_dw8; // offset: 201 (0xC9)
|
||||
uint32_t iqtimer_pkt_dw9; // offset: 202 (0xCA)
|
||||
uint32_t iqtimer_pkt_dw10; // offset: 203 (0xCB)
|
||||
uint32_t iqtimer_pkt_dw11; // offset: 204 (0xCC)
|
||||
uint32_t iqtimer_pkt_dw12; // offset: 205 (0xCD)
|
||||
uint32_t iqtimer_pkt_dw13; // offset: 206 (0xCE)
|
||||
uint32_t iqtimer_pkt_dw14; // offset: 207 (0xCF)
|
||||
uint32_t iqtimer_pkt_dw15; // offset: 208 (0xD0)
|
||||
uint32_t iqtimer_pkt_dw16; // offset: 209 (0xD1)
|
||||
uint32_t iqtimer_pkt_dw17; // offset: 210 (0xD2)
|
||||
uint32_t iqtimer_pkt_dw18; // offset: 211 (0xD3)
|
||||
uint32_t iqtimer_pkt_dw19; // offset: 212 (0xD4)
|
||||
uint32_t iqtimer_pkt_dw20; // offset: 213 (0xD5)
|
||||
uint32_t iqtimer_pkt_dw21; // offset: 214 (0xD6)
|
||||
uint32_t iqtimer_pkt_dw22; // offset: 215 (0xD7)
|
||||
uint32_t iqtimer_pkt_dw23; // offset: 216 (0xD8)
|
||||
uint32_t iqtimer_pkt_dw24; // offset: 217 (0xD9)
|
||||
uint32_t iqtimer_pkt_dw25; // offset: 218 (0xDA)
|
||||
uint32_t iqtimer_pkt_dw26; // offset: 219 (0xDB)
|
||||
uint32_t iqtimer_pkt_dw27; // offset: 220 (0xDC)
|
||||
uint32_t iqtimer_pkt_dw28; // offset: 221 (0xDD)
|
||||
uint32_t iqtimer_pkt_dw29; // offset: 222 (0xDE)
|
||||
uint32_t iqtimer_pkt_dw30; // offset: 223 (0xDF)
|
||||
uint32_t iqtimer_pkt_dw31; // offset: 224 (0xE0)
|
||||
uint32_t reserved_225; // offset: 225 (0xE1)
|
||||
uint32_t reserved_226; // offset: 226 (0xE2)
|
||||
uint32_t reserved_227; // offset: 227 (0xE3)
|
||||
uint32_t set_resources_header; // offset: 228 (0xE4)
|
||||
uint32_t set_resources_dw1; // offset: 229 (0xE5)
|
||||
uint32_t set_resources_dw2; // offset: 230 (0xE6)
|
||||
uint32_t set_resources_dw3; // offset: 231 (0xE7)
|
||||
uint32_t set_resources_dw4; // offset: 232 (0xE8)
|
||||
uint32_t set_resources_dw5; // offset: 233 (0xE9)
|
||||
uint32_t set_resources_dw6; // offset: 234 (0xEA)
|
||||
uint32_t set_resources_dw7; // offset: 235 (0xEB)
|
||||
uint32_t reserved_236; // offset: 236 (0xEC)
|
||||
uint32_t reserved_237; // offset: 237 (0xED)
|
||||
uint32_t reserved_238; // offset: 238 (0xEE)
|
||||
uint32_t reserved_239; // offset: 239 (0xEF)
|
||||
uint32_t queue_doorbell_id0; // offset: 240 (0xF0)
|
||||
uint32_t queue_doorbell_id1; // offset: 241 (0xF1)
|
||||
uint32_t queue_doorbell_id2; // offset: 242 (0xF2)
|
||||
uint32_t queue_doorbell_id3; // offset: 243 (0xF3)
|
||||
uint32_t queue_doorbell_id4; // offset: 244 (0xF4)
|
||||
uint32_t queue_doorbell_id5; // offset: 245 (0xF5)
|
||||
uint32_t queue_doorbell_id6; // offset: 246 (0xF6)
|
||||
uint32_t queue_doorbell_id7; // offset: 247 (0xF7)
|
||||
uint32_t queue_doorbell_id8; // offset: 248 (0xF8)
|
||||
uint32_t queue_doorbell_id9; // offset: 249 (0xF9)
|
||||
uint32_t queue_doorbell_id10; // offset: 250 (0xFA)
|
||||
uint32_t queue_doorbell_id11; // offset: 251 (0xFB)
|
||||
uint32_t queue_doorbell_id12; // offset: 252 (0xFC)
|
||||
uint32_t queue_doorbell_id13; // offset: 253 (0xFD)
|
||||
uint32_t queue_doorbell_id14; // offset: 254 (0xFE)
|
||||
uint32_t queue_doorbell_id15; // offset: 255 (0xFF)
|
||||
uint32_t Reserved_256; // offset: 256 (0x100)
|
||||
uint32_t Reserved_257; // offset: 257 (0x101)
|
||||
uint32_t Reserved_258; // offset: 258 (0x102)
|
||||
uint32_t Reserved_259; // offset: 259 (0x103)
|
||||
uint32_t Reserved_260; // offset: 260 (0x104)
|
||||
uint32_t Reserved_261; // offset: 261 (0x105)
|
||||
uint32_t Reserved_262; // offset: 262 (0x106)
|
||||
uint32_t Reserved_263; // offset: 263 (0x107)
|
||||
uint32_t Reserved_264; // offset: 264 (0x108)
|
||||
uint32_t Reserved_265; // offset: 265 (0x109)
|
||||
uint32_t Reserved_266; // offset: 266 (0x10A)
|
||||
uint32_t Reserved_267; // offset: 267 (0x10B)
|
||||
uint32_t Reserved_268; // offset: 268 (0x10C)
|
||||
uint32_t Reserved_269; // offset: 269 (0x10D)
|
||||
uint32_t Reserved_270; // offset: 270 (0x10E)
|
||||
uint32_t Reserved_271; // offset: 271 (0x10F)
|
||||
uint32_t Reserved_272; // offset: 272 (0x110)
|
||||
uint32_t Reserved_273; // offset: 273 (0x111)
|
||||
uint32_t Reserved_274; // offset: 274 (0x112)
|
||||
uint32_t Reserved_275; // offset: 275 (0x113)
|
||||
uint32_t Reserved_276; // offset: 276 (0x114)
|
||||
uint32_t Reserved_277; // offset: 277 (0x115)
|
||||
uint32_t Reserved_278; // offset: 278 (0x116)
|
||||
uint32_t Reserved_279; // offset: 279 (0x117)
|
||||
uint32_t Reserved_280; // offset: 280 (0x118)
|
||||
uint32_t Reserved_281; // offset: 281 (0x119)
|
||||
uint32_t Reserved_282; // offset: 282 (0x11A)
|
||||
uint32_t Reserved_283; // offset: 283 (0x11B)
|
||||
uint32_t Reserved_284; // offset: 284 (0x11C)
|
||||
uint32_t Reserved_285; // offset: 285 (0x11D)
|
||||
uint32_t Reserved_286; // offset: 286 (0x11E)
|
||||
uint32_t Reserved_287; // offset: 287 (0x11F)
|
||||
uint32_t Reserved_288; // offset: 288 (0x120)
|
||||
uint32_t Reserved_289; // offset: 289 (0x121)
|
||||
uint32_t Reserved_290; // offset: 290 (0x122)
|
||||
uint32_t Reserved_291; // offset: 291 (0x123)
|
||||
uint32_t Reserved_292; // offset: 292 (0x124)
|
||||
uint32_t Reserved_293; // offset: 293 (0x125)
|
||||
uint32_t Reserved_294; // offset: 294 (0x126)
|
||||
uint32_t Reserved_295; // offset: 295 (0x127)
|
||||
uint32_t Reserved_296; // offset: 296 (0x128)
|
||||
uint32_t Reserved_297; // offset: 297 (0x129)
|
||||
uint32_t Reserved_298; // offset: 298 (0x12A)
|
||||
uint32_t Reserved_299; // offset: 299 (0x12B)
|
||||
uint32_t Reserved_300; // offset: 300 (0x12C)
|
||||
uint32_t Reserved_301; // offset: 301 (0x12D)
|
||||
uint32_t Reserved_302; // offset: 302 (0x12E)
|
||||
uint32_t Reserved_303; // offset: 303 (0x12F)
|
||||
uint32_t Reserved_304; // offset: 304 (0x130)
|
||||
uint32_t Reserved_305; // offset: 305 (0x131)
|
||||
uint32_t Reserved_306; // offset: 306 (0x132)
|
||||
uint32_t Reserved_307; // offset: 307 (0x133)
|
||||
uint32_t Reserved_308; // offset: 308 (0x134)
|
||||
uint32_t Reserved_309; // offset: 309 (0x135)
|
||||
uint32_t Reserved_310; // offset: 310 (0x136)
|
||||
uint32_t Reserved_311; // offset: 311 (0x137)
|
||||
uint32_t Reserved_312; // offset: 312 (0x138)
|
||||
uint32_t Reserved_313; // offset: 313 (0x139)
|
||||
uint32_t Reserved_314; // offset: 314 (0x13A)
|
||||
uint32_t Reserved_315; // offset: 315 (0x13B)
|
||||
uint32_t Reserved_316; // offset: 316 (0x13C)
|
||||
uint32_t Reserved_317; // offset: 317 (0x13D)
|
||||
uint32_t Reserved_318; // offset: 318 (0x13E)
|
||||
uint32_t Reserved_319; // offset: 319 (0x13F)
|
||||
uint32_t Reserved_320; // offset: 320 (0x140)
|
||||
uint32_t Reserved_321; // offset: 321 (0x141)
|
||||
uint32_t Reserved_322; // offset: 322 (0x142)
|
||||
uint32_t Reserved_323; // offset: 323 (0x143)
|
||||
uint32_t Reserved_324; // offset: 324 (0x144)
|
||||
uint32_t Reserved_325; // offset: 325 (0x145)
|
||||
uint32_t Reserved_326; // offset: 326 (0x146)
|
||||
uint32_t Reserved_327; // offset: 327 (0x147)
|
||||
uint32_t Reserved_328; // offset: 328 (0x148)
|
||||
uint32_t Reserved_329; // offset: 329 (0x149)
|
||||
uint32_t Reserved_330; // offset: 330 (0x14A)
|
||||
uint32_t Reserved_331; // offset: 331 (0x14B)
|
||||
uint32_t Reserved_332; // offset: 332 (0x14C)
|
||||
uint32_t Reserved_333; // offset: 333 (0x14D)
|
||||
uint32_t Reserved_334; // offset: 334 (0x14E)
|
||||
uint32_t Reserved_335; // offset: 335 (0x14F)
|
||||
uint32_t Reserved_336; // offset: 336 (0x150)
|
||||
uint32_t Reserved_337; // offset: 337 (0x151)
|
||||
uint32_t Reserved_338; // offset: 338 (0x152)
|
||||
uint32_t Reserved_339; // offset: 339 (0x153)
|
||||
uint32_t Reserved_340; // offset: 340 (0x154)
|
||||
uint32_t Reserved_341; // offset: 341 (0x155)
|
||||
uint32_t Reserved_342; // offset: 342 (0x156)
|
||||
uint32_t Reserved_343; // offset: 343 (0x157)
|
||||
uint32_t Reserved_344; // offset: 344 (0x158)
|
||||
uint32_t Reserved_345; // offset: 345 (0x159)
|
||||
uint32_t Reserved_346; // offset: 346 (0x15A)
|
||||
uint32_t Reserved_347; // offset: 347 (0x15B)
|
||||
uint32_t Reserved_348; // offset: 348 (0x15C)
|
||||
uint32_t Reserved_349; // offset: 349 (0x15D)
|
||||
uint32_t Reserved_350; // offset: 350 (0x15E)
|
||||
uint32_t Reserved_351; // offset: 351 (0x15F)
|
||||
uint32_t Reserved_352; // offset: 352 (0x160)
|
||||
uint32_t Reserved_353; // offset: 353 (0x161)
|
||||
uint32_t Reserved_354; // offset: 354 (0x162)
|
||||
uint32_t Reserved_355; // offset: 355 (0x163)
|
||||
uint32_t Reserved_356; // offset: 356 (0x164)
|
||||
uint32_t Reserved_357; // offset: 357 (0x165)
|
||||
uint32_t Reserved_358; // offset: 358 (0x166)
|
||||
uint32_t Reserved_359; // offset: 359 (0x167)
|
||||
uint32_t Reserved_360; // offset: 360 (0x168)
|
||||
uint32_t Reserved_361; // offset: 361 (0x169)
|
||||
uint32_t Reserved_362; // offset: 362 (0x16A)
|
||||
uint32_t Reserved_363; // offset: 363 (0x16B)
|
||||
uint32_t Reserved_364; // offset: 364 (0x16C)
|
||||
uint32_t Reserved_365; // offset: 365 (0x16D)
|
||||
uint32_t Reserved_366; // offset: 366 (0x16E)
|
||||
uint32_t Reserved_367; // offset: 367 (0x16F)
|
||||
uint32_t Reserved_368; // offset: 368 (0x170)
|
||||
uint32_t Reserved_369; // offset: 369 (0x171)
|
||||
uint32_t Reserved_370; // offset: 370 (0x172)
|
||||
uint32_t Reserved_371; // offset: 371 (0x173)
|
||||
uint32_t Reserved_372; // offset: 372 (0x174)
|
||||
uint32_t Reserved_373; // offset: 373 (0x175)
|
||||
uint32_t Reserved_374; // offset: 374 (0x176)
|
||||
uint32_t Reserved_375; // offset: 375 (0x177)
|
||||
uint32_t Reserved_376; // offset: 376 (0x178)
|
||||
uint32_t Reserved_377; // offset: 377 (0x179)
|
||||
uint32_t Reserved_378; // offset: 378 (0x17A)
|
||||
uint32_t Reserved_379; // offset: 379 (0x17B)
|
||||
uint32_t Reserved_380; // offset: 380 (0x17C)
|
||||
uint32_t Reserved_381; // offset: 381 (0x17D)
|
||||
uint32_t Reserved_382; // offset: 382 (0x17E)
|
||||
uint32_t Reserved_383; // offset: 383 (0x17F)
|
||||
uint32_t Reserved_384; // offset: 384 (0x180)
|
||||
uint32_t Reserved_385; // offset: 385 (0x181)
|
||||
uint32_t Reserved_386; // offset: 386 (0x182)
|
||||
uint32_t Reserved_387; // offset: 387 (0x183)
|
||||
uint32_t Reserved_388; // offset: 388 (0x184)
|
||||
uint32_t Reserved_389; // offset: 389 (0x185)
|
||||
uint32_t Reserved_390; // offset: 390 (0x186)
|
||||
uint32_t Reserved_391; // offset: 391 (0x187)
|
||||
uint32_t Reserved_392; // offset: 392 (0x188)
|
||||
uint32_t Reserved_393; // offset: 393 (0x189)
|
||||
uint32_t Reserved_394; // offset: 394 (0x18A)
|
||||
uint32_t Reserved_395; // offset: 395 (0x18B)
|
||||
uint32_t Reserved_396; // offset: 396 (0x18C)
|
||||
uint32_t Reserved_397; // offset: 397 (0x18D)
|
||||
uint32_t Reserved_398; // offset: 398 (0x18E)
|
||||
uint32_t Reserved_399; // offset: 399 (0x18F)
|
||||
uint32_t Reserved_400; // offset: 400 (0x190)
|
||||
uint32_t Reserved_401; // offset: 401 (0x191)
|
||||
uint32_t Reserved_402; // offset: 402 (0x192)
|
||||
uint32_t Reserved_403; // offset: 403 (0x193)
|
||||
uint32_t Reserved_404; // offset: 404 (0x194)
|
||||
uint32_t Reserved_405; // offset: 405 (0x195)
|
||||
uint32_t Reserved_406; // offset: 406 (0x196)
|
||||
uint32_t Reserved_407; // offset: 407 (0x197)
|
||||
uint32_t Reserved_408; // offset: 408 (0x198)
|
||||
uint32_t Reserved_409; // offset: 409 (0x199)
|
||||
uint32_t Reserved_410; // offset: 410 (0x19A)
|
||||
uint32_t Reserved_411; // offset: 411 (0x19B)
|
||||
uint32_t Reserved_412; // offset: 412 (0x19C)
|
||||
uint32_t Reserved_413; // offset: 413 (0x19D)
|
||||
uint32_t Reserved_414; // offset: 414 (0x19E)
|
||||
uint32_t Reserved_415; // offset: 415 (0x19F)
|
||||
uint32_t Reserved_416; // offset: 416 (0x1A0)
|
||||
uint32_t Reserved_417; // offset: 417 (0x1A1)
|
||||
uint32_t Reserved_418; // offset: 418 (0x1A2)
|
||||
uint32_t Reserved_419; // offset: 419 (0x1A3)
|
||||
uint32_t Reserved_420; // offset: 420 (0x1A4)
|
||||
uint32_t Reserved_421; // offset: 421 (0x1A5)
|
||||
uint32_t Reserved_422; // offset: 422 (0x1A6)
|
||||
uint32_t Reserved_423; // offset: 423 (0x1A7)
|
||||
uint32_t Reserved_424; // offset: 424 (0x1A8)
|
||||
uint32_t Reserved_425; // offset: 425 (0x1A9)
|
||||
uint32_t Reserved_426; // offset: 426 (0x1AA)
|
||||
uint32_t Reserved_427; // offset: 427 (0x1AB)
|
||||
uint32_t Reserved_428; // offset: 428 (0x1AC)
|
||||
uint32_t Reserved_429; // offset: 429 (0x1AD)
|
||||
uint32_t Reserved_430; // offset: 430 (0x1AE)
|
||||
uint32_t Reserved_431; // offset: 431 (0x1AF)
|
||||
uint32_t Reserved_432; // offset: 432 (0x1B0)
|
||||
uint32_t Reserved_433; // offset: 433 (0x1B1)
|
||||
uint32_t Reserved_434; // offset: 434 (0x1B2)
|
||||
uint32_t Reserved_435; // offset: 435 (0x1B3)
|
||||
uint32_t Reserved_436; // offset: 436 (0x1B4)
|
||||
uint32_t Reserved_437; // offset: 437 (0x1B5)
|
||||
uint32_t Reserved_438; // offset: 438 (0x1B6)
|
||||
uint32_t Reserved_439; // offset: 439 (0x1B7)
|
||||
uint32_t Reserved_440; // offset: 440 (0x1B8)
|
||||
uint32_t Reserved_441; // offset: 441 (0x1B9)
|
||||
uint32_t Reserved_442; // offset: 442 (0x1BA)
|
||||
uint32_t Reserved_443; // offset: 443 (0x1BB)
|
||||
uint32_t Reserved_444; // offset: 444 (0x1BC)
|
||||
uint32_t Reserved_445; // offset: 445 (0x1BD)
|
||||
uint32_t Reserved_446; // offset: 446 (0x1BE)
|
||||
uint32_t Reserved_447; // offset: 447 (0x1BF)
|
||||
uint32_t Reserved_448; // offset: 448 (0x1C0)
|
||||
uint32_t Reserved_449; // offset: 449 (0x1C1)
|
||||
uint32_t Reserved_450; // offset: 450 (0x1C2)
|
||||
uint32_t Reserved_451; // offset: 451 (0x1C3)
|
||||
uint32_t Reserved_452; // offset: 452 (0x1C4)
|
||||
uint32_t Reserved_453; // offset: 453 (0x1C5)
|
||||
uint32_t Reserved_454; // offset: 454 (0x1C6)
|
||||
uint32_t Reserved_455; // offset: 455 (0x1C7)
|
||||
uint32_t Reserved_456; // offset: 456 (0x1C8)
|
||||
uint32_t Reserved_457; // offset: 457 (0x1C9)
|
||||
uint32_t Reserved_458; // offset: 458 (0x1CA)
|
||||
uint32_t Reserved_459; // offset: 459 (0x1CB)
|
||||
uint32_t Reserved_460; // offset: 460 (0x1CC)
|
||||
uint32_t Reserved_461; // offset: 461 (0x1CD)
|
||||
uint32_t Reserved_462; // offset: 462 (0x1CE)
|
||||
uint32_t Reserved_463; // offset: 463 (0x1CF)
|
||||
uint32_t Reserved_464; // offset: 464 (0x1D0)
|
||||
uint32_t Reserved_465; // offset: 465 (0x1D1)
|
||||
uint32_t Reserved_466; // offset: 466 (0x1D2)
|
||||
uint32_t Reserved_467; // offset: 467 (0x1D3)
|
||||
uint32_t Reserved_468; // offset: 468 (0x1D4)
|
||||
uint32_t Reserved_469; // offset: 469 (0x1D5)
|
||||
uint32_t Reserved_470; // offset: 470 (0x1D6)
|
||||
uint32_t Reserved_471; // offset: 471 (0x1D7)
|
||||
uint32_t Reserved_472; // offset: 472 (0x1D8)
|
||||
uint32_t Reserved_473; // offset: 473 (0x1D9)
|
||||
uint32_t Reserved_474; // offset: 474 (0x1DA)
|
||||
uint32_t Reserved_475; // offset: 475 (0x1DB)
|
||||
uint32_t Reserved_476; // offset: 476 (0x1DC)
|
||||
uint32_t Reserved_477; // offset: 477 (0x1DD)
|
||||
uint32_t Reserved_478; // offset: 478 (0x1DE)
|
||||
uint32_t Reserved_479; // offset: 479 (0x1DF)
|
||||
uint32_t Reserved_480; // offset: 480 (0x1E0)
|
||||
uint32_t Reserved_481; // offset: 481 (0x1E1)
|
||||
uint32_t Reserved_482; // offset: 482 (0x1E2)
|
||||
uint32_t Reserved_483; // offset: 483 (0x1E3)
|
||||
uint32_t Reserved_484; // offset: 484 (0x1E4)
|
||||
uint32_t Reserved_485; // offset: 485 (0x1E5)
|
||||
uint32_t Reserved_486; // offset: 486 (0x1E6)
|
||||
uint32_t Reserved_487; // offset: 487 (0x1E7)
|
||||
uint32_t Reserved_488; // offset: 488 (0x1E8)
|
||||
uint32_t Reserved_489; // offset: 489 (0x1E9)
|
||||
uint32_t Reserved_490; // offset: 490 (0x1EA)
|
||||
uint32_t Reserved_491; // offset: 491 (0x1EB)
|
||||
uint32_t Reserved_492; // offset: 492 (0x1EC)
|
||||
uint32_t Reserved_493; // offset: 493 (0x1ED)
|
||||
uint32_t Reserved_494; // offset: 494 (0x1EE)
|
||||
uint32_t Reserved_495; // offset: 495 (0x1EF)
|
||||
uint32_t Reserved_496; // offset: 496 (0x1F0)
|
||||
uint32_t Reserved_497; // offset: 497 (0x1F1)
|
||||
uint32_t Reserved_498; // offset: 498 (0x1F2)
|
||||
uint32_t Reserved_499; // offset: 499 (0x1F3)
|
||||
uint32_t Reserved_500; // offset: 500 (0x1F4)
|
||||
uint32_t Reserved_501; // offset: 501 (0x1F5)
|
||||
uint32_t Reserved_502; // offset: 502 (0x1F6)
|
||||
uint32_t Reserved_503; // offset: 503 (0x1F7)
|
||||
uint32_t Reserved_504; // offset: 504 (0x1F8)
|
||||
uint32_t Reserved_505; // offset: 505 (0x1F9)
|
||||
uint32_t Reserved_506; // offset: 506 (0x1FA)
|
||||
uint32_t Reserved_507; // offset: 507 (0x1FB)
|
||||
uint32_t Reserved_508; // offset: 508 (0x1FC)
|
||||
uint32_t Reserved_509; // offset: 509 (0x1FD)
|
||||
uint32_t Reserved_510; // offset: 510 (0x1FE)
|
||||
uint32_t Reserved_511; // offset: 511 (0x1FF)
|
||||
} STRMEC_COMPUTE_512DW_MQD, *PSTRMEC_COMPUTE_512DW_MQD;
|
||||
#endif
|
||||
|
||||
} // gfx9
|
||||
} // pm4_profile
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,461 +0,0 @@
|
||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
//
|
||||
// Trade secret of Advanced Micro Devices, Inc.
|
||||
// Copyright 2014, Advanced Micro Devices, Inc., (unpublished)
|
||||
//
|
||||
// All rights reserved. This notice is intended as a precaution against
|
||||
// inadvertent publication and does not imply publication or any waiver
|
||||
// of confidentiality. The year included in the foregoing notice is the
|
||||
// year of creation of the work.
|
||||
//
|
||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
#ifndef F32_MES_PM4_PACKETS_H
|
||||
#define F32_MES_PM4_PACKETS_H
|
||||
|
||||
namespace pm4_profile {
|
||||
namespace gfx9 {
|
||||
|
||||
#ifndef PM4_MES_HEADER_DEFINED
|
||||
#define PM4_MES_HEADER_DEFINED
|
||||
typedef union PM4_MES_TYPE_3_HEADER {
|
||||
struct {
|
||||
uint32_t reserved1 : 8; ///< reserved
|
||||
uint32_t opcode : 8; ///< IT opcode
|
||||
uint32_t count : 14; ///< number of DWORDs - 1 in the information body.
|
||||
uint32_t type : 2; ///< packet identifier. It should be 3 for type 3 packets
|
||||
};
|
||||
uint32_t u32All;
|
||||
} PM4_MES_TYPE_3_HEADER;
|
||||
#endif // PM4_MES_HEADER_DEFINED
|
||||
|
||||
//--------------------MES_SET_RESOURCES--------------------
|
||||
|
||||
#ifndef PM4_MES_SET_RESOURCES_DEFINED
|
||||
#define PM4_MES_SET_RESOURCES_DEFINED
|
||||
enum MES_SET_RESOURCES_queue_type_enum {
|
||||
queue_type__mes_set_resources__kernel_interface_queue_kiq = 0,
|
||||
queue_type__mes_set_resources__hsa_interface_queue_hiq = 1,
|
||||
queue_type__mes_set_resources__hsa_debug_interface_queue = 4
|
||||
};
|
||||
|
||||
|
||||
typedef struct PM4_MES_SET_RESOURCES {
|
||||
union {
|
||||
PM4_MES_TYPE_3_HEADER header; /// header
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t vmid_mask : 16;
|
||||
uint32_t unmap_latency : 8;
|
||||
uint32_t reserved1 : 5;
|
||||
MES_SET_RESOURCES_queue_type_enum queue_type : 3;
|
||||
} bitfields2;
|
||||
uint32_t ordinal2;
|
||||
};
|
||||
|
||||
uint32_t queue_mask_lo;
|
||||
|
||||
uint32_t queue_mask_hi;
|
||||
|
||||
uint32_t gws_mask_lo;
|
||||
|
||||
uint32_t gws_mask_hi;
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t oac_mask : 16;
|
||||
uint32_t reserved2 : 16;
|
||||
} bitfields7;
|
||||
uint32_t ordinal7;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t gds_heap_base : 6;
|
||||
uint32_t reserved3 : 5;
|
||||
uint32_t gds_heap_size : 6;
|
||||
uint32_t reserved4 : 15;
|
||||
} bitfields8;
|
||||
uint32_t ordinal8;
|
||||
};
|
||||
|
||||
} PM4MES_SET_RESOURCES, *PPM4MES_SET_RESOURCES;
|
||||
#endif
|
||||
|
||||
//--------------------MES_RUN_LIST--------------------
|
||||
|
||||
#ifndef PM4_MES_RUN_LIST_DEFINED
|
||||
#define PM4_MES_RUN_LIST_DEFINED
|
||||
|
||||
typedef struct PM4_MES_RUN_LIST {
|
||||
union {
|
||||
PM4_MES_TYPE_3_HEADER header; /// header
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t reserved1 : 2;
|
||||
uint32_t ib_base_lo : 30;
|
||||
} bitfields2;
|
||||
uint32_t ordinal2;
|
||||
};
|
||||
|
||||
uint32_t ib_base_hi;
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t ib_size : 20;
|
||||
uint32_t chain : 1;
|
||||
uint32_t offload_polling : 1;
|
||||
uint32_t reserved2 : 1;
|
||||
uint32_t valid : 1;
|
||||
uint32_t process_cnt : 4;
|
||||
uint32_t reserved3 : 4;
|
||||
} bitfields4;
|
||||
uint32_t ordinal4;
|
||||
};
|
||||
|
||||
} PM4MES_RUN_LIST, *PPM4MES_RUN_LIST;
|
||||
#endif
|
||||
|
||||
//--------------------MES_MAP_PROCESS--------------------
|
||||
|
||||
#ifndef PM4_MES_MAP_PROCESS_DEFINED
|
||||
#define PM4_MES_MAP_PROCESS_DEFINED
|
||||
|
||||
typedef struct PM4_MES_MAP_PROCESS {
|
||||
union {
|
||||
PM4_MES_TYPE_3_HEADER header; /// header
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t pasid : 16;
|
||||
uint32_t reserved1 : 8;
|
||||
uint32_t diq_enable : 1;
|
||||
uint32_t process_quantum : 7;
|
||||
} bitfields2;
|
||||
uint32_t ordinal2;
|
||||
};
|
||||
|
||||
uint32_t vm_context_page_table_base_addr_lo32;
|
||||
|
||||
uint32_t vm_context_page_table_base_addr_hi32;
|
||||
|
||||
uint32_t sh_mem_bases;
|
||||
|
||||
uint32_t sh_mem_config;
|
||||
|
||||
uint32_t sq_shader_tba_lo;
|
||||
|
||||
uint32_t sq_shader_tba_hi;
|
||||
|
||||
uint32_t sq_shader_tma_lo;
|
||||
|
||||
uint32_t sq_shader_tma_hi;
|
||||
|
||||
uint32_t reserved2;
|
||||
|
||||
uint32_t gds_addr_lo;
|
||||
|
||||
uint32_t gds_addr_hi;
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t num_gws : 6;
|
||||
uint32_t reserved3 : 1;
|
||||
uint32_t sdma_enable : 1;
|
||||
uint32_t num_oac : 4;
|
||||
uint32_t reserved4 : 4;
|
||||
uint32_t gds_size : 6;
|
||||
uint32_t num_queues : 10;
|
||||
} bitfields14;
|
||||
uint32_t ordinal14;
|
||||
};
|
||||
|
||||
uint32_t completion_signal_lo32;
|
||||
|
||||
uint32_t completion_signal_hi32;
|
||||
|
||||
} PM4MES_MAP_PROCESS, *PPM4MES_MAP_PROCESS;
|
||||
#endif
|
||||
|
||||
//--------------------MES_MAP_PROCESS_VM--------------------
|
||||
|
||||
#ifndef PM4_MES_MAP_PROCESS_VM_DEFINED
|
||||
#define PM4_MES_MAP_PROCESS_VM_DEFINED
|
||||
|
||||
typedef struct PM4_MES_MAP_PROCESS_VM {
|
||||
union {
|
||||
PM4_MES_TYPE_3_HEADER header; /// header
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
uint32_t reserved1;
|
||||
|
||||
uint32_t vm_context_cntl;
|
||||
|
||||
uint32_t reserved2;
|
||||
|
||||
uint32_t vm_context_page_table_end_addr_lo32;
|
||||
|
||||
uint32_t vm_context_page_table_end_addr_hi32;
|
||||
|
||||
uint32_t vm_context_page_table_start_addr_lo32;
|
||||
|
||||
uint32_t vm_context_page_table_start_addr_hi32;
|
||||
|
||||
uint32_t reserved3;
|
||||
|
||||
uint32_t reserved4;
|
||||
|
||||
uint32_t reserved5;
|
||||
|
||||
uint32_t reserved6;
|
||||
|
||||
uint32_t reserved7;
|
||||
|
||||
uint32_t reserved8;
|
||||
|
||||
uint32_t completion_signal_lo32;
|
||||
|
||||
uint32_t completion_signal_hi32;
|
||||
|
||||
} PM4MES_MAP_PROCESS_VM, *PPM4MES_MAP_PROCESS_VM;
|
||||
#endif
|
||||
|
||||
//--------------------MES_MAP_QUEUES--------------------
|
||||
|
||||
#ifndef PM4_MES_MAP_QUEUES_DEFINED
|
||||
#define PM4_MES_MAP_QUEUES_DEFINED
|
||||
enum MES_MAP_QUEUES_queue_sel_enum {
|
||||
queue_sel__mes_map_queues__map_to_specified_queue_slots = 0,
|
||||
queue_sel__mes_map_queues__map_to_hws_determined_queue_slots = 1
|
||||
};
|
||||
|
||||
enum MES_MAP_QUEUES_queue_type_enum {
|
||||
queue_type__mes_map_queues__normal_compute = 0,
|
||||
queue_type__mes_map_queues__debug_interface_queue = 1,
|
||||
queue_type__mes_map_queues__normal_latency_static_queue = 2,
|
||||
queue_type__mes_map_queues__low_latency_static_queue = 3
|
||||
};
|
||||
|
||||
enum MES_MAP_QUEUES_alloc_format_enum {
|
||||
alloc_format__mes_map_queues__one_per_pipe = 0,
|
||||
alloc_format__mes_map_queues__all_on_one_pipe = 1
|
||||
};
|
||||
|
||||
enum MES_MAP_QUEUES_engine_sel_enum {
|
||||
engine_sel__mes_map_queues__compute = 0,
|
||||
engine_sel__mes_map_queues__sdma0 = 2,
|
||||
engine_sel__mes_map_queues__sdma1 = 3,
|
||||
engine_sel__mes_map_queues__gfx = 4
|
||||
};
|
||||
|
||||
|
||||
typedef struct PM4_MES_MAP_QUEUES {
|
||||
union {
|
||||
PM4_MES_TYPE_3_HEADER header; /// header
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t reserved1 : 4;
|
||||
MES_MAP_QUEUES_queue_sel_enum queue_sel : 2;
|
||||
uint32_t reserved2 : 2;
|
||||
uint32_t vmid : 4;
|
||||
uint32_t reserved3 : 1;
|
||||
uint32_t queue : 8;
|
||||
MES_MAP_QUEUES_queue_type_enum queue_type : 3;
|
||||
MES_MAP_QUEUES_alloc_format_enum alloc_format : 2;
|
||||
MES_MAP_QUEUES_engine_sel_enum engine_sel : 3;
|
||||
uint32_t num_queues : 3;
|
||||
} bitfields2;
|
||||
uint32_t ordinal2;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t reserved4 : 1;
|
||||
uint32_t check_disable : 1;
|
||||
uint32_t doorbell_offset : 26;
|
||||
uint32_t reserved5 : 4;
|
||||
} bitfields3;
|
||||
uint32_t ordinal3;
|
||||
};
|
||||
|
||||
uint32_t mqd_addr_lo;
|
||||
|
||||
uint32_t mqd_addr_hi;
|
||||
|
||||
uint32_t wptr_addr_lo;
|
||||
|
||||
uint32_t wptr_addr_hi;
|
||||
|
||||
} PM4MES_MAP_QUEUES, *PPM4MES_MAP_QUEUES;
|
||||
#endif
|
||||
|
||||
//--------------------MES_QUERY_STATUS--------------------
|
||||
|
||||
#ifndef PM4_MES_QUERY_STATUS_DEFINED
|
||||
#define PM4_MES_QUERY_STATUS_DEFINED
|
||||
enum MES_QUERY_STATUS_interrupt_sel_enum {
|
||||
interrupt_sel__mes_query_status__completion_status = 0,
|
||||
interrupt_sel__mes_query_status__process_status = 1,
|
||||
interrupt_sel__mes_query_status__queue_status = 2
|
||||
};
|
||||
|
||||
enum MES_QUERY_STATUS_command_enum {
|
||||
command__mes_query_status__interrupt_only = 0,
|
||||
command__mes_query_status__fence_only_immediate = 1,
|
||||
command__mes_query_status__fence_only_after_write_ack = 2,
|
||||
command__mes_query_status__fence_wait_for_write_ack_send_interrupt = 3
|
||||
};
|
||||
|
||||
enum MES_QUERY_STATUS_engine_sel_enum {
|
||||
engine_sel__mes_query_status__compute = 0,
|
||||
engine_sel__mes_query_status__gfx = 4
|
||||
};
|
||||
|
||||
|
||||
typedef struct PM4_MES_QUERY_STATUS {
|
||||
union {
|
||||
PM4_MES_TYPE_3_HEADER header; /// header
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t context_id : 28;
|
||||
MES_QUERY_STATUS_interrupt_sel_enum interrupt_sel : 2;
|
||||
MES_QUERY_STATUS_command_enum command : 2;
|
||||
} bitfields2;
|
||||
uint32_t ordinal2;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t pasid : 16;
|
||||
uint32_t reserved1 : 16;
|
||||
} bitfields3a;
|
||||
struct {
|
||||
uint32_t reserved2 : 2;
|
||||
uint32_t doorbell_offset : 26;
|
||||
MES_QUERY_STATUS_engine_sel_enum engine_sel : 3;
|
||||
uint32_t reserved3 : 1;
|
||||
} bitfields3b;
|
||||
uint32_t ordinal3;
|
||||
};
|
||||
|
||||
uint32_t addr_lo;
|
||||
|
||||
uint32_t addr_hi;
|
||||
|
||||
uint32_t data_lo;
|
||||
|
||||
uint32_t data_hi;
|
||||
|
||||
} PM4MES_QUERY_STATUS, *PPM4MES_QUERY_STATUS;
|
||||
#endif
|
||||
|
||||
//--------------------MES_UNMAP_QUEUES--------------------
|
||||
|
||||
#ifndef PM4_MES_UNMAP_QUEUES_DEFINED
|
||||
#define PM4_MES_UNMAP_QUEUES_DEFINED
|
||||
enum MES_UNMAP_QUEUES_action_enum {
|
||||
action__mes_unmap_queues__preempt_queues = 0,
|
||||
action__mes_unmap_queues__reset_queues = 1,
|
||||
action__mes_unmap_queues__disable_process_queues = 2,
|
||||
action__mes_unmap_queues__preempt_queues_no_unmap = 3
|
||||
};
|
||||
|
||||
enum MES_UNMAP_QUEUES_queue_sel_enum {
|
||||
queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0,
|
||||
queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1,
|
||||
queue_sel__mes_unmap_queues__unmap_all_queues = 2,
|
||||
queue_sel__mes_unmap_queues__unmap_all_non_static_queues = 3
|
||||
};
|
||||
|
||||
enum MES_UNMAP_QUEUES_engine_sel_enum {
|
||||
engine_sel__mes_unmap_queues__compute = 0,
|
||||
engine_sel__mes_unmap_queues__sdma0 = 2,
|
||||
engine_sel__mes_unmap_queues__sdma1 = 3,
|
||||
engine_sel__mes_unmap_queues__gfx = 4
|
||||
};
|
||||
|
||||
|
||||
typedef struct PM4_MES_UNMAP_QUEUES {
|
||||
union {
|
||||
PM4_MES_TYPE_3_HEADER header; /// header
|
||||
uint32_t ordinal1;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
MES_UNMAP_QUEUES_action_enum action : 2;
|
||||
uint32_t reserved1 : 2;
|
||||
MES_UNMAP_QUEUES_queue_sel_enum queue_sel : 2;
|
||||
uint32_t reserved2 : 20;
|
||||
MES_UNMAP_QUEUES_engine_sel_enum engine_sel : 3;
|
||||
uint32_t num_queues : 3;
|
||||
} bitfields2;
|
||||
uint32_t ordinal2;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t pasid : 16;
|
||||
uint32_t reserved3 : 16;
|
||||
} bitfields3a;
|
||||
struct {
|
||||
uint32_t reserved4 : 2;
|
||||
uint32_t doorbell_offset0 : 26;
|
||||
uint32_t reserved5 : 4;
|
||||
} bitfields3b;
|
||||
uint32_t ordinal3;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t reserved6 : 2;
|
||||
uint32_t doorbell_offset1 : 26;
|
||||
uint32_t reserved7 : 4;
|
||||
} bitfields4a;
|
||||
struct {
|
||||
uint32_t rb_wptr : 20;
|
||||
uint32_t reserved8 : 12;
|
||||
} bitfields4b;
|
||||
uint32_t ordinal4;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t reserved9 : 2;
|
||||
uint32_t doorbell_offset2 : 26;
|
||||
uint32_t reserved10 : 4;
|
||||
} bitfields5;
|
||||
uint32_t ordinal5;
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t reserved11 : 2;
|
||||
uint32_t doorbell_offset3 : 26;
|
||||
uint32_t reserved12 : 4;
|
||||
} bitfields6;
|
||||
uint32_t ordinal6;
|
||||
};
|
||||
|
||||
} PM4MES_UNMAP_QUEUES, *PPM4MES_UNMAP_QUEUES;
|
||||
#endif
|
||||
|
||||
} // gfx9
|
||||
} // pm4_profile
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,149 +0,0 @@
|
||||
//////////////////////////////////////////////////////////////////////////////////
|
||||
// THIS FILE IS AUTO-GENERATED BY PITGEN (vA)
|
||||
// !!!! DO NOT EDIT BY HAND !!!!
|
||||
//////////////////////////////////////////////////////////////////////////////////
|
||||
// Project: 10xx or later
|
||||
// Description:
|
||||
//
|
||||
// PM4 PacketType3 IT_OpCode Definitions
|
||||
// Extracted From ME and PFP F32 Microcode Jump Tables:
|
||||
//
|
||||
//////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Trade secret of ATI Technologies, Inc.
|
||||
// Copyright 1999, ATI Technologies, Inc., (unpublished)
|
||||
//
|
||||
// All rights reserved. This notice is intended as a precaution against
|
||||
// inadvertent publication and does not imply publication or any waiver
|
||||
// of confidentiality. The year included in the foregoing notice is the
|
||||
// year of creation of the work.
|
||||
//////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef PM4_IT_OPCODES_H
|
||||
#define PM4_IT_OPCODES_H
|
||||
|
||||
namespace pm4_profile {
|
||||
namespace gfx9 {
|
||||
|
||||
// typedef enum IT_OpCodeType {
|
||||
enum IT_OpCodeType {
|
||||
IT_NOP = 0x10,
|
||||
IT_SET_BASE = 0x11,
|
||||
IT_CLEAR_STATE = 0x12,
|
||||
IT_INDEX_BUFFER_SIZE = 0x13,
|
||||
IT_DISPATCH_DIRECT = 0x15,
|
||||
IT_DISPATCH_INDIRECT = 0x16,
|
||||
IT_INDIRECT_BUFFER_END = 0x17,
|
||||
IT_INDIRECT_BUFFER_CNST_END = 0x19,
|
||||
IT_ATOMIC_GDS = 0x1D,
|
||||
IT_ATOMIC_MEM = 0x1E,
|
||||
IT_OCCLUSION_QUERY = 0x1F,
|
||||
IT_SET_PREDICATION = 0x20,
|
||||
IT_REG_RMW = 0x21,
|
||||
IT_COND_EXEC = 0x22,
|
||||
IT_PRED_EXEC = 0x23,
|
||||
IT_DRAW_INDIRECT = 0x24,
|
||||
IT_DRAW_INDEX_INDIRECT = 0x25,
|
||||
IT_INDEX_BASE = 0x26,
|
||||
IT_DRAW_INDEX_2 = 0x27,
|
||||
IT_CONTEXT_CONTROL = 0x28,
|
||||
IT_INDEX_TYPE = 0x2A,
|
||||
IT_DRAW_INDIRECT_MULTI = 0x2C,
|
||||
IT_DRAW_INDEX_AUTO = 0x2D,
|
||||
IT_NUM_INSTANCES = 0x2F,
|
||||
IT_DRAW_INDEX_MULTI_AUTO = 0x30,
|
||||
IT_INDIRECT_BUFFER_CNST = 0x33,
|
||||
IT_STRMOUT_BUFFER_UPDATE = 0x34,
|
||||
IT_DRAW_INDEX_OFFSET_2 = 0x35,
|
||||
IT_WRITE_DATA = 0x37,
|
||||
IT_DRAW_INDEX_INDIRECT_MULTI = 0x38,
|
||||
IT_MEM_SEMAPHORE = 0x39,
|
||||
IT_DRAW_INDEX_MULTI_INST = 0x3A,
|
||||
IT_WAIT_REG_MEM = 0x3C,
|
||||
IT_INDIRECT_BUFFER = 0x3F,
|
||||
IT_COND_INDIRECT_BUFFER = 0x3F,
|
||||
IT_COPY_DATA = 0x40,
|
||||
IT_PFP_SYNC_ME = 0x42,
|
||||
IT_ME_INITIALIZE = 0x44,
|
||||
IT_COND_WRITE = 0x45,
|
||||
IT_EVENT_WRITE = 0x46,
|
||||
IT_RELEASE_MEM = 0x49,
|
||||
IT_PREAMBLE_CNTL = 0x4A,
|
||||
IT_DMA_DATA = 0x50,
|
||||
IT_CONTEXT_REG_RMW = 0x51,
|
||||
IT_GFX_CNTX_UPDATE = 0x52,
|
||||
IT_BLK_CNTX_UPDATE = 0x53,
|
||||
IT_INCR_UPDT_STATE = 0x55,
|
||||
IT_ACQUIRE_MEM = 0x58,
|
||||
IT_REWIND = 0x59,
|
||||
IT_GEN_PDEPTE = 0x5B,
|
||||
IT_PRIME_UTCL2 = 0x5D,
|
||||
IT_LOAD_UCONFIG_REG = 0x5E,
|
||||
IT_LOAD_SH_REG = 0x5F,
|
||||
IT_LOAD_CONFIG_REG = 0x60,
|
||||
IT_LOAD_CONTEXT_REG = 0x61,
|
||||
IT_SET_CONFIG_REG = 0x68,
|
||||
IT_SET_CONTEXT_REG = 0x69,
|
||||
IT_SET_CONTEXT_REG_INDEX = 0x6A,
|
||||
IT_SET_SH_REG_DI = 0x72,
|
||||
IT_SET_SH_REG = 0x76,
|
||||
IT_SET_SH_REG_OFFSET = 0x77,
|
||||
IT_SET_QUEUE_REG = 0x78,
|
||||
IT_SET_UCONFIG_REG = 0x79,
|
||||
IT_SET_UCONFIG_REG_INDEX = 0x7A,
|
||||
IT_FORWARD_HEADER = 0x7C,
|
||||
IT_SCRATCH_RAM_WRITE = 0x7D,
|
||||
IT_SCRATCH_RAM_READ = 0x7E,
|
||||
IT_LOAD_CONST_RAM = 0x80,
|
||||
IT_WRITE_CONST_RAM = 0x81,
|
||||
IT_DUMP_CONST_RAM = 0x83,
|
||||
IT_INCREMENT_CE_COUNTER = 0x84,
|
||||
IT_INCREMENT_DE_COUNTER = 0x85,
|
||||
IT_WAIT_ON_CE_COUNTER = 0x86,
|
||||
IT_WAIT_ON_DE_COUNTER_DIFF = 0x88,
|
||||
IT_SWITCH_BUFFER = 0x8B,
|
||||
IT_DISPATCH_DRAW_PREAMBLE = 0x8C,
|
||||
IT_DISPATCH_DRAW = 0x8D,
|
||||
IT_COND_PREEMPT = 0x8E,
|
||||
IT_DRAW_MULTI_PREAMBLE = 0x8F,
|
||||
IT_FRAME_CONTROL = 0x90,
|
||||
IT_INDEX_ATTRIBUTES_INDIRECT = 0x91,
|
||||
IT_WAIT_REG_MEM64 = 0x93,
|
||||
IT_GET_LOD_STATS = 0x94,
|
||||
IT_COPY_DATA_RB = 0x96,
|
||||
IT_DMA_DATA_FILL_MULTI = 0x9A,
|
||||
IT_SET_SH_REG_INDEX = 0x9B,
|
||||
IT_EOP_BUFFER_END = 0x18,
|
||||
IT_INTR_BUFFER_END = 0x1A,
|
||||
IT_CP_DMA = 0x41,
|
||||
IT_SURFACE_SYNC = 0x43,
|
||||
IT_EVENT_WRITE_EOP = 0x47,
|
||||
IT_EVENT_WRITE_EOS = 0x48,
|
||||
IT_INTERRUPT = 0x5A,
|
||||
IT_INDIRECT_BUFFER_PASID = 0x5C,
|
||||
IT_LOAD_COMPUTE_STATE = 0x62,
|
||||
IT_SET_CONTEXT_REG_INDIRECT = 0x73,
|
||||
IT_DISPATCH_DRAW_PREAMBLE_ACE = 0x8C,
|
||||
IT_DISPATCH_DRAW_ACE = 0x8D,
|
||||
IT_HDP_FLUSH = 0x95,
|
||||
IT_SECURE_CONTROL = 0x97,
|
||||
IT_INVALIDATE_TLBS = 0x98,
|
||||
IT_AQL_PACKET = 0x99,
|
||||
IT_SET_RESOURCES = 0xA0,
|
||||
IT_MAP_PROCESS = 0xA1,
|
||||
IT_MAP_QUEUES = 0xA2,
|
||||
IT_UNMAP_QUEUES = 0xA3,
|
||||
IT_QUERY_STATUS = 0xA4,
|
||||
IT_RUN_LIST = 0xA5,
|
||||
IT_MAP_PROCESS_VM = 0xA6
|
||||
};
|
||||
//} IT_OpCodeType;
|
||||
|
||||
#define PM4_TYPE_0 0
|
||||
#define PM4_TYPE_2 2
|
||||
#define PM4_TYPE_3 3
|
||||
|
||||
} // gfx9
|
||||
} // pm4_profile
|
||||
|
||||
#endif // PM4_IT_OPCODES_H
|
||||
@@ -1,30 +0,0 @@
|
||||
/*
|
||||
***************************************************************************************************
|
||||
*
|
||||
* Trade secret of Advanced Micro Devices, Inc.
|
||||
* Copyright (c) 2010 Advanced Micro Devices, Inc. (unpublished)
|
||||
*
|
||||
* All rights reserved. This notice is intended as a precaution against inadvertent publication and
|
||||
* does not imply publication or any waiver of confidentiality. The year included in the foregoing
|
||||
* notice is the year of creation of the work.
|
||||
*
|
||||
***************************************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _GFX9_PM4DEFS_H_
|
||||
#define _GFX9_PM4DEFS_H_
|
||||
|
||||
/******************************************************************************
|
||||
*
|
||||
* gfx9_pm4defs.h
|
||||
*
|
||||
* GFX9 PM4 definitions, typedefs, and enumerations.
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
#define COPY_DATA_SEL_REG 0 ///< Mem-mapped register
|
||||
#define COPY_DATA_SEL_SRC_SYS_PERF_COUNTER 4 ///< Privileged memory performance counter
|
||||
#define COPY_DATA_SEL_COUNT_1DW 0 ///< Copy 1 word (32 bits)
|
||||
#define COPY_DATA_SEL_COUNT_2DW 1 ///< Copy 2 words (64 bits)
|
||||
|
||||
#endif // _GFX9_PM4DEFS_H_
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,141 +0,0 @@
|
||||
/*
|
||||
***********************************************************************************************************************
|
||||
*
|
||||
* Trade secret of Advanced Micro Devices, Inc.
|
||||
* Copyright (c) 2016, Advanced Micro Devices, Inc., (unpublished)
|
||||
*
|
||||
* All rights reserved. This notice is intended as a precaution against inadvertent publication and
|
||||
*does not imply
|
||||
* publication or any waiver of confidentiality. The year included in the foregoing notice is the
|
||||
*year of creation of
|
||||
* the work.
|
||||
*
|
||||
**********************************************************************************************************************/
|
||||
|
||||
#ifndef _GFX9_UTILS_H_
|
||||
#define _GFX9_UTILS_H_
|
||||
|
||||
namespace pm4_profile {
|
||||
namespace gfx9 {
|
||||
|
||||
/*
|
||||
* PM4 packet helper constants and macros.
|
||||
* Constructed from header file:
|
||||
* core/hw/gfxip/gfx9/chip/gfx9_f32_pfp_pm4_packets_gr.h
|
||||
*/
|
||||
|
||||
// Shift amounts for each field of a type-3 PM4 header:
|
||||
#define PM4_PREDICATE_SHIFT 0
|
||||
#define PM4_SHADERTYPE_SHIFT 1
|
||||
#define PM4_TYPE_SHIFT 30
|
||||
#define PM4_COUNT_SHIFT 16
|
||||
#define PM4_OPCODE_SHIFT 8
|
||||
|
||||
/*
|
||||
* Constructs a PM4 type-3 header and packs it into a uint.
|
||||
*/
|
||||
#define PM4_TYPE3_HDR(_opc_, _count_) \
|
||||
(uint32_t)((3) << PM4_TYPE_SHIFT | ((_count_)-2) << PM4_COUNT_SHIFT | (_opc_) << PM4_OPCODE_SHIFT)
|
||||
|
||||
// Packet shader types:
|
||||
#define PM4_SHADER_GRAPHICS 0
|
||||
#define PM4_SHADER_COMPUTE 1
|
||||
|
||||
// Indices into VGT event type table
|
||||
#define EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP 0
|
||||
#define EVENT_WRITE_INDEX_ZPASS_DONE 1
|
||||
#define EVENT_WRITE_INDEX_SAMPLE_PIPELINESTAT 2
|
||||
#define EVENT_WRITE_INDEX_SAMPLE_STREAMOUTSTATS 3
|
||||
#define EVENT_WRITE_INDEX_VS_PS_PARTIAL_FLUSH 4
|
||||
#define EVENT_WRITE_INDEX_ANY_EOP_TIMESTAMP 5
|
||||
#define EVENT_WRITE_INDEX_ANY_EOS_TIMESTAMP 6
|
||||
#define EVENT_WRITE_EOS_INDEX_CSDONE_PSDONE 6
|
||||
#define EVENT_WRITE_INDEX_CACHE_FLUSH_EVENT 7
|
||||
#define EVENT_WRITE_INDEX_INVALID 0xffffffff
|
||||
|
||||
static const uint8_t EventTypeToIndexTable[] = {
|
||||
0, // Reserved_0x00 0x00000000
|
||||
EVENT_WRITE_INDEX_SAMPLE_STREAMOUTSTATS, // SAMPLE_STREAMOUTSTATS1
|
||||
// 0x00000001
|
||||
EVENT_WRITE_INDEX_SAMPLE_STREAMOUTSTATS, // SAMPLE_STREAMOUTSTATS2
|
||||
// 0x00000002
|
||||
EVENT_WRITE_INDEX_SAMPLE_STREAMOUTSTATS, // SAMPLE_STREAMOUTSTATS3
|
||||
// 0x00000003
|
||||
EVENT_WRITE_INDEX_ANY_EOP_TIMESTAMP, // CACHE_FLUSH_TS 0x00000004
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // CONTEXT_DONE 0x00000005
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // CACHE_FLUSH 0x00000006
|
||||
EVENT_WRITE_INDEX_VS_PS_PARTIAL_FLUSH, // CS_PARTIAL_FLUSH 0x00000007
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // VGT_STREAMOUT_SYNC 0x00000008
|
||||
0, // Reserved_0x09 0x00000009
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // VGT_STREAMOUT_RESET 0x0000000a
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // END_OF_PIPE_INCR_DE 0x0000000b
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // END_OF_PIPE_IB_END 0x0000000c
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // RST_PIX_CNT 0x0000000d
|
||||
0, // Reserved_0x0E 0x0000000e
|
||||
EVENT_WRITE_INDEX_VS_PS_PARTIAL_FLUSH, // VS_PARTIAL_FLUSH 0x0000000f
|
||||
EVENT_WRITE_INDEX_VS_PS_PARTIAL_FLUSH, // PS_PARTIAL_FLUSH 0x00000010
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // FLUSH_HS_OUTPUT 0x00000011
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // FLUSH_LS_OUTPUT 0x00000012
|
||||
0, // Reserved_0x13 0x00000013
|
||||
EVENT_WRITE_INDEX_ANY_EOP_TIMESTAMP, // CACHE_FLUSH_AND_INV_TS_EVENT
|
||||
// 0x00000014
|
||||
EVENT_WRITE_INDEX_ZPASS_DONE, // ZPASS_DONE 0x00000015
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // CACHE_FLUSH_AND_INV_EVENT
|
||||
// 0x00000016
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // PERFCOUNTER_START 0x00000017
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // PERFCOUNTER_STOP 0x00000018
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // PIPELINESTAT_START 0x00000019
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // PIPELINESTAT_STOP 0x0000001a
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // PERFCOUNTER_SAMPLE 0x0000001b
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // FLUSH_ES_OUTPUT 0x0000001c
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // FLUSH_GS_OUTPUT 0x0000001d
|
||||
EVENT_WRITE_INDEX_SAMPLE_PIPELINESTAT, // SAMPLE_PIPELINESTAT 0x0000001e
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // SO_VGTSTREAMOUT_FLUSH 0x0000001f
|
||||
EVENT_WRITE_INDEX_SAMPLE_STREAMOUTSTATS, // SAMPLE_STREAMOUTSTATS
|
||||
// 0x00000020
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // RESET_VTX_CNT 0x00000021
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // BLOCK_CONTEXT_DONE 0x00000022
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // CS_CONTEXT_DONE 0x00000023
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // VGT_FLUSH 0x00000024
|
||||
0, // Reserved_0x25 0x00000025
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // SQ_NON_EVENT 0x00000026
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // SC_SEND_DB_VPZ 0x00000027
|
||||
EVENT_WRITE_INDEX_ANY_EOP_TIMESTAMP, // BOTTOM_OF_PIPE_TS 0x00000028
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // FLUSH_SX_TS 0x00000029
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // DB_CACHE_FLUSH_AND_INV 0x0000002a
|
||||
EVENT_WRITE_INDEX_ANY_EOP_TIMESTAMP, // FLUSH_AND_INV_DB_DATA_TS 0x0000002b
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // FLUSH_AND_INV_DB_META 0x0000002c
|
||||
EVENT_WRITE_INDEX_ANY_EOP_TIMESTAMP, // FLUSH_AND_INV_CB_DATA_TS 0x0000002d
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // FLUSH_AND_INV_CB_META 0x0000002e
|
||||
EVENT_WRITE_EOS_INDEX_CSDONE_PSDONE, // CS_DONE 0x0000002f
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // PS_DONE 0x00000030
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // FLUSH_AND_INV_CB_PIXEL_DATA
|
||||
// 0x00000031
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // SX_CB_RAT_ACK_REQUEST 0x00000032
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // THREAD_TRACE_START 0x00000033
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // THREAD_TRACE_STOP 0x00000034
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // THREAD_TRACE_MARKER 0x00000035
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // THREAD_TRACE_FLUSH 0x00000036
|
||||
EVENT_WRITE_INDEX_ANY_NON_TIMESTAMP, // THREAD_TRACE_FINISH 0x00000037
|
||||
};
|
||||
|
||||
/// @brief Enum specifying the size of elements of a buffer
|
||||
enum BufElementSize {
|
||||
kBufElementSize2 = 0,
|
||||
kBufElementSize4 = 1,
|
||||
kBufElementSize8 = 2,
|
||||
kBufElementSize16 = 3
|
||||
};
|
||||
|
||||
/// @brief Enum specifying the striding of a buffer
|
||||
enum BufIndexStride {
|
||||
kBufIndexStride8 = 0,
|
||||
kBufIndexStride16 = 1,
|
||||
kBufIndexStride32 = 2,
|
||||
kBufIndexStride64 = 3
|
||||
};
|
||||
|
||||
} // gfx9
|
||||
} // pm4_profile
|
||||
|
||||
#endif // _GFX9_UTILS_H_
|
||||
@@ -1,70 +0,0 @@
|
||||
#
|
||||
# Minimum version of cmake required
|
||||
#
|
||||
cmake_minimum_required ( VERSION 3.5.0 )
|
||||
|
||||
#
|
||||
# Setup flag to be verbose or not
|
||||
#
|
||||
set ( CMAKE_VERBOSE_MAKEFILE TRUE CACHE BOOL "Verbose Output" FORCE )
|
||||
|
||||
#
|
||||
# Set name for the project
|
||||
# @note: Must come before adding any sub-directories
|
||||
#
|
||||
set ( TARGET_NAME "aqlprofile" )
|
||||
project ( ${TARGET_NAME} )
|
||||
|
||||
if ( NOT DEFINED PROJ_DIR )
|
||||
set ( PROJ_DIR ${CMAKE_CURRENT_SOURCE_DIR} )
|
||||
set ( ROOT_DIR ${PROJ_DIR}/.. )
|
||||
endif ()
|
||||
|
||||
set ( HSA_RUNTIME_DIR ${PROJ_DIR}/../../hsa-runtime )
|
||||
set ( API_DIR ${HSA_RUNTIME_DIR}/inc )
|
||||
set ( CORE_UTIL_DIR ${HSA_RUNTIME_DIR}/core/util )
|
||||
|
||||
include_directories ( ${ROOT_DIR} )
|
||||
|
||||
#
|
||||
# Validate required build environment is setup correctly
|
||||
#
|
||||
include ( ${ROOT_DIR}/cmake_modules/validateBldEnv.cmake )
|
||||
|
||||
#
|
||||
# Setup tool chain flags - preprocessor, compiler and linker
|
||||
#
|
||||
include ( ${ROOT_DIR}/cmake_modules/exportToolFlags.cmake )
|
||||
|
||||
#
|
||||
# Set Name for Cmdwriter library and build it as a
|
||||
# static library to be linked with others
|
||||
#
|
||||
set ( CMDWRITER_LIB "commandwriter${ONLY64STR}" )
|
||||
add_subdirectory ( ${PROJ_DIR}/commandwriter "${PROJECT_BINARY_DIR}/commandwriter" )
|
||||
|
||||
#
|
||||
# Set Name for ThreadTrace library and build it as a
|
||||
# static library to be linked with others
|
||||
#
|
||||
set ( SQTT_LIB "sqtt${ONLY64STR}" )
|
||||
add_subdirectory ( ${PROJ_DIR}/threadtrace "${PROJECT_BINARY_DIR}/threadtrace" )
|
||||
|
||||
#
|
||||
# Set Name for Profiler library and build it as a
|
||||
# static library to be linked with others
|
||||
#
|
||||
set ( PMC_LIB "pmc${ONLY64STR}" )
|
||||
add_subdirectory ( ${PROJ_DIR}/perfcounter "${PROJECT_BINARY_DIR}/perfcounter" )
|
||||
|
||||
#
|
||||
# Build the library and link it with other static
|
||||
# libraries that have been built in this regard
|
||||
#
|
||||
set ( TARGET_LIB "${TARGET_NAME}${ONLY64STR}" )
|
||||
add_subdirectory ( ${PROJ_DIR}/core "${PROJECT_BINARY_DIR}/core" )
|
||||
|
||||
#
|
||||
# Creating the library link
|
||||
#
|
||||
execute_process ( COMMAND sh -xc "/bin/ln -s core/lib${TARGET_LIB}.so libhsa-amd-${TARGET_LIB}.so.1" )
|
||||
@@ -1,10 +0,0 @@
|
||||
#
|
||||
# Source files for Rocr Cmdwriter
|
||||
#
|
||||
set ( CmdWriterSrcs gfx8_cmdwriter.cpp )
|
||||
set ( CmdWriterSrcs ${CmdWriterSrcs} gfx9_cmdwriter.cpp )
|
||||
|
||||
#
|
||||
# Build Cmdwriter as a Static Library object
|
||||
#
|
||||
add_library ( ${CMDWRITER_LIB} STATIC ${CmdWriterSrcs} )
|
||||
@@ -1,498 +0,0 @@
|
||||
// cmdwriter.h
|
||||
// Header file for CommandWriter and CmdBuf interfaces
|
||||
|
||||
#ifndef _CMDWRITER_H_
|
||||
#define _CMDWRITER_H_
|
||||
|
||||
#include <vector>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
namespace pm4_profile {
|
||||
|
||||
// User defined options for flusing cache
|
||||
typedef struct FlushCacheOptions_ {
|
||||
bool l1, l2;
|
||||
bool icache, kcache;
|
||||
bool l1_vol, l2_vol, kcache_vol;
|
||||
FlushCacheOptions_() {
|
||||
l1 = l2 = icache = kcache = false;
|
||||
l1_vol = l2_vol = kcache_vol = false;
|
||||
};
|
||||
} FlushCacheOptions;
|
||||
|
||||
/// @brief Interface to build a list of Gpu commands into a byte
|
||||
/// buffer. Classes implementing this interface are used to translate
|
||||
/// various Gpu commands as byte stream.
|
||||
///
|
||||
/// @note: The Api does not require implementations to be thread safe.
|
||||
/// Users are therefore required to be access in a serialized manner.
|
||||
class CmdBuf {
|
||||
public:
|
||||
/// Default destructor.
|
||||
virtual ~CmdBuf() {}
|
||||
|
||||
/// @brief Resets the command buffer object. All of the commands
|
||||
/// previously packed into the buffer are lost i.e. the number of
|
||||
/// bytes in command stream is reset.
|
||||
///
|
||||
/// @note: This convenience Api is provided to allow reuse of the
|
||||
/// command buffer object.
|
||||
///
|
||||
/// @return bool true if successful, false otherwise.
|
||||
virtual bool Reset(void) = 0;
|
||||
|
||||
/// @brief Appends input command into a buffer that could
|
||||
/// be queried for its size and other properties. The append
|
||||
/// does not verify the contents.
|
||||
///
|
||||
/// @param cmd Buffer containing one or more instances of Gpu commands
|
||||
///
|
||||
/// @param size size of the Gpu commands in bytes.
|
||||
///
|
||||
/// @return void
|
||||
virtual void AppendCommand(const void* cmd, uint32_t size) = 0;
|
||||
|
||||
/// @brief Returns the total size (in bytes) of the accumulated commands.
|
||||
///
|
||||
/// @return size_t size of Gpu commands in bytes
|
||||
virtual size_t Size() const = 0;
|
||||
|
||||
private:
|
||||
/// Indexes the command buffer by dwords. Allows accessing constants
|
||||
/// in an assembled command buffer.
|
||||
virtual uint32_t& operator[](size_t index) = 0;
|
||||
|
||||
friend class CommandWriter;
|
||||
};
|
||||
|
||||
/// @brief Implements the interface CmdBuf and thus can be used to
|
||||
/// translate various Gpu commands as byte stream.
|
||||
///
|
||||
/// @note: The Api does not require implementations to be thread safe.
|
||||
/// Users are therefore required to be access in a serialized manner.
|
||||
class DefaultCmdBuf : public CmdBuf {
|
||||
public:
|
||||
/// @brief Append the command into the underlying buffer
|
||||
///
|
||||
/// @param cmd Buffer containing one or more instances of Gpu commands
|
||||
///
|
||||
/// @param size Size of Gpu command(s) in bytes
|
||||
///
|
||||
/// @retur void
|
||||
virtual void AppendCommand(const void* cmd, uint32_t size) {
|
||||
memcpy(ReserveCmdbufSpace(size), cmd, size);
|
||||
}
|
||||
|
||||
/// @brief Resets the Gpu command buffer
|
||||
bool Reset() {
|
||||
cmdbuf_.clear();
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Size of Gpu commands in bytes in the underlying buffer
|
||||
size_t Size() const { return cmdbuf_.size() * sizeof(StorageType); }
|
||||
|
||||
/// Address of the start of accumulated commands.
|
||||
const void* Base() const { return &cmdbuf_[0]; }
|
||||
|
||||
private:
|
||||
/// @brief Returns reference to the value of Gpu command buffer
|
||||
/// at specified index
|
||||
///
|
||||
/// @param index Specifies the buffer index whose value is needed
|
||||
///
|
||||
/// @return uint32_t & Reference of the value being returned
|
||||
uint32_t& operator[](size_t index) { return cmdbuf_[index]; }
|
||||
|
||||
/// @brief Increase Gpu command buffer by specified size
|
||||
///
|
||||
/// @param size Size in bytes by which command buffer should
|
||||
/// be resized.
|
||||
///
|
||||
/// @return void * Pointer into the buffer where the next
|
||||
/// command can be written
|
||||
void* ReserveCmdbufSpace(std::size_t size) {
|
||||
const size_t len = cmdbuf_.size();
|
||||
cmdbuf_.resize(len + size / sizeof(StorageType));
|
||||
return &cmdbuf_[len];
|
||||
}
|
||||
|
||||
/// @brief Defines Gpu command buffer as a vector of StorageType
|
||||
typedef uint32_t StorageType;
|
||||
std::vector<StorageType> cmdbuf_;
|
||||
};
|
||||
|
||||
/// @brief Specifies the public interface of CommandWriter for use by
|
||||
/// clients to build Gpu command streams.
|
||||
class CommandWriter {
|
||||
public:
|
||||
/// @brief These enums specify the operation to perform in the packet
|
||||
/// generated by BuildAtomicPacket. The commenting for each enum uses
|
||||
/// the arguments to the function BuildAtomicPacket to express the
|
||||
/// resulting operation.
|
||||
enum AtomicType {
|
||||
|
||||
/// *destination = *destination + 1;
|
||||
kAtomicTypeIncrement,
|
||||
|
||||
/// *destination = *destination - 1;
|
||||
kAtomicTypeDecrement,
|
||||
|
||||
/// if (*destination == compare) *destination = value;
|
||||
kAtomicTypeCompareAndSwap,
|
||||
|
||||
/// while (*destination != compare);
|
||||
/// *destination = value;
|
||||
kAtomicTypeBlockingCompareAndSwap,
|
||||
|
||||
/// *destination = *destination + value;
|
||||
kAtomicAdd,
|
||||
|
||||
/// *destination = *destination - value;
|
||||
kAtomicSubtract,
|
||||
|
||||
/// *destination = value;
|
||||
kAtomicSwap
|
||||
};
|
||||
|
||||
/// @brief These enums specify the VGT EVENT TYPE to issue and wait for.
|
||||
/// Command Processor (CP) uses these events to communicate with SPI to
|
||||
/// learn about outstanding waves and determine kernel completion.
|
||||
enum VgtEventType {
|
||||
|
||||
/// Enable Performance Counters
|
||||
kPerfCntrsStart,
|
||||
|
||||
/// Disable Performance Counters
|
||||
kPerfCntrsStop,
|
||||
|
||||
/// Read Performance Counters
|
||||
kPerfCntrsSample,
|
||||
|
||||
/// Enable a Thread Trace session
|
||||
kThrdTraceStart,
|
||||
|
||||
/// Disable a Thread Trace session
|
||||
kThrdTraceStop,
|
||||
|
||||
/// Enable flushing of thread trace buffers
|
||||
kThrdTraceFlush,
|
||||
|
||||
/// Enables resetting of BASE register to its last value
|
||||
/// including flushing of thread trace buffers. This could
|
||||
/// be used to toggle between two buffers so as to allow
|
||||
/// collection of large token data
|
||||
kThrdTraceFinish
|
||||
};
|
||||
|
||||
/// @brief Returns the Dword that encodes a No-Op for the CP
|
||||
///
|
||||
/// @return uint32_t Dword that can be used to populate a Pm4
|
||||
/// command queue.
|
||||
///
|
||||
virtual uint32_t GetNoOpCmd() = 0;
|
||||
|
||||
/// @brief Build an instance of Barrier command and copy it into
|
||||
/// the input commmand buffer
|
||||
///
|
||||
/// @param cmdbuf Pointer to command buffer which is updated with
|
||||
/// an instance of Barrier command.
|
||||
///
|
||||
/// @return void
|
||||
virtual void BuildBarrierCommand(CmdBuf* cmdbuf) = 0;
|
||||
|
||||
/// @brief Builds the Gpu command to reference indirectly a stream
|
||||
/// of other Gpu commands. The launch command is then copied into
|
||||
/// the command buffer parameter.
|
||||
///
|
||||
/// @param cmdBuf command buffer to be appended with launch command
|
||||
///
|
||||
/// @param cmd_addr Address of command buffer carrying command stream
|
||||
///
|
||||
/// @param cmd_size Size of dispatch command stream in bytes
|
||||
///
|
||||
/// @return void
|
||||
virtual void BuildIndirectBufferCmd(CmdBuf* cmdbuf, const void* cmd_addr,
|
||||
std::size_t cmd_size) = 0;
|
||||
|
||||
/// @brief Build a Gpu command that triggers an event whose type
|
||||
/// is specified by input parameter. It then copies it into the input
|
||||
/// command buffer
|
||||
///
|
||||
/// @param cmdbuf Pointer to command buffer to be appended
|
||||
///
|
||||
/// @param event Id of Event to be triggered by Gpu
|
||||
///
|
||||
/// @return void
|
||||
virtual void BuildWriteEventPacket(CmdBuf* cmdbuf, uint32_t event) = 0;
|
||||
|
||||
/// @bried Builds a Gpu command to wait until condition is realized
|
||||
///
|
||||
/// @param cmdbuf command buffer to be appended with launch command
|
||||
///
|
||||
/// @param mem_space if the address is in memory or is a register offset
|
||||
///
|
||||
/// @param wait_addr address to wait on
|
||||
///
|
||||
/// @param func_eq true means equal, false means not-equal
|
||||
///
|
||||
/// @param mask_val Mask to apply on value from addr in comparison
|
||||
///
|
||||
/// @param wait_val value to apply for the func given above
|
||||
virtual void BuildWaitRegMemCommand(CmdBuf* cmdbuf, bool mem_space, uint64_t wait_addr,
|
||||
bool func_eq, uint32_t mask_val, uint32_t wait_val) = 0;
|
||||
|
||||
virtual void BuildUpdateHostAddress(CmdBuf* cmdbuf, uint64_t* addr, int64_t value) = 0;
|
||||
|
||||
/// @brief Build CP command to program a Gpu register
|
||||
///
|
||||
/// @param cmdbuf Pointer to command buffer to be appended
|
||||
/// @param addr Register to be programmed
|
||||
/// @param value Value to write into register
|
||||
///
|
||||
/// @return void
|
||||
virtual void BuildWriteUConfigRegPacket(CmdBuf* cmdbuf, uint32_t addr, uint32_t value) = 0;
|
||||
|
||||
/// @brief Build and copy WriteShReg command
|
||||
///
|
||||
/// @param cmdbuf Pointer to command buffer to be appended
|
||||
///
|
||||
/// @param addr Offset of the register
|
||||
///
|
||||
/// @param value Value to write into register
|
||||
///
|
||||
/// @return void
|
||||
virtual void BuildWriteShRegPacket(CmdBuf* cmdbuf, uint32_t addr, uint32_t value) = 0;
|
||||
|
||||
/// @brief Builds a Gpu command to flush Gpu caches and write a
|
||||
/// user defined value at a configurable location that is Gpu
|
||||
/// accessible.
|
||||
///
|
||||
/// @param cmdBuf Command buffer to be appended with bottom of pipe
|
||||
/// notification command
|
||||
///
|
||||
/// @param write_addr Address into which Gpu should write
|
||||
///
|
||||
/// @param write_val Value to write into user provided address
|
||||
///
|
||||
/// @param interrupt True if Gpu should raise an interrupt upon writing
|
||||
/// the user value
|
||||
///
|
||||
/// @return void
|
||||
virtual void BuildBOPNotifyCmd(CmdBuf* cmdbuf, const void* write_addr, uint32_t write_val,
|
||||
bool intrpt) = 0;
|
||||
|
||||
|
||||
/// @brief Build a Gpu command that copies data from a specified
|
||||
/// source to destination
|
||||
///
|
||||
/// @param cmdbuf Pointer to command buffer to be appended
|
||||
///
|
||||
/// @param reg_to_mem flag to indicate if values are being read from a
|
||||
/// Register or a memory location
|
||||
///
|
||||
/// @param src_addr_lo Low 32-bit Source address of the data to read from
|
||||
///
|
||||
/// @param src_addr_hi High 32-bit Source address of the data to read from
|
||||
///
|
||||
/// @param dst_addr Destination address for the data to be written to
|
||||
///
|
||||
/// @param size Size of the data to be written
|
||||
///
|
||||
/// @param wait True if Gpu command should confirm the write operation
|
||||
/// operation has completed successfully
|
||||
///
|
||||
/// @return void
|
||||
///
|
||||
/// @NOTE Change interface to use void* for Src and void* for Dest
|
||||
virtual void BuildCopyDataPacket(CmdBuf* cmdbuf, uint32_t src_sel, uint32_t src_addr_lo,
|
||||
uint32_t src_addr_hi, uint32_t* dst_addr, uint32_t size,
|
||||
bool wait) = 0;
|
||||
|
||||
/// @brief Build and copy a WaitIdle Gpu command into command buffer
|
||||
///
|
||||
/// @param cmdbuf Pointer to command buffer to be appended
|
||||
///
|
||||
/// @return void
|
||||
virtual void BuildWriteWaitIdlePacket(CmdBuf* cmdbuf) = 0;
|
||||
|
||||
// Will issue a VGT event including a cache flush later on
|
||||
virtual void BuildVgtEventPacket(CmdBuf* cmdbuf, uint32_t vgtEvent) = 0;
|
||||
|
||||
/// @brief Build and copy a WriteRegister Gpu command into command buffer
|
||||
///
|
||||
/// @param cmdbuf Pointer to command buffer to be appended
|
||||
///
|
||||
/// @param addr Register into which to write
|
||||
///
|
||||
/// @param value Value to write into register
|
||||
///
|
||||
/// @return void
|
||||
virtual void BuildWriteRegisterPacket(CmdBuf* cmdbuf, uint32_t addr, uint32_t value) = 0;
|
||||
|
||||
/// @brief Build and copy a Gpu command to query the status of a
|
||||
/// WriteEvent into command buffer
|
||||
///
|
||||
/// @param cmdbuf Pointer to command buffer to be appended
|
||||
///
|
||||
/// @param event Id of Event whose status is to be queried
|
||||
///
|
||||
/// @param addr Address to update the status of WriteEvent operation
|
||||
///
|
||||
/// @return void
|
||||
virtual void BuildWriteEventQueryPacket(CmdBuf* cmdBuf, uint32_t event, uint32_t* addr) = 0;
|
||||
|
||||
/// @brief Builds and copies a Gpu comamnd to peform user specified
|
||||
/// operation atomically. The various atomic operations on integers
|
||||
/// that are supported include: increment, decrement, add, subtract,
|
||||
/// compare-and-swap and swap. The operation to perform is specified
|
||||
/// by the enum AtomicType.
|
||||
///
|
||||
/// @param cmdbuf Pointer to command buffer to be appended
|
||||
///
|
||||
/// @param atomic_op Id of the atomic operation to perform
|
||||
///
|
||||
/// @param addr Pointer to the memory block where atomic operation
|
||||
/// would be performed
|
||||
///
|
||||
/// @param value New value to write if atomic operation can be performed
|
||||
///
|
||||
/// @param compare Value to compare if atomic operation is a compare-and-swap
|
||||
///
|
||||
/// @return void
|
||||
virtual void BuildAtomicPacket(CmdBuf* cmdbuf, AtomicType atomic_op, volatile uint32_t* addr,
|
||||
uint32_t value = 0, uint32_t compare = 0) = 0;
|
||||
|
||||
/// @brief Builds and copies a Gpu comamnd to peform user specified
|
||||
/// operation atomically. The various atomic operations on integers
|
||||
/// that are supported include: increment, decrement, add, subtract,
|
||||
/// compare-and-swap and swap. The operation to perform is specified
|
||||
/// by the enum AtomicType.
|
||||
///
|
||||
/// @param cmdbuf Pointer to command buffer to be appended
|
||||
///
|
||||
/// @param atomic_op Id of the atomic operation to perform
|
||||
///
|
||||
/// @param addr Pointer to the memory block where atomic operation
|
||||
/// would be performed
|
||||
///
|
||||
/// @param value New value to write if atomic operation can be performed
|
||||
///
|
||||
/// @param compare Value to compare if atomic operation is a compare-and-swap
|
||||
///
|
||||
/// @return void
|
||||
virtual void BuildAtomicPacket64(CmdBuf* cmdbuf, AtomicType atomic_op, volatile uint64_t* addr,
|
||||
uint64_t value = 0, uint64_t compare = 0) = 0;
|
||||
|
||||
/// @brief Returns the size of an atomic packet
|
||||
///
|
||||
/// @return size_t Size of atomic packet
|
||||
virtual size_t SizeOfAtomicPacket() const = 0;
|
||||
|
||||
/// @brief Build and copy a Gpu command that will tell command processor
|
||||
/// to conditionally execute or skip the next sequence of packets.
|
||||
///
|
||||
/// @param cmdbuf Pointer to command buffer to be appended
|
||||
///
|
||||
/// @param signal Pointer to an integer that tells the command processor
|
||||
/// whether to skip or execute the next block of packets. If it is set
|
||||
/// to 0 the following packets will be skipped, else it will execute the
|
||||
/// following packets
|
||||
///
|
||||
/// @param count The number of dwords in the following packet stream
|
||||
/// that will be conditionally executed
|
||||
///
|
||||
/// @return void
|
||||
virtual void BuildConditionalExecute(CmdBuf* cmdbuf, uint32_t* signal, uint16_t count) = 0;
|
||||
|
||||
/// @brief Builds a CP command to write user specified value
|
||||
/// at a user specified address. The command is then copied
|
||||
/// into the command buffer for submission to a device queue.
|
||||
///
|
||||
/// @param cmdbuf Pointer to command buffer to be appended
|
||||
///
|
||||
/// @param write_addr Address into which CP will write the user
|
||||
/// specified value
|
||||
///
|
||||
/// @param write_value Value to write into the user specified address
|
||||
///
|
||||
/// @return void
|
||||
virtual void BuildWriteDataCommand(CmdBuf* cmdbuf, uint32_t* write_addr,
|
||||
uint32_t write_value) = 0;
|
||||
|
||||
/// @brief Builds a CP command to write user specified value
|
||||
/// at a user specified address. The command is then copied
|
||||
/// into the command buffer for submission to a device queue.
|
||||
///
|
||||
/// @param cmdbuf Pointer to command buffer to be appended
|
||||
///
|
||||
/// @param write_addr Address into which CP will write the user
|
||||
/// specified value
|
||||
///
|
||||
/// @param write_value Value to write into the user specified address
|
||||
///
|
||||
/// @return void
|
||||
virtual void BuildWriteData64Command(CmdBuf* cmdbuf, uint64_t* write_addr,
|
||||
uint64_t write_value) = 0;
|
||||
|
||||
/// Writes into input buffer Gpu commands to flush its cache. It is
|
||||
/// necessary that the buffer provided for flush commands is large
|
||||
/// enough to accommodate the full set of commands. It should be at
|
||||
/// least 512 bytes.
|
||||
///
|
||||
/// @param tsCmdBuf Buffer to write commands to.
|
||||
/// @param writeAddr Registered address into which GPU should write
|
||||
/// a user provided value upon executing the flush commands.
|
||||
/// @param writeVal User provided value written by GPU at user provided
|
||||
/// address, upon executing the flush commands.
|
||||
///
|
||||
/// @return void
|
||||
virtual void BuildFlushCacheCmd(CmdBuf* cmdbuf, FlushCacheOptions* options, uint32_t* writeAddr,
|
||||
uint32_t writeVal) = 0;
|
||||
|
||||
/// @brief Builds Gpu command to copy data from source to destination
|
||||
/// buffer using DMA engine.
|
||||
///
|
||||
/// @param cmdbuf Buffer updated with Gpu copy command
|
||||
/// @param srcAddr Address of source buffer address
|
||||
/// @param dstAddr Address of destination buffer address
|
||||
/// @param copySize Size of data to copy in bytes
|
||||
/// @param waitForCompletion if command should wait for copying to complete
|
||||
virtual void BuildDmaDataPacket(CmdBuf* cmdbuf, uint32_t* srcAddrLo, uint32_t* dstAddr,
|
||||
uint32_t copySize, bool waitForCompletion) = 0;
|
||||
|
||||
/// @brief Release resources used by CommandWriter
|
||||
virtual ~CommandWriter(){};
|
||||
|
||||
protected:
|
||||
/// @brief Return the reference to a value in the command buffer
|
||||
uint32_t& IndexBuffer(CmdBuf* cmdbuf, uint32_t index) { return (*cmdbuf)[index]; }
|
||||
};
|
||||
|
||||
/// @brief Returns the lower 32-bits of a value
|
||||
inline uint32_t Low32(uint64_t u) { return (u & 0xFFFFFFFFUL); }
|
||||
|
||||
/// @brief Returns the upper 32-bits of a value
|
||||
inline uint32_t High32(uint64_t u) { return (u >> 32); }
|
||||
|
||||
/// @brief Returns the lower 32-bits of an address
|
||||
inline uint32_t PtrLow32(const void* p) {
|
||||
return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(p));
|
||||
}
|
||||
|
||||
/// @brief Returns the upper 32-bits of an address
|
||||
inline uint32_t PtrHigh32(const void* p) {
|
||||
uint32_t hi_32 = 0;
|
||||
#ifdef HSA_LARGE_MODEL
|
||||
hi_32 = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(p) >> 32);
|
||||
static_assert(sizeof(void*) == 8, "HSA_LARGE_MODEL is not set properly here!");
|
||||
#else
|
||||
static_assert(sizeof(void*) == 4, "HSA_LARGE_MODEL is not set properly here!");
|
||||
#endif
|
||||
return hi_32;
|
||||
}
|
||||
|
||||
} // pm4_profile
|
||||
|
||||
#endif // _CMDWRITER_H_
|
||||
@@ -1,161 +0,0 @@
|
||||
#ifndef _GFX8_CMDS_H_
|
||||
#define _GFX8_CMDS_H_
|
||||
|
||||
#include "gfxip/gfx8/si_ci_vi_merged_enum.h"
|
||||
#include "gfxip/gfx8/si_ci_vi_merged_mask.h"
|
||||
#include "gfxip/gfx8/si_ci_vi_merged_offset.h"
|
||||
#include "gfxip/gfx8/si_ci_vi_merged_registers.h"
|
||||
#include "gfxip/gfx8/si_ci_vi_merged_typedef.h"
|
||||
#include "gfxip/gfx8/si_ci_vi_merged_pm4_it_opcodes.h"
|
||||
#include "gfxip/gfx8/si_pm4defs.h"
|
||||
|
||||
namespace pm4_profile {
|
||||
|
||||
namespace gfx8 {
|
||||
|
||||
// Desc: Defines the Gpu command to dispatch a kernel. It embeds
|
||||
// various Gpu hardware specific data structures for initialization
|
||||
// and configuration before a dispatch begins to run
|
||||
struct DispatchTemplate {
|
||||
// Desc: Structure used to initialize the group dimensions
|
||||
// of a kernel dispatch and if performance counters are enabled
|
||||
struct DispatchDimensionRegs {
|
||||
PM4CMDSETDATA cmd_set_data;
|
||||
regCOMPUTE_START_X compute_start_x;
|
||||
regCOMPUTE_START_Y compute_start_y;
|
||||
regCOMPUTE_START_Z compute_start_z;
|
||||
regCOMPUTE_NUM_THREAD_X compute_num_thread_x;
|
||||
regCOMPUTE_NUM_THREAD_Y compute_num_thread_y;
|
||||
regCOMPUTE_NUM_THREAD_Z compute_num_thread_z;
|
||||
regCOMPUTE_PIPELINESTAT_ENABLE__CI__VI compute_pipelinestat_enable;
|
||||
} dimension_regs;
|
||||
|
||||
// Desc: Structure used to initialize kernel Isa, trap
|
||||
// handler, trap handler buffer, number of SGPR and VGPR
|
||||
// registers needed, amount of Group memory and LDS needed,
|
||||
// Rounding mode for Floating point numbers, etc.
|
||||
struct DispatchProgramRegs {
|
||||
PM4CMDSETDATA cmd_set_data;
|
||||
regCOMPUTE_PGM_LO compute_pgm_lo;
|
||||
regCOMPUTE_PGM_HI compute_pgm_hi;
|
||||
regCOMPUTE_TBA_LO compute_tba_lo;
|
||||
regCOMPUTE_TBA_HI compute_tba_hi;
|
||||
regCOMPUTE_TMA_LO compute_tma_lo;
|
||||
regCOMPUTE_TMA_HI compute_tma_hi;
|
||||
regCOMPUTE_PGM_RSRC1 compute_pgm_rsrc1;
|
||||
regCOMPUTE_PGM_RSRC2 compute_pgm_rsrc2;
|
||||
} program_regs;
|
||||
|
||||
// Desc: Structure used to initialize parameters related to
|
||||
// thread management i.e. number of waves to issue and number
|
||||
// of Compute Units to use
|
||||
struct DispatchResourceRegs {
|
||||
PM4CMDSETDATA cmd_set_data;
|
||||
regCOMPUTE_RESOURCE_LIMITS compute_resource_limits;
|
||||
regCOMPUTE_STATIC_THREAD_MGMT_SE0 compute_static_thread_mgmt_se0;
|
||||
regCOMPUTE_STATIC_THREAD_MGMT_SE1 compute_static_thread_mgmt_se1;
|
||||
regCOMPUTE_TMPRING_SIZE compute_tmpring_size;
|
||||
regCOMPUTE_STATIC_THREAD_MGMT_SE2__CI__VI compute_static_thread_mgmt_se2;
|
||||
regCOMPUTE_STATIC_THREAD_MGMT_SE3__CI__VI compute_static_thread_mgmt_se3;
|
||||
regCOMPUTE_RESTART_X__CI__VI compute_restart_x;
|
||||
regCOMPUTE_RESTART_Y__CI__VI compute_restart_y;
|
||||
regCOMPUTE_RESTART_Z__CI__VI compute_restart_z;
|
||||
regCOMPUTE_THREAD_TRACE_ENABLE__CI__VI compute_thread_trace_enable;
|
||||
} resource_regs;
|
||||
|
||||
// Desc: Structure used to pass handles of the Aql dispatch
|
||||
// packet, Aql queue, Kernel argument address block, Scratch
|
||||
// buffer
|
||||
struct DispatchComputeUserDataRegs {
|
||||
PM4CMDSETDATA cmd_set_data;
|
||||
uint32_t compute_user_data[16];
|
||||
} compute_user_data_regs;
|
||||
|
||||
// Desc: Structure used to configure Cache flush policy
|
||||
// and dimensions of total work size
|
||||
PM4CMDDISPATCHDIRECT dispatch_direct;
|
||||
};
|
||||
|
||||
// Desc: Structure used to issue a Gpu Barrier command
|
||||
struct BarrierTemplate {
|
||||
PM4CMDEVENTWRITE event_write;
|
||||
};
|
||||
|
||||
// Desc: Structure used to configure the flushing
|
||||
// of various caches - instruction, constants, L1
|
||||
// and L2
|
||||
struct AcquireMemTemplate {
|
||||
PM4CMDACQUIREMEM acquire_mem;
|
||||
};
|
||||
|
||||
// Desc: Structure used to reference another Gpu command
|
||||
// indirectly. Generally used to reference a list of Gpu
|
||||
// commands (dispatch cmds) indirectly
|
||||
struct LaunchTemplate {
|
||||
PM4CMDINDIRECTBUFFER indirect_buffer;
|
||||
};
|
||||
|
||||
// Desc: Structure used to determine the end of
|
||||
// a kernel including cache flushes and writing to
|
||||
// a user configurable memory location
|
||||
struct EndofKernelNotifyTemplate {
|
||||
PM4CMDRELEASEMEM release_mem;
|
||||
};
|
||||
|
||||
// Desc: Strucuture used to perform various atomic
|
||||
// operations - add, subtract, increment, etc
|
||||
struct AtomicTemplate {
|
||||
PM4CMDATOMIC atomic;
|
||||
};
|
||||
|
||||
// Desc: Structure used to conditionalize the execution
|
||||
// of a Gpu command stream
|
||||
struct ConditionalExecuteTemplate {
|
||||
PM4CMDCONDEXEC_CI conditional;
|
||||
};
|
||||
|
||||
// Desc: PM4 command to write a 32-bit value into a memory
|
||||
// location accessible to Gpu
|
||||
struct WriteDataTemplate {
|
||||
PM4CMDWRITEDATA write_data;
|
||||
uint32_t write_data_value;
|
||||
};
|
||||
|
||||
// Desc: PM4 command to write a 64-bit value into a memory
|
||||
// location accessible to Gpu
|
||||
struct WriteData64Template {
|
||||
PM4CMDWRITEDATA write_data;
|
||||
uint64_t write_data_value;
|
||||
};
|
||||
|
||||
// Desc: PM4 command to wait for a certain event before proceeding
|
||||
// to process another command on the queue
|
||||
struct WaitRegMemTemplate {
|
||||
PM4CMDWAITREGMEM wait_reg_mem;
|
||||
};
|
||||
|
||||
// Desc: Initializer for commands that set shader registers
|
||||
template <class T> void GenerateSetShRegHeader(T* pm4, uint32_t reg_addr) {
|
||||
pm4->cmd_set_data.header.u32All =
|
||||
PM4_TYPE_3_HDR(IT_SET_SH_REG, sizeof(T) / sizeof(uint32_t), ShaderCompute, 0);
|
||||
pm4->cmd_set_data.regOffset = reg_addr - PERSISTENT_SPACE_START;
|
||||
}
|
||||
|
||||
// Desc: Initializer for various Gpu command headers
|
||||
template <class T> void GenerateCmdHeader(T* pm4, IT_OpCodeType op_code) {
|
||||
pm4->header.u32All = PM4_TYPE_3_HDR(op_code, sizeof(T) / sizeof(uint32_t), ShaderCompute, 0);
|
||||
}
|
||||
|
||||
// Desc: Initializer for commands that set configuration registers
|
||||
template <class T> void GenerateSetConfigRegHeader(T* pm4, uint32_t reg_addr) {
|
||||
pm4->cmd_set_data.header.u32All =
|
||||
PM4_TYPE_3_HDR(IT_SET_CONFIG_REG, sizeof(T) / sizeof(uint32_t), ShaderCompute, 0);
|
||||
pm4->cmd_set_data.regOffset = reg_addr - CONFIG_SPACE_START;
|
||||
}
|
||||
|
||||
|
||||
} // gfx8
|
||||
|
||||
} // pm4_profile
|
||||
|
||||
#endif // _GFX8_CMDS_H_
|
||||
@@ -1,765 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "gfx8_cmdwriter.h"
|
||||
#include "gfxip/gfx8/gfx8_utils.h"
|
||||
|
||||
// RELEASE MEM DST SEL Definitions
|
||||
#define RELEASE_MEM_DST_SEL_MEMORY_CONTROLLER 0
|
||||
#define RELEASE_MEM_DST_SEL_TC_L2 1
|
||||
|
||||
// RELEASE MEM CACHE POLICY Definitions
|
||||
#define RELEASE_MEM_CACHE_POLICY_LRU 0
|
||||
#define RELEASE_MEM_CACHE_POLICY_STREAM 1
|
||||
#define RELEASE_MEM_CACHE_POLICY_BYPASS 2
|
||||
|
||||
template <class T> static void PrintPm4Packet(const T& command, const char* name) {
|
||||
#if !defined(NDEBUG)
|
||||
uint32_t* cmd = (uint32_t*)&command;
|
||||
uint32_t size = sizeof(command) / sizeof(uint32_t);
|
||||
std::ostringstream oss;
|
||||
oss << "'" << name << "' size(" << std::dec << size << ")";
|
||||
std::clog << std::setw(40) << std::left << oss.str() << ":";
|
||||
for (uint32_t idx = 0; idx < size; idx++) {
|
||||
std::clog << " " << std::hex << std::setw(8) << std::setfill('0') << cmd[idx];
|
||||
}
|
||||
std::clog << std::setfill(' ') << std::endl;
|
||||
#endif
|
||||
}
|
||||
|
||||
#define APPEND_COMMAND_WRAPPER(cmdbuf, command) \
|
||||
PrintPm4Packet(command, __FUNCTION__); \
|
||||
AppendCommand(cmdbuf, command);
|
||||
|
||||
namespace pm4_profile {
|
||||
namespace gfx8 {
|
||||
|
||||
template <class T> void Gfx8CmdWriter::AppendCommand(CmdBuf* cmdbuf, const T& command) {
|
||||
cmdbuf->AppendCommand(&command, sizeof(command));
|
||||
}
|
||||
|
||||
void Gfx8CmdWriter::InitializeAtomicTemplate() {
|
||||
memset(&atomic_template_.atomic, 0, sizeof(atomic_template_));
|
||||
GenerateCmdHeader(&atomic_template_.atomic, IT_ATOMIC_MEM__CI);
|
||||
|
||||
if (atc_support_) {
|
||||
const uint32_t kAtcShift = 24;
|
||||
atomic_template_.atomic.ordinal2 |= 1 << kAtcShift;
|
||||
}
|
||||
}
|
||||
|
||||
void Gfx8CmdWriter::InitializeConditionalTemplate() {
|
||||
memset(&conditional_template_.conditional, 0, sizeof(conditional_template_));
|
||||
gfx8::GenerateCmdHeader(&conditional_template_.conditional, IT_COND_EXEC);
|
||||
|
||||
if (atc_support_) {
|
||||
const uint32_t kAtcShift = 24;
|
||||
conditional_template_.conditional.ordinal4 |= 1 << kAtcShift;
|
||||
}
|
||||
}
|
||||
|
||||
void Gfx8CmdWriter::InitializeLaunchTemplate() {
|
||||
memset(&launch_template_, 0, sizeof(launch_template_));
|
||||
|
||||
GenerateCmdHeader(&launch_template_.indirect_buffer, IT_INDIRECT_BUFFER);
|
||||
launch_template_.indirect_buffer.CI.valid = true;
|
||||
}
|
||||
|
||||
void Gfx8CmdWriter::InitializeWriteDataTemplate() {
|
||||
// Set the header of write data command
|
||||
memset(&write_data_template_, 0, sizeof(write_data_template_));
|
||||
|
||||
// Initialize the header of command packet
|
||||
PM4CMDWRITEDATA* command = &(write_data_template_.write_data);
|
||||
uint32_t cmd_size = sizeof(write_data_template_) / sizeof(uint32_t);
|
||||
command->ordinal1 = PM4_TYPE_3_HDR(IT_WRITE_DATA, cmd_size, ShaderCompute, 0);
|
||||
|
||||
// Set the ATC bit of command template - specifies if the address
|
||||
// belongs to system memory
|
||||
write_data_template_.write_data.atc__CI = (atc_support_) ? 1 : 0;
|
||||
|
||||
// Set the bit to confirm the write operation and cache policy
|
||||
write_data_template_.write_data.wrConfirm = 1;
|
||||
write_data_template_.write_data.cachePolicy__CI = WRITE_DATA_CACHE_POLICY_BYPASS;
|
||||
|
||||
// Specify the module that will execute the write data command
|
||||
write_data_template_.write_data.engineSel = WRITE_DATA_ENGINE_ME;
|
||||
|
||||
// Specify the class to which the write destination belongs
|
||||
write_data_template_.write_data.dstSel = WRITE_DATA_DST_SEL_MEMORY_ASYNC;
|
||||
}
|
||||
|
||||
void Gfx8CmdWriter::InitializeWriteData64Template() {
|
||||
// Set the header of write data command
|
||||
memset(&write_data64_template_, 0, sizeof(write_data64_template_));
|
||||
|
||||
// Initialize the header of command packet
|
||||
PM4CMDWRITEDATA* command = &(write_data64_template_.write_data);
|
||||
uint32_t cmd_size = sizeof(write_data64_template_) / sizeof(uint32_t);
|
||||
command->ordinal1 = PM4_TYPE_3_HDR(IT_WRITE_DATA, cmd_size, ShaderCompute, 0);
|
||||
|
||||
// Set the ATC bit of command template - specifies if the address
|
||||
// belongs to system memory
|
||||
write_data64_template_.write_data.atc__CI = (atc_support_) ? 1 : 0;
|
||||
|
||||
// Set the bit to confirm the write operation and cache policy
|
||||
write_data64_template_.write_data.wrConfirm = 1;
|
||||
write_data64_template_.write_data.cachePolicy__CI = WRITE_DATA_CACHE_POLICY_BYPASS;
|
||||
|
||||
// Specify the module that will execute the write data command
|
||||
write_data64_template_.write_data.engineSel = WRITE_DATA_ENGINE_ME;
|
||||
|
||||
// Specify the class to which the write destination belongs
|
||||
// write_data64_template_.write_data.dstSel = WRITE_DATA_DST_SEL_TCL2;
|
||||
// TODO: For Hawaii bring up only.
|
||||
write_data64_template_.write_data.dstSel = WRITE_DATA_DST_SEL_MEMORY_ASYNC;
|
||||
}
|
||||
|
||||
void Gfx8CmdWriter::InitializeBarrierTemplate() {
|
||||
memset(&pending_dispatch_template_, 0, sizeof(pending_dispatch_template_));
|
||||
|
||||
gfx8::GenerateCmdHeader(&pending_dispatch_template_.event_write, IT_EVENT_WRITE);
|
||||
pending_dispatch_template_.event_write.eventType = CS_PARTIAL_FLUSH;
|
||||
pending_dispatch_template_.event_write.eventIndex = EventTypeToIndexTable[CS_PARTIAL_FLUSH];
|
||||
}
|
||||
|
||||
void Gfx8CmdWriter::InitializeAcquireMemTemplate() {
|
||||
memset(&invalidate_cache_template_, 0, sizeof(invalidate_cache_template_));
|
||||
|
||||
gfx8::GenerateCmdHeader(&invalidate_cache_template_.acquire_mem, IT_ACQUIRE_MEM__CI__VI);
|
||||
invalidate_cache_template_.acquire_mem.cpCoherBase.u32All = 0x00;
|
||||
invalidate_cache_template_.acquire_mem.cpCoherBaseHi.u32All = 0x00;
|
||||
invalidate_cache_template_.acquire_mem.cpCoherSize.u32All = 0xFFFFFFFF;
|
||||
invalidate_cache_template_.acquire_mem.cpCoherSizeHi.u32All = 0xFF;
|
||||
invalidate_cache_template_.acquire_mem.pollInterval = 0;
|
||||
}
|
||||
|
||||
void Gfx8CmdWriter::InitializeWaitRegMemTemplate() {
|
||||
memset(&wait_reg_mem_template_, 0, sizeof(wait_reg_mem_template_));
|
||||
|
||||
gfx8::GenerateCmdHeader(&wait_reg_mem_template_.wait_reg_mem, IT_WAIT_REG_MEM);
|
||||
wait_reg_mem_template_.wait_reg_mem.atc__CI = (atc_support_) ? 1 : 0;
|
||||
wait_reg_mem_template_.wait_reg_mem.cachePolicy__CI = 2; // bypass
|
||||
wait_reg_mem_template_.wait_reg_mem.pollInterval = 0;
|
||||
wait_reg_mem_template_.wait_reg_mem.engine = WAIT_REG_MEM_ENGINE_ME;
|
||||
}
|
||||
|
||||
Gfx8CmdWriter::Gfx8CmdWriter(bool atc_support, bool pcie_atomic_support) {
|
||||
// Initialize various state variables related to
|
||||
// atomic operations and atc support
|
||||
pcie_atomic_support_ = pcie_atomic_support;
|
||||
atc_support_ = atc_support;
|
||||
|
||||
InitializeLaunchTemplate();
|
||||
InitializeAtomicTemplate();
|
||||
InitializeConditionalTemplate();
|
||||
InitializeWriteDataTemplate();
|
||||
InitializeWriteData64Template();
|
||||
InitializeBarrierTemplate();
|
||||
InitializeAcquireMemTemplate();
|
||||
InitializeWaitRegMemTemplate();
|
||||
}
|
||||
|
||||
void Gfx8CmdWriter::BuildWaitRegMemCommand(CmdBuf* cmdbuf, bool mem_space, uint64_t wait_addr,
|
||||
bool func_eq, uint32_t mask_val, uint32_t wait_val) {
|
||||
gfx8::WaitRegMemTemplate wait_cmd = wait_reg_mem_template_;
|
||||
|
||||
// Apply the space to which addr belongs
|
||||
if (mem_space) {
|
||||
wait_cmd.wait_reg_mem.memSpace = WAIT_REG_MEM_SPACE_MEMORY;
|
||||
} else {
|
||||
wait_cmd.wait_reg_mem.memSpace = WAIT_REG_MEM_SPACE_REGISTER;
|
||||
}
|
||||
|
||||
// Apply the function - equal / not equal desired by user
|
||||
if (func_eq) {
|
||||
wait_cmd.wait_reg_mem.function = WAIT_REG_MEM_FUNC_EQUAL;
|
||||
} else {
|
||||
wait_cmd.wait_reg_mem.function = WAIT_REG_MEM_FUNC_NOT_EQUAL;
|
||||
}
|
||||
|
||||
// Apply the mask on value at address/register
|
||||
wait_cmd.wait_reg_mem.mask = mask_val;
|
||||
|
||||
// Value to use in applying equal / not equal function
|
||||
wait_cmd.wait_reg_mem.reference = wait_val;
|
||||
|
||||
// Update upper 32 bit address if addr is not a register
|
||||
if (mem_space) {
|
||||
assert(!(wait_addr & 0x3) && "WaitRegMem address must be 4 byte aligned");
|
||||
}
|
||||
wait_cmd.wait_reg_mem.pollAddressLo = Low32(wait_addr);
|
||||
if (mem_space) {
|
||||
wait_cmd.wait_reg_mem.pollAddressHi = High32(wait_addr);
|
||||
}
|
||||
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, wait_cmd);
|
||||
}
|
||||
|
||||
void Gfx8CmdWriter::BuildUpdateHostAddress(CmdBuf* cmdbuf, uint64_t* addr, int64_t value) {
|
||||
// If Atomics are supported, use it
|
||||
if (pcie_atomic_support_) {
|
||||
BuildAtomicPacket64(cmdbuf, CommandWriter::AtomicType::kAtomicSwap, (volatile uint64_t*)addr,
|
||||
value);
|
||||
return;
|
||||
}
|
||||
|
||||
BuildWriteData64Command(cmdbuf, addr, value);
|
||||
return;
|
||||
}
|
||||
|
||||
void Gfx8CmdWriter::BuildIndirectBufferCmd(CmdBuf* cmdbuf, const void* cmd_addr,
|
||||
std::size_t cmd_size) {
|
||||
gfx8::LaunchTemplate launch = launch_template_;
|
||||
|
||||
launch.indirect_buffer.ibBaseLo = PtrLow32(cmd_addr);
|
||||
launch.indirect_buffer.ibBaseHi = PtrHigh32(cmd_addr);
|
||||
launch.indirect_buffer.CI.ibSize = cmd_size / sizeof(uint32_t);
|
||||
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, launch);
|
||||
}
|
||||
|
||||
void Gfx8CmdWriter::BuildBOPNotifyCmd(CmdBuf* cmdbuf, const void* write_addr, uint32_t write_val,
|
||||
bool interrupt) {
|
||||
// Initialize the command including its header
|
||||
gfx8::EndofKernelNotifyTemplate eopCmd;
|
||||
memset(&eopCmd, 0, sizeof(eopCmd));
|
||||
gfx8::GenerateCmdHeader(&eopCmd.release_mem, IT_RELEASE_MEM__CI__VI);
|
||||
|
||||
// Program CP to wait until following event is notified by SPI
|
||||
eopCmd.release_mem.eventType = BOTTOM_OF_PIPE_TS;
|
||||
eopCmd.release_mem.eventIndex = EventTypeToIndexTable[BOTTOM_OF_PIPE_TS];
|
||||
|
||||
// Program CP to perform various cache operations
|
||||
// which complete before Write operation commences
|
||||
eopCmd.release_mem.atc = atc_support_;
|
||||
eopCmd.release_mem.l2Invlidate = true;
|
||||
eopCmd.release_mem.l2WriteBack = true;
|
||||
|
||||
// Set destination as Memory with Write bypassing Cache
|
||||
eopCmd.release_mem.cachePolicy = RELEASE_MEM_CACHE_POLICY_BYPASS;
|
||||
eopCmd.release_mem.dstSel = RELEASE_MEM_DST_SEL_MEMORY_CONTROLLER;
|
||||
|
||||
// Program CP to write user specified value to user specified address
|
||||
eopCmd.release_mem.ordinal4 = Low32(uint64_t(write_addr));
|
||||
eopCmd.release_mem.addrHi = High32(uint64_t(write_addr));
|
||||
eopCmd.release_mem.dataLo = Low32(write_val);
|
||||
eopCmd.release_mem.dataHi = High32(write_val);
|
||||
eopCmd.release_mem.dataSel = EVENTWRITEEOP_DATA_SEL_SEND_DATA32;
|
||||
|
||||
// Determine if host will poll or wait for interrupt
|
||||
eopCmd.release_mem.intSel =
|
||||
(interrupt == false) ? EVENTWRITEEOP_INT_SEL_NONE : EVENTWRITEEOP_INT_SEL_SEND_INT_ON_CONFIRM;
|
||||
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, eopCmd);
|
||||
}
|
||||
|
||||
|
||||
void Gfx8CmdWriter::BuildBarrierFenceCommands(CmdBuf* cmdbuf) {
|
||||
gfx8::AcquireMemTemplate invalidate_src_caches = invalidate_cache_template_;
|
||||
|
||||
// wbINVL2 by default writes-back and invalidates both L1 and L2
|
||||
invalidate_src_caches.acquire_mem.coherCntl =
|
||||
CP_COHER_CNTL__TC_ACTION_ENA_MASK | CP_COHER_CNTL__TC_WB_ACTION_ENA_MASK__CI__VI;
|
||||
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, invalidate_src_caches);
|
||||
}
|
||||
|
||||
// PM4 packet for profilers
|
||||
#define PM4_PACKET3 (0xC0000000)
|
||||
#define PM4_PACKET3_CMD_SHIFT 8
|
||||
#define PM4_PACKET3_COUNT_SHIFT 16
|
||||
|
||||
#define PACKET3(cmd, count) \
|
||||
(PM4_PACKET3 | (((count)-1) << PM4_PACKET3_COUNT_SHIFT) | ((cmd) << PM4_PACKET3_CMD_SHIFT))
|
||||
|
||||
// Structure to store the event PM4 packet
|
||||
typedef struct WriteRegPacket_ { uint32_t item[3]; } WriteRegPacket;
|
||||
|
||||
typedef struct WriteEventPacket_ { uint32_t item[7]; } WriteEventPacket;
|
||||
|
||||
void Gfx8CmdWriter::BuildWriteEventPacket(CmdBuf* cmdbuf, uint32_t event) {
|
||||
PM4CMDEVENTWRITE cp_event_initiator;
|
||||
cp_event_initiator.ordinal1 = PACKET3(IT_EVENT_WRITE, 1);
|
||||
cp_event_initiator.ordinal2 = 0;
|
||||
|
||||
VGT_EVENT_TYPE eventType = Reserved_0x00;
|
||||
switch (event) {
|
||||
case kPerfCntrsStart:
|
||||
eventType = PERFCOUNTER_START;
|
||||
break;
|
||||
case kPerfCntrsStop:
|
||||
eventType = PERFCOUNTER_STOP;
|
||||
break;
|
||||
case kPerfCntrsSample:
|
||||
eventType = PERFCOUNTER_SAMPLE;
|
||||
break;
|
||||
default:
|
||||
assert(false && "Illegal VGT Event Id");
|
||||
}
|
||||
|
||||
cp_event_initiator.eventType = eventType;
|
||||
cp_event_initiator.eventIndex = EventTypeToIndexTable[eventType];
|
||||
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, cp_event_initiator);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void Gfx8CmdWriter::BuildWriteUnshadowRegPacket(CmdBuf* cmdbuf, uint32_t addr, uint32_t value) {
|
||||
WriteRegPacket packet;
|
||||
packet.item[0] = (PM4_TYPE_3_HDR(IT_SET_UCONFIG_REG__CI__VI, 1 + PM4_CMD_SET_CONFIG_REG_DWORDS,
|
||||
ShaderGraphics, 0));
|
||||
packet.item[1] = (addr - UCONFIG_SPACE_START__CI__VI);
|
||||
packet.item[2] = value;
|
||||
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, packet);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void Gfx8CmdWriter::BuildWriteUConfigRegPacket(CmdBuf* cmdbuf, uint32_t addr, uint32_t value) {
|
||||
WriteRegPacket packet;
|
||||
packet.item[0] = (PM4_TYPE_3_HDR(IT_SET_UCONFIG_REG__CI__VI, 1 + PM4_CMD_SET_CONFIG_REG_DWORDS,
|
||||
ShaderCompute, 0));
|
||||
packet.item[1] = (addr - UCONFIG_SPACE_START__CI__VI);
|
||||
packet.item[2] = value;
|
||||
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, packet);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void Gfx8CmdWriter::BuildWriteShRegPacket(CmdBuf* cmdbuf, uint32_t addr, uint32_t value) {
|
||||
WriteRegPacket packet;
|
||||
packet.item[0] = (PM4_TYPE_3_HDR(IT_SET_SH_REG, 1 + PM4_CMD_SET_SH_REG_DWORDS, ShaderCompute, 0));
|
||||
packet.item[1] = (addr - PERSISTENT_SPACE_START);
|
||||
packet.item[2] = value;
|
||||
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, packet);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void Gfx8CmdWriter::BuildCopyDataPacket(CmdBuf* cmdbuf, uint32_t src_sel, uint32_t src_addr_lo,
|
||||
uint32_t src_addr_hi, uint32_t* dst_addr, uint32_t size,
|
||||
bool wait) {
|
||||
PM4CMDCOPYDATA cmd_data;
|
||||
memset(&cmd_data, 0, sizeof(PM4CMDCOPYDATA));
|
||||
|
||||
cmd_data.header.u32All = PACKET3(IT_COPY_DATA, 5);
|
||||
|
||||
cmd_data.srcAtc__CI = atc_support_;
|
||||
cmd_data.srcCachePolicy__CI = COPY_DATA_SRC_CACHE_POLICY_BYPASS;
|
||||
cmd_data.srcSel = src_sel;
|
||||
|
||||
cmd_data.dstAtc__CI = atc_support_;
|
||||
cmd_data.dstSel = COPY_DATA_SEL_DST_ASYNC_MEMORY;
|
||||
cmd_data.dstCachePolicy__CI = COPY_DATA_DST_CACHE_POLICY_BYPASS;
|
||||
|
||||
uint32_t dst_addr_lo, dst_addr_hi;
|
||||
|
||||
dst_addr_lo = PtrLow32(dst_addr);
|
||||
dst_addr_hi = PtrHigh32(dst_addr);
|
||||
|
||||
cmd_data.srcAddressLo = src_addr_lo;
|
||||
cmd_data.srcAddressHi = src_addr_hi;
|
||||
cmd_data.dstAddressLo = dst_addr_lo;
|
||||
cmd_data.dstAddressHi = dst_addr_hi;
|
||||
|
||||
cmd_data.countSel = size;
|
||||
cmd_data.wrConfirm = wait;
|
||||
cmd_data.engineSel = COPY_DATA_ENGINE_ME;
|
||||
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, cmd_data);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void Gfx8CmdWriter::BuildCacheFlushPacket(CmdBuf* cmdbuf) {
|
||||
WriteEventPacket packet;
|
||||
packet.item[0] = PACKET3(IT_ACQUIRE_MEM__CI__VI, 6);
|
||||
packet.item[1] = 0x28C00000;
|
||||
packet.item[2] = 0xFFFFFFFF;
|
||||
packet.item[3] = 0;
|
||||
packet.item[4] = 0;
|
||||
packet.item[5] = 0;
|
||||
packet.item[6] = 0x00000004;
|
||||
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, packet);
|
||||
}
|
||||
|
||||
void Gfx8CmdWriter::BuildWriteWaitIdlePacket(CmdBuf* cmdbuf) {
|
||||
BuildBarrierCommand(cmdbuf);
|
||||
BuildCacheFlushPacket(cmdbuf);
|
||||
return;
|
||||
}
|
||||
|
||||
// Will issue a VGT event including a cache flush later on
|
||||
void Gfx8CmdWriter::BuildVgtEventPacket(CmdBuf* cmdbuf, uint32_t vgtEvent) {
|
||||
PM4CMDEVENTWRITE cp_event_initiator;
|
||||
|
||||
cp_event_initiator.ordinal1 = PACKET3(IT_EVENT_WRITE, 1);
|
||||
cp_event_initiator.ordinal2 = 0;
|
||||
|
||||
VGT_EVENT_TYPE eventType = Reserved_0x00;
|
||||
switch (vgtEvent) {
|
||||
case kPerfCntrsStart:
|
||||
eventType = PERFCOUNTER_START;
|
||||
break;
|
||||
case kPerfCntrsStop:
|
||||
eventType = PERFCOUNTER_STOP;
|
||||
break;
|
||||
case kPerfCntrsSample:
|
||||
eventType = PERFCOUNTER_SAMPLE;
|
||||
break;
|
||||
case kThrdTraceStart:
|
||||
eventType = THREAD_TRACE_START;
|
||||
break;
|
||||
case kThrdTraceStop:
|
||||
eventType = THREAD_TRACE_STOP;
|
||||
break;
|
||||
case kThrdTraceFlush:
|
||||
eventType = THREAD_TRACE_FLUSH;
|
||||
break;
|
||||
case kThrdTraceFinish:
|
||||
eventType = THREAD_TRACE_FINISH;
|
||||
break;
|
||||
default:
|
||||
assert(false && "Illegal VGT Event Id");
|
||||
}
|
||||
|
||||
cp_event_initiator.eventType = eventType;
|
||||
cp_event_initiator.eventIndex = EventTypeToIndexTable[eventType];
|
||||
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, cp_event_initiator);
|
||||
|
||||
// Check If I should be issuing a cache flush operation as well
|
||||
// test and remove it
|
||||
BuildCacheFlushPacket(cmdbuf);
|
||||
return;
|
||||
}
|
||||
|
||||
void Gfx8CmdWriter::BuildWriteRegisterPacket(CmdBuf* cmdbuf, uint32_t addr, uint32_t value) {
|
||||
WriteRegPacket packet;
|
||||
packet.item[0] =
|
||||
(PM4_TYPE_3_HDR(IT_SET_CONFIG_REG, 1 + PM4_CMD_SET_CONFIG_REG_DWORDS, ShaderGraphics, 0));
|
||||
packet.item[1] = addr - CONFIG_SPACE_START;
|
||||
packet.item[2] = value;
|
||||
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, packet);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void Gfx8CmdWriter::BuildWriteEventQueryPacket(CmdBuf* cmdbuf, uint32_t event, uint32_t* addr) {
|
||||
PM4CMDEVENTWRITEQUERY cp_event_initiator;
|
||||
cp_event_initiator.ordinal1 = PACKET3(IT_EVENT_WRITE, 3);
|
||||
cp_event_initiator.ordinal2 = 0;
|
||||
|
||||
// Update switch statements you want to support
|
||||
VGT_EVENT_TYPE eventType = Reserved_0x00;
|
||||
switch (event) {
|
||||
default:
|
||||
assert(false && "Illegal VGT Event Id");
|
||||
}
|
||||
|
||||
cp_event_initiator.eventType = eventType;
|
||||
cp_event_initiator.eventIndex = EventTypeToIndexTable[eventType];
|
||||
|
||||
// set the address
|
||||
uint32_t addrLo = PtrLow32(addr);
|
||||
uint32_t addrHi = PtrHigh32(addr);
|
||||
((addrLo & 0x7) != 0) ? assert(false) : assert(true);
|
||||
|
||||
cp_event_initiator.ordinal3 = 0;
|
||||
cp_event_initiator.ordinal4 = 0;
|
||||
cp_event_initiator.addressLo = addrLo;
|
||||
cp_event_initiator.addressHi = addrHi;
|
||||
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, cp_event_initiator);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void Gfx8CmdWriter::BuildBarrierCommand(CmdBuf* cmdBuf) {
|
||||
APPEND_COMMAND_WRAPPER(cmdBuf, pending_dispatch_template_);
|
||||
}
|
||||
|
||||
void Gfx8CmdWriter::WriteUserData(uint32_t* dst_addr, uint32_t count, const void* src_addr) {
|
||||
memcpy(dst_addr, src_addr, count * sizeof(uint32_t));
|
||||
}
|
||||
|
||||
|
||||
void Gfx8CmdWriter::BuildAtomicPacket(CmdBuf* cmdbuf, AtomicType atomic_op, volatile uint32_t* addr,
|
||||
uint32_t value, uint32_t compare) {
|
||||
gfx8::AtomicTemplate atomic = atomic_template_;
|
||||
|
||||
// make sure the destination adddress is aligned
|
||||
uint32_t address_low = PtrLow32((void*)addr);
|
||||
uint32_t address_high = PtrHigh32((void*)addr);
|
||||
assert(!(address_low & 0x7) && "destination address must be 8 byte aligned");
|
||||
|
||||
atomic.atomic.addressLo = address_low;
|
||||
atomic.atomic.addressHi = address_high;
|
||||
|
||||
switch (atomic_op) {
|
||||
case CommandWriter::kAtomicTypeIncrement: {
|
||||
atomic.atomic.atomOp = TC_OP_ATOMIC_ADD_RTN_32;
|
||||
atomic.atomic.srcDataLo = 1;
|
||||
break;
|
||||
}
|
||||
case CommandWriter::kAtomicTypeDecrement: {
|
||||
atomic.atomic.atomOp = TC_OP_ATOMIC_SUB_RTN_32;
|
||||
atomic.atomic.srcDataLo = 1;
|
||||
break;
|
||||
}
|
||||
case CommandWriter::kAtomicTypeCompareAndSwap: {
|
||||
atomic.atomic.atomOp = TC_OP_ATOMIC_CMPSWAP_RTN_32;
|
||||
atomic.atomic.srcDataLo = value;
|
||||
atomic.atomic.cmpDataLo = compare;
|
||||
break;
|
||||
}
|
||||
case CommandWriter::kAtomicTypeBlockingCompareAndSwap: {
|
||||
atomic.atomic.atomOp = TC_OP_ATOMIC_CMPSWAP_RTN_32;
|
||||
atomic.atomic.srcDataLo = value;
|
||||
atomic.atomic.cmpDataLo = compare;
|
||||
atomic.atomic.command = 1;
|
||||
atomic.atomic.loopInterval = 128;
|
||||
break;
|
||||
}
|
||||
case CommandWriter::kAtomicAdd: {
|
||||
atomic.atomic.atomOp = TC_OP_ATOMIC_ADD_RTN_32;
|
||||
atomic.atomic.srcDataLo = value;
|
||||
break;
|
||||
}
|
||||
case CommandWriter::kAtomicSubtract: {
|
||||
atomic.atomic.atomOp = TC_OP_ATOMIC_SUB_RTN_32;
|
||||
atomic.atomic.srcDataLo = value;
|
||||
break;
|
||||
}
|
||||
case CommandWriter::kAtomicSwap: {
|
||||
atomic.atomic.atomOp = TC_OP_ATOMIC_SWAP_RTN_32;
|
||||
atomic.atomic.srcDataLo = value;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, atomic);
|
||||
}
|
||||
|
||||
void Gfx8CmdWriter::BuildAtomicPacket64(CmdBuf* cmdbuf, AtomicType atomic_op,
|
||||
volatile uint64_t* addr, uint64_t value, uint64_t compare) {
|
||||
AtomicTemplate atomic = atomic_template_;
|
||||
|
||||
// make sure the destination adddress is aligned
|
||||
uint32_t address_low = PtrLow32((void*)addr);
|
||||
uint32_t address_high = PtrHigh32((void*)addr);
|
||||
assert(!(address_low & 0x7) && "destination address must be 8 byte aligned");
|
||||
|
||||
atomic.atomic.addressLo = address_low;
|
||||
atomic.atomic.addressHi = address_high;
|
||||
|
||||
atomic.atomic.atc = (atc_support_) ? 1 : 0;
|
||||
atomic.atomic.cachePolicy = 2;
|
||||
|
||||
switch (atomic_op) {
|
||||
case CommandWriter::kAtomicTypeIncrement: {
|
||||
atomic.atomic.atomOp = TC_OP_ATOMIC_ADD_RTN_64;
|
||||
atomic.atomic.srcDataLo = 1;
|
||||
break;
|
||||
}
|
||||
case CommandWriter::kAtomicTypeDecrement: {
|
||||
atomic.atomic.atomOp = TC_OP_ATOMIC_SUB_RTN_64;
|
||||
atomic.atomic.srcDataLo = 1;
|
||||
break;
|
||||
}
|
||||
case CommandWriter::kAtomicTypeCompareAndSwap: {
|
||||
atomic.atomic.atomOp = TC_OP_ATOMIC_CMPSWAP_RTN_64;
|
||||
atomic.atomic.srcDataLo = Low32(value);
|
||||
atomic.atomic.srcDataHi = High32(value);
|
||||
atomic.atomic.cmpDataLo = Low32(compare);
|
||||
atomic.atomic.cmpDataHi = High32(compare);
|
||||
break;
|
||||
}
|
||||
case CommandWriter::kAtomicTypeBlockingCompareAndSwap: {
|
||||
atomic.atomic.atomOp = TC_OP_ATOMIC_CMPSWAP_RTN_64;
|
||||
atomic.atomic.srcDataLo = Low32(value);
|
||||
atomic.atomic.srcDataHi = High32(value);
|
||||
atomic.atomic.cmpDataLo = Low32(compare);
|
||||
atomic.atomic.cmpDataHi = High32(compare);
|
||||
atomic.atomic.command = 1;
|
||||
atomic.atomic.loopInterval = 128;
|
||||
break;
|
||||
}
|
||||
case CommandWriter::kAtomicAdd: {
|
||||
atomic.atomic.atomOp = TC_OP_ATOMIC_ADD_RTN_64;
|
||||
atomic.atomic.srcDataLo = Low32(value);
|
||||
atomic.atomic.srcDataHi = High32(value);
|
||||
break;
|
||||
}
|
||||
case CommandWriter::kAtomicSubtract: {
|
||||
atomic.atomic.atomOp = TC_OP_ATOMIC_SUB_RTN_64;
|
||||
atomic.atomic.srcDataLo = Low32(value);
|
||||
atomic.atomic.srcDataHi = High32(value);
|
||||
break;
|
||||
}
|
||||
case CommandWriter::kAtomicSwap: {
|
||||
atomic.atomic.atomOp = TC_OP_ATOMIC_SWAP_RTN_64;
|
||||
atomic.atomic.srcDataLo = Low32(value);
|
||||
atomic.atomic.srcDataHi = High32(value);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, atomic);
|
||||
}
|
||||
|
||||
size_t Gfx8CmdWriter::SizeOfAtomicPacket() const {
|
||||
return sizeof(AtomicTemplate) / sizeof(uint32_t);
|
||||
}
|
||||
|
||||
void Gfx8CmdWriter::BuildConditionalExecute(CmdBuf* cmdbuf, uint32_t* signal, uint16_t count) {
|
||||
ConditionalExecuteTemplate conditional = conditional_template_;
|
||||
|
||||
uint32_t address_low = PtrLow32(signal);
|
||||
uint32_t address_high = PtrHigh32(signal);
|
||||
assert(!(address_low & 0x7) && "destination address must be 8 byte aligned");
|
||||
|
||||
conditional.conditional.boolAddrLo = address_low;
|
||||
conditional.conditional.boolAddrHi = address_high;
|
||||
conditional.conditional.execCount = count;
|
||||
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, conditional);
|
||||
}
|
||||
|
||||
void Gfx8CmdWriter::BuildWriteDataCommand(CmdBuf* cmdbuf, uint32_t* write_addr,
|
||||
uint32_t write_value) {
|
||||
// Copy the initialize command packet
|
||||
gfx8::WriteDataTemplate command = write_data_template_;
|
||||
|
||||
// Encode the user specified value to write
|
||||
command.write_data_value = write_value;
|
||||
|
||||
// Encode the user specified address to write to
|
||||
command.write_data.dstAddrLo = PtrLow32(write_addr);
|
||||
command.write_data.dstAddrHi = PtrHigh32(write_addr);
|
||||
|
||||
// Append the built command into output Command Buffer
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, command);
|
||||
}
|
||||
|
||||
void Gfx8CmdWriter::BuildWriteData64Command(CmdBuf* cmdbuf, uint64_t* write_addr,
|
||||
uint64_t write_value) {
|
||||
// Copy the initialize command packet
|
||||
gfx8::WriteData64Template command = write_data64_template_;
|
||||
|
||||
// Encode the user specified value to write
|
||||
command.write_data_value = write_value;
|
||||
|
||||
// Encode the user specified address to write to
|
||||
command.write_data.dstAddrLo = PtrLow32(write_addr);
|
||||
command.write_data.dstAddrHi = PtrHigh32(write_addr);
|
||||
|
||||
// Append the built command into output Command Buffer
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, command);
|
||||
}
|
||||
|
||||
void Gfx8CmdWriter::BuildFlushCacheCmd(CmdBuf* cmdbuf, FlushCacheOptions* options,
|
||||
uint32_t* writeAddr, uint32_t writeVal) {
|
||||
PM4CMDACQUIREMEM flushCmd;
|
||||
memset(&flushCmd, 0, sizeof(flushCmd));
|
||||
|
||||
// Verify write back address is valid. Note that this address is NOT
|
||||
// used on CI. But to have a same interface as that on SI, we keep
|
||||
// the address argument in this function. Thus, this check always pass
|
||||
// no matter the address is NULL or not.
|
||||
(writeAddr == NULL) ? assert(true) : assert(true);
|
||||
|
||||
// Initialize the command header
|
||||
gfx8::GenerateCmdHeader(&flushCmd, IT_ACQUIRE_MEM__CI__VI);
|
||||
|
||||
// Specify the base address of memory being synchronized.
|
||||
// The starting address is indicated as follows: bits [0-48].
|
||||
flushCmd.cpCoherBase.u32All = 0;
|
||||
flushCmd.cpCoherBaseHi.u32All = 0;
|
||||
|
||||
// Specify the size of memory being synchronized. It is indicated
|
||||
// as follows:
|
||||
// COHER_SIZE_256B_MASK = 0xffffffffL
|
||||
// COHER_SIZE_HI_256B_MASK__CI__VI = 0x000000ffL
|
||||
flushCmd.cpCoherSize.u32All = CP_COHER_SIZE__COHER_SIZE_256B_MASK;
|
||||
flushCmd.cpCoherSizeHi.u32All = CP_COHER_SIZE_HI__COHER_SIZE_HI_256B_MASK__CI__VI;
|
||||
|
||||
// Periodicity of polling - interval to wait from the time
|
||||
// of unsuccessful polling result is returned and a new
|
||||
// poll is issued
|
||||
flushCmd.pollInterval = 0x04;
|
||||
|
||||
// Program Coherence Control Register. Initialize L2 Cache flush
|
||||
// for Non-Coherent memory blocks
|
||||
uint32_t coher_cntl = 0;
|
||||
|
||||
coher_cntl |= (options->l1) ? CP_COHER_CNTL__TCL1_ACTION_ENA_MASK : 0;
|
||||
coher_cntl |= (options->l2)
|
||||
? (CP_COHER_CNTL__TC_ACTION_ENA_MASK | CP_COHER_CNTL__TC_WB_ACTION_ENA_MASK__CI__VI)
|
||||
: 0;
|
||||
coher_cntl |= (options->icache) ? CP_COHER_CNTL__SH_ICACHE_ACTION_ENA_MASK : 0;
|
||||
coher_cntl |= (options->kcache) ? CP_COHER_CNTL__SH_KCACHE_ACTION_ENA_MASK : 0;
|
||||
flushCmd.coherCntl = coher_cntl;
|
||||
|
||||
// Copy AcquireMem command buffer stream
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, flushCmd);
|
||||
return;
|
||||
}
|
||||
|
||||
void Gfx8CmdWriter::BuildDmaDataPacket(CmdBuf* cmdbuf, uint32_t* srcAddr, uint32_t* dstAddr,
|
||||
uint32_t copySize, bool waitForConfirm) {
|
||||
PM4CMDDMADATA cmdDmaData;
|
||||
memset(&cmdDmaData, 0, sizeof(PM4CMDDMADATA));
|
||||
cmdDmaData.header.u32All =
|
||||
(PM4_TYPE_3_HDR(IT_DMA_DATA__CI__VI, PM4_CMD_DMA_DATA_DWORDS, ShaderCompute, 0));
|
||||
|
||||
// Id of Micro Engine
|
||||
cmdDmaData.engine = 0;
|
||||
|
||||
// Specify attributes of source buffer such as its
|
||||
// location, ATC property, Cache policy and Volatile
|
||||
// A value of 1 for cache policy means to Stream
|
||||
cmdDmaData.srcSel = 0;
|
||||
cmdDmaData.srcATC = atc_support_;
|
||||
cmdDmaData.srcCachePolicy = 1;
|
||||
cmdDmaData.srcVolatile = 0;
|
||||
|
||||
// Specify attributes of destination buffer such as
|
||||
// its location, ATC property, Cache policy and Volatile
|
||||
// A value of 1 for cache policy means to Stream
|
||||
cmdDmaData.dstSel = 0;
|
||||
cmdDmaData.dstATC = atc_support_;
|
||||
cmdDmaData.dstCachePolicy = 1;
|
||||
cmdDmaData.dstVolatile = 0;
|
||||
|
||||
// Specify the source and destination addr
|
||||
cmdDmaData.srcAddrHi = PtrHigh32(srcAddr);
|
||||
cmdDmaData.srcAddrLoOrData = PtrLow32(srcAddr);
|
||||
cmdDmaData.dstAddrLo = PtrLow32(dstAddr);
|
||||
cmdDmaData.dstAddrHi = PtrHigh32(dstAddr);
|
||||
|
||||
// Number of bytes to copy. The command restricts
|
||||
// the size to be (2 MB - 1) - 21 Bits
|
||||
assert(copySize < 0x1FFFFF);
|
||||
cmdDmaData.command.byteCount = copySize;
|
||||
|
||||
// Indicate that DMA Cmd should wait if its source
|
||||
// is the destination of a previous DMA Cmd
|
||||
cmdDmaData.command.rawWait = waitForConfirm;
|
||||
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, cmdDmaData);
|
||||
return;
|
||||
}
|
||||
|
||||
} // gfx8
|
||||
} // pm4_profile
|
||||
@@ -1,201 +0,0 @@
|
||||
#ifndef _GFX8_CMDWRITER_H_
|
||||
#define _GFX8_CMDWRITER_H_
|
||||
|
||||
#include "cmdwriter.h"
|
||||
#include "gfx8_cmds.h"
|
||||
|
||||
namespace pm4_profile {
|
||||
|
||||
namespace gfx8 {
|
||||
|
||||
/// @brief class Gfx8CmdWriter implements the virtual class CommandWriter
|
||||
/// for Sea Islands (CI) and VI chipset
|
||||
class Gfx8CmdWriter : public CommandWriter {
|
||||
public:
|
||||
Gfx8CmdWriter(bool atc_support, bool pcie_atomic_support);
|
||||
|
||||
/// @brief Dword specifying NOOP command for SI/CI/VI chipsets. The macro
|
||||
/// populates the NOOP command which is 32-bits wide. The second parameter,
|
||||
/// the COUNT field of NOOP command, specifies the number of Dwords to skip.
|
||||
/// To skip ZERO Dwords the value should be set to 0x3FFF. Since the macro
|
||||
/// decrements the second parameter by TWO, an artifact of its definition,
|
||||
/// the value is incremented by TWO to 0x4001 (0x3FFF + 2).
|
||||
///
|
||||
inline uint32_t GetNoOpCmd() {
|
||||
static const uint32_t nopCmd = PM4_TYPE_3_HDR(IT_NOP, 0x4001, ShaderCompute, 0);
|
||||
return nopCmd;
|
||||
}
|
||||
|
||||
void BuildBarrierCommand(CmdBuf* cmdBuf);
|
||||
|
||||
void BuildIndirectBufferCmd(CmdBuf* cmdbuf, const void* cmd_addr, std::size_t cmd_size);
|
||||
|
||||
void BuildBOPNotifyCmd(CmdBuf* cmdbuf, const void* write_addr, uint32_t write_val,
|
||||
bool interrupt);
|
||||
|
||||
void BuildBarrierFenceCommands(CmdBuf* cmdbuf);
|
||||
|
||||
void BuildWriteEventPacket(CmdBuf* cmdbuf, uint32_t event);
|
||||
|
||||
void BuildWaitRegMemCommand(CmdBuf* cmdbuf, bool mem_space, uint64_t wait_addr, bool func_eq,
|
||||
uint32_t mask_val, uint32_t wait_val);
|
||||
|
||||
void BuildWriteUnshadowRegPacket(CmdBuf* cmdbuf, uint32_t addr, uint32_t value);
|
||||
|
||||
/// @brief Build CP command to program a Gpu register
|
||||
///
|
||||
/// @param cmdbuf Pointer to command buffer to be appended
|
||||
/// @param addr Register to be programmed
|
||||
/// @param value Value to write into register
|
||||
///
|
||||
/// @return void
|
||||
void BuildWriteUConfigRegPacket(CmdBuf* cmdbuf, uint32_t addr, uint32_t value);
|
||||
|
||||
void BuildWriteShRegPacket(CmdBuf* cmdbuf, uint32_t addr, uint32_t value);
|
||||
|
||||
void BuildCopyDataPacket(CmdBuf* cmdbuf, uint32_t src_sel, uint32_t src_addr_lo,
|
||||
uint32_t src_addr_hi, uint32_t* dst_addr, uint32_t size, bool wait);
|
||||
|
||||
void BuildWriteWaitIdlePacket(CmdBuf* cmdbuf);
|
||||
|
||||
// Will issue a VGT event including a cache flush later on
|
||||
void BuildVgtEventPacket(CmdBuf* cmdbuf, uint32_t vgtEvent);
|
||||
|
||||
void BuildWriteRegisterPacket(CmdBuf* cmdbuf, uint32_t addr, uint32_t value);
|
||||
|
||||
void BuildWriteEventQueryPacket(CmdBuf* cmdbuf, uint32_t event, uint32_t* addr);
|
||||
|
||||
void BuildAtomicPacket(CmdBuf* cmdbuf, AtomicType atomic_op, volatile uint32_t* addr,
|
||||
uint32_t value, uint32_t compare);
|
||||
|
||||
void BuildAtomicPacket64(CmdBuf* cmdbuf, AtomicType atomic_op, volatile uint64_t* addr,
|
||||
uint64_t value = 0, uint64_t compare = 0);
|
||||
|
||||
size_t SizeOfAtomicPacket() const;
|
||||
|
||||
void BuildConditionalExecute(CmdBuf* cmdbuf, uint32_t* signal, uint16_t count);
|
||||
|
||||
void BuildWriteDataCommand(CmdBuf* cmdbuf, uint32_t* write_addr, uint32_t write_value);
|
||||
|
||||
void BuildWriteData64Command(CmdBuf* cmdbuf, uint64_t* write_addr, uint64_t write_value);
|
||||
|
||||
void BuildCacheFlushPacket(CmdBuf* cmdbuf);
|
||||
|
||||
/// Writes into input buffer Gpu commands to flush its cache. It is
|
||||
/// necessary that the buffer provided for flush commands is large
|
||||
/// enough to accommodate the full set of commands. It should be at
|
||||
/// least 512 bytes.
|
||||
///
|
||||
/// @param tsCmdBuf Buffer to write commands to.
|
||||
/// @param writeAddr Registered address into which GPU should write
|
||||
/// a user provided value upon executing the flush commands.
|
||||
/// @param writeVal User provided value written by GPU at user provided
|
||||
/// address, upon executing the flush commands.
|
||||
///
|
||||
/// @return void
|
||||
void BuildFlushCacheCmd(CmdBuf* cmdBuf, FlushCacheOptions* options, uint32_t* writeAddr,
|
||||
uint32_t writeVal);
|
||||
|
||||
/// Builds Gpu command to copy data from source to destination buffer
|
||||
/// using DMA engine.
|
||||
///
|
||||
/// @param cmdbuf Buffer updated with Gpu copy command
|
||||
/// @param srcAddr Address of source buffer address
|
||||
/// @param dstAddr Address of destination buffer address
|
||||
/// @param copySize Size of data to copy in bytes
|
||||
/// @param waitForCompletion if command should wait for copying to complete
|
||||
void BuildDmaDataPacket(CmdBuf* cmdBuf, uint32_t* srcAddr, uint32_t* dstAddr, uint32_t copySize,
|
||||
bool waitForCompletion);
|
||||
|
||||
protected:
|
||||
/// @brief Copies data from source buffer to destination buffer
|
||||
///
|
||||
/// @param dst_addr Address of destination buffer data
|
||||
///
|
||||
/// @count Size of data to copy in 32-bit words
|
||||
///
|
||||
/// @param src_addr Address of buffer containing source data
|
||||
///
|
||||
/// @return void
|
||||
virtual void WriteUserData(uint32_t* dst_addr, uint32_t count, const void* src_addr);
|
||||
|
||||
/// @brief Append an instance of Gpu command into input command buffer stream.
|
||||
///
|
||||
/// @param cmdbuf CommandWriter object appended with anohter Gpu command
|
||||
///
|
||||
/// @param cmd Gpu command to be appended into command buffer
|
||||
///
|
||||
/// @return void
|
||||
template <class T> void AppendCommand(CmdBuf* cmdbuf, const T& cmd);
|
||||
|
||||
private:
|
||||
/// @brief Initializes a Gpu command which can be used to
|
||||
/// reference a Gpu command stream indirectly
|
||||
void InitializeLaunchTemplate();
|
||||
|
||||
/// @brief Initializes a Gpu command to perform atomic operations
|
||||
////
|
||||
void InitializeAtomicTemplate();
|
||||
|
||||
/// @brief Initializes a Gpu command to allow conditional execution
|
||||
/// of a Gpu command stream
|
||||
void InitializeConditionalTemplate();
|
||||
|
||||
/// @brief Initializes a Gpu command to let command processor
|
||||
/// wait for some update before letting other commands to be
|
||||
/// processed
|
||||
void InitializeWaitRegMemTemplate();
|
||||
|
||||
/// @brief Initializes the template for Barrier command.
|
||||
/// Applications can use Barrier command to ensure their
|
||||
/// command is executed only after all other commands have
|
||||
/// completed their execution.
|
||||
void InitializeBarrierTemplate();
|
||||
|
||||
void BuildUpdateHostAddress(CmdBuf* cmdbuf, uint64_t* addr, int64_t value);
|
||||
|
||||
/// @brief Initializes Acquire Memory command template. Users
|
||||
/// can submit this command to invalidate Gpu caches - L1 and
|
||||
/// or L2.
|
||||
void InitializeAcquireMemTemplate();
|
||||
|
||||
/// @brief Initializes an instance of Write Data command
|
||||
/// for use by an application
|
||||
void InitializeWriteDataTemplate();
|
||||
void InitializeWriteData64Template();
|
||||
|
||||
/// @brief Instance of Gpu command to reference dispatch commands
|
||||
LaunchTemplate launch_template_;
|
||||
|
||||
/// @brief Instance of Gpu command to use in performing atomic operations
|
||||
AtomicTemplate atomic_template_;
|
||||
|
||||
/// @brief Instance of Gpu command to use in conditional execution
|
||||
/// of a command stream
|
||||
ConditionalExecuteTemplate conditional_template_;
|
||||
|
||||
/// @brief Instance of Pm4 command WRITE_DATA
|
||||
WriteDataTemplate write_data_template_;
|
||||
WriteData64Template write_data64_template_;
|
||||
|
||||
/// @brief Instance of Pm4 command EVENT_WRITE
|
||||
BarrierTemplate pending_dispatch_template_;
|
||||
|
||||
/// @brief Instance of Pm4 command ACQUIRE_MEM
|
||||
AcquireMemTemplate invalidate_cache_template_;
|
||||
|
||||
/// @brief Instance of Pm4 command WAIT_REG_MEM
|
||||
WaitRegMemTemplate wait_reg_mem_template_;
|
||||
|
||||
/// @brief ATC support.
|
||||
bool atc_support_;
|
||||
|
||||
/// @brief PCIe atomic support.
|
||||
bool pcie_atomic_support_;
|
||||
};
|
||||
|
||||
} // gfx8
|
||||
|
||||
} // pm4_profile
|
||||
|
||||
#endif // _GFX8_CMDWRITER_H_
|
||||
@@ -1,90 +0,0 @@
|
||||
#ifndef _GFX9_CMDS_H_
|
||||
#define _GFX9_CMDS_H_
|
||||
|
||||
#include "gfxip/gfx9/gfx9_utils.h"
|
||||
#include "gfxip/gfx9/gfx9_enum.h"
|
||||
#include "gfxip/gfx9/gfx9_mask.h"
|
||||
#include "gfxip/gfx9/gfx9_offset.h"
|
||||
#include "gfxip/gfx9/gfx9_typedef.h"
|
||||
#include "gfxip/gfx9/gfx9_registers.h"
|
||||
#include "gfxip/gfx9/gfx9_pm4_it_opcodes.h"
|
||||
#include "gfxip/gfx9/f32_mec_pm4_packets_vg10.h"
|
||||
#include "gfxip/gfx9/f32_pfp_pm4_packets_vg10.h"
|
||||
|
||||
namespace pm4_profile {
|
||||
|
||||
namespace gfx9 {
|
||||
|
||||
/// @brief Initializer for commands that set shader registers
|
||||
template <class T> void GenerateSetShRegHeader(T* pm4, uint32_t reg_addr) {
|
||||
pm4->cmd_set_data.header.u32All = PM4_TYPE3_HDR(IT_SET_SH_REG, sizeof(T) / sizeof(uint32_t));
|
||||
pm4->cmd_set_data.bitfields2.reg_offset = reg_addr - PERSISTENT_SPACE_START;
|
||||
}
|
||||
|
||||
// @brief Initializer for various Gpu command headers
|
||||
template <class T> void GenerateCmdHeader(T* pm4, IT_OpCodeType op_code) {
|
||||
pm4->header.u32All = PM4_TYPE3_HDR(op_code, sizeof(T) / sizeof(uint32_t));
|
||||
}
|
||||
|
||||
// @brief Initializer for commands that set configuration registers
|
||||
template <class T> void GenerateSetConfigRegHeader(T* pm4, uint32_t reg_addr) {
|
||||
pm4->cmd_set_data.header.u32All = PM4_TYPE3_HDR(IT_SET_CONFIG_REG, sizeof(T) / sizeof(uint32_t));
|
||||
pm4->cmd_set_data.bitfields2.reg_offset = reg_addr - CONFIG_SPACE_START;
|
||||
}
|
||||
|
||||
/// @brief Structure used to issue a Gpu Barrier command
|
||||
struct BarrierTemplate {
|
||||
PM4MEC_EVENT_WRITE event_write;
|
||||
};
|
||||
|
||||
/// @brief Structure used to configure the flushing of
|
||||
/// various caches - instruction, constants, L1 and L2
|
||||
struct AcquireMemTemplate {
|
||||
PM4MEC_ACQUIRE_MEM acquire_mem;
|
||||
};
|
||||
|
||||
/// @brief Structure used to reference another Gpu command
|
||||
/// indirectly. Generally used to reference a list of Gpu
|
||||
/// commands (dispatch cmds) indirectly
|
||||
struct LaunchTemplate {
|
||||
PM4MEC_INDIRECT_BUFFER indirect_buffer;
|
||||
};
|
||||
|
||||
/// @brief Structure used to determine the end of
|
||||
/// a kernel including cache flushes and writing to
|
||||
/// a user configurable memory location
|
||||
struct EndofKernelNotifyTemplate {
|
||||
PM4MEC_RELEASE_MEM release_mem;
|
||||
};
|
||||
|
||||
// Desc: Strucuture used to perform various atomic
|
||||
// operations - add, subtract, increment, etc
|
||||
struct AtomicTemplate {
|
||||
PM4MEC_ATOMIC_MEM atomic;
|
||||
};
|
||||
|
||||
/// @brief PM4 command to write a 32-bit value into a memory
|
||||
/// location accessible to Gpu
|
||||
struct WriteDataTemplate {
|
||||
PM4MEC_WRITE_DATA write_data;
|
||||
uint32_t write_data_value;
|
||||
};
|
||||
|
||||
/// @brief PM4 command to write a 64-bit value into a memory
|
||||
/// location accessible to Gpu
|
||||
struct WriteData64Template {
|
||||
PM4MEC_WRITE_DATA write_data;
|
||||
uint64_t write_data_value;
|
||||
};
|
||||
|
||||
/// @brief PM4 command to wait for a certain event before proceeding
|
||||
/// to process another command on the queue
|
||||
struct WaitRegMemTemplate {
|
||||
PM4MEC_WAIT_REG_MEM wait_reg_mem;
|
||||
};
|
||||
|
||||
} // gfx9
|
||||
|
||||
} // pm4_profile
|
||||
|
||||
#endif // _GFX9_CMDS_H_
|
||||
@@ -1,743 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "gfx9_cmdwriter.h"
|
||||
|
||||
template <class T> static void PrintPm4Packet(const T& command, const char* name) {
|
||||
#if !defined(NDEBUG)
|
||||
uint32_t* cmd = (uint32_t*)&command;
|
||||
uint32_t size = sizeof(command) / sizeof(uint32_t);
|
||||
std::ostringstream oss;
|
||||
oss << "'" << name << "' size(" << std::dec << size << ")";
|
||||
std::clog << std::setw(40) << std::left << oss.str() << ":";
|
||||
for (uint32_t idx = 0; idx < size; idx++) {
|
||||
std::clog << " " << std::hex << std::setw(8) << std::setfill('0') << cmd[idx];
|
||||
}
|
||||
std::clog << std::setfill(' ') << std::endl;
|
||||
#endif
|
||||
}
|
||||
|
||||
#define APPEND_COMMAND_WRAPPER(cmdbuf, command) \
|
||||
PrintPm4Packet(command, __FUNCTION__); \
|
||||
AppendCommand(cmdbuf, command);
|
||||
|
||||
namespace pm4_profile {
|
||||
namespace gfx9 {
|
||||
|
||||
template <class T> void Gfx9CmdWriter::AppendCommand(CmdBuf* cmdbuf, const T& command) {
|
||||
cmdbuf->AppendCommand(&command, sizeof(command));
|
||||
}
|
||||
|
||||
void Gfx9CmdWriter::InitializeLaunchTemplate() {
|
||||
memset(&launch_template_, 0, sizeof(launch_template_));
|
||||
GenerateCmdHeader(&launch_template_.indirect_buffer, IT_INDIRECT_BUFFER);
|
||||
}
|
||||
|
||||
void Gfx9CmdWriter::InitializeAtomicTemplate() {
|
||||
memset(&atomic_template_.atomic, 0, sizeof(atomic_template_));
|
||||
GenerateCmdHeader(&atomic_template_.atomic, IT_ATOMIC_MEM);
|
||||
|
||||
// Specify the micro engine and cache policies
|
||||
PM4MEC_ATOMIC_MEM* atomicCmd = &atomic_template_.atomic;
|
||||
atomicCmd->bitfields2.cache_policy = cache_policy__mec_atomic_mem__stream;
|
||||
}
|
||||
|
||||
void Gfx9CmdWriter::InitializeBarrierTemplate() {
|
||||
memset(&pending_dispatch_template_, 0, sizeof(pending_dispatch_template_));
|
||||
GenerateCmdHeader(&pending_dispatch_template_.event_write, IT_EVENT_WRITE);
|
||||
|
||||
MEC_EVENT_WRITE_event_index_enum index;
|
||||
index = event_index__mec_event_write__cs_partial_flush;
|
||||
pending_dispatch_template_.event_write.bitfields2.event_index = index;
|
||||
pending_dispatch_template_.event_write.bitfields2.event_type = CS_PARTIAL_FLUSH;
|
||||
}
|
||||
|
||||
void Gfx9CmdWriter::InitializeAcquireMemTemplate() {
|
||||
memset(&invalidate_cache_template_, 0, sizeof(invalidate_cache_template_));
|
||||
GenerateCmdHeader(&invalidate_cache_template_.acquire_mem, IT_ACQUIRE_MEM);
|
||||
|
||||
// Specify the CP module which will process this packet
|
||||
PM4MEC_ACQUIRE_MEM* acquire_mem = &invalidate_cache_template_.acquire_mem;
|
||||
|
||||
// Specify the size of memory to invalidate. Size is
|
||||
// specified in terms of 256 byte chunks. A coher_size
|
||||
// of 0xFFFFFFFF actually specified 0xFFFFFFFF00 (40 bits)
|
||||
// of memory. The field coher_size_hi specifies memory from
|
||||
// bits 40-64 for a total of 256 TB.
|
||||
acquire_mem->coher_size = 0xFFFFFFFF;
|
||||
acquire_mem->bitfields4.coher_size_hi = 0xFFFFFF;
|
||||
|
||||
// Specify the address of memory to invalidate. The
|
||||
// address must be 256 byte aligned.
|
||||
acquire_mem->coher_base_lo = 0x00;
|
||||
acquire_mem->bitfields6.coher_base_hi = 0x00;
|
||||
|
||||
// Specify the poll interval for determing if operation is complete
|
||||
acquire_mem->bitfields7.poll_interval = 0x04;
|
||||
}
|
||||
|
||||
void Gfx9CmdWriter::InitializeWaitRegMemTemplate() {
|
||||
memset(&wait_reg_mem_template_, 0, sizeof(wait_reg_mem_template_));
|
||||
GenerateCmdHeader(&wait_reg_mem_template_.wait_reg_mem, IT_WAIT_REG_MEM);
|
||||
|
||||
PM4MEC_WAIT_REG_MEM* wait_reg_mem = &wait_reg_mem_template_.wait_reg_mem;
|
||||
|
||||
wait_reg_mem->bitfields7.poll_interval = 0x04;
|
||||
wait_reg_mem->bitfields2.operation = operation__mec_wait_reg_mem__wait_reg_mem;
|
||||
}
|
||||
|
||||
void Gfx9CmdWriter::InitializeWriteDataTemplate(PM4MEC_WRITE_DATA* write_data, bool bit32) {
|
||||
// Initialize the header of command packet by adjusting the
|
||||
// size of payload - one 32bit DWord or two 32bit DWords
|
||||
uint32_t cmd_size = (bit32) ? 1 : 2;
|
||||
memset(write_data, 0, sizeof(PM4MEC_WRITE_DATA));
|
||||
cmd_size = cmd_size + (sizeof(PM4MEC_WRITE_DATA) / sizeof(uint32_t));
|
||||
write_data->ordinal1 = PM4_TYPE3_HDR(IT_WRITE_DATA, cmd_size);
|
||||
|
||||
// Set the bit to confirm the write operation and cache policy
|
||||
write_data->bitfields2.wr_confirm = wr_confirm__mec_write_data__wait_for_write_confirmation;
|
||||
write_data->bitfields2.cache_policy = cache_policy__mec_write_data__stream;
|
||||
|
||||
// Specify the command to increment address if writing more than one DWord
|
||||
write_data->bitfields2.addr_incr = addr_incr__mec_write_data__increment_address;
|
||||
|
||||
// Specify the class to which the write destination belongs
|
||||
write_data->bitfields2.dst_sel = dst_sel__mec_write_data__memory;
|
||||
}
|
||||
|
||||
void Gfx9CmdWriter::InitializeWriteDataTemplate() {
|
||||
InitializeWriteDataTemplate(&write_data_template_.write_data, true);
|
||||
}
|
||||
|
||||
void Gfx9CmdWriter::InitializeWriteData64Template() {
|
||||
InitializeWriteDataTemplate(&write_data64_template_.write_data, false);
|
||||
}
|
||||
|
||||
void Gfx9CmdWriter::InitializeConditionalTemplate() {
|
||||
/*
|
||||
memset(&conditional_template_.conditional, 0, sizeof(conditional_template_));
|
||||
GenerateCmdHeader(&conditional_template_.conditional, IT_COND_EXEC);
|
||||
|
||||
if (atc_support_) {
|
||||
const uint32_t kAtcShift = 24;
|
||||
conditional_template_.conditional.ordinal4 |= 1 << kAtcShift;
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
void Gfx9CmdWriter::InitializeEndOfKernelNotifyTemplate() {
|
||||
memset(¬ify_template_, 0, sizeof(notify_template_));
|
||||
GenerateCmdHeader(¬ify_template_.release_mem, IT_RELEASE_MEM);
|
||||
|
||||
// Set the event type to be bottom of pipe and cache policy
|
||||
PM4MEC_RELEASE_MEM* rel_mem;
|
||||
rel_mem = ¬ify_template_.release_mem;
|
||||
rel_mem->bitfields2.event_type = BOTTOM_OF_PIPE_TS;
|
||||
rel_mem->bitfields2.cache_policy = cache_policy__mec_release_mem__stream;
|
||||
rel_mem->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
|
||||
|
||||
// Specify the attributes of source and destinations of data
|
||||
rel_mem->bitfields3.int_sel = int_sel__mec_release_mem__none;
|
||||
rel_mem->bitfields3.data_sel = data_sel__mec_release_mem__none;
|
||||
rel_mem->bitfields3.dst_sel = dst_sel__mec_release_mem__memory_controller;
|
||||
}
|
||||
|
||||
Gfx9CmdWriter::Gfx9CmdWriter(bool atc_support, bool pcie_atomic_support) {
|
||||
// Initialize various state variables related to
|
||||
// atomic operations and atc support
|
||||
this->atc_support_ = atc_support;
|
||||
this->pcie_atomic_support_ = pcie_atomic_support;
|
||||
|
||||
// Initialize various command templates
|
||||
InitializeLaunchTemplate();
|
||||
InitializeAtomicTemplate();
|
||||
InitializeBarrierTemplate();
|
||||
InitializeAcquireMemTemplate();
|
||||
InitializeWaitRegMemTemplate();
|
||||
InitializeWriteDataTemplate();
|
||||
InitializeWriteData64Template();
|
||||
InitializeConditionalTemplate();
|
||||
InitializeEndOfKernelNotifyTemplate();
|
||||
}
|
||||
|
||||
void Gfx9CmdWriter::BuildIndirectBufferCmd(CmdBuf* cmdbuf, const void* cmd_addr,
|
||||
std::size_t cmd_size) {
|
||||
// Verify the address is 4-byte aligned
|
||||
uint64_t addr = uintptr_t(cmd_addr);
|
||||
assert(!(addr & 0x3) && "IndirectBuffer address must be 4 byte aligned");
|
||||
|
||||
// Specify the address of indirect buffer encoding cmd stream
|
||||
LaunchTemplate launch = launch_template_;
|
||||
|
||||
launch.indirect_buffer.bitfields2.ib_base_lo = (PtrLow32(cmd_addr) >> 2);
|
||||
launch.indirect_buffer.ib_base_hi = PtrHigh32(cmd_addr);
|
||||
|
||||
// Specify the size of indirect buffer and cache policy to set
|
||||
// upon executing the cmds of indirect buffer
|
||||
launch.indirect_buffer.bitfields4.priv = 0;
|
||||
launch.indirect_buffer.bitfields4.valid = 1;
|
||||
launch.indirect_buffer.bitfields4.ib_size = cmd_size / sizeof(uint32_t);
|
||||
launch.indirect_buffer.bitfields4.cache_policy = cache_policy__mec_indirect_buffer__stream;
|
||||
|
||||
// Append the built command into output Command Buffer
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, launch);
|
||||
}
|
||||
|
||||
void Gfx9CmdWriter::BuildAtomicPacket(CmdBuf* cmdbuf, AtomicType atomic_op, volatile uint32_t* addr,
|
||||
uint32_t value, uint32_t compare) {
|
||||
AtomicTemplate atomicTemplate = atomic_template_;
|
||||
PM4MEC_ATOMIC_MEM* atomicCmd = &atomicTemplate.atomic;
|
||||
|
||||
// make sure the destination adddress is aligned
|
||||
uint32_t address_low = PtrLow32((void*)addr);
|
||||
uint32_t address_high = PtrHigh32((void*)addr);
|
||||
assert(!(address_low & 0x7) && "destination address must be 8 byte aligned");
|
||||
atomicCmd->addr_lo = address_low;
|
||||
atomicCmd->addr_hi = address_high;
|
||||
|
||||
switch (atomic_op) {
|
||||
case CommandWriter::kAtomicTypeIncrement:
|
||||
assert(!(value != 0x01) && "Atomic Increment value should be 1");
|
||||
case CommandWriter::kAtomicAdd:
|
||||
atomicCmd->src_data_lo = value;
|
||||
atomicCmd->bitfields2.atomic = TC_OP_ATOMIC_ADD_RTN_32;
|
||||
break;
|
||||
case CommandWriter::kAtomicTypeDecrement:
|
||||
assert(!(value != 0x01) && "Atomic Decrement value should be 1");
|
||||
case CommandWriter::kAtomicSubtract:
|
||||
atomicCmd->src_data_lo = value;
|
||||
atomicCmd->bitfields2.atomic = TC_OP_ATOMIC_SUB_RTN_32;
|
||||
break;
|
||||
case CommandWriter::kAtomicTypeBlockingCompareAndSwap:
|
||||
atomicCmd->bitfields9.loop_interval = 128;
|
||||
atomicCmd->bitfields2.command = command__mec_atomic_mem__loop_until_compare_satisfied;
|
||||
case CommandWriter::kAtomicTypeCompareAndSwap:
|
||||
atomicCmd->src_data_lo = value;
|
||||
atomicCmd->cmp_data_lo = compare;
|
||||
atomicCmd->bitfields2.atomic = TC_OP_ATOMIC_CMPSWAP_RTN_32;
|
||||
break;
|
||||
case CommandWriter::kAtomicSwap:
|
||||
atomicCmd->src_data_lo = value;
|
||||
atomicCmd->bitfields2.atomic = TC_OP_ATOMIC_SWAP_RTN_32;
|
||||
break;
|
||||
default:
|
||||
assert((false) && "Atomic operation id is invalid");
|
||||
}
|
||||
|
||||
// Append the built command into output Command Buffer
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, atomicTemplate);
|
||||
}
|
||||
|
||||
void Gfx9CmdWriter::BuildAtomicPacket64(CmdBuf* cmdbuf, AtomicType atomic_op,
|
||||
volatile uint64_t* addr, uint64_t value, uint64_t compare) {
|
||||
AtomicTemplate atomicTemplate = atomic_template_;
|
||||
PM4MEC_ATOMIC_MEM* atomicCmd = &atomicTemplate.atomic;
|
||||
|
||||
// make sure the destination adddress is aligned
|
||||
uint32_t address_low = PtrLow32((void*)addr);
|
||||
uint32_t address_high = PtrHigh32((void*)addr);
|
||||
assert(!(address_low & 0x7) && "destination address must be 8 byte aligned");
|
||||
atomicCmd->addr_lo = address_low;
|
||||
atomicCmd->addr_hi = address_high;
|
||||
|
||||
switch (atomic_op) {
|
||||
case CommandWriter::kAtomicTypeIncrement:
|
||||
assert(!(value != 0x01) && "Atomic Increment value should be 1");
|
||||
case CommandWriter::kAtomicAdd:
|
||||
atomicCmd->src_data_lo = Low32(value);
|
||||
atomicCmd->src_data_hi = High32(value);
|
||||
atomicCmd->bitfields2.atomic = TC_OP_ATOMIC_ADD_RTN_64;
|
||||
break;
|
||||
case CommandWriter::kAtomicTypeDecrement:
|
||||
assert(!(value != 0x01) && "Atomic Decrement value should be 1");
|
||||
case CommandWriter::kAtomicSubtract:
|
||||
atomicCmd->src_data_lo = Low32(value);
|
||||
atomicCmd->src_data_hi = High32(value);
|
||||
atomicCmd->bitfields2.atomic = TC_OP_ATOMIC_SUB_RTN_64;
|
||||
break;
|
||||
case CommandWriter::kAtomicTypeBlockingCompareAndSwap:
|
||||
atomicCmd->bitfields9.loop_interval = 128;
|
||||
atomicCmd->bitfields2.command = command__mec_atomic_mem__loop_until_compare_satisfied;
|
||||
case CommandWriter::kAtomicTypeCompareAndSwap:
|
||||
atomicCmd->src_data_lo = Low32(value);
|
||||
atomicCmd->src_data_hi = High32(value);
|
||||
atomicCmd->cmp_data_lo = Low32(compare);
|
||||
atomicCmd->cmp_data_hi = High32(compare);
|
||||
atomicCmd->bitfields2.atomic = TC_OP_ATOMIC_CMPSWAP_RTN_64;
|
||||
break;
|
||||
case CommandWriter::kAtomicSwap:
|
||||
atomicCmd->src_data_lo = Low32(value);
|
||||
atomicCmd->src_data_hi = High32(value);
|
||||
atomicCmd->bitfields2.atomic = TC_OP_ATOMIC_SWAP_RTN_64;
|
||||
break;
|
||||
default:
|
||||
assert((false) && "Atomic operation id is invalid");
|
||||
}
|
||||
|
||||
// Append the built command into output Command Buffer
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, atomicTemplate);
|
||||
}
|
||||
|
||||
void Gfx9CmdWriter::BuildBarrierCommand(CmdBuf* cmdBuf) {
|
||||
APPEND_COMMAND_WRAPPER(cmdBuf, pending_dispatch_template_);
|
||||
}
|
||||
|
||||
void Gfx9CmdWriter::BuildWriteDataCommand(CmdBuf* cmdbuf, uint32_t* write_addr,
|
||||
uint32_t write_value) {
|
||||
// Copy the initialized command packet and its payload
|
||||
WriteDataTemplate command = write_data_template_;
|
||||
|
||||
// Encode the user specified address to write to
|
||||
uint64_t addr = uintptr_t(write_addr);
|
||||
assert(!(addr & 0x3) && "WriteData address must be 4 byte aligned");
|
||||
|
||||
// Specify the value to write
|
||||
command.write_data_value = write_value;
|
||||
|
||||
// Test Code to see if this makes a difference
|
||||
command.write_data.dst_mem_addr_hi = PtrHigh32(write_addr);
|
||||
command.write_data.bitfields3c.dst_mem_addr_lo = (PtrLow32(write_addr) >> 2);
|
||||
|
||||
// Append the built command into output Command Buffer
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, command);
|
||||
}
|
||||
|
||||
void Gfx9CmdWriter::BuildWriteData64Command(CmdBuf* cmdbuf, uint64_t* write_addr,
|
||||
uint64_t write_value) {
|
||||
// Copy the initialized command packet and its payload
|
||||
WriteData64Template command = write_data64_template_;
|
||||
|
||||
// Encode the user specified address to write to
|
||||
uint64_t addr = uintptr_t(write_addr);
|
||||
assert(!(addr & 0x3) && "WriteData address must be 4 byte aligned");
|
||||
|
||||
command.write_data.bitfields3c.dst_mem_addr_lo = (PtrLow32(write_addr) >> 2);
|
||||
command.write_data.dst_mem_addr_hi = PtrHigh32(write_addr);
|
||||
|
||||
// Specify the value to write
|
||||
command.write_data_value = write_value;
|
||||
|
||||
// Append the built command into output Command Buffer
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, command);
|
||||
}
|
||||
|
||||
void Gfx9CmdWriter::BuildWaitRegMemCommand(CmdBuf* cmdbuf, bool mem_space, uint64_t wait_addr,
|
||||
bool func_eq, uint32_t mask_val, uint32_t wait_val) {
|
||||
WaitRegMemTemplate wait_cmd = wait_reg_mem_template_;
|
||||
|
||||
// Apply the space to which addr belongs
|
||||
if (mem_space) {
|
||||
wait_cmd.wait_reg_mem.bitfields2.mem_space = mem_space__mec_wait_reg_mem__memory_space;
|
||||
} else {
|
||||
wait_cmd.wait_reg_mem.bitfields2.mem_space = mem_space__mec_wait_reg_mem__register_space;
|
||||
}
|
||||
|
||||
// Apply the function - equal / not equal desired by user
|
||||
if (func_eq) {
|
||||
wait_cmd.wait_reg_mem.bitfields2.function =
|
||||
function__mec_wait_reg_mem__equal_to_the_reference_value;
|
||||
} else {
|
||||
wait_cmd.wait_reg_mem.bitfields2.function =
|
||||
function__mec_wait_reg_mem__not_equal_reference_value;
|
||||
}
|
||||
|
||||
// Value to use in applying equal / not equal function
|
||||
wait_cmd.wait_reg_mem.reference = wait_val;
|
||||
|
||||
// Apply the mask on value at address/register
|
||||
wait_cmd.wait_reg_mem.mask = mask_val;
|
||||
|
||||
// The address to poll should be DWord (4 byte) aligned
|
||||
// Update upper 32 bit address if addr is not a register
|
||||
if (mem_space) {
|
||||
assert(!(wait_addr & 0x3) && "WaitRegMem address must be 4 byte aligned");
|
||||
}
|
||||
wait_cmd.wait_reg_mem.bitfields3a.mem_poll_addr_lo = (Low32(wait_addr) >> 2);
|
||||
if (mem_space) {
|
||||
wait_cmd.wait_reg_mem.mem_poll_addr_hi = High32(wait_addr);
|
||||
}
|
||||
|
||||
// Append the command to cmd stream
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, wait_cmd);
|
||||
}
|
||||
|
||||
void Gfx9CmdWriter::BuildConditionalExecute(CmdBuf* cmdbuf, uint32_t* signal, uint16_t count) {
|
||||
assert(false && "BuildConditionalExecute method is not implemented");
|
||||
/*
|
||||
ConditionalExecuteTemplate conditional = conditional_template_;
|
||||
|
||||
uint32_t address_low = PtrLow32(signal);
|
||||
uint32_t address_high = PtrHigh32(signal);
|
||||
assert(!(address_low & 0x7) && "destination address must be 8 byte aligned");
|
||||
|
||||
conditional.conditional.boolAddrLo = address_low;
|
||||
conditional.conditional.boolAddrHi = address_high;
|
||||
conditional.conditional.execCount = count;
|
||||
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, conditional);
|
||||
*/
|
||||
}
|
||||
|
||||
void Gfx9CmdWriter::BuildUpdateHostAddress(CmdBuf* cmdbuf, uint64_t* addr, int64_t value) {
|
||||
// If Atomics are supported, use it
|
||||
if (pcie_atomic_support_) {
|
||||
BuildAtomicPacket64(cmdbuf, CommandWriter::AtomicType::kAtomicSwap, (volatile uint64_t*)addr,
|
||||
value);
|
||||
return;
|
||||
}
|
||||
|
||||
BuildWriteData64Command(cmdbuf, addr, value);
|
||||
return;
|
||||
}
|
||||
|
||||
void Gfx9CmdWriter::BuildBOPNotifyCmd(CmdBuf* cmdbuf, const void* write_addr, uint32_t write_value,
|
||||
bool interrupt) {
|
||||
// Initialize the command including its header
|
||||
EndofKernelNotifyTemplate eop = notify_template_;
|
||||
PM4MEC_RELEASE_MEM* rel_mem = &eop.release_mem;
|
||||
|
||||
// Program CP to perform various cache operations
|
||||
// before issuing the write operation commences
|
||||
rel_mem->bitfields2.tc_action_ena = true;
|
||||
rel_mem->bitfields2.tc_wb_action_ena = true;
|
||||
|
||||
// Update cmd to write a user specified 32-bit value
|
||||
rel_mem->data_lo = write_value;
|
||||
rel_mem->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
|
||||
|
||||
// Update cmd with user specified address to write to
|
||||
rel_mem->address_hi = High32(uint64_t(write_addr));
|
||||
rel_mem->bitfields4b.address_lo_64b = (Low32(uint64_t(write_addr) >> 3));
|
||||
|
||||
// Update cmd to issue interrupt if user has requested it
|
||||
if (interrupt) {
|
||||
rel_mem->bitfields3.int_sel = int_sel__mec_release_mem__send_interrupt_after_write_confirm;
|
||||
}
|
||||
|
||||
// Serialize the command as stream of Dwords
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, eop);
|
||||
}
|
||||
|
||||
void Gfx9CmdWriter::BuildBarrierFenceCommands(CmdBuf* cmdbuf) {
|
||||
// TODO: temporarily remove the check because some OpenCL tests
|
||||
// (test_buffers, test_relationals) are failing.
|
||||
// if (using_cc_memory_policy_)
|
||||
// return;
|
||||
AcquireMemTemplate invalidate_src_caches = invalidate_cache_template_;
|
||||
|
||||
// wbINVL2 by default writes-back and invalidates both L1 and L2
|
||||
invalidate_src_caches.acquire_mem.bitfields2.coher_cntl = CP_COHER_CNTL__TC_ACTION_ENA_MASK;
|
||||
invalidate_src_caches.acquire_mem.bitfields2.coher_cntl |= CP_COHER_CNTL__TC_WB_ACTION_ENA_MASK;
|
||||
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, invalidate_src_caches);
|
||||
}
|
||||
|
||||
/*
|
||||
// PM4 packet for profilers
|
||||
#define PM4_PACKET3 (0xC0000000)
|
||||
#define PM4_PACKET3_CMD_SHIFT 8
|
||||
#define PM4_PACKET3_COUNT_SHIFT 16
|
||||
|
||||
#define PACKET3(cmd, count) \
|
||||
(PM4_PACKET3 | (((count)-1) << PM4_PACKET3_COUNT_SHIFT) | \
|
||||
((cmd) << PM4_PACKET3_CMD_SHIFT))
|
||||
*/
|
||||
|
||||
// Structure to store the event PM4 packet
|
||||
typedef struct WriteRegPacket_ { uint32_t item[3]; } WriteRegPacket;
|
||||
|
||||
void Gfx9CmdWriter::BuildWriteEventPacket(CmdBuf* cmdbuf, uint32_t event) {
|
||||
PM4MEC_EVENT_WRITE cp_event_initiator;
|
||||
memset(&cp_event_initiator, 0, sizeof(PM4MEC_EVENT_WRITE));
|
||||
cp_event_initiator.ordinal1 =
|
||||
PM4_TYPE3_HDR(IT_EVENT_WRITE, (sizeof(PM4MEC_EVENT_WRITE) / sizeof(uint32_t)));
|
||||
cp_event_initiator.ordinal2 = 0;
|
||||
|
||||
VGT_EVENT_TYPE eventType = Reserved_0x00;
|
||||
switch (event) {
|
||||
case kPerfCntrsStart:
|
||||
eventType = PERFCOUNTER_START;
|
||||
break;
|
||||
case kPerfCntrsStop:
|
||||
eventType = PERFCOUNTER_STOP;
|
||||
break;
|
||||
case kPerfCntrsSample:
|
||||
eventType = PERFCOUNTER_SAMPLE;
|
||||
break;
|
||||
default:
|
||||
assert(false && "Illegal VGT Event Id");
|
||||
}
|
||||
|
||||
MEC_EVENT_WRITE_event_index_enum index;
|
||||
index = event_index__mec_event_write__other;
|
||||
cp_event_initiator.bitfields2.event_index = index;
|
||||
cp_event_initiator.bitfields2.event_type = eventType;
|
||||
|
||||
// Append the built command into output Command Buffer
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, cp_event_initiator);
|
||||
}
|
||||
|
||||
void Gfx9CmdWriter::BuildWriteUnshadowRegPacket(CmdBuf* cmdbuf, uint32_t addr, uint32_t value) {
|
||||
WriteRegPacket packet;
|
||||
packet.item[0] =
|
||||
PM4_TYPE3_HDR(IT_SET_UCONFIG_REG, (1 + sizeof(PM4MEC_SET_CONFIG_REG) / sizeof(uint32_t)));
|
||||
packet.item[1] = (addr - UCONFIG_SPACE_START);
|
||||
packet.item[2] = value;
|
||||
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, packet);
|
||||
}
|
||||
|
||||
void Gfx9CmdWriter::BuildWriteUConfigRegPacket(CmdBuf* cmdbuf, uint32_t addr, uint32_t value) {
|
||||
WriteRegPacket packet;
|
||||
packet.item[0] =
|
||||
PM4_TYPE3_HDR(IT_SET_UCONFIG_REG, (1 + sizeof(PM4MEC_SET_CONFIG_REG) / sizeof(uint32_t)));
|
||||
packet.item[1] = (addr - UCONFIG_SPACE_START);
|
||||
packet.item[2] = value;
|
||||
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, packet);
|
||||
}
|
||||
|
||||
void Gfx9CmdWriter::BuildWriteShRegPacket(CmdBuf* cmdbuf, uint32_t addr, uint32_t value) {
|
||||
WriteRegPacket packet;
|
||||
packet.item[0] =
|
||||
PM4_TYPE3_HDR(IT_SET_SH_REG, (1 + sizeof(PM4MEC_SET_CONFIG_REG) / sizeof(uint32_t)));
|
||||
packet.item[1] = (addr - PERSISTENT_SPACE_START);
|
||||
packet.item[2] = value;
|
||||
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, packet);
|
||||
}
|
||||
|
||||
void Gfx9CmdWriter::BuildCopyDataPacket(CmdBuf* cmdbuf, uint32_t src_sel, uint32_t src_addr_lo,
|
||||
uint32_t src_addr_hi, uint32_t* dst_addr, uint32_t size,
|
||||
bool wait) {
|
||||
PM4MEC_COPY_DATA cmd_data;
|
||||
memset(&cmd_data, 0, sizeof(PM4MEC_COPY_DATA));
|
||||
cmd_data.ordinal1 = PM4_TYPE3_HDR(IT_COPY_DATA, (sizeof(PM4MEC_COPY_DATA) / sizeof(uint32_t)));
|
||||
|
||||
MEC_COPY_DATA_src_sel_enum data_src = src_sel__mec_copy_data__memory;
|
||||
switch (src_sel) {
|
||||
case 0:
|
||||
data_src = src_sel__mec_copy_data__mem_mapped_register;
|
||||
break;
|
||||
case 4:
|
||||
data_src = src_sel__mec_copy_data__perfcounters;
|
||||
break;
|
||||
default:
|
||||
assert(false && "CopyData Illegal value for source of data");
|
||||
break;
|
||||
}
|
||||
cmd_data.bitfields2.src_sel = data_src;
|
||||
cmd_data.bitfields2.src_cache_policy = src_cache_policy__mec_copy_data__stream;
|
||||
|
||||
cmd_data.bitfields2.dst_sel = dst_sel__mec_copy_data__memory;
|
||||
cmd_data.bitfields2.dst_cache_policy = dst_cache_policy__mec_copy_data__stream;
|
||||
|
||||
cmd_data.bitfields2.wr_confirm = (MEC_COPY_DATA_wr_confirm_enum)wait;
|
||||
cmd_data.bitfields2.count_sel = (size == 0) ? count_sel__mec_copy_data__32_bits_of_data
|
||||
: count_sel__mec_copy_data__64_bits_of_data;
|
||||
|
||||
// Specify the source register offset
|
||||
cmd_data.bitfields3a.src_reg_offset = src_addr_lo;
|
||||
|
||||
// Specify the destination memory address
|
||||
cmd_data.dst_addr_hi = PtrHigh32(dst_addr);
|
||||
if (size == 0) {
|
||||
cmd_data.bitfields5b.dst_32b_addr_lo = (PtrLow32(dst_addr) >> 2);
|
||||
} else {
|
||||
cmd_data.bitfields5c.dst_64b_addr_lo = (PtrLow32(dst_addr) >> 3);
|
||||
}
|
||||
|
||||
// Append the built command into output Command Buffer
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, cmd_data);
|
||||
}
|
||||
|
||||
void Gfx9CmdWriter::BuildCacheFlushPacket(CmdBuf* cmdbuf) {
|
||||
// Initialize the command header
|
||||
PM4MEC_ACQUIRE_MEM cache_flush = invalidate_cache_template_.acquire_mem;
|
||||
|
||||
// Program Coherence Control Register. Initialize L2 Cache flush
|
||||
// for Non-Coherent memory blocks
|
||||
uint32_t coher_cntl = 0;
|
||||
|
||||
coher_cntl |= CP_COHER_CNTL__TC_ACTION_ENA_MASK;
|
||||
coher_cntl |= CP_COHER_CNTL__TCL1_ACTION_ENA_MASK;
|
||||
coher_cntl |= CP_COHER_CNTL__TC_WB_ACTION_ENA_MASK;
|
||||
coher_cntl |= CP_COHER_CNTL__SH_ICACHE_ACTION_ENA_MASK;
|
||||
coher_cntl |= CP_COHER_CNTL__SH_KCACHE_ACTION_ENA_MASK;
|
||||
cache_flush.bitfields2.coher_cntl = coher_cntl;
|
||||
|
||||
// Copy AcquireMem command buffer stream
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, cache_flush);
|
||||
}
|
||||
|
||||
void Gfx9CmdWriter::BuildWriteWaitIdlePacket(CmdBuf* cmdbuf) {
|
||||
BuildBarrierCommand(cmdbuf);
|
||||
BuildCacheFlushPacket(cmdbuf);
|
||||
}
|
||||
|
||||
// Will issue a VGT event including a cache flush later on
|
||||
void Gfx9CmdWriter::BuildVgtEventPacket(CmdBuf* cmdbuf, uint32_t vgtEvent) {
|
||||
PM4MEC_EVENT_WRITE cp_event_initiator;
|
||||
memset(&cp_event_initiator, 0, sizeof(PM4MEC_EVENT_WRITE));
|
||||
cp_event_initiator.ordinal1 =
|
||||
PM4_TYPE3_HDR(IT_EVENT_WRITE, (sizeof(PM4MEC_EVENT_WRITE) / sizeof(uint32_t)));
|
||||
cp_event_initiator.ordinal2 = 0;
|
||||
|
||||
VGT_EVENT_TYPE eventType = Reserved_0x00;
|
||||
switch (vgtEvent) {
|
||||
case kPerfCntrsStart:
|
||||
eventType = PERFCOUNTER_START;
|
||||
break;
|
||||
case kPerfCntrsStop:
|
||||
eventType = PERFCOUNTER_STOP;
|
||||
break;
|
||||
case kPerfCntrsSample:
|
||||
eventType = PERFCOUNTER_SAMPLE;
|
||||
break;
|
||||
case kThrdTraceStart:
|
||||
eventType = THREAD_TRACE_START;
|
||||
break;
|
||||
case kThrdTraceStop:
|
||||
eventType = THREAD_TRACE_STOP;
|
||||
break;
|
||||
case kThrdTraceFlush:
|
||||
eventType = THREAD_TRACE_FLUSH;
|
||||
break;
|
||||
case kThrdTraceFinish:
|
||||
eventType = THREAD_TRACE_FINISH;
|
||||
break;
|
||||
default:
|
||||
assert(false && "Illegal VGT Event Id");
|
||||
}
|
||||
|
||||
MEC_EVENT_WRITE_event_index_enum index;
|
||||
index = event_index__mec_event_write__other;
|
||||
cp_event_initiator.bitfields2.event_index = index;
|
||||
cp_event_initiator.bitfields2.event_type = eventType;
|
||||
|
||||
// Append the built command into output Command Buffer
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, cp_event_initiator);
|
||||
|
||||
// Check If I should be issuing a cache flush operation as well
|
||||
// test and remove it
|
||||
BuildCacheFlushPacket(cmdbuf);
|
||||
}
|
||||
|
||||
void Gfx9CmdWriter::BuildWriteRegisterPacket(CmdBuf* cmdbuf, uint32_t addr, uint32_t value) {
|
||||
/*
|
||||
WriteRegPacket packet;
|
||||
packet.item[0] = (PM4_TYPE3_HDR(
|
||||
IT_SET_CONFIG_REG, 1 + PM4_CMD_SET_CONFIG_REG_DWORDS, ShaderGraphics, 0));
|
||||
packet.item[1] = addr - CONFIG_SPACE_START;
|
||||
packet.item[2] = value;
|
||||
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, packet);
|
||||
|
||||
return;
|
||||
*/
|
||||
}
|
||||
|
||||
void Gfx9CmdWriter::BuildWriteEventQueryPacket(CmdBuf* cmdbuf, uint32_t event, uint32_t* addr) {
|
||||
PM4MEC_EVENT_WRITE_QUERY cp_event_initiator;
|
||||
memset(&cp_event_initiator, 0, sizeof(PM4MEC_EVENT_WRITE_QUERY));
|
||||
cp_event_initiator.ordinal1 =
|
||||
PM4_TYPE3_HDR(IT_EVENT_WRITE, (sizeof(PM4MEC_EVENT_WRITE_QUERY) / sizeof(uint32_t)));
|
||||
cp_event_initiator.ordinal2 = 0;
|
||||
|
||||
// Update switch statements you want to support
|
||||
VGT_EVENT_TYPE eventType = Reserved_0x00;
|
||||
switch (event) {
|
||||
default:
|
||||
assert(false && "Illegal VGT Event Id");
|
||||
}
|
||||
|
||||
MEC_EVENT_WRITE_event_index_enum index;
|
||||
cp_event_initiator.bitfields2.event_type = eventType;
|
||||
index = (MEC_EVENT_WRITE_event_index_enum)EventTypeToIndexTable[eventType];
|
||||
cp_event_initiator.bitfields2.event_index = index;
|
||||
|
||||
// set the address
|
||||
uint32_t addrLo = PtrLow32(addr);
|
||||
uint32_t addrHi = PtrHigh32(addr);
|
||||
((addrLo & 0x7) != 0) ? assert(false) : assert(true);
|
||||
|
||||
cp_event_initiator.address_hi = addrHi;
|
||||
cp_event_initiator.bitfields3.address_lo = (addrLo >> 3);
|
||||
|
||||
// Append the built command into output Command Buffer
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, cp_event_initiator);
|
||||
}
|
||||
|
||||
size_t Gfx9CmdWriter::SizeOfAtomicPacket() const {
|
||||
return sizeof(AtomicTemplate) / sizeof(uint32_t);
|
||||
}
|
||||
|
||||
void Gfx9CmdWriter::BuildFlushCacheCmd(CmdBuf* cmdbuf, FlushCacheOptions* options,
|
||||
uint32_t* writeAddr, uint32_t writeVal) {
|
||||
PM4MEC_ACQUIRE_MEM cache_flush = invalidate_cache_template_.acquire_mem;
|
||||
|
||||
// Verify write back address is valid. Note that this address is NOT
|
||||
// used on CI. But to have a same interface as that on SI, we keep
|
||||
// the address argument in this function. Thus, this check always pass
|
||||
// no matter the address is NULL or not.
|
||||
(writeAddr == NULL) ? assert(true) : assert(true);
|
||||
|
||||
// Program Coherence Control Register. Initialize L2 Cache flush
|
||||
// for Non-Coherent memory blocks
|
||||
uint32_t coher_cntl = 0;
|
||||
coher_cntl |= (options->l1) ? CP_COHER_CNTL__TCL1_ACTION_ENA_MASK : 0;
|
||||
coher_cntl |= (options->l2)
|
||||
? (CP_COHER_CNTL__TC_ACTION_ENA_MASK | CP_COHER_CNTL__TC_WB_ACTION_ENA_MASK)
|
||||
: 0;
|
||||
coher_cntl |= (options->icache) ? CP_COHER_CNTL__SH_ICACHE_ACTION_ENA_MASK : 0;
|
||||
coher_cntl |= (options->kcache) ? CP_COHER_CNTL__SH_KCACHE_ACTION_ENA_MASK : 0;
|
||||
cache_flush.bitfields2.coher_cntl = coher_cntl;
|
||||
|
||||
// Append the built command into output Command Buffer
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, cache_flush);
|
||||
return;
|
||||
}
|
||||
|
||||
void Gfx9CmdWriter::BuildDmaDataPacket(CmdBuf* cmdbuf, uint32_t* srcAddr, uint32_t* dstAddr,
|
||||
uint32_t copySize, bool waitForConfirm) {
|
||||
PM4MEC_DMA_DATA cmdDmaData;
|
||||
memset(&cmdDmaData, 0, sizeof(PM4MEC_DMA_DATA));
|
||||
cmdDmaData.header.u32All =
|
||||
PM4_TYPE3_HDR(IT_DMA_DATA, (sizeof(PM4MEC_DMA_DATA) / sizeof(uint32_t)));
|
||||
|
||||
// Specify attributes of source buffer such as its
|
||||
// location and Cache policy
|
||||
cmdDmaData.bitfields2.src_sel = src_sel__mec_dma_data__src_addr_using_sas;
|
||||
cmdDmaData.bitfields2.src_cache_policy = src_cache_policy__mec_dma_data__stream;
|
||||
|
||||
// Specify attributes of destination buffer such as its
|
||||
// location and Cache policy
|
||||
cmdDmaData.bitfields2.dst_sel = dst_sel__mec_dma_data__dst_addr_using_das;
|
||||
cmdDmaData.bitfields2.dst_cache_policy = dst_cache_policy__mec_dma_data__stream;
|
||||
|
||||
// Specify the source and destination addr
|
||||
cmdDmaData.src_addr_lo_or_data = PtrLow32(srcAddr);
|
||||
cmdDmaData.src_addr_hi = PtrHigh32(srcAddr);
|
||||
cmdDmaData.dst_addr_lo = PtrLow32(dstAddr);
|
||||
cmdDmaData.dst_addr_hi = PtrHigh32(dstAddr);
|
||||
|
||||
// Number of bytes to copy. The command restricts
|
||||
// the size to be (64 MB - 1) - 26 Bits
|
||||
assert(copySize < 0x1FFFFF);
|
||||
cmdDmaData.bitfields7.byte_count = copySize;
|
||||
|
||||
// Indicate that DMA Cmd should wait if its source
|
||||
// is the destination of a previous DMA Cmd
|
||||
cmdDmaData.bitfields7.raw_wait = waitForConfirm;
|
||||
|
||||
APPEND_COMMAND_WRAPPER(cmdbuf, cmdDmaData);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
} // gfx9 namespace
|
||||
|
||||
} // pm4_profile
|
||||
@@ -1,199 +0,0 @@
|
||||
#ifndef _GFX9_CMDWRITER_H_
|
||||
#define _GFX9_CMDWRITER_H_
|
||||
|
||||
#include "cmdwriter.h"
|
||||
#include "gfx9_cmds.h"
|
||||
|
||||
namespace pm4_profile {
|
||||
|
||||
namespace gfx9 {
|
||||
|
||||
|
||||
/// @brief class Gfx9CmdWriter implements the virtual class CommandWriter
|
||||
/// for GFX9 chipsets
|
||||
class Gfx9CmdWriter : public CommandWriter {
|
||||
public:
|
||||
Gfx9CmdWriter(bool atc_support, bool pcie_atomic_support);
|
||||
|
||||
/// @brief Dword specifying NOOP command for GFX9 chipsets. The macro
|
||||
/// populates the NOOP command which is 32-bits wide. The second parameter,
|
||||
/// the COUNT field of NOOP command, specifies the number of Dwords to skip.
|
||||
/// To skip ZERO Dwords the value should be set to 0x3FFF. Since the macro
|
||||
/// decrements the second parameter by TWO, an artifact of its definition,
|
||||
/// the value is incremented by TWO to 0x4001 (0x3FFF + 2).
|
||||
///
|
||||
inline uint32_t GetNoOpCmd() {
|
||||
static const uint32_t nopCmd = PM4_TYPE3_HDR(IT_NOP, 0x4001);
|
||||
return nopCmd;
|
||||
}
|
||||
|
||||
void BuildBarrierCommand(CmdBuf* cmdBuf);
|
||||
|
||||
void BuildIndirectBufferCmd(CmdBuf* cmdbuf, const void* cmd_addr, std::size_t cmd_size);
|
||||
|
||||
void BuildBOPNotifyCmd(CmdBuf* cmdbuf, const void* write_addr, uint32_t write_val,
|
||||
bool interrupt);
|
||||
|
||||
void BuildBarrierFenceCommands(CmdBuf* cmdbuf);
|
||||
|
||||
void BuildWriteEventPacket(CmdBuf* cmdbuf, uint32_t event);
|
||||
|
||||
void BuildWaitRegMemCommand(CmdBuf* cmdbuf, bool mem_space, uint64_t wait_addr, bool func_eq,
|
||||
uint32_t mask_val, uint32_t wait_val);
|
||||
|
||||
void BuildWriteUnshadowRegPacket(CmdBuf* cmdbuf, uint32_t addr, uint32_t value);
|
||||
|
||||
/// @brief Build CP command to program a Gpu register
|
||||
///
|
||||
/// @param cmdbuf Pointer to command buffer to be appended
|
||||
/// @param addr Register to be programmed
|
||||
/// @param value Value to write into register
|
||||
///
|
||||
/// @return void
|
||||
void BuildWriteUConfigRegPacket(CmdBuf* cmdbuf, uint32_t addr, uint32_t value);
|
||||
|
||||
void BuildWriteShRegPacket(CmdBuf* cmdbuf, uint32_t addr, uint32_t value);
|
||||
|
||||
void BuildCopyDataPacket(CmdBuf* cmdbuf, uint32_t src_sel, uint32_t src_addr_lo,
|
||||
uint32_t src_addr_hi, uint32_t* dst_addr, uint32_t size, bool wait);
|
||||
|
||||
void BuildWriteWaitIdlePacket(CmdBuf* cmdbuf);
|
||||
|
||||
// Will issue a VGT event including a cache flush later on
|
||||
void BuildVgtEventPacket(CmdBuf* cmdbuf, uint32_t vgtEvent);
|
||||
|
||||
void BuildWriteRegisterPacket(CmdBuf* cmdbuf, uint32_t addr, uint32_t value);
|
||||
|
||||
void BuildWriteEventQueryPacket(CmdBuf* cmdbuf, uint32_t event, uint32_t* addr);
|
||||
|
||||
void BuildAtomicPacket(CmdBuf* cmdbuf, AtomicType atomic_op, volatile uint32_t* addr,
|
||||
uint32_t value, uint32_t compare);
|
||||
|
||||
void BuildAtomicPacket64(CmdBuf* cmdbuf, AtomicType atomic_op, volatile uint64_t* addr,
|
||||
uint64_t value = 0, uint64_t compare = 0);
|
||||
|
||||
size_t SizeOfAtomicPacket() const;
|
||||
|
||||
void BuildConditionalExecute(CmdBuf* cmdbuf, uint32_t* signal, uint16_t count);
|
||||
|
||||
void BuildWriteDataCommand(CmdBuf* cmdbuf, uint32_t* write_addr, uint32_t write_value);
|
||||
|
||||
void BuildWriteData64Command(CmdBuf* cmdbuf, uint64_t* write_addr, uint64_t write_value);
|
||||
|
||||
void BuildCacheFlushPacket(CmdBuf* cmdbuf);
|
||||
|
||||
/// Writes into input buffer Gpu commands to flush its cache. It is
|
||||
/// necessary that the buffer provided for flush commands is large
|
||||
/// enough to accommodate the full set of commands. It should be at
|
||||
/// least 512 bytes.
|
||||
///
|
||||
/// @param tsCmdBuf Buffer to write commands to.
|
||||
/// @param writeAddr Registered address into which GPU should write
|
||||
/// a user provided value upon executing the flush commands.
|
||||
/// @param writeVal User provided value written by GPU at user provided
|
||||
/// address, upon executing the flush commands.
|
||||
///
|
||||
/// @return void
|
||||
void BuildFlushCacheCmd(CmdBuf* cmdBuf, FlushCacheOptions* options, uint32_t* writeAddr,
|
||||
uint32_t writeVal);
|
||||
|
||||
/// Builds Gpu command to copy data from source to destination buffer
|
||||
/// using DMA engine.
|
||||
///
|
||||
/// @param cmdbuf Buffer updated with Gpu copy command
|
||||
/// @param srcAddr Address of source buffer address
|
||||
/// @param dstAddr Address of destination buffer address
|
||||
/// @param copySize Size of data to copy in bytes
|
||||
/// @param waitForCompletion if command should wait for copying to complete
|
||||
void BuildDmaDataPacket(CmdBuf* cmdBuf, uint32_t* srcAddr, uint32_t* dstAddr, uint32_t copySize,
|
||||
bool waitForCompletion);
|
||||
|
||||
protected:
|
||||
/// @brief Append an instance of Gpu command into input command buffer stream.
|
||||
///
|
||||
/// @param cmdbuf CommandWriter object appended with anohter Gpu command
|
||||
///
|
||||
/// @param cmd Gpu command to be appended into command buffer
|
||||
///
|
||||
/// @return void
|
||||
template <class T> void AppendCommand(CmdBuf* cmdbuf, const T& cmd);
|
||||
|
||||
private:
|
||||
/// @brief Initializes a Gpu command which can be used to
|
||||
/// reference a Gpu command stream indirectly
|
||||
void InitializeLaunchTemplate();
|
||||
|
||||
/// @brief Initializes a Gpu command which can be used to
|
||||
/// flush Gpu caches and write to a user configurable address
|
||||
/// to indicate an end of kernel
|
||||
void InitializeEndOfKernelNotifyTemplate();
|
||||
|
||||
/// @brief Initializes a Gpu command to perform atomic operations
|
||||
////
|
||||
void InitializeAtomicTemplate();
|
||||
|
||||
/// @brief Initializes a Gpu command to allow conditional execution
|
||||
/// of a Gpu command stream
|
||||
void InitializeConditionalTemplate();
|
||||
|
||||
/// @brief Initializes a Gpu command to let command processor
|
||||
/// wait for some update before letting other commands to be
|
||||
/// processed
|
||||
void InitializeWaitRegMemTemplate();
|
||||
|
||||
/// @brief Initializes the template for Barrier command.
|
||||
/// Applications can use Barrier command to ensure their
|
||||
/// command is executed only after all other commands have
|
||||
/// completed their execution.
|
||||
void InitializeBarrierTemplate();
|
||||
|
||||
void BuildUpdateHostAddress(CmdBuf* cmdbuf, uint64_t* addr, int64_t value);
|
||||
|
||||
/// @brief Initializes Acquire Memory command template. Users
|
||||
/// can submit this command to invalidate Gpu caches - L1 and
|
||||
/// or L2.
|
||||
void InitializeAcquireMemTemplate();
|
||||
|
||||
/// @brief Initializes an instance of Write Data command
|
||||
/// for use by an application
|
||||
void InitializeWriteDataTemplate();
|
||||
void InitializeWriteData64Template();
|
||||
void InitializeWriteDataTemplate(PM4MEC_WRITE_DATA* write_data, bool bit32);
|
||||
|
||||
/// @brief Builds wait_reg_mem with EQUALS condition
|
||||
void BuildWaitRegMemCommand(CmdBuf* cmdbuf, uint64_t wait_addr, uint32_t wait_value);
|
||||
|
||||
/// @brief Instance of Gpu command to reference dispatch commands
|
||||
LaunchTemplate launch_template_;
|
||||
|
||||
/// @brief Instance of Gpu command to use in determing end of kernel
|
||||
EndofKernelNotifyTemplate notify_template_;
|
||||
|
||||
/// @brief Instance of Gpu command to use in performing atomic operations
|
||||
AtomicTemplate atomic_template_;
|
||||
|
||||
/// @brief Instance of Pm4 command WRITE_DATA
|
||||
WriteDataTemplate write_data_template_;
|
||||
WriteData64Template write_data64_template_;
|
||||
|
||||
/// @brief Instance of Pm4 command EVENT_WRITE
|
||||
BarrierTemplate pending_dispatch_template_;
|
||||
|
||||
/// @brief Instance of Pm4 command ACQUIRE_MEM
|
||||
AcquireMemTemplate invalidate_cache_template_;
|
||||
|
||||
/// @brief Instance of Pm4 command WAIT_REG_MEM
|
||||
WaitRegMemTemplate wait_reg_mem_template_;
|
||||
|
||||
/// @brief ATC support.
|
||||
bool atc_support_;
|
||||
|
||||
/// @brief PCIe atomic support.
|
||||
bool pcie_atomic_support_;
|
||||
};
|
||||
|
||||
} // gfx9
|
||||
|
||||
} // pm4_profile
|
||||
|
||||
#endif // _GFX9_CMDWRITER_H_
|
||||
@@ -1,20 +0,0 @@
|
||||
#
|
||||
# Source files for Rocr Service Manager
|
||||
#
|
||||
set ( LIB_SRC aql_profile.cpp populate_aql.cpp gfx8_factory.cpp gfx9_factory.cpp legacy_pm4.cpp )
|
||||
|
||||
#
|
||||
# Header files include path(s).
|
||||
#
|
||||
include_directories ( $ENV{ROCR_INC_DIR} )
|
||||
include_directories ( ${PROJ_DIR}/perfcounter )
|
||||
include_directories ( ${PROJ_DIR}/threadtrace )
|
||||
include_directories ( ${PROJ_DIR}/commandwriter )
|
||||
include_directories ( ${API_DIR} )
|
||||
|
||||
#
|
||||
# Build Service Manager as a dynamic Library object
|
||||
#
|
||||
set ( LIB_LIST ${PMC_LIB} ${SQTT_LIB} ${CMDWRITER_LIB} )
|
||||
add_library ( ${TARGET_LIB} SHARED ${LIB_SRC} )
|
||||
target_link_libraries( ${TARGET_LIB} ${LIB_LIST} c stdc++ dl pthread rt )
|
||||
@@ -1,67 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright 2017 ADVANCED MICRO DEVICES, INC.
|
||||
//
|
||||
// AMD is granting you permission to use this software and documentation(if any)
|
||||
// (collectively, the "Materials") pursuant to the terms and conditions of the
|
||||
// Software License Agreement included with the Materials.If you do not have a
|
||||
// copy of the Software License Agreement, contact your AMD representative for a
|
||||
// copy.
|
||||
//
|
||||
// You agree that you will not reverse engineer or decompile the Materials, in
|
||||
// whole or in part, except as allowed by applicable law.
|
||||
//
|
||||
// WARRANTY DISCLAIMER : THE SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF
|
||||
// ANY KIND.AMD DISCLAIMS ALL WARRANTIES, EXPRESS, IMPLIED, OR STATUTORY,
|
||||
// INCLUDING BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE, TITLE, NON - INFRINGEMENT, THAT THE
|
||||
// SOFTWARE WILL RUN UNINTERRUPTED OR ERROR - FREE OR WARRANTIES ARISING FROM
|
||||
// CUSTOM OF TRADE OR COURSE OF USAGE.THE ENTIRE RISK ASSOCIATED WITH THE USE OF
|
||||
// THE SOFTWARE IS ASSUMED BY YOU.Some jurisdictions do not allow the exclusion
|
||||
// of implied warranties, so the above exclusion may not apply to You.
|
||||
//
|
||||
// LIMITATION OF LIABILITY AND INDEMNIFICATION : AMD AND ITS LICENSORS WILL NOT,
|
||||
// UNDER ANY CIRCUMSTANCES BE LIABLE TO YOU FOR ANY PUNITIVE, DIRECT,
|
||||
// INCIDENTAL, INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES ARISING FROM USE OF
|
||||
// THE SOFTWARE OR THIS AGREEMENT EVEN IF AMD AND ITS LICENSORS HAVE BEEN
|
||||
// ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.In no event shall AMD's total
|
||||
// liability to You for all damages, losses, and causes of action (whether in
|
||||
// contract, tort (including negligence) or otherwise) exceed the amount of $100
|
||||
// USD. You agree to defend, indemnify and hold harmless AMD and its licensors,
|
||||
// and any of their directors, officers, employees, affiliates or agents from
|
||||
// and against any and all loss, damage, liability and other expenses (including
|
||||
// reasonable attorneys' fees), resulting from Your use of the Software or
|
||||
// violation of the terms and conditions of this Agreement.
|
||||
//
|
||||
// U.S.GOVERNMENT RESTRICTED RIGHTS : The Materials are provided with
|
||||
// "RESTRICTED RIGHTS." Use, duplication, or disclosure by the Government is
|
||||
// subject to the restrictions as set forth in FAR 52.227 - 14 and DFAR252.227 -
|
||||
// 7013, et seq., or its successor.Use of the Materials by the Government
|
||||
// constitutes acknowledgement of AMD's proprietary rights in them.
|
||||
//
|
||||
// EXPORT RESTRICTIONS: The Materials may be subject to export restrictions as
|
||||
// stated in the Software License Agreement.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef _AMD_AQL_PM4_IB_PACKET_H_
|
||||
#define _AMD_AQL_PM4_IB_PACKET_H_
|
||||
|
||||
// Value of 'pm4_ib_format' field of amd_aql_pm4_ib_packet_t packet
|
||||
const static uint32_t AMD_AQL_PM4_IB_FORMAT = 1;
|
||||
// Value of 'dw_count_remain' field of amd_aql_pm4_ib_packet_t packet
|
||||
const static uint32_t AMD_AQL_PM4_IB_DW_COUNT_REMAIN = 10;
|
||||
// Size of 'reserved' array of amd_aql_pm4_ib_packet_t packet
|
||||
const static uint32_t AMD_AQL_PM4_IB_RESERVED_COUNT = 8;
|
||||
|
||||
// AQL Vendor Specific Packet which carry PM4 IB command
|
||||
typedef struct {
|
||||
uint16_t header;
|
||||
uint16_t pm4_ib_format;
|
||||
uint32_t pm4_ib_command[4];
|
||||
uint32_t dw_count_remain;
|
||||
uint32_t reserved[AMD_AQL_PM4_IB_RESERVED_COUNT];
|
||||
hsa_signal_t completion_signal;
|
||||
} amd_aql_pm4_ib_packet_t;
|
||||
|
||||
#endif // _AMD_AQL_PM4_IB_H_
|
||||
@@ -1,503 +0,0 @@
|
||||
#include "aql_profile.h"
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include "pm4_factory.h"
|
||||
#include "cmdwriter.h" // commandwriter
|
||||
#include "perf_counter.h" // perfcounter
|
||||
#include "thread_trace.h" // threadtrace
|
||||
#include "gpu_block_info.h"
|
||||
#include "logger.h"
|
||||
|
||||
#define PUBLIC_API __attribute__((visibility("default")))
|
||||
#define DESTRUCTOR_API __attribute__((destructor))
|
||||
#define ERR_CHECK(cond, err, msg) \
|
||||
{ \
|
||||
if (cond) { \
|
||||
ERR_LOGGING << msg; \
|
||||
return err; \
|
||||
} \
|
||||
}
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
// Command buffer partitioning manager
|
||||
// Supports Pre/Post commands partitioning
|
||||
// and postfix control partition
|
||||
class CommandBufferMgr {
|
||||
const static uint32_t align_size = 0x100;
|
||||
const static uint32_t align_mask = align_size - 1;
|
||||
|
||||
struct info_t {
|
||||
uint32_t precmds_size;
|
||||
uint32_t postcmds_size;
|
||||
};
|
||||
|
||||
descriptor_t buffer;
|
||||
uint32_t postfix_size;
|
||||
info_t* info;
|
||||
|
||||
uint32_t align(const uint32_t& size) { return (size + align_mask) & ~align_mask; }
|
||||
|
||||
public:
|
||||
explicit CommandBufferMgr(const profile_t* profile)
|
||||
: buffer(profile->command_buffer), postfix_size(0), info(NULL) {
|
||||
info = (info_t*)setPostfix(sizeof(info_t));
|
||||
}
|
||||
|
||||
uint32_t getSize() { return buffer.size; }
|
||||
|
||||
void* setPostfix(const uint32_t& size) {
|
||||
if (size > postfix_size) {
|
||||
const uint32_t delta = size - postfix_size;
|
||||
postfix_size = size;
|
||||
buffer.size -= (delta < buffer.size) ? delta : buffer.size;
|
||||
}
|
||||
if (buffer.size == 0)
|
||||
throw aql_profile_exc_msg("CommandBufferMgr::setPostfix(): buffer size set to zero");
|
||||
return (buffer.size != 0) ? buffer.ptr + buffer.size : NULL;
|
||||
}
|
||||
|
||||
bool setPreSize(const uint32_t& size) {
|
||||
bool suc = (size <= buffer.size);
|
||||
if (suc) info->precmds_size = size;
|
||||
if (!suc)
|
||||
throw aql_profile_exc_msg("CommandBufferMgr::setPreSize(): size set out of the buffer");
|
||||
return suc;
|
||||
}
|
||||
|
||||
uint32_t getPostOffset() { return align(info->precmds_size); }
|
||||
|
||||
bool checkTotalSize(const uint32_t& size) {
|
||||
bool suc = (size <= buffer.size);
|
||||
if (suc) suc = (size >= info->precmds_size);
|
||||
if (suc) {
|
||||
info->postcmds_size = size - info->precmds_size;
|
||||
suc = ((getPostOffset() + info->postcmds_size) <= buffer.size);
|
||||
}
|
||||
if (!suc)
|
||||
throw aql_profile_exc_msg("CommandBufferMgr::checkTotalSize(): size set out of the buffer");
|
||||
return suc;
|
||||
}
|
||||
|
||||
descriptor_t getPreDescr() {
|
||||
descriptor_t descr;
|
||||
descr.ptr = buffer.ptr;
|
||||
descr.size = info->precmds_size;
|
||||
return descr;
|
||||
}
|
||||
|
||||
descriptor_t getPostDescr() {
|
||||
descriptor_t descr;
|
||||
descr.ptr = buffer.ptr + getPostOffset();
|
||||
descr.size = info->postcmds_size;
|
||||
return descr;
|
||||
}
|
||||
};
|
||||
|
||||
static inline pm4_profile::CountersMap CountersMapCreate(const profile_t* profile,
|
||||
const Pm4Factory* pm4_factory) {
|
||||
pm4_profile::CountersMap countersMap;
|
||||
for (const hsa_ven_amd_aqlprofile_event_t* p = profile->events;
|
||||
p < profile->events + profile->event_count; ++p) {
|
||||
countersMap[pm4_factory->getBlockId(p)].push_back(p->counter_id);
|
||||
}
|
||||
return countersMap;
|
||||
}
|
||||
|
||||
typedef std::vector<const event_t*> EventsVec;
|
||||
static inline EventsVec EventsVecCreate(const profile_t* profile, const Pm4Factory* pm4_factory) {
|
||||
pm4_profile::CountersMap countersMap = CountersMapCreate(profile, pm4_factory);
|
||||
|
||||
std::map<uint32_t, const event_t*> id_map;
|
||||
for (const hsa_ven_amd_aqlprofile_event_t* p = profile->events;
|
||||
p < profile->events + profile->event_count; ++p) {
|
||||
id_map.insert(decltype(id_map)::value_type(pm4_factory->getBlockId(p), p));
|
||||
}
|
||||
|
||||
// Iterate through the list of blocks/counters to generate correct order events vector
|
||||
EventsVec eventsVec;
|
||||
for (pm4_profile::CountersMap::const_iterator block_it = countersMap.begin();
|
||||
block_it != countersMap.end(); ++block_it) {
|
||||
const uint32_t block_id = block_it->first;
|
||||
const pm4_profile::CountersVec& counters = block_it->second;
|
||||
const uint32_t counter_count = counters.size();
|
||||
|
||||
for (uint32_t ind = 0; ind < counter_count; ++ind) {
|
||||
eventsVec.push_back(id_map[block_id] + ind);
|
||||
}
|
||||
}
|
||||
|
||||
return eventsVec;
|
||||
}
|
||||
|
||||
static inline bool is_event_match(const event_t& event1, const event_t& event2) {
|
||||
return (event1.block_name == event2.block_name) && (event1.block_index == event2.block_index) &&
|
||||
(event1.counter_id == event2.counter_id);
|
||||
}
|
||||
|
||||
hsa_status_t default_pmcdata_callback(hsa_ven_amd_aqlprofile_info_type_t info_type,
|
||||
hsa_ven_amd_aqlprofile_info_data_t* info_data,
|
||||
void* callback_data) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
hsa_ven_amd_aqlprofile_info_data_t* passed_data =
|
||||
reinterpret_cast<hsa_ven_amd_aqlprofile_info_data_t*>(callback_data);
|
||||
|
||||
if (info_type == HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA) {
|
||||
if (is_event_match(info_data->pmc_data.event, passed_data->pmc_data.event)) {
|
||||
if (passed_data->sample_id == UINT32_MAX) {
|
||||
passed_data->pmc_data.result += info_data->pmc_data.result;
|
||||
} else if (passed_data->sample_id == info_data->sample_id) {
|
||||
passed_data->pmc_data.result = info_data->pmc_data.result;
|
||||
status = HSA_STATUS_INFO_BREAK;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
struct sqtt_ctrl_t {
|
||||
uint32_t status;
|
||||
uint32_t counter;
|
||||
uint32_t writePtr;
|
||||
};
|
||||
|
||||
hsa_status_t default_sqttdata_callback(hsa_ven_amd_aqlprofile_info_type_t info_type,
|
||||
hsa_ven_amd_aqlprofile_info_data_t* info_data,
|
||||
void* callback_data) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
hsa_ven_amd_aqlprofile_info_data_t* passed_data =
|
||||
reinterpret_cast<hsa_ven_amd_aqlprofile_info_data_t*>(callback_data);
|
||||
|
||||
if (info_type == HSA_VEN_AMD_AQLPROFILE_INFO_SQTT_DATA) {
|
||||
if (info_data->sample_id == passed_data->sample_id) {
|
||||
passed_data->sqtt_data = info_data->sqtt_data;
|
||||
status = HSA_STATUS_INFO_BREAK;
|
||||
}
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
std::mutex Logger::mutex;
|
||||
Logger* Logger::instance = NULL;
|
||||
std::mutex Pm4Factory::mutex;
|
||||
Pm4Factory::instances_t Pm4Factory::instances;
|
||||
|
||||
DESTRUCTOR_API void destructor() {
|
||||
Logger::Destroy();
|
||||
Pm4Factory::Destroy();
|
||||
}
|
||||
|
||||
} // aql_profile
|
||||
|
||||
extern "C" {
|
||||
|
||||
PUBLIC_API hsa_status_t hsa_ven_amd_aqlprofile_error_string(const char** str) {
|
||||
*str = aql_profile::Logger::LastMessage().c_str();
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Check if event is valid for the specific GPU
|
||||
PUBLIC_API hsa_status_t hsa_ven_amd_aqlprofile_validate_event(
|
||||
hsa_agent_t agent, const hsa_ven_amd_aqlprofile_event_t* event, bool* result) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
*result = false;
|
||||
|
||||
try {
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(agent);
|
||||
if (pm4_factory->getBlockInfo(event) != NULL) *result = true;
|
||||
} catch (aql_profile::event_exception& e) {
|
||||
INFO_LOGGING << e.what();
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING << e.what();
|
||||
status = HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
// Method to populate the provided AQL packet with profiling start commands
|
||||
PUBLIC_API hsa_status_t hsa_ven_amd_aqlprofile_start(
|
||||
const hsa_ven_amd_aqlprofile_profile_t* profile, aql_profile::packet_t* aql_start_packet) {
|
||||
try {
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile);
|
||||
pm4_profile::CommandWriter* cmdWriter = pm4_factory->getCommandWriter();
|
||||
pm4_profile::DefaultCmdBuf commands;
|
||||
aql_profile::CommandBufferMgr cmdBufMgr(profile);
|
||||
|
||||
if (profile->type == HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC) {
|
||||
pm4_profile::PerfCounter* pmcMgr = pm4_factory->getPmcMgr();
|
||||
|
||||
// Generate start commands
|
||||
const pm4_profile::CountersMap countersMap = CountersMapCreate(profile, pm4_factory);
|
||||
pmcMgr->begin(&commands, cmdWriter, countersMap);
|
||||
cmdBufMgr.setPreSize(commands.Size());
|
||||
|
||||
// Generate stop commands
|
||||
const uint32_t data_size =
|
||||
pmcMgr->end(&commands, cmdWriter, countersMap, profile->output_buffer.ptr);
|
||||
ERR_CHECK(data_size == 0, HSA_STATUS_ERROR, "PMC mgr end(): data size set to zero");
|
||||
assert(data_size <= profile->output_buffer.size);
|
||||
if (data_size > profile->output_buffer.size) {
|
||||
ERR_LOGGING << "data size assertion failed, data_size(" << data_size << "), buffer size("
|
||||
<< profile->output_buffer.size << ")";
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
} else if (profile->type == HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_SQTT) {
|
||||
pm4_profile::ThreadTrace* sqttMgr = pm4_factory->getSqttMgr();
|
||||
|
||||
pm4_profile::ThreadTraceConfig sqtt_config;
|
||||
sqttMgr->InitThreadTraceConfig(&sqtt_config);
|
||||
if (profile->parameters) {
|
||||
for (const hsa_ven_amd_aqlprofile_parameter_t* p = profile->parameters;
|
||||
p < (profile->parameters + profile->parameter_count); ++p) {
|
||||
switch (p->parameter_name) {
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET:
|
||||
sqtt_config.threadTraceTargetCu = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK:
|
||||
sqtt_config.threadTraceVmIdMask = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK:
|
||||
sqtt_config.threadTraceMask = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK:
|
||||
sqtt_config.threadTraceTokenMask = p->value;
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2:
|
||||
sqtt_config.threadTraceTokenMask2 = p->value;
|
||||
break;
|
||||
default:
|
||||
ERR_LOGGING << "Bad SQTT parameter name (" << p->parameter_name << ")";
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
}
|
||||
}
|
||||
sqttMgr->Init(&sqtt_config);
|
||||
|
||||
sqttMgr->setSqttDataBuff((uint8_t*)profile->output_buffer.ptr, profile->output_buffer.size);
|
||||
|
||||
// Control buffer registering
|
||||
const uint32_t status_size = sqttMgr->StatusSizeInfo();
|
||||
void* status_ptr = cmdBufMgr.setPostfix(status_size);
|
||||
sqttMgr->setSqttCtrlBuff((uint32_t*)status_ptr);
|
||||
|
||||
// Generate start commands
|
||||
sqttMgr->BeginSession(&commands, cmdWriter);
|
||||
cmdBufMgr.setPreSize(commands.Size());
|
||||
// Generate stop commands
|
||||
sqttMgr->StopSession(&commands, cmdWriter);
|
||||
} else {
|
||||
ERR_LOGGING << "Bad profile type (" << profile->type << ")";
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
cmdBufMgr.checkTotalSize(commands.Size());
|
||||
|
||||
const aql_profile::descriptor_t pre_descr = cmdBufMgr.getPreDescr();
|
||||
const aql_profile::descriptor_t post_descr = cmdBufMgr.getPostDescr();
|
||||
memcpy(pre_descr.ptr, commands.Base(), pre_descr.size);
|
||||
memcpy(post_descr.ptr, commands.Base() + pre_descr.size, post_descr.size);
|
||||
// Populate start aql packet
|
||||
aql_profile::populateAql(pre_descr.ptr, pre_descr.size, cmdWriter, aql_start_packet);
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING << e.what();
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Method to populate the provided AQL packet with profiling stop commands
|
||||
PUBLIC_API hsa_status_t hsa_ven_amd_aqlprofile_stop(const hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
aql_profile::packet_t* aql_stop_packet) {
|
||||
try {
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile);
|
||||
pm4_profile::CommandWriter* cmdWriter = pm4_factory->getCommandWriter();
|
||||
aql_profile::CommandBufferMgr cmdBufMgr(profile);
|
||||
|
||||
// Populate stop aql packet
|
||||
const aql_profile::descriptor_t post_descr = cmdBufMgr.getPostDescr();
|
||||
aql_profile::populateAql(post_descr.ptr, post_descr.size, cmdWriter, aql_stop_packet);
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING << e.what();
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Legacy devices, converting of the profiling AQL packet to PM4 packet blob
|
||||
PUBLIC_API hsa_status_t
|
||||
hsa_ven_amd_aqlprofile_legacy_get_pm4(const aql_profile::packet_t* aql_packet, void* data) {
|
||||
try {
|
||||
// Populate GFX8 pm4 packet blob
|
||||
// Adding HSA barrier acquire packet
|
||||
data = aql_profile::legacyAqlAcquire(aql_packet, data);
|
||||
// Adding PM4 command packet
|
||||
data = aql_profile::legacyPm4(aql_packet, data);
|
||||
// Adding HSA barrier release packet
|
||||
data = aql_profile::legacyAqlRelease(aql_packet, data);
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING << e.what();
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Method for getting the profile info
|
||||
PUBLIC_API hsa_status_t
|
||||
hsa_ven_amd_aqlprofile_get_info(const hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
hsa_ven_amd_aqlprofile_info_type_t attribute, void* value) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
|
||||
try {
|
||||
switch (attribute) {
|
||||
case HSA_VEN_AMD_AQLPROFILE_INFO_COMMAND_BUFFER_SIZE:
|
||||
*(uint32_t*)value = 0x1000; // a current approximation as 4K is big enaugh
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA_SIZE:
|
||||
*(uint32_t*)value = 0x1000; // a current approximation as 4K is big enaugh
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA:
|
||||
reinterpret_cast<hsa_ven_amd_aqlprofile_info_data_t*>(value)->pmc_data.result = 0;
|
||||
status = hsa_ven_amd_aqlprofile_iterate_data(profile, aql_profile::default_pmcdata_callback,
|
||||
value);
|
||||
break;
|
||||
case HSA_VEN_AMD_AQLPROFILE_INFO_SQTT_DATA:
|
||||
status = hsa_ven_amd_aqlprofile_iterate_data(profile,
|
||||
aql_profile::default_sqttdata_callback, value);
|
||||
break;
|
||||
default:
|
||||
status = HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
ERR_LOGGING << "Invalid attribute (" << attribute << ")";
|
||||
}
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING << e.what();
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
// Method for iterating the events output data
|
||||
PUBLIC_API hsa_status_t
|
||||
hsa_ven_amd_aqlprofile_iterate_data(const hsa_ven_amd_aqlprofile_profile_t* profile,
|
||||
hsa_ven_amd_aqlprofile_data_callback_t callback, void* data) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
|
||||
try {
|
||||
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile);
|
||||
|
||||
if (profile->type == HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC) {
|
||||
uint32_t info_size = 0;
|
||||
void* info_data;
|
||||
uint64_t* samples = (uint64_t*)profile->output_buffer.ptr;
|
||||
const uint32_t sample_count = profile->output_buffer.size / sizeof(uint64_t);
|
||||
uint32_t sample_index = 0;
|
||||
|
||||
pm4_profile::PerfCounter* pmcMgr = pm4_factory->getPmcMgr();
|
||||
|
||||
aql_profile::EventsVec eventsVec = EventsVecCreate(profile, pm4_factory);
|
||||
for (aql_profile::EventsVec::const_iterator it = eventsVec.begin(); it != eventsVec.end();
|
||||
++it) {
|
||||
const hsa_ven_amd_aqlprofile_event_t* p = *it;
|
||||
const pm4_profile::CntlMethod method = pm4_factory->getBlockInfo(p)->method;
|
||||
// A perfcounter data sample per ShaderEngine
|
||||
const uint32_t block_samples_count = (method == pm4_profile::CntlMethodBySe ||
|
||||
method == pm4_profile::CntlMethodBySeAndInstance)
|
||||
? pmcMgr->getNumSe()
|
||||
: 1;
|
||||
for (uint32_t i = 0; i < block_samples_count; ++i) {
|
||||
assert(sample_index < sample_count);
|
||||
if (sample_index >= sample_count) {
|
||||
ERR_LOGGING << "Bad sample index (" << sample_index << "/" << sample_count << ")";
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
hsa_ven_amd_aqlprofile_info_data_t sample_info;
|
||||
sample_info.sample_id = i;
|
||||
sample_info.pmc_data.event = *p;
|
||||
sample_info.pmc_data.result = samples[sample_index];
|
||||
status = callback(HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA, &sample_info, data);
|
||||
if (status == HSA_STATUS_INFO_BREAK) {
|
||||
status = HSA_STATUS_SUCCESS;
|
||||
break;
|
||||
}
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
ERR_LOGGING << "PMC data callback error, sample_id(" << i << ") status(" << status
|
||||
<< ")";
|
||||
break;
|
||||
}
|
||||
++sample_index;
|
||||
}
|
||||
}
|
||||
} else if (profile->type == HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_SQTT) {
|
||||
pm4_profile::ThreadTrace* sqttMgr = pm4_factory->getSqttMgr();
|
||||
aql_profile::CommandBufferMgr cmdBufMgr(profile);
|
||||
|
||||
// Control buffer was allocated as the CmdBuffer postfix partition
|
||||
const uint32_t status_size = sqttMgr->StatusSizeInfo();
|
||||
void* status_ptr = cmdBufMgr.setPostfix(status_size);
|
||||
// Control buffer registering
|
||||
sqttMgr->setSqttCtrlBuff((uint32_t*)status_ptr);
|
||||
// Validate SQTT status and normalize WRPTR
|
||||
if (sqttMgr->Validate() == false) {
|
||||
ERR_LOGGING << "SQTT data corrupted";
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
const uint32_t se_number = sqttMgr->getNumSe();
|
||||
// Casting status pointer to SQTT control per ShaderEngine array
|
||||
aql_profile::sqtt_ctrl_t* sqtt_ctrl = (aql_profile::sqtt_ctrl_t*)status_ptr;
|
||||
const uint32_t status_size_exp = sizeof(aql_profile::sqtt_ctrl_t) * se_number;
|
||||
assert(status_size == status_size_exp);
|
||||
if (status_size != status_size_exp) {
|
||||
ERR_LOGGING << "Bad SQTT controll data structure"
|
||||
<< ", status_size(" << status_size << "), status_size_exp(" << status_size_exp
|
||||
<< "), se_number(" << se_number << ")";
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
// SQTT output buffer and capacity per ShaderEngine
|
||||
void* sample_ptr = profile->output_buffer.ptr;
|
||||
const uint32_t sample_capacity = profile->output_buffer.size / se_number;
|
||||
// The samples sizes are returned in the control buffer
|
||||
for (int i = 0; i < se_number; ++i) {
|
||||
// WPTR specifies the index in thread trace buffer where next token will be
|
||||
// written by hardware. The index is incremented by size of 32 bytes.
|
||||
uint32_t sample_size = sqtt_ctrl[i].writePtr * TT_WRITE_PTR_BLK;
|
||||
|
||||
hsa_ven_amd_aqlprofile_info_data_t sample_info;
|
||||
sample_info.sample_id = i;
|
||||
sample_info.sqtt_data.ptr = sample_ptr;
|
||||
sample_info.sqtt_data.size = sample_size;
|
||||
status = callback(HSA_VEN_AMD_AQLPROFILE_INFO_SQTT_DATA, &sample_info, data);
|
||||
if (status == HSA_STATUS_INFO_BREAK) {
|
||||
status = HSA_STATUS_SUCCESS;
|
||||
break;
|
||||
}
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
ERR_LOGGING << "SQTT data callback error, sample_id(" << i << ") status(" << status
|
||||
<< ")";
|
||||
break;
|
||||
}
|
||||
|
||||
sample_ptr += sample_capacity;
|
||||
}
|
||||
} else {
|
||||
ERR_LOGGING << "Bad profile type (" << profile->type << ")";
|
||||
status = HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
} catch (std::exception& e) {
|
||||
ERR_LOGGING << e.what();
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
}
|
||||
@@ -1,40 +0,0 @@
|
||||
#ifndef _AQL_PROFILE_H_
|
||||
#define _AQL_PROFILE_H_
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "hsa_ven_amd_aqlprofile.h"
|
||||
#include "aql_profile_exception.h"
|
||||
|
||||
namespace pm4_profile {
|
||||
class CommandWriter;
|
||||
}
|
||||
|
||||
namespace aql_profile {
|
||||
typedef hsa_ven_amd_aqlprofile_descriptor_t descriptor_t;
|
||||
typedef hsa_ven_amd_aqlprofile_profile_t profile_t;
|
||||
typedef hsa_ven_amd_aqlprofile_info_type_t info_type_t;
|
||||
typedef hsa_ven_amd_aqlprofile_data_callback_t data_callback_t;
|
||||
typedef hsa_ext_amd_aql_pm4_packet_t packet_t;
|
||||
typedef hsa_ven_amd_aqlprofile_event_t event_t;
|
||||
|
||||
void populateAql(const void* cmd_buffer, uint32_t cmd_size, pm4_profile::CommandWriter* cmd_writer,
|
||||
packet_t* aql_packet);
|
||||
void* legacyAqlAcquire(const packet_t* aql_packet, void* data);
|
||||
void* legacyAqlRelease(const packet_t* aql_packet, void* data);
|
||||
void* legacyPm4(const packet_t* aql_packet, void* data);
|
||||
|
||||
class event_exception : public aql_profile_exc_val<event_t> {
|
||||
public:
|
||||
event_exception(const std::string& m, const event_t& ev) : aql_profile_exc_val(m, ev) {}
|
||||
};
|
||||
|
||||
static std::ostream& operator<<(std::ostream& os, const event_t& ev) {
|
||||
os << "event( block(" << ev.block_name << "." << ev.block_index << "), Id(" << ev.counter_id
|
||||
<< "))";
|
||||
return os;
|
||||
}
|
||||
} // namespace aql_profile
|
||||
|
||||
#endif // _AQL_PROFILE_H_
|
||||
@@ -1,34 +0,0 @@
|
||||
#ifndef _AQL_PROFILE_EXCEPTION_H_
|
||||
#define _AQL_PROFILE_EXCEPTION_H_
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
class aql_profile_exc_msg : public std::exception {
|
||||
public:
|
||||
explicit aql_profile_exc_msg(const std::string& msg) : str(msg) {}
|
||||
virtual const char* what() const throw() { return str.c_str(); }
|
||||
|
||||
protected:
|
||||
std::string str;
|
||||
};
|
||||
|
||||
template <typename T> class aql_profile_exc_val : public std::exception {
|
||||
public:
|
||||
aql_profile_exc_val(const std::string& msg, const T& val) {
|
||||
std::ostringstream oss;
|
||||
oss << msg << "(" << val << ")";
|
||||
str = oss.str();
|
||||
}
|
||||
virtual const char* what() const throw() { return str.c_str(); }
|
||||
|
||||
protected:
|
||||
std::string str;
|
||||
};
|
||||
} // namespace aql_profile
|
||||
|
||||
#endif // _AQL_PROFILE_EXCEPTION_H_
|
||||
@@ -1,50 +0,0 @@
|
||||
#include "pm4_factory.h"
|
||||
// Commandwriter includes
|
||||
#include "gfx8_cmdwriter.h"
|
||||
// PMC includes
|
||||
#include "gfx8_perf_counter.h"
|
||||
// SQTT includes
|
||||
#include "gfx8_thread_trace.h"
|
||||
// Block info
|
||||
#include "gfx8_block_info.h"
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
// GFX9 block ID mapping table
|
||||
uint32_t Gfx8Factory::block_id_table[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER] = {
|
||||
pm4_profile::kHsaViCounterBlockIdCb0, pm4_profile::kHsaViCounterBlockIdCpf,
|
||||
pm4_profile::kHsaViCounterBlockIdDb0, pm4_profile::kHsaViCounterBlockIdGrbm,
|
||||
pm4_profile::kHsaViCounterBlockIdGrbmSe, pm4_profile::kHsaViCounterBlockIdPaSu,
|
||||
pm4_profile::kHsaViCounterBlockIdPaSc, pm4_profile::kHsaViCounterBlockIdSpi,
|
||||
pm4_profile::kHsaViCounterBlockIdSq, pm4_profile::kHsaViCounterBlockIdSqEs,
|
||||
pm4_profile::kHsaViCounterBlockIdSqGs, pm4_profile::kHsaViCounterBlockIdSqVs,
|
||||
pm4_profile::kHsaViCounterBlockIdSqPs, pm4_profile::kHsaViCounterBlockIdSqLs,
|
||||
pm4_profile::kHsaViCounterBlockIdSqHs, pm4_profile::kHsaViCounterBlockIdSqCs,
|
||||
pm4_profile::kHsaViCounterBlockIdSx, pm4_profile::kHsaViCounterBlockIdTa0,
|
||||
pm4_profile::kHsaViCounterBlockIdTca0, pm4_profile::kHsaViCounterBlockIdTcc0,
|
||||
pm4_profile::kHsaViCounterBlockIdTd0, pm4_profile::kHsaViCounterBlockIdTcp0,
|
||||
pm4_profile::kHsaViCounterBlockIdGds, pm4_profile::kHsaViCounterBlockIdVgt,
|
||||
pm4_profile::kHsaViCounterBlockIdIa, pm4_profile::kHsaViCounterBlockIdMc,
|
||||
pm4_profile::kHsaViCounterBlockIdSrbm, pm4_profile::kHsaViCounterBlockIdTcs,
|
||||
pm4_profile::kHsaViCounterBlockIdWd, pm4_profile::kHsaViCounterBlockIdCpg,
|
||||
pm4_profile::kHsaViCounterBlockIdCpc};
|
||||
|
||||
pm4_profile::CommandWriter* Gfx8Factory::getCommandWriter() {
|
||||
auto p = new pm4_profile::gfx8::Gfx8CmdWriter(false, true);
|
||||
if (p == NULL) throw aql_profile_exc_msg("CommandWriter allocation failed");
|
||||
return p;
|
||||
}
|
||||
|
||||
pm4_profile::PerfCounter* Gfx8Factory::getPmcMgr() {
|
||||
auto p = new pm4_profile::Gfx8PerfCounter();
|
||||
if (p == NULL) throw aql_profile_exc_msg("PerfCounter mgr allocation failed");
|
||||
return p;
|
||||
}
|
||||
|
||||
pm4_profile::ThreadTrace* Gfx8Factory::getSqttMgr() {
|
||||
auto p = new pm4_profile::Gfx8ThreadTrace();
|
||||
if (p == NULL) throw aql_profile_exc_msg("ThreadTrace mgr allocation failed");
|
||||
return p;
|
||||
}
|
||||
|
||||
} // aql_profile
|
||||
@@ -1,65 +0,0 @@
|
||||
#include "pm4_factory.h"
|
||||
// Commandwriter includes
|
||||
#include "gfx9_cmdwriter.h"
|
||||
// PMC includes
|
||||
#include "gfx9_perf_counter.h"
|
||||
// SQTT includes
|
||||
#include "gfx9_thread_trace.h"
|
||||
// Block info
|
||||
#include "gfx9_block_info.h"
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
// GFX9 block ID mapping table
|
||||
uint32_t Gfx9Factory::block_id_table[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER] = {
|
||||
pm4_profile::kHsaAiCounterBlockIdCb0,
|
||||
kBadBlockId /*CPF*/,
|
||||
pm4_profile::kHsaAiCounterBlockIdDb0,
|
||||
pm4_profile::kHsaAiCounterBlockIdGrbm,
|
||||
pm4_profile::kHsaAiCounterBlockIdGrbmSe,
|
||||
pm4_profile::kHsaAiCounterBlockIdPaSu,
|
||||
pm4_profile::kHsaAiCounterBlockIdPaSc,
|
||||
pm4_profile::kHsaAiCounterBlockIdSpi,
|
||||
pm4_profile::kHsaAiCounterBlockIdSq,
|
||||
kBadBlockId /*GFX8:SQES*/,
|
||||
pm4_profile::kHsaAiCounterBlockIdSqGs,
|
||||
pm4_profile::kHsaAiCounterBlockIdSqVs,
|
||||
pm4_profile::kHsaAiCounterBlockIdSqPs,
|
||||
kBadBlockId /*GFX8:SQLS*/,
|
||||
pm4_profile::kHsaAiCounterBlockIdSqHs,
|
||||
pm4_profile::kHsaAiCounterBlockIdSqCs,
|
||||
pm4_profile::kHsaAiCounterBlockIdSx,
|
||||
pm4_profile::kHsaAiCounterBlockIdTa0,
|
||||
pm4_profile::kHsaAiCounterBlockIdTca0,
|
||||
pm4_profile::kHsaAiCounterBlockIdTcc0,
|
||||
pm4_profile::kHsaAiCounterBlockIdTd0,
|
||||
pm4_profile::kHsaAiCounterBlockIdTcp0,
|
||||
pm4_profile::kHsaAiCounterBlockIdGds,
|
||||
pm4_profile::kHsaAiCounterBlockIdVgt,
|
||||
pm4_profile::kHsaAiCounterBlockIdIa,
|
||||
pm4_profile::kHsaAiCounterBlockIdMc,
|
||||
kBadBlockId /*SRBM*/,
|
||||
pm4_profile::kHsaAiCounterBlockIdTcs,
|
||||
pm4_profile::kHsaAiCounterBlockIdWd,
|
||||
kBadBlockId /*CPG*/,
|
||||
pm4_profile::kHsaAiCounterBlockIdCpc};
|
||||
|
||||
pm4_profile::CommandWriter* Gfx9Factory::getCommandWriter() {
|
||||
auto p = new pm4_profile::gfx9::Gfx9CmdWriter(false, true);
|
||||
if (p == NULL) throw aql_profile_exc_msg("CommandWriter allocation failed");
|
||||
return p;
|
||||
}
|
||||
|
||||
pm4_profile::PerfCounter* Gfx9Factory::getPmcMgr() {
|
||||
auto p = new pm4_profile::Gfx9PerfCounter();
|
||||
if (p == NULL) throw aql_profile_exc_msg("PerfCounter mgr allocation failed");
|
||||
return p;
|
||||
}
|
||||
|
||||
pm4_profile::ThreadTrace* Gfx9Factory::getSqttMgr() {
|
||||
auto p = new pm4_profile::Gfx9ThreadTrace();
|
||||
if (p == NULL) throw aql_profile_exc_msg("ThreadTrace mgr allocation failed");
|
||||
return p;
|
||||
}
|
||||
|
||||
} // aql_profile
|
||||
@@ -1,97 +0,0 @@
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <iomanip>
|
||||
|
||||
#include "aql_profile.h"
|
||||
#include "amd_aql_pm4_ib_packet.h"
|
||||
#include "gfxip/gfx8/si_pm4defs.h"
|
||||
#include "gfxip/gfx8/si_ci_vi_merged_pm4_it_opcodes.h"
|
||||
#include "gfxip/gfx8/si_ci_vi_merged_pm4cmds.h"
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
typedef uint16_t aql_packet_header_t;
|
||||
|
||||
void* legacyAqlAcquire(const packet_t* aql_packet, void* data) {
|
||||
hsa_barrier_and_packet_t* aql_barrier = reinterpret_cast<hsa_barrier_and_packet_t*>(data);
|
||||
memset(aql_barrier, 0, sizeof(hsa_barrier_and_packet_t));
|
||||
const aql_packet_header_t aql_header_type = HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE;
|
||||
const aql_packet_header_t aql_header_barrier = 1ul << HSA_PACKET_HEADER_BARRIER;
|
||||
const aql_packet_header_t aql_header_acquire = HSA_FENCE_SCOPE_SYSTEM
|
||||
<< HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE;
|
||||
aql_barrier->header |= aql_header_type;
|
||||
aql_barrier->header |= aql_header_barrier;
|
||||
aql_barrier->header |= aql_header_acquire;
|
||||
return data + sizeof(hsa_barrier_and_packet_t);
|
||||
}
|
||||
|
||||
void* legacyAqlRelease(const packet_t* aql_packet, void* data) {
|
||||
hsa_barrier_and_packet_t* aql_barrier = reinterpret_cast<hsa_barrier_and_packet_t*>(data);
|
||||
memset(aql_barrier, 0, sizeof(hsa_barrier_and_packet_t));
|
||||
const aql_packet_header_t aql_header_type = HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE;
|
||||
const aql_packet_header_t aql_header_barrier = 1ul << HSA_PACKET_HEADER_BARRIER;
|
||||
const aql_packet_header_t aql_header_release = HSA_FENCE_SCOPE_SYSTEM
|
||||
<< HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE;
|
||||
aql_barrier->header |= aql_header_type;
|
||||
aql_barrier->header |= aql_header_barrier;
|
||||
aql_barrier->header |= aql_header_release;
|
||||
aql_barrier->completion_signal = aql_packet->completion_signal;
|
||||
return data + sizeof(hsa_barrier_and_packet_t);
|
||||
}
|
||||
|
||||
void* legacyPm4(const packet_t* aql_packet, void* data) {
|
||||
constexpr uint32_t major_version = 8;
|
||||
constexpr uint32_t slot_size_b = 0x40;
|
||||
constexpr uint32_t slot_size_dw = uint32_t(slot_size_b / sizeof(uint32_t));
|
||||
constexpr uint32_t ib_jump_size_dw = 4;
|
||||
constexpr uint32_t rel_mem_size_dw = 7;
|
||||
constexpr uint32_t nop_pad_size_dw = slot_size_dw - (ib_jump_size_dw + rel_mem_size_dw);
|
||||
|
||||
// Construct a set of PM4 to fit inside the AQL packet slot.
|
||||
const amd_aql_pm4_ib_packet_t* aql_pm4_ib =
|
||||
reinterpret_cast<const amd_aql_pm4_ib_packet_t*>(aql_packet);
|
||||
uint32_t* const slot_data = (uint32_t*)data;
|
||||
uint32_t slot_dw_idx = 0;
|
||||
|
||||
// Construct a no-op command to pad the queue slot.
|
||||
uint32_t* nop_pad = &slot_data[slot_dw_idx];
|
||||
slot_dw_idx += nop_pad_size_dw;
|
||||
nop_pad[0] = PM4_CMD(IT_NOP, nop_pad_size_dw);
|
||||
for (int i = 1; i < nop_pad_size_dw; ++i) {
|
||||
nop_pad[i] = 0;
|
||||
}
|
||||
|
||||
// Copy in command to execute the IB.
|
||||
assert(slot_dw_idx + ib_jump_size_dw <= slot_size_dw);
|
||||
uint32_t* ib_jump = &slot_data[slot_dw_idx];
|
||||
slot_dw_idx += ib_jump_size_dw;
|
||||
assert(ib_jump_size_dw == sizeof(aql_pm4_ib->pm4_ib_command) / sizeof(uint32_t));
|
||||
memcpy(ib_jump, aql_pm4_ib->pm4_ib_command, sizeof(aql_pm4_ib->pm4_ib_command));
|
||||
|
||||
// Construct a command to advance the read index and invalidate the packet
|
||||
// header. This must be the last command since this releases the queue slot
|
||||
// for writing.
|
||||
assert(slot_dw_idx + rel_mem_size_dw <= slot_size_dw);
|
||||
PM4CMDRELEASEMEM* rel_mem = reinterpret_cast<PM4CMDRELEASEMEM*>(&slot_data[slot_dw_idx]);
|
||||
assert(rel_mem_size_dw == sizeof(*rel_mem) / sizeof(uint32_t));
|
||||
memset(rel_mem, 0, sizeof(*rel_mem));
|
||||
rel_mem->ordinal1 = PM4_CMD(IT_RELEASE_MEM__CI__VI, rel_mem_size_dw);
|
||||
rel_mem->eventIndex = EVENT_WRITE_INDEX_CACHE_FLUSH_EVENT;
|
||||
|
||||
#if !defined(NDEBUG)
|
||||
std::ostringstream oss;
|
||||
oss << "AQL 'Legacy PM4' size(" << slot_size_dw << ")";
|
||||
std::clog << std::setw(40) << std::left << oss.str() << ":";
|
||||
for (int idx = 0; idx < 16; idx++) {
|
||||
std::clog << " " << std::hex << std::setw(8) << std::setfill('0') << slot_data[idx];
|
||||
}
|
||||
std::clog << std::setfill(' ') << std::endl;
|
||||
#endif
|
||||
|
||||
return data + slot_size_b;
|
||||
}
|
||||
|
||||
} // aql_profile
|
||||
@@ -1,137 +0,0 @@
|
||||
#ifndef _LOGGER_H_
|
||||
#define _LOGGER_H_
|
||||
|
||||
#include <time.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/file.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <exception>
|
||||
#include <mutex>
|
||||
#include <map>
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
class Logger {
|
||||
public:
|
||||
template <typename T> Logger& operator<<(const T& m) {
|
||||
std::ostringstream oss;
|
||||
oss << m;
|
||||
if (!streaming)
|
||||
log(oss.str());
|
||||
else
|
||||
put(oss.str());
|
||||
streaming = true;
|
||||
return *this;
|
||||
}
|
||||
|
||||
typedef void (Logger::*manip_t)();
|
||||
Logger& operator<<(manip_t f) {
|
||||
(this->*f)();
|
||||
return *this;
|
||||
}
|
||||
|
||||
void begm() { messaging = true; }
|
||||
void endl() { resetStreaming(); }
|
||||
|
||||
static const std::string& LastMessage() {
|
||||
Logger& logger = Instance();
|
||||
std::lock_guard<std::mutex> lck(mutex);
|
||||
return logger.message[GetTid()];
|
||||
}
|
||||
|
||||
static Logger& Instance() {
|
||||
std::lock_guard<std::mutex> lck(mutex);
|
||||
if (instance == NULL) instance = new Logger();
|
||||
return *instance;
|
||||
}
|
||||
|
||||
static void Destroy() {
|
||||
std::lock_guard<std::mutex> lck(mutex);
|
||||
if (instance != NULL) delete instance;
|
||||
instance = NULL;
|
||||
}
|
||||
|
||||
private:
|
||||
static uint32_t GetPid() { return syscall(__NR_getpid); }
|
||||
static uint32_t GetTid() { return syscall(__NR_gettid); }
|
||||
|
||||
Logger() : file(NULL), dirty(false), streaming(false), messaging(false) {
|
||||
const char* path = getenv("HSA_VEN_AMD_AQLPROFILE_LOG");
|
||||
if (path != NULL) {
|
||||
file = fopen("/tmp/aql_profile_log.txt", "a");
|
||||
}
|
||||
resetStreaming();
|
||||
}
|
||||
|
||||
~Logger() {
|
||||
if (file != NULL) {
|
||||
if (dirty) put("\n");
|
||||
fclose(file);
|
||||
}
|
||||
}
|
||||
|
||||
void resetStreaming() {
|
||||
std::lock_guard<std::mutex> lck(mutex);
|
||||
if (messaging) {
|
||||
message[GetTid()] = "";
|
||||
}
|
||||
messaging = false;
|
||||
streaming = false;
|
||||
}
|
||||
|
||||
void put(const std::string& m) {
|
||||
std::lock_guard<std::mutex> lck(mutex);
|
||||
if (messaging) {
|
||||
message[GetTid()] += m;
|
||||
}
|
||||
if (file != NULL) {
|
||||
dirty = true;
|
||||
flock(fileno(file), LOCK_EX);
|
||||
fprintf(file, "%s", m.c_str());
|
||||
fflush(file);
|
||||
flock(fileno(file), LOCK_UN);
|
||||
}
|
||||
}
|
||||
|
||||
void log(const std::string& m) {
|
||||
const time_t rawtime = time(NULL);
|
||||
tm tm_info;
|
||||
localtime_r(&rawtime, &tm_info);
|
||||
char tm_str[26];
|
||||
strftime(tm_str, 26, "%Y-%m-%d %H:%M:%S", &tm_info);
|
||||
std::ostringstream oss;
|
||||
oss << "\n<" << tm_str << std::dec << " pid" << GetPid() << " tid" << GetTid() << "> " << m;
|
||||
put(oss.str());
|
||||
}
|
||||
|
||||
FILE* file;
|
||||
bool dirty;
|
||||
bool streaming;
|
||||
bool messaging;
|
||||
|
||||
static std::mutex mutex;
|
||||
static Logger* instance;
|
||||
std::map<uint32_t, std::string> message;
|
||||
};
|
||||
|
||||
} // namespace aql_profile
|
||||
|
||||
#define ERR_LOGGING \
|
||||
(aql_profile::Logger::Instance() << aql_profile::Logger::endl \
|
||||
<< "Error: " << __FUNCTION__ \
|
||||
<< "(): " << aql_profile::Logger::begm)
|
||||
#define INFO_LOGGING \
|
||||
(aql_profile::Logger::Instance() << aql_profile::Logger::endl \
|
||||
<< "Info: " << __FUNCTION__ \
|
||||
<< "(): " << aql_profile::Logger::begm)
|
||||
|
||||
#endif // _LOGGER_H_
|
||||
@@ -1,157 +0,0 @@
|
||||
#ifndef _PM4_FACTORY_H_
|
||||
#define _PM4_FACTORY_H_
|
||||
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <climits>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
|
||||
#include "aql_profile.h"
|
||||
#include "gpu_block_info.h"
|
||||
#include "aql_profile_exception.h"
|
||||
|
||||
namespace pm4_profile {
|
||||
class CommandWriter;
|
||||
class PerfCounter;
|
||||
class ThreadTrace;
|
||||
extern GpuBlockInfo Gfx9HwBlocks[];
|
||||
extern const uint32_t Gfx9HwBlockCount;
|
||||
extern GpuBlockInfo Gfx8HwBlocks[];
|
||||
extern const uint32_t Gfx8HwBlockCount;
|
||||
}
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
class BlockMap {
|
||||
public:
|
||||
typedef std::map<uint32_t, const pm4_profile::GpuBlockInfo*> map_t;
|
||||
typedef map_t::const_iterator iter_t;
|
||||
|
||||
void init(uint32_t* id_table, pm4_profile::GpuBlockInfo* info_table, const uint32_t& info_count) {
|
||||
if (block_map.size() == 0) fill(id_table, info_table, info_count);
|
||||
}
|
||||
|
||||
const pm4_profile::GpuBlockInfo* get(const uint32_t& id) const {
|
||||
iter_t it = block_map.find(id);
|
||||
return (it != block_map.end()) ? it->second : NULL;
|
||||
}
|
||||
|
||||
private:
|
||||
void fill(uint32_t* id_table, pm4_profile::GpuBlockInfo* info_table, const uint32_t& info_count) {
|
||||
map_t info_map;
|
||||
for (uint32_t i = 0; i < info_count; ++i) {
|
||||
const pm4_profile::GpuBlockInfo& entry = info_table[i];
|
||||
info_map[entry.counterGroupId] = &entry;
|
||||
}
|
||||
for (uint32_t i = 0; i < HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER; ++i) {
|
||||
iter_t it = info_map.find(id_table[i]);
|
||||
if (it != info_map.end()) block_map[i] = it->second;
|
||||
}
|
||||
}
|
||||
|
||||
map_t block_map;
|
||||
};
|
||||
|
||||
class Pm4Factory {
|
||||
public:
|
||||
enum { kBadBlockId = UINT_MAX };
|
||||
|
||||
static Pm4Factory* Create(const hsa_agent_t agent);
|
||||
static Pm4Factory* Create(const profile_t* profile) { return Create(profile->agent); }
|
||||
static void Destroy();
|
||||
|
||||
virtual pm4_profile::CommandWriter* getCommandWriter() = 0;
|
||||
virtual pm4_profile::PerfCounter* getPmcMgr() = 0;
|
||||
virtual pm4_profile::ThreadTrace* getSqttMgr() = 0;
|
||||
|
||||
const pm4_profile::GpuBlockInfo* getBlockInfo(const event_t* event) const {
|
||||
const pm4_profile::GpuBlockInfo* info = block_map.get(event->block_name);
|
||||
if (info == NULL) throw event_exception(std::string("Bad block, "), *event);
|
||||
if (event->block_index >= info->maxInstanceCount)
|
||||
throw event_exception(std::string("Bad block index, "), *event);
|
||||
if (event->counter_id > info->maxEventId)
|
||||
throw event_exception(std::string("Bad event ID, "), *event);
|
||||
return info;
|
||||
}
|
||||
|
||||
uint32_t getBlockId(const event_t* event) const {
|
||||
return getBlockInfo(event)->counterGroupId + event->block_index;
|
||||
}
|
||||
|
||||
protected:
|
||||
explicit Pm4Factory(const BlockMap& map) : block_map(map) {}
|
||||
virtual ~Pm4Factory() {}
|
||||
|
||||
private:
|
||||
typedef std::map<std::string, Pm4Factory*> instances_t;
|
||||
|
||||
static std::mutex mutex;
|
||||
static instances_t instances;
|
||||
const BlockMap& block_map;
|
||||
};
|
||||
|
||||
class Gfx8Factory : public Pm4Factory {
|
||||
public:
|
||||
Gfx8Factory() : Pm4Factory(block_map) {
|
||||
block_map.init(block_id_table, pm4_profile::Gfx8HwBlocks, pm4_profile::Gfx8HwBlockCount);
|
||||
}
|
||||
pm4_profile::CommandWriter* getCommandWriter();
|
||||
pm4_profile::PerfCounter* getPmcMgr();
|
||||
pm4_profile::ThreadTrace* getSqttMgr();
|
||||
|
||||
private:
|
||||
static uint32_t block_id_table[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER];
|
||||
BlockMap block_map;
|
||||
};
|
||||
|
||||
class Gfx9Factory : public Pm4Factory {
|
||||
public:
|
||||
Gfx9Factory() : Pm4Factory(block_map) {
|
||||
block_map.init(block_id_table, pm4_profile::Gfx9HwBlocks, pm4_profile::Gfx9HwBlockCount);
|
||||
}
|
||||
pm4_profile::CommandWriter* getCommandWriter();
|
||||
pm4_profile::PerfCounter* getPmcMgr();
|
||||
pm4_profile::ThreadTrace* getSqttMgr();
|
||||
|
||||
private:
|
||||
static uint32_t block_id_table[HSA_VEN_AMD_AQLPROFILE_BLOCKS_NUMBER];
|
||||
BlockMap block_map;
|
||||
};
|
||||
|
||||
inline Pm4Factory* Pm4Factory::Create(const hsa_agent_t agent) {
|
||||
std::lock_guard<std::mutex> lck(mutex);
|
||||
|
||||
char agent_name[64];
|
||||
hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_name);
|
||||
instances_t::iterator it = instances.find(agent_name);
|
||||
|
||||
if (it == instances.end()) {
|
||||
if (strncmp(agent_name, "gfx801", 6) == 0) {
|
||||
throw aql_profile_exc_val<std::string>(std::string("GFX8 Carrizo is not supported "),
|
||||
agent_name);
|
||||
} else if (strncmp(agent_name, "gfx8", 4) == 0) {
|
||||
it->second = new Gfx8Factory();
|
||||
} else if (strncmp(agent_name, "gfx9", 4) == 0) {
|
||||
it->second = new Gfx9Factory();
|
||||
} else {
|
||||
throw aql_profile_exc_val<std::string>("Unsupported GFXIP", agent_name);
|
||||
}
|
||||
}
|
||||
|
||||
if (it->second == NULL) throw aql_profile_exc_msg("Pm4Factory allocation failed");
|
||||
return it->second;
|
||||
}
|
||||
|
||||
inline void Pm4Factory::Destroy() {
|
||||
std::lock_guard<std::mutex> lck(mutex);
|
||||
for (auto it : instances) delete it.second;
|
||||
instances.clear();
|
||||
}
|
||||
|
||||
} // namespace aql_profile
|
||||
|
||||
#endif // _PM4_FACTORY_H_
|
||||
@@ -1,49 +0,0 @@
|
||||
#include <assert.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <iomanip>
|
||||
|
||||
#include "aql_profile.h"
|
||||
#include "cmdwriter.h"
|
||||
#include "amd_aql_pm4_ib_packet.h"
|
||||
|
||||
namespace aql_profile {
|
||||
|
||||
void populateAql(const uint32_t* ib_packet, packet_t* aql_packet) {
|
||||
// Populate relevant fields of Aql pkt
|
||||
// Size of IB pkt is four DWords
|
||||
// Header and completion sinal are not set
|
||||
amd_aql_pm4_ib_packet_t* aql_pm4_ib = reinterpret_cast<amd_aql_pm4_ib_packet_t*>(aql_packet);
|
||||
aql_pm4_ib->pm4_ib_format = AMD_AQL_PM4_IB_FORMAT;
|
||||
aql_pm4_ib->pm4_ib_command[0] = ib_packet[0];
|
||||
aql_pm4_ib->pm4_ib_command[1] = ib_packet[1];
|
||||
aql_pm4_ib->pm4_ib_command[2] = ib_packet[2];
|
||||
aql_pm4_ib->pm4_ib_command[3] = ib_packet[3];
|
||||
aql_pm4_ib->dw_count_remain = AMD_AQL_PM4_IB_DW_COUNT_REMAIN;
|
||||
for (int i = 0; i < AMD_AQL_PM4_IB_RESERVED_COUNT; ++i) {
|
||||
aql_pm4_ib->reserved[i] = 0;
|
||||
}
|
||||
|
||||
#if !defined(NDEBUG)
|
||||
const uint32_t* dwords = (uint32_t*)aql_packet;
|
||||
const uint32_t dword_count = sizeof(*aql_packet) / sizeof(uint32_t);
|
||||
std::ostringstream oss;
|
||||
oss << "AQL 'IB' size(" << dword_count << ")";
|
||||
std::clog << std::setw(40) << std::left << "AQL 'IB' size(16)"
|
||||
<< ":";
|
||||
for (int idx = 0; idx < dword_count; idx++) {
|
||||
std::clog << " " << std::hex << std::setw(8) << std::setfill('0') << dwords[idx];
|
||||
}
|
||||
std::clog << std::setfill(' ') << std::endl;
|
||||
#endif
|
||||
}
|
||||
|
||||
void populateAql(const void* cmd_buffer, uint32_t cmd_size, pm4_profile::CommandWriter* cmd_writer,
|
||||
packet_t* aql_packet) {
|
||||
pm4_profile::DefaultCmdBuf ib_buffer;
|
||||
cmd_writer->BuildIndirectBufferCmd(&ib_buffer, cmd_buffer, (size_t)cmd_size);
|
||||
populateAql((const uint32_t*)ib_buffer.Base(), aql_packet);
|
||||
}
|
||||
|
||||
} // aql_profile
|
||||
@@ -1,17 +0,0 @@
|
||||
#
|
||||
# Source files for Rocr PerfCntr
|
||||
#
|
||||
set ( LIB_SRC gfx8_perf_counter.cpp )
|
||||
set ( LIB_SRC ${LIB_SRC} gfx9_perf_counter.cpp )
|
||||
set ( LIB_SRC ${LIB_SRC} gfx8_block_info.cpp )
|
||||
set ( LIB_SRC ${LIB_SRC} gfx9_block_info.cpp )
|
||||
|
||||
#
|
||||
# Header files include path(s).
|
||||
#
|
||||
include_directories ( ${PROJ_DIR}/commandwriter )
|
||||
|
||||
#
|
||||
# Build PerfCntr as a Static Library object
|
||||
#
|
||||
add_library ( ${PMC_LIB} STATIC ${LIB_SRC} )
|
||||
@@ -1,624 +0,0 @@
|
||||
#include "gfx8_block_info.h"
|
||||
#include "gfxip/gfx8/si_ci_vi_merged_offset.h"
|
||||
|
||||
namespace pm4_profile {
|
||||
/**
|
||||
* Table containing CounterGroups which represent VI hardware blocks
|
||||
* as defined by \ref GpuBlockInfo structure
|
||||
*/
|
||||
GpuBlockInfo Gfx8HwBlocks[] = {
|
||||
// Counter block CB
|
||||
{"VI_CB0", kHsaViCounterBlockIdCb0, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_CB,
|
||||
CntlMethodBySeAndInstance, 395, VI_COUNTER_NUM_PER_CB, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_CB1", kHsaViCounterBlockIdCb1, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_CB,
|
||||
CntlMethodBySeAndInstance, 395, VI_COUNTER_NUM_PER_CB, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_CB2", kHsaViCounterBlockIdCb2, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_CB,
|
||||
CntlMethodBySeAndInstance, 395, VI_COUNTER_NUM_PER_CB, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_CB3", kHsaViCounterBlockIdCb3, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_CB,
|
||||
CntlMethodBySeAndInstance, 395, VI_COUNTER_NUM_PER_CB, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block CPF
|
||||
{"VI_CPF", kHsaViCounterBlockIdCpf, VI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodNone, 19,
|
||||
VI_COUNTER_NUM_PER_CPF, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block DB
|
||||
{"VI_DB0", kHsaViCounterBlockIdDb0, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_DB,
|
||||
CntlMethodBySeAndInstance, 256, VI_COUNTER_NUM_PER_DB, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_DB1", kHsaViCounterBlockIdDb1, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_DB,
|
||||
CntlMethodBySeAndInstance, 256, VI_COUNTER_NUM_PER_DB, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_DB2", kHsaViCounterBlockIdDb2, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_DB,
|
||||
CntlMethodBySeAndInstance, 256, VI_COUNTER_NUM_PER_DB, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_DB3", kHsaViCounterBlockIdDb3, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_DB,
|
||||
CntlMethodBySeAndInstance, 256, VI_COUNTER_NUM_PER_DB, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block GRBM
|
||||
{"VI_GRBM", kHsaViCounterBlockIdGrbm, VI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodNone, 33,
|
||||
VI_COUNTER_NUM_PER_GRBM, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block GRBMSE
|
||||
{"VI_GRBMSE", kHsaViCounterBlockIdGrbmSe, VI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodNone, 14,
|
||||
VI_COUNTER_NUM_PER_GRBMSE, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block PA_SU
|
||||
{"VI_PA_SU", kHsaViCounterBlockIdPaSu, VI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 152,
|
||||
VI_COUNTER_NUM_PER_PA_SU, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block PA_SC
|
||||
{"VI_PA_SC", kHsaViCounterBlockIdPaSc, VI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 396,
|
||||
VI_COUNTER_NUM_PER_PA_SC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block SPI
|
||||
{"VI_SPI", kHsaViCounterBlockIdSpi, VI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 196,
|
||||
VI_COUNTER_NUM_PER_SPI, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block SQ
|
||||
{"VI_SQ", kHsaViCounterBlockIdSq, VI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 298,
|
||||
VI_COUNTER_NUM_PER_SQ, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_SQ_ES", kHsaViCounterBlockIdSqEs, VI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 298,
|
||||
VI_COUNTER_NUM_PER_SQ, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_SQ_GS", kHsaViCounterBlockIdSqGs, VI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 298,
|
||||
VI_COUNTER_NUM_PER_SQ, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_SQ_VS", kHsaViCounterBlockIdSqVs, VI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 298,
|
||||
VI_COUNTER_NUM_PER_SQ, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_SQ_PS", kHsaViCounterBlockIdSqPs, VI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 298,
|
||||
VI_COUNTER_NUM_PER_SQ, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_SQ_LS", kHsaViCounterBlockIdSqLs, VI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 298,
|
||||
VI_COUNTER_NUM_PER_SQ, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_SQ_HS", kHsaViCounterBlockIdSqHs, VI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 298,
|
||||
VI_COUNTER_NUM_PER_SQ, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_SQ_CS", kHsaViCounterBlockIdSqCs, VI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 298,
|
||||
VI_COUNTER_NUM_PER_SQ, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block SX
|
||||
{"VI_SX", kHsaViCounterBlockIdSx, VI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 33,
|
||||
VI_COUNTER_NUM_PER_SX, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block TA
|
||||
{"VI_TA0", kHsaViCounterBlockIdTa0, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TA,
|
||||
CntlMethodBySeAndInstance, 118, VI_COUNTER_NUM_PER_TA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TA1", kHsaViCounterBlockIdTa1, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TA,
|
||||
CntlMethodBySeAndInstance, 118, VI_COUNTER_NUM_PER_TA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TA2", kHsaViCounterBlockIdTa2, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TA,
|
||||
CntlMethodBySeAndInstance, 118, VI_COUNTER_NUM_PER_TA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TA3", kHsaViCounterBlockIdTa3, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TA,
|
||||
CntlMethodBySeAndInstance, 118, VI_COUNTER_NUM_PER_TA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TA4", kHsaViCounterBlockIdTa4, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TA,
|
||||
CntlMethodBySeAndInstance, 118, VI_COUNTER_NUM_PER_TA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TA5", kHsaViCounterBlockIdTa5, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TA,
|
||||
CntlMethodBySeAndInstance, 118, VI_COUNTER_NUM_PER_TA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TA6", kHsaViCounterBlockIdTa6, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TA,
|
||||
CntlMethodBySeAndInstance, 118, VI_COUNTER_NUM_PER_TA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TA7", kHsaViCounterBlockIdTa7, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TA,
|
||||
CntlMethodBySeAndInstance, 118, VI_COUNTER_NUM_PER_TA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TA8", kHsaViCounterBlockIdTa8, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TA,
|
||||
CntlMethodBySeAndInstance, 118, VI_COUNTER_NUM_PER_TA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TA9", kHsaViCounterBlockIdTa9, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TA,
|
||||
CntlMethodBySeAndInstance, 118, VI_COUNTER_NUM_PER_TA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TA10", kHsaViCounterBlockIdTa10, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TA,
|
||||
CntlMethodBySeAndInstance, 118, VI_COUNTER_NUM_PER_TA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TA11", kHsaViCounterBlockIdTa11, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TA,
|
||||
CntlMethodBySeAndInstance, 118, VI_COUNTER_NUM_PER_TA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TA12", kHsaViCounterBlockIdTa12, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TA,
|
||||
CntlMethodBySeAndInstance, 118, VI_COUNTER_NUM_PER_TA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TA13", kHsaViCounterBlockIdTa13, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TA,
|
||||
CntlMethodBySeAndInstance, 118, VI_COUNTER_NUM_PER_TA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TA14", kHsaViCounterBlockIdTa14, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TA,
|
||||
CntlMethodBySeAndInstance, 118, VI_COUNTER_NUM_PER_TA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TA15", kHsaViCounterBlockIdTa15, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TA,
|
||||
CntlMethodBySeAndInstance, 118, VI_COUNTER_NUM_PER_TA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block TCA
|
||||
{"VI_TCA0", kHsaViCounterBlockIdTca0, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCA,
|
||||
CntlMethodByInstance, 34, VI_COUNTER_NUM_PER_TCA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TCA1", kHsaViCounterBlockIdTca1, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCA,
|
||||
CntlMethodByInstance, 34, VI_COUNTER_NUM_PER_TCA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block TCC
|
||||
{"VI_TCC0", kHsaViCounterBlockIdTcc0, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCC,
|
||||
CntlMethodByInstance, 191, VI_COUNTER_NUM_PER_TCC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TCC1", kHsaViCounterBlockIdTcc1, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCC,
|
||||
CntlMethodByInstance, 191, VI_COUNTER_NUM_PER_TCC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TCC2", kHsaViCounterBlockIdTcc2, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCC,
|
||||
CntlMethodByInstance, 191, VI_COUNTER_NUM_PER_TCC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TCC3", kHsaViCounterBlockIdTcc3, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCC,
|
||||
CntlMethodByInstance, 191, VI_COUNTER_NUM_PER_TCC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TCC4", kHsaViCounterBlockIdTcc4, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCC,
|
||||
CntlMethodByInstance, 191, VI_COUNTER_NUM_PER_TCC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TCC5", kHsaViCounterBlockIdTcc5, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCC,
|
||||
CntlMethodByInstance, 191, VI_COUNTER_NUM_PER_TCC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TCC6", kHsaViCounterBlockIdTcc6, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCC,
|
||||
CntlMethodByInstance, 191, VI_COUNTER_NUM_PER_TCC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TCC7", kHsaViCounterBlockIdTcc7, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCC,
|
||||
CntlMethodByInstance, 191, VI_COUNTER_NUM_PER_TCC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TCC8", kHsaViCounterBlockIdTcc8, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCC,
|
||||
CntlMethodByInstance, 191, VI_COUNTER_NUM_PER_TCC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TCC9", kHsaViCounterBlockIdTcc9, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCC,
|
||||
CntlMethodByInstance, 191, VI_COUNTER_NUM_PER_TCC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TCC10", kHsaViCounterBlockIdTcc10, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCC,
|
||||
CntlMethodByInstance, 191, VI_COUNTER_NUM_PER_TCC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TCC11", kHsaViCounterBlockIdTcc11, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCC,
|
||||
CntlMethodByInstance, 191, VI_COUNTER_NUM_PER_TCC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TCC12", kHsaViCounterBlockIdTcc12, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCC,
|
||||
CntlMethodByInstance, 191, VI_COUNTER_NUM_PER_TCC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TCC13", kHsaViCounterBlockIdTcc13, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCC,
|
||||
CntlMethodByInstance, 191, VI_COUNTER_NUM_PER_TCC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TCC14", kHsaViCounterBlockIdTcc14, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCC,
|
||||
CntlMethodByInstance, 191, VI_COUNTER_NUM_PER_TCC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TCC15", kHsaViCounterBlockIdTcc15, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCC,
|
||||
CntlMethodByInstance, 191, VI_COUNTER_NUM_PER_TCC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block TD
|
||||
{"VI_TD0", kHsaViCounterBlockIdTd0, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TD,
|
||||
CntlMethodBySeAndInstance, 54, VI_COUNTER_NUM_PER_TD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TD1", kHsaViCounterBlockIdTd1, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TD,
|
||||
CntlMethodBySeAndInstance, 54, VI_COUNTER_NUM_PER_TD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TD2", kHsaViCounterBlockIdTd2, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TD,
|
||||
CntlMethodBySeAndInstance, 54, VI_COUNTER_NUM_PER_TD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TD3", kHsaViCounterBlockIdTd3, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TD,
|
||||
CntlMethodBySeAndInstance, 54, VI_COUNTER_NUM_PER_TD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TD4", kHsaViCounterBlockIdTd4, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TD,
|
||||
CntlMethodBySeAndInstance, 54, VI_COUNTER_NUM_PER_TD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TD5", kHsaViCounterBlockIdTd5, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TD,
|
||||
CntlMethodBySeAndInstance, 54, VI_COUNTER_NUM_PER_TD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TD6", kHsaViCounterBlockIdTd6, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TD,
|
||||
CntlMethodBySeAndInstance, 54, VI_COUNTER_NUM_PER_TD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TD7", kHsaViCounterBlockIdTd7, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TD,
|
||||
CntlMethodBySeAndInstance, 54, VI_COUNTER_NUM_PER_TD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TD8", kHsaViCounterBlockIdTd8, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TD,
|
||||
CntlMethodBySeAndInstance, 54, VI_COUNTER_NUM_PER_TD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TD9", kHsaViCounterBlockIdTd9, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TD,
|
||||
CntlMethodBySeAndInstance, 54, VI_COUNTER_NUM_PER_TD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TD10", kHsaViCounterBlockIdTd10, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TD,
|
||||
CntlMethodBySeAndInstance, 54, VI_COUNTER_NUM_PER_TD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TD11", kHsaViCounterBlockIdTd11, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TD,
|
||||
CntlMethodBySeAndInstance, 54, VI_COUNTER_NUM_PER_TD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TD12", kHsaViCounterBlockIdTd12, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TD,
|
||||
CntlMethodBySeAndInstance, 54, VI_COUNTER_NUM_PER_TD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TD13", kHsaViCounterBlockIdTd13, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TD,
|
||||
CntlMethodBySeAndInstance, 54, VI_COUNTER_NUM_PER_TD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TD14", kHsaViCounterBlockIdTd14, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TD,
|
||||
CntlMethodBySeAndInstance, 54, VI_COUNTER_NUM_PER_TD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TD15", kHsaViCounterBlockIdTd15, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TD,
|
||||
CntlMethodBySeAndInstance, 54, VI_COUNTER_NUM_PER_TD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block TCP
|
||||
{"VI_TCP0", kHsaViCounterBlockIdTcp0, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCP,
|
||||
CntlMethodBySeAndInstance, 182, VI_COUNTER_NUM_PER_TCP, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TCP1", kHsaViCounterBlockIdTcp1, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCP,
|
||||
CntlMethodBySeAndInstance, 182, VI_COUNTER_NUM_PER_TCP, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TCP2", kHsaViCounterBlockIdTcp2, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCP,
|
||||
CntlMethodBySeAndInstance, 182, VI_COUNTER_NUM_PER_TCP, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TCP3", kHsaViCounterBlockIdTcp3, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCP,
|
||||
CntlMethodBySeAndInstance, 182, VI_COUNTER_NUM_PER_TCP, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TCP4", kHsaViCounterBlockIdTcp4, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCP,
|
||||
CntlMethodBySeAndInstance, 182, VI_COUNTER_NUM_PER_TCP, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TCP5", kHsaViCounterBlockIdTcp5, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCP,
|
||||
CntlMethodBySeAndInstance, 182, VI_COUNTER_NUM_PER_TCP, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TCP6", kHsaViCounterBlockIdTcp6, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCP,
|
||||
CntlMethodBySeAndInstance, 182, VI_COUNTER_NUM_PER_TCP, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TCP7", kHsaViCounterBlockIdTcp7, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCP,
|
||||
CntlMethodBySeAndInstance, 182, VI_COUNTER_NUM_PER_TCP, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TCP8", kHsaViCounterBlockIdTcp8, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCP,
|
||||
CntlMethodBySeAndInstance, 182, VI_COUNTER_NUM_PER_TCP, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TCP9", kHsaViCounterBlockIdTcp9, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCP,
|
||||
CntlMethodBySeAndInstance, 182, VI_COUNTER_NUM_PER_TCP, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TCP10", kHsaViCounterBlockIdTcp10, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCP,
|
||||
CntlMethodBySeAndInstance, 182, VI_COUNTER_NUM_PER_TCP, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TCP11", kHsaViCounterBlockIdTcp11, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCP,
|
||||
CntlMethodBySeAndInstance, 182, VI_COUNTER_NUM_PER_TCP, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TCP12", kHsaViCounterBlockIdTcp12, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCP,
|
||||
CntlMethodBySeAndInstance, 182, VI_COUNTER_NUM_PER_TCP, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TCP13", kHsaViCounterBlockIdTcp13, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCP,
|
||||
CntlMethodBySeAndInstance, 182, VI_COUNTER_NUM_PER_TCP, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TCP14", kHsaViCounterBlockIdTcp14, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCP,
|
||||
CntlMethodBySeAndInstance, 182, VI_COUNTER_NUM_PER_TCP, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"VI_TCP15", kHsaViCounterBlockIdTcp15, VI_MAX_NUM_SHADER_ENGINES, 2, VI_NUM_TCP,
|
||||
CntlMethodBySeAndInstance, 182, VI_COUNTER_NUM_PER_TCP, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block GDS
|
||||
{"VI_GDS", kHsaViCounterBlockIdGds, VI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodNone, 120,
|
||||
VI_COUNTER_NUM_PER_GDS, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block VGT
|
||||
{"VI_VGT", kHsaViCounterBlockIdVgt, VI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 145,
|
||||
VI_COUNTER_NUM_PER_VGT, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block IA
|
||||
{"VI_IA", kHsaViCounterBlockIdIa, VI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 23,
|
||||
VI_COUNTER_NUM_PER_IA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block MC
|
||||
{"VI_MC", kHsaViCounterBlockIdMc, VI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodNone, 22,
|
||||
VI_COUNTER_NUM_PER_MC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block SRBM
|
||||
{"VI_SRBM", kHsaViCounterBlockIdSrbm, VI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodNone, 19,
|
||||
VI_COUNTER_NUM_PER_SRBM, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block WD
|
||||
{"VI_WD", kHsaViCounterBlockIdWd, VI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodNone, 36,
|
||||
VI_COUNTER_NUM_PER_WD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block CPG
|
||||
{"VI_CPG", kHsaViCounterBlockIdCpg, VI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodNone, 48,
|
||||
VI_COUNTER_NUM_PER_CPG, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block CPC
|
||||
{"VI_CPC", kHsaViCounterBlockIdCpc, VI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodNone, 24,
|
||||
VI_COUNTER_NUM_PER_CPC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block IOMMUV2
|
||||
{"VI_IOMMUV2", kHsaViCounterBlockIdIommuV2, VI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodNone, 25,
|
||||
8, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block KernelDriver
|
||||
{"VI_KD", kHsaViCounterBlockIdKernelDriver, VI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodNone, 0,
|
||||
0, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Name of the last line should be empty to indicate end of all counter groups
|
||||
{"", kHsaViCounterBlockIdBlocksLast, 0, 0, 0, CntlMethodNone, 0, 0, 0, 0, false, 0, 0, false, 0,
|
||||
0}};
|
||||
|
||||
extern const uint32_t Gfx8HwBlockCount = sizeof(Gfx8HwBlocks) / sizeof(GpuBlockInfo);
|
||||
|
||||
/*
|
||||
* The following tables contain register addresses of the SQ counter registers
|
||||
*/
|
||||
|
||||
/*
|
||||
* SQ
|
||||
*/
|
||||
GpuCounterRegInfo ViSqCounterRegAddr[] = {
|
||||
{mmSQ_PERFCOUNTER0_SELECT__CI__VI, mmSQ_PERFCOUNTER_CTRL__CI__VI, mmSQ_PERFCOUNTER0_LO__CI__VI,
|
||||
mmSQ_PERFCOUNTER0_HI__CI__VI},
|
||||
{mmSQ_PERFCOUNTER1_SELECT__CI__VI, mmSQ_PERFCOUNTER_CTRL__CI__VI, mmSQ_PERFCOUNTER1_LO__CI__VI,
|
||||
mmSQ_PERFCOUNTER1_HI__CI__VI},
|
||||
{mmSQ_PERFCOUNTER2_SELECT__CI__VI, mmSQ_PERFCOUNTER_CTRL__CI__VI, mmSQ_PERFCOUNTER2_LO__CI__VI,
|
||||
mmSQ_PERFCOUNTER2_HI__CI__VI},
|
||||
{mmSQ_PERFCOUNTER3_SELECT__CI__VI, mmSQ_PERFCOUNTER_CTRL__CI__VI, mmSQ_PERFCOUNTER3_LO__CI__VI,
|
||||
mmSQ_PERFCOUNTER3_HI__CI__VI},
|
||||
{mmSQ_PERFCOUNTER4_SELECT__CI__VI, mmSQ_PERFCOUNTER_CTRL__CI__VI, mmSQ_PERFCOUNTER4_LO__CI__VI,
|
||||
mmSQ_PERFCOUNTER4_HI__CI__VI},
|
||||
{mmSQ_PERFCOUNTER5_SELECT__CI__VI, mmSQ_PERFCOUNTER_CTRL__CI__VI, mmSQ_PERFCOUNTER5_LO__CI__VI,
|
||||
mmSQ_PERFCOUNTER5_HI__CI__VI},
|
||||
{mmSQ_PERFCOUNTER6_SELECT__CI__VI, mmSQ_PERFCOUNTER_CTRL__CI__VI, mmSQ_PERFCOUNTER6_LO__CI__VI,
|
||||
mmSQ_PERFCOUNTER6_HI__CI__VI},
|
||||
{mmSQ_PERFCOUNTER7_SELECT__CI__VI, mmSQ_PERFCOUNTER_CTRL__CI__VI, mmSQ_PERFCOUNTER7_LO__CI__VI,
|
||||
mmSQ_PERFCOUNTER7_HI__CI__VI},
|
||||
{mmSQ_PERFCOUNTER8_SELECT__CI__VI, mmSQ_PERFCOUNTER_CTRL__CI__VI, mmSQ_PERFCOUNTER8_LO__CI__VI,
|
||||
mmSQ_PERFCOUNTER8_HI__CI__VI},
|
||||
{mmSQ_PERFCOUNTER9_SELECT__CI__VI, mmSQ_PERFCOUNTER_CTRL__CI__VI, mmSQ_PERFCOUNTER9_LO__CI__VI,
|
||||
mmSQ_PERFCOUNTER9_HI__CI__VI},
|
||||
{mmSQ_PERFCOUNTER10_SELECT__CI__VI, mmSQ_PERFCOUNTER_CTRL__CI__VI,
|
||||
mmSQ_PERFCOUNTER10_LO__CI__VI, mmSQ_PERFCOUNTER10_HI__CI__VI},
|
||||
{mmSQ_PERFCOUNTER11_SELECT__CI__VI, mmSQ_PERFCOUNTER_CTRL__CI__VI,
|
||||
mmSQ_PERFCOUNTER11_LO__CI__VI, mmSQ_PERFCOUNTER11_HI__CI__VI},
|
||||
{mmSQ_PERFCOUNTER12_SELECT__CI__VI, mmSQ_PERFCOUNTER_CTRL__CI__VI,
|
||||
mmSQ_PERFCOUNTER12_LO__CI__VI, mmSQ_PERFCOUNTER12_HI__CI__VI},
|
||||
{mmSQ_PERFCOUNTER13_SELECT__CI__VI, mmSQ_PERFCOUNTER_CTRL__CI__VI,
|
||||
mmSQ_PERFCOUNTER13_LO__CI__VI, mmSQ_PERFCOUNTER13_HI__CI__VI},
|
||||
{mmSQ_PERFCOUNTER14_SELECT__CI__VI, mmSQ_PERFCOUNTER_CTRL__CI__VI,
|
||||
mmSQ_PERFCOUNTER14_LO__CI__VI, mmSQ_PERFCOUNTER14_HI__CI__VI},
|
||||
{mmSQ_PERFCOUNTER15_SELECT__CI__VI, mmSQ_PERFCOUNTER_CTRL__CI__VI,
|
||||
mmSQ_PERFCOUNTER15_LO__CI__VI, mmSQ_PERFCOUNTER15_HI__CI__VI}};
|
||||
|
||||
/*
|
||||
* DRMDMA
|
||||
*/
|
||||
GpuCounterRegInfo ViDrmdmaCounterRegAddr[] = {
|
||||
{mmSDMA0_PERFMON_CNTL__VI, 0, mmSDMA0_PERFCOUNTER0_RESULT__VI, 0},
|
||||
{mmSDMA0_PERFMON_CNTL__VI, 0, mmSDMA0_PERFCOUNTER1_RESULT__VI, 0},
|
||||
{mmSDMA1_PERFMON_CNTL__VI, 0, mmSDMA1_PERFCOUNTER0_RESULT__VI, 0},
|
||||
{mmSDMA1_PERFMON_CNTL__VI, 0, mmSDMA1_PERFCOUNTER1_RESULT__VI, 0},
|
||||
};
|
||||
|
||||
/*
|
||||
* IH
|
||||
*/
|
||||
GpuCounterRegInfo ViIhCounterRegAddr[] = {
|
||||
{mmIH_PERFMON_CNTL__VI, 0, mmIH_PERFCOUNTER0_RESULT__VI, 0},
|
||||
{mmIH_PERFMON_CNTL__VI, 0, mmIH_PERFCOUNTER1_RESULT__VI, 0}};
|
||||
|
||||
/*
|
||||
* CPF
|
||||
*/
|
||||
GpuCounterRegInfo ViCpfCounterRegAddr[] = {
|
||||
{mmCPF_PERFCOUNTER0_SELECT__CI__VI, 0, mmCPF_PERFCOUNTER0_LO__CI__VI,
|
||||
mmCPF_PERFCOUNTER0_HI__CI__VI},
|
||||
{mmCPF_PERFCOUNTER1_SELECT__CI__VI, 0, mmCPF_PERFCOUNTER1_LO__CI__VI,
|
||||
mmCPF_PERFCOUNTER1_HI__CI__VI}};
|
||||
|
||||
/*
|
||||
* DRM
|
||||
*/
|
||||
GpuCounterRegInfo ViDrmCounterRegAddr[] = {
|
||||
{mmDRM_PERFCOUNTER1_SELECT, 0, mmDRM_PERFCOUNTER1_LO, mmDRM_PERFCOUNTER1_HI},
|
||||
{mmDRM_PERFCOUNTER2_SELECT, 0, mmDRM_PERFCOUNTER2_LO, mmDRM_PERFCOUNTER2_HI}};
|
||||
|
||||
/*
|
||||
* GRBM
|
||||
*/
|
||||
GpuCounterRegInfo ViGrbmCounterRegAddr[] = {
|
||||
{mmGRBM_PERFCOUNTER0_SELECT__CI__VI, 0, mmGRBM_PERFCOUNTER0_LO__CI__VI,
|
||||
mmGRBM_PERFCOUNTER0_HI__CI__VI},
|
||||
{mmGRBM_PERFCOUNTER1_SELECT__CI__VI, 0, mmGRBM_PERFCOUNTER1_LO__CI__VI,
|
||||
mmGRBM_PERFCOUNTER1_HI__CI__VI}};
|
||||
|
||||
/*
|
||||
* GRBM_SE
|
||||
*/
|
||||
GpuCounterRegInfo ViGrbmSeCounterRegAddr[] = {
|
||||
{mmGRBM_SE0_PERFCOUNTER_SELECT__CI__VI, 0, mmGRBM_SE0_PERFCOUNTER_LO__CI__VI,
|
||||
mmGRBM_SE0_PERFCOUNTER_HI__CI__VI},
|
||||
{mmGRBM_SE1_PERFCOUNTER_SELECT__CI__VI, 0, mmGRBM_SE1_PERFCOUNTER_LO__CI__VI,
|
||||
mmGRBM_SE1_PERFCOUNTER_HI__CI__VI},
|
||||
{mmGRBM_SE2_PERFCOUNTER_SELECT__CI__VI, 0, mmGRBM_SE2_PERFCOUNTER_LO__CI__VI,
|
||||
mmGRBM_SE2_PERFCOUNTER_HI__CI__VI},
|
||||
{mmGRBM_SE3_PERFCOUNTER_SELECT__CI__VI, 0, mmGRBM_SE3_PERFCOUNTER_LO__CI__VI,
|
||||
mmGRBM_SE3_PERFCOUNTER_HI__CI__VI}};
|
||||
|
||||
/*
|
||||
* PA_SU
|
||||
*/
|
||||
GpuCounterRegInfo ViPaSuCounterRegAddr[] = {
|
||||
{mmPA_SU_PERFCOUNTER0_SELECT__CI__VI, 0, mmPA_SU_PERFCOUNTER0_LO__CI__VI,
|
||||
mmPA_SU_PERFCOUNTER0_HI__CI__VI},
|
||||
{mmPA_SU_PERFCOUNTER1_SELECT__CI__VI, 0, mmPA_SU_PERFCOUNTER1_LO__CI__VI,
|
||||
mmPA_SU_PERFCOUNTER1_HI__CI__VI},
|
||||
{mmPA_SU_PERFCOUNTER2_SELECT__CI__VI, 0, mmPA_SU_PERFCOUNTER2_LO__CI__VI,
|
||||
mmPA_SU_PERFCOUNTER2_HI__CI__VI},
|
||||
{mmPA_SU_PERFCOUNTER3_SELECT__CI__VI, 0, mmPA_SU_PERFCOUNTER3_LO__CI__VI,
|
||||
mmPA_SU_PERFCOUNTER3_HI__CI__VI}};
|
||||
|
||||
/*
|
||||
* PA_SC
|
||||
*/
|
||||
GpuCounterRegInfo ViPaScCounterRegAddr[] = {
|
||||
{mmPA_SC_PERFCOUNTER0_SELECT__CI__VI, 0, mmPA_SC_PERFCOUNTER0_LO__CI__VI,
|
||||
mmPA_SC_PERFCOUNTER0_HI__CI__VI},
|
||||
{mmPA_SC_PERFCOUNTER1_SELECT__CI__VI, 0, mmPA_SC_PERFCOUNTER1_LO__CI__VI,
|
||||
mmPA_SC_PERFCOUNTER1_HI__CI__VI},
|
||||
{mmPA_SC_PERFCOUNTER2_SELECT__CI__VI, 0, mmPA_SC_PERFCOUNTER2_LO__CI__VI,
|
||||
mmPA_SC_PERFCOUNTER2_HI__CI__VI},
|
||||
{mmPA_SC_PERFCOUNTER3_SELECT__CI__VI, 0, mmPA_SC_PERFCOUNTER3_LO__CI__VI,
|
||||
mmPA_SC_PERFCOUNTER3_HI__CI__VI}};
|
||||
|
||||
/*
|
||||
* SPI
|
||||
*/
|
||||
GpuCounterRegInfo ViSpiCounterRegAddr[] = {
|
||||
{mmSPI_PERFCOUNTER0_SELECT__CI__VI, 0, mmSPI_PERFCOUNTER0_LO__CI__VI,
|
||||
mmSPI_PERFCOUNTER0_HI__CI__VI},
|
||||
{mmSPI_PERFCOUNTER1_SELECT__CI__VI, 0, mmSPI_PERFCOUNTER1_LO__CI__VI,
|
||||
mmSPI_PERFCOUNTER1_HI__CI__VI},
|
||||
{mmSPI_PERFCOUNTER2_SELECT__CI__VI, 0, mmSPI_PERFCOUNTER2_LO__CI__VI,
|
||||
mmSPI_PERFCOUNTER2_HI__CI__VI},
|
||||
{mmSPI_PERFCOUNTER3_SELECT__CI__VI, 0, mmSPI_PERFCOUNTER3_LO__CI__VI,
|
||||
mmSPI_PERFCOUNTER3_HI__CI__VI},
|
||||
{mmSPI_PERFCOUNTER4_SELECT__CI__VI, 0, mmSPI_PERFCOUNTER4_LO__CI__VI,
|
||||
mmSPI_PERFCOUNTER4_HI__CI__VI},
|
||||
{mmSPI_PERFCOUNTER5_SELECT__CI__VI, 0, mmSPI_PERFCOUNTER5_LO__CI__VI,
|
||||
mmSPI_PERFCOUNTER5_HI__CI__VI}};
|
||||
|
||||
/*
|
||||
* TCA
|
||||
*/
|
||||
GpuCounterRegInfo ViTcaCounterRegAddr[] = {
|
||||
{mmTCA_PERFCOUNTER0_SELECT__CI__VI, 0, mmTCA_PERFCOUNTER0_LO__CI__VI,
|
||||
mmTCA_PERFCOUNTER0_HI__CI__VI},
|
||||
{mmTCA_PERFCOUNTER1_SELECT__CI__VI, 0, mmTCA_PERFCOUNTER1_LO__CI__VI,
|
||||
mmTCA_PERFCOUNTER1_HI__CI__VI},
|
||||
{mmTCA_PERFCOUNTER2_SELECT__CI__VI, 0, mmTCA_PERFCOUNTER2_LO__CI__VI,
|
||||
mmTCA_PERFCOUNTER2_HI__CI__VI},
|
||||
{mmTCA_PERFCOUNTER3_SELECT__CI__VI, 0, mmTCA_PERFCOUNTER3_LO__CI__VI,
|
||||
mmTCA_PERFCOUNTER3_HI__CI__VI}};
|
||||
|
||||
/*
|
||||
* TCC
|
||||
*/
|
||||
GpuCounterRegInfo ViTccCounterRegAddr[] = {
|
||||
{mmTCC_PERFCOUNTER0_SELECT__CI__VI, 0, mmTCC_PERFCOUNTER0_LO__CI__VI,
|
||||
mmTCC_PERFCOUNTER0_HI__CI__VI},
|
||||
{mmTCC_PERFCOUNTER1_SELECT__CI__VI, 0, mmTCC_PERFCOUNTER1_LO__CI__VI,
|
||||
mmTCC_PERFCOUNTER1_HI__CI__VI},
|
||||
{mmTCC_PERFCOUNTER2_SELECT__CI__VI, 0, mmTCC_PERFCOUNTER2_LO__CI__VI,
|
||||
mmTCC_PERFCOUNTER2_HI__CI__VI},
|
||||
{mmTCC_PERFCOUNTER3_SELECT__CI__VI, 0, mmTCC_PERFCOUNTER3_LO__CI__VI,
|
||||
mmTCC_PERFCOUNTER3_HI__CI__VI}};
|
||||
|
||||
/*
|
||||
* TCP
|
||||
*/
|
||||
GpuCounterRegInfo ViTcpCounterRegAddr[] = {
|
||||
{mmTCP_PERFCOUNTER0_SELECT__CI__VI, 0, mmTCP_PERFCOUNTER0_LO__CI__VI,
|
||||
mmTCP_PERFCOUNTER0_HI__CI__VI},
|
||||
{mmTCP_PERFCOUNTER1_SELECT__CI__VI, 0, mmTCP_PERFCOUNTER1_LO__CI__VI,
|
||||
mmTCP_PERFCOUNTER1_HI__CI__VI},
|
||||
{mmTCP_PERFCOUNTER2_SELECT__CI__VI, 0, mmTCP_PERFCOUNTER2_LO__CI__VI,
|
||||
mmTCP_PERFCOUNTER2_HI__CI__VI},
|
||||
{mmTCP_PERFCOUNTER3_SELECT__CI__VI, 0, mmTCP_PERFCOUNTER3_LO__CI__VI,
|
||||
mmTCP_PERFCOUNTER3_HI__CI__VI}};
|
||||
|
||||
/*
|
||||
* CB
|
||||
*/
|
||||
GpuCounterRegInfo ViCbCounterRegAddr[] = {
|
||||
{mmCB_PERFCOUNTER0_SELECT__CI__VI, 0, mmCB_PERFCOUNTER0_LO__CI__VI,
|
||||
mmCB_PERFCOUNTER0_HI__CI__VI},
|
||||
{mmCB_PERFCOUNTER1_SELECT__CI__VI, 0, mmCB_PERFCOUNTER1_LO__CI__VI,
|
||||
mmCB_PERFCOUNTER1_HI__CI__VI},
|
||||
{mmCB_PERFCOUNTER2_SELECT__CI__VI, 0, mmCB_PERFCOUNTER2_LO__CI__VI,
|
||||
mmCB_PERFCOUNTER2_HI__CI__VI},
|
||||
{mmCB_PERFCOUNTER3_SELECT__CI__VI, 0, mmCB_PERFCOUNTER3_LO__CI__VI,
|
||||
mmCB_PERFCOUNTER3_HI__CI__VI}};
|
||||
|
||||
/*
|
||||
* DB
|
||||
*/
|
||||
GpuCounterRegInfo ViDbCounterRegAddr[] = {
|
||||
{mmDB_PERFCOUNTER0_SELECT__CI__VI, 0, mmDB_PERFCOUNTER0_LO__CI__VI,
|
||||
mmDB_PERFCOUNTER0_HI__CI__VI},
|
||||
{mmDB_PERFCOUNTER1_SELECT__CI__VI, 0, mmDB_PERFCOUNTER1_LO__CI__VI,
|
||||
mmDB_PERFCOUNTER1_HI__CI__VI},
|
||||
{mmDB_PERFCOUNTER2_SELECT__CI__VI, 0, mmDB_PERFCOUNTER2_LO__CI__VI,
|
||||
mmDB_PERFCOUNTER2_HI__CI__VI},
|
||||
{mmDB_PERFCOUNTER3_SELECT__CI__VI, 0, mmDB_PERFCOUNTER3_LO__CI__VI,
|
||||
mmDB_PERFCOUNTER3_HI__CI__VI}};
|
||||
|
||||
/*
|
||||
* RLC
|
||||
*/
|
||||
GpuCounterRegInfo ViRlcCounterRegAddr[] = {
|
||||
{mmRLC_PERFCOUNTER0_SELECT__CI__VI, 0, mmRLC_PERFCOUNTER0_LO__CI__VI,
|
||||
mmRLC_PERFCOUNTER0_HI__CI__VI},
|
||||
{mmRLC_PERFCOUNTER1_SELECT__CI__VI, 0, mmRLC_PERFCOUNTER1_LO__CI__VI,
|
||||
mmRLC_PERFCOUNTER1_HI__CI__VI}};
|
||||
|
||||
/*
|
||||
* SC
|
||||
*/
|
||||
GpuCounterRegInfo ViScCounterRegAddr[] = {
|
||||
{mmPA_SC_PERFCOUNTER0_SELECT__CI__VI, 0, mmPA_SC_PERFCOUNTER0_LO__CI__VI,
|
||||
mmPA_SC_PERFCOUNTER0_HI__CI__VI},
|
||||
{mmPA_SC_PERFCOUNTER1_SELECT__CI__VI, 0, mmPA_SC_PERFCOUNTER1_LO__CI__VI,
|
||||
mmPA_SC_PERFCOUNTER1_HI__CI__VI},
|
||||
{mmPA_SC_PERFCOUNTER2_SELECT__CI__VI, 0, mmPA_SC_PERFCOUNTER2_LO__CI__VI,
|
||||
mmPA_SC_PERFCOUNTER2_HI__CI__VI},
|
||||
{mmPA_SC_PERFCOUNTER3_SELECT__CI__VI, 0, mmPA_SC_PERFCOUNTER3_LO__CI__VI,
|
||||
mmPA_SC_PERFCOUNTER3_HI__CI__VI},
|
||||
{mmPA_SC_PERFCOUNTER4_SELECT__CI__VI, 0, mmPA_SC_PERFCOUNTER4_LO__CI__VI,
|
||||
mmPA_SC_PERFCOUNTER4_HI__CI__VI},
|
||||
{mmPA_SC_PERFCOUNTER5_SELECT__CI__VI, 0, mmPA_SC_PERFCOUNTER5_LO__CI__VI,
|
||||
mmPA_SC_PERFCOUNTER5_HI__CI__VI},
|
||||
{mmPA_SC_PERFCOUNTER6_SELECT__CI__VI, 0, mmPA_SC_PERFCOUNTER6_LO__CI__VI,
|
||||
mmPA_SC_PERFCOUNTER6_HI__CI__VI},
|
||||
{mmPA_SC_PERFCOUNTER7_SELECT__CI__VI, 0, mmPA_SC_PERFCOUNTER7_LO__CI__VI,
|
||||
mmPA_SC_PERFCOUNTER7_HI__CI__VI}};
|
||||
|
||||
/*
|
||||
* SX
|
||||
*/
|
||||
GpuCounterRegInfo ViSxCounterRegAddr[] = {
|
||||
{mmSX_PERFCOUNTER0_SELECT__CI__VI, 0, mmSX_PERFCOUNTER0_LO__CI__VI,
|
||||
mmSX_PERFCOUNTER0_HI__CI__VI},
|
||||
{mmSX_PERFCOUNTER1_SELECT__CI__VI, 0, mmSX_PERFCOUNTER1_LO__CI__VI,
|
||||
mmSX_PERFCOUNTER1_HI__CI__VI},
|
||||
{mmSX_PERFCOUNTER2_SELECT__CI__VI, 0, mmSX_PERFCOUNTER2_LO__CI__VI,
|
||||
mmSX_PERFCOUNTER2_HI__CI__VI},
|
||||
{mmSX_PERFCOUNTER3_SELECT__CI__VI, 0, mmSX_PERFCOUNTER3_LO__CI__VI,
|
||||
mmSX_PERFCOUNTER3_HI__CI__VI}};
|
||||
|
||||
/*
|
||||
* TA
|
||||
*/
|
||||
GpuCounterRegInfo ViTaCounterRegAddr[] = {
|
||||
{mmTA_PERFCOUNTER0_SELECT__CI__VI, 0, mmTA_PERFCOUNTER0_LO__CI__VI,
|
||||
mmTA_PERFCOUNTER0_HI__CI__VI},
|
||||
{mmTA_PERFCOUNTER1_SELECT__CI__VI, 0, mmTA_PERFCOUNTER1_LO__CI__VI,
|
||||
mmTA_PERFCOUNTER1_HI__CI__VI}};
|
||||
|
||||
/*
|
||||
* TD
|
||||
*/
|
||||
GpuCounterRegInfo ViTdCounterRegAddr[] = {
|
||||
{mmTD_PERFCOUNTER0_SELECT__CI__VI, 0, mmTD_PERFCOUNTER0_LO__CI__VI,
|
||||
mmTD_PERFCOUNTER0_HI__CI__VI},
|
||||
{mmTD_PERFCOUNTER1_SELECT__CI__VI, 0, mmTD_PERFCOUNTER1_LO__CI__VI,
|
||||
mmTD_PERFCOUNTER1_HI__CI__VI}};
|
||||
|
||||
/*
|
||||
* GDS
|
||||
*/
|
||||
GpuCounterRegInfo ViGdsCounterRegAddr[] = {
|
||||
{mmGDS_PERFCOUNTER0_SELECT__CI__VI, 0, mmGDS_PERFCOUNTER0_LO__CI__VI,
|
||||
mmGDS_PERFCOUNTER0_HI__CI__VI},
|
||||
{mmGDS_PERFCOUNTER1_SELECT__CI__VI, 0, mmGDS_PERFCOUNTER1_LO__CI__VI,
|
||||
mmGDS_PERFCOUNTER1_HI__CI__VI},
|
||||
{mmGDS_PERFCOUNTER2_SELECT__CI__VI, 0, mmGDS_PERFCOUNTER2_LO__CI__VI,
|
||||
mmGDS_PERFCOUNTER2_HI__CI__VI},
|
||||
{mmGDS_PERFCOUNTER3_SELECT__CI__VI, 0, mmGDS_PERFCOUNTER3_LO__CI__VI,
|
||||
mmGDS_PERFCOUNTER3_HI__CI__VI}};
|
||||
|
||||
/*
|
||||
* VGT
|
||||
*/
|
||||
GpuCounterRegInfo ViVgtCounterRegAddr[] = {
|
||||
{mmVGT_PERFCOUNTER0_SELECT__CI__VI, 0, mmVGT_PERFCOUNTER0_LO__CI__VI,
|
||||
mmVGT_PERFCOUNTER0_HI__CI__VI},
|
||||
{mmVGT_PERFCOUNTER1_SELECT__CI__VI, 0, mmVGT_PERFCOUNTER1_LO__CI__VI,
|
||||
mmVGT_PERFCOUNTER1_HI__CI__VI},
|
||||
{mmVGT_PERFCOUNTER2_SELECT__CI__VI, 0, mmVGT_PERFCOUNTER2_LO__CI__VI,
|
||||
mmVGT_PERFCOUNTER2_HI__CI__VI},
|
||||
{mmVGT_PERFCOUNTER3_SELECT__CI__VI, 0, mmVGT_PERFCOUNTER3_LO__CI__VI,
|
||||
mmVGT_PERFCOUNTER3_HI__CI__VI}};
|
||||
|
||||
/*
|
||||
* IA
|
||||
*/
|
||||
GpuCounterRegInfo ViIaCounterRegAddr[] = {
|
||||
{mmIA_PERFCOUNTER0_SELECT__CI__VI, 0, mmIA_PERFCOUNTER0_LO__CI__VI,
|
||||
mmIA_PERFCOUNTER0_HI__CI__VI},
|
||||
{mmIA_PERFCOUNTER1_SELECT__CI__VI, 0, mmIA_PERFCOUNTER1_LO__CI__VI,
|
||||
mmIA_PERFCOUNTER1_HI__CI__VI},
|
||||
{mmIA_PERFCOUNTER2_SELECT__CI__VI, 0, mmIA_PERFCOUNTER2_LO__CI__VI,
|
||||
mmIA_PERFCOUNTER2_HI__CI__VI},
|
||||
{mmIA_PERFCOUNTER3_SELECT__CI__VI, 0, mmIA_PERFCOUNTER3_LO__CI__VI,
|
||||
mmIA_PERFCOUNTER3_HI__CI__VI}};
|
||||
|
||||
/*
|
||||
* MC
|
||||
*/
|
||||
GpuCounterRegInfo ViMcCounterRegAddr[] = {
|
||||
{mmMC_SEQ_PERF_SEQ_CTL__SI__VI, 0, mmMC_SEQ_PERF_SEQ_CNT_A_I0__VI,
|
||||
mmMC_SEQ_PERF_SEQ_CNT_A_I1__VI},
|
||||
{mmMC_SEQ_PERF_SEQ_CTL__SI__VI, 0, mmMC_SEQ_PERF_SEQ_CNT_B_I0__VI,
|
||||
mmMC_SEQ_PERF_SEQ_CNT_B_I1__VI},
|
||||
{mmMC_SEQ_PERF_SEQ_CTL__SI__VI, 0, mmMC_SEQ_PERF_SEQ_CNT_C_I0__VI,
|
||||
mmMC_SEQ_PERF_SEQ_CNT_C_I1__VI},
|
||||
{mmMC_SEQ_PERF_SEQ_CTL__SI__VI, 0, mmMC_SEQ_PERF_SEQ_CNT_D_I0__VI,
|
||||
mmMC_SEQ_PERF_SEQ_CNT_D_I1__VI}};
|
||||
|
||||
/*
|
||||
* SRBM
|
||||
*/
|
||||
GpuCounterRegInfo ViSrbmCounterRegAddr[] = {
|
||||
{mmSRBM_PERFCOUNTER0_SELECT__VI, 0, mmSRBM_PERFCOUNTER0_LO__VI, mmSRBM_PERFCOUNTER0_HI__VI},
|
||||
{mmSRBM_PERFCOUNTER1_SELECT__VI, 0, mmSRBM_PERFCOUNTER1_LO__VI, mmSRBM_PERFCOUNTER1_HI__VI}};
|
||||
|
||||
/*
|
||||
* WD
|
||||
*/
|
||||
GpuCounterRegInfo ViWdCounterRegAddr[] = {
|
||||
{mmWD_PERFCOUNTER0_SELECT__CI__VI, 0, mmWD_PERFCOUNTER0_LO__CI__VI,
|
||||
mmWD_PERFCOUNTER0_HI__CI__VI},
|
||||
{mmWD_PERFCOUNTER1_SELECT__CI__VI, 0, mmWD_PERFCOUNTER1_LO__CI__VI,
|
||||
mmWD_PERFCOUNTER1_HI__CI__VI},
|
||||
{mmWD_PERFCOUNTER2_SELECT__CI__VI, 0, mmWD_PERFCOUNTER2_LO__CI__VI,
|
||||
mmWD_PERFCOUNTER2_HI__CI__VI},
|
||||
{mmWD_PERFCOUNTER3_SELECT__CI__VI, 0, mmWD_PERFCOUNTER3_LO__CI__VI,
|
||||
mmWD_PERFCOUNTER3_HI__CI__VI}};
|
||||
|
||||
/*
|
||||
* CPG
|
||||
*/
|
||||
GpuCounterRegInfo ViCpgCounterRegAddr[] = {
|
||||
{mmCPG_PERFCOUNTER0_SELECT__CI__VI, 0, mmCPG_PERFCOUNTER0_LO__CI__VI,
|
||||
mmCPG_PERFCOUNTER0_HI__CI__VI},
|
||||
{mmCPG_PERFCOUNTER1_SELECT__CI__VI, 0, mmCPG_PERFCOUNTER1_LO__CI__VI,
|
||||
mmCPG_PERFCOUNTER1_HI__CI__VI}};
|
||||
|
||||
/*
|
||||
* CPC
|
||||
*/
|
||||
GpuCounterRegInfo ViCpcCounterRegAddr[] = {
|
||||
{mmCPC_PERFCOUNTER0_SELECT__CI__VI, 0, mmCPC_PERFCOUNTER0_LO__CI__VI,
|
||||
mmCPC_PERFCOUNTER0_HI__CI__VI},
|
||||
{mmCPC_PERFCOUNTER1_SELECT__CI__VI, 0, mmCPC_PERFCOUNTER1_LO__CI__VI,
|
||||
mmCPC_PERFCOUNTER1_HI__CI__VI}};
|
||||
|
||||
GpuPrivCounterBlockId ViBlockIdSq = {{0xb5c396b6, 0x47e4d310, 0xc35cfc86, 0x08f53a04}};
|
||||
GpuPrivCounterBlockId ViBlockIdMc = {{0x13900b57, 0x4d984956, 0x5268d081, 0x9cf53719}};
|
||||
GpuPrivCounterBlockId ViBlockIdIommuV2 = {{0x80969879, 0x4be6b0f6, 0x636af697, 0x1d10f500}};
|
||||
GpuPrivCounterBlockId ViBlockIdKernelDriver = {{0xea9b5ae1, 0x44b36c3f, 0xf0da5489, 0x0aa96575}};
|
||||
|
||||
} // pm4_profile
|
||||
@@ -1,227 +0,0 @@
|
||||
#ifndef _VI_BLOCKINFO_H_
|
||||
#define _VI_BLOCKINFO_H_
|
||||
|
||||
#include "gpu_block_info.h"
|
||||
|
||||
namespace pm4_profile {
|
||||
|
||||
// MAX Number of block instances for VOLCANIC ISLANDS (From Fiji)
|
||||
// Values are found here //gfxip/gfx8/main/src/meta/features/variant/Fiji/album.dj
|
||||
|
||||
// @brief Number of block instances.
|
||||
|
||||
// We index per SE and instance
|
||||
#define VI_NUM_CB 4 // CB has 4 instances per SE
|
||||
#define VI_NUM_DB 4 // DB has 4 instances per SE
|
||||
|
||||
// For TA, TD and TCP, the values below are the same as the number of CUs
|
||||
// per SH. We index per SE and instance
|
||||
#define VI_NUM_TA 16 // TA has 11 instances
|
||||
#define VI_NUM_TD 16 // TD has 11 instances
|
||||
#define VI_NUM_TCP 16 // TCP has 11 instances
|
||||
|
||||
// These values are per chip, we index directly per instance
|
||||
#define VI_NUM_TCA 2 // TCA has 2 instances per chip
|
||||
#define VI_NUM_TCC 16 // TCC has 16 instances per chip
|
||||
#define VI_NUM_SDMA 2 // There are two SDMA blocks on VI, exposed as 2
|
||||
// instances here
|
||||
|
||||
// Number of counter registers per block for volcanic islands
|
||||
#define VI_COUNTER_NUM_PER_DRM 2
|
||||
#define VI_COUNTER_NUM_PER_DRMDMA 2
|
||||
#define VI_COUNTER_NUM_PER_IH 2
|
||||
#define VI_COUNTER_NUM_PER_SRBM 2
|
||||
#define VI_COUNTER_NUM_PER_CB 4
|
||||
#define VI_COUNTER_NUM_PER_CPF 2
|
||||
#define VI_COUNTER_NUM_PER_DB 4
|
||||
#define VI_COUNTER_NUM_PER_GRBM 2
|
||||
#define VI_COUNTER_NUM_PER_GRBMSE 4
|
||||
#define VI_COUNTER_NUM_PER_PA_SU 4
|
||||
#define VI_COUNTER_NUM_PER_RLC 2
|
||||
#define VI_COUNTER_NUM_PER_PA_SC 8
|
||||
#define VI_COUNTER_NUM_PER_SPI 6 // [Shucai: To do: double check the value]
|
||||
#define VI_COUNTER_NUM_PER_SQ 16
|
||||
#define VI_COUNTER_NUM_PER_SX 4
|
||||
#define VI_COUNTER_NUM_PER_TA 2
|
||||
#define VI_COUNTER_NUM_PER_TCA 4
|
||||
#define VI_COUNTER_NUM_PER_TCC 4
|
||||
#define VI_COUNTER_NUM_PER_TD 2 // [Shucai: To do: double check the value]
|
||||
#define VI_COUNTER_NUM_PER_TCP 4
|
||||
#define VI_COUNTER_NUM_PER_GDS 4
|
||||
#define VI_COUNTER_NUM_PER_VGT 4
|
||||
#define VI_COUNTER_NUM_PER_IA 4
|
||||
#define VI_COUNTER_NUM_PER_MC 4
|
||||
#define VI_COUNTER_NUM_PER_TCS 4
|
||||
#define VI_COUNTER_NUM_PER_WD 4
|
||||
#define VI_COUNTER_NUM_PER_CPG 2
|
||||
#define VI_COUNTER_NUM_PER_CPC 2
|
||||
#define VI_COUNTER_NUM_PER_VM 1
|
||||
#define VI_COUNTER_NUM_PER_VM_MD 1
|
||||
#define VI_COUNTER_NUM_PER_PIPESTATS 12
|
||||
|
||||
#define VI_MAX_NUM_SHADER_ENGINES 1
|
||||
|
||||
// Enumeration of VI hardware counter blocks
|
||||
typedef enum HsaViCounterBlockId {
|
||||
kHsaViCounterBlockIdCb0 = 0,
|
||||
kHsaViCounterBlockIdCb1,
|
||||
kHsaViCounterBlockIdCb2,
|
||||
kHsaViCounterBlockIdCb3,
|
||||
|
||||
kHsaViCounterBlockIdCpf,
|
||||
|
||||
kHsaViCounterBlockIdDb0,
|
||||
kHsaViCounterBlockIdDb1,
|
||||
kHsaViCounterBlockIdDb2,
|
||||
kHsaViCounterBlockIdDb3,
|
||||
|
||||
kHsaViCounterBlockIdGrbm,
|
||||
kHsaViCounterBlockIdGrbmSe,
|
||||
kHsaViCounterBlockIdPaSu,
|
||||
kHsaViCounterBlockIdPaSc,
|
||||
kHsaViCounterBlockIdSpi,
|
||||
|
||||
kHsaViCounterBlockIdSq,
|
||||
kHsaViCounterBlockIdSqEs,
|
||||
kHsaViCounterBlockIdSqGs,
|
||||
kHsaViCounterBlockIdSqVs,
|
||||
kHsaViCounterBlockIdSqPs,
|
||||
kHsaViCounterBlockIdSqLs,
|
||||
kHsaViCounterBlockIdSqHs,
|
||||
kHsaViCounterBlockIdSqCs,
|
||||
|
||||
kHsaViCounterBlockIdSx,
|
||||
|
||||
kHsaViCounterBlockIdTa0,
|
||||
kHsaViCounterBlockIdTa1,
|
||||
kHsaViCounterBlockIdTa2,
|
||||
kHsaViCounterBlockIdTa3,
|
||||
kHsaViCounterBlockIdTa4,
|
||||
kHsaViCounterBlockIdTa5,
|
||||
kHsaViCounterBlockIdTa6,
|
||||
kHsaViCounterBlockIdTa7,
|
||||
kHsaViCounterBlockIdTa8,
|
||||
kHsaViCounterBlockIdTa9,
|
||||
kHsaViCounterBlockIdTa10,
|
||||
kHsaViCounterBlockIdTa11,
|
||||
kHsaViCounterBlockIdTa12,
|
||||
kHsaViCounterBlockIdTa13,
|
||||
kHsaViCounterBlockIdTa14,
|
||||
kHsaViCounterBlockIdTa15,
|
||||
|
||||
kHsaViCounterBlockIdTca0,
|
||||
kHsaViCounterBlockIdTca1,
|
||||
|
||||
kHsaViCounterBlockIdTcc0,
|
||||
kHsaViCounterBlockIdTcc1,
|
||||
kHsaViCounterBlockIdTcc2,
|
||||
kHsaViCounterBlockIdTcc3,
|
||||
kHsaViCounterBlockIdTcc4,
|
||||
kHsaViCounterBlockIdTcc5,
|
||||
kHsaViCounterBlockIdTcc6,
|
||||
kHsaViCounterBlockIdTcc7,
|
||||
kHsaViCounterBlockIdTcc8,
|
||||
kHsaViCounterBlockIdTcc9,
|
||||
kHsaViCounterBlockIdTcc10,
|
||||
kHsaViCounterBlockIdTcc11,
|
||||
kHsaViCounterBlockIdTcc12,
|
||||
kHsaViCounterBlockIdTcc13,
|
||||
kHsaViCounterBlockIdTcc14,
|
||||
kHsaViCounterBlockIdTcc15,
|
||||
|
||||
kHsaViCounterBlockIdTd0,
|
||||
kHsaViCounterBlockIdTd1,
|
||||
kHsaViCounterBlockIdTd2,
|
||||
kHsaViCounterBlockIdTd3,
|
||||
kHsaViCounterBlockIdTd4,
|
||||
kHsaViCounterBlockIdTd5,
|
||||
kHsaViCounterBlockIdTd6,
|
||||
kHsaViCounterBlockIdTd7,
|
||||
kHsaViCounterBlockIdTd8,
|
||||
kHsaViCounterBlockIdTd9,
|
||||
kHsaViCounterBlockIdTd10,
|
||||
kHsaViCounterBlockIdTd11,
|
||||
kHsaViCounterBlockIdTd12,
|
||||
kHsaViCounterBlockIdTd13,
|
||||
kHsaViCounterBlockIdTd14,
|
||||
kHsaViCounterBlockIdTd15,
|
||||
|
||||
kHsaViCounterBlockIdTcp0,
|
||||
kHsaViCounterBlockIdTcp1,
|
||||
kHsaViCounterBlockIdTcp2,
|
||||
kHsaViCounterBlockIdTcp3,
|
||||
kHsaViCounterBlockIdTcp4,
|
||||
kHsaViCounterBlockIdTcp5,
|
||||
kHsaViCounterBlockIdTcp6,
|
||||
kHsaViCounterBlockIdTcp7,
|
||||
kHsaViCounterBlockIdTcp8,
|
||||
kHsaViCounterBlockIdTcp9,
|
||||
kHsaViCounterBlockIdTcp10,
|
||||
kHsaViCounterBlockIdTcp11,
|
||||
kHsaViCounterBlockIdTcp12,
|
||||
kHsaViCounterBlockIdTcp13,
|
||||
kHsaViCounterBlockIdTcp14,
|
||||
kHsaViCounterBlockIdTcp15,
|
||||
|
||||
kHsaViCounterBlockIdGds,
|
||||
kHsaViCounterBlockIdVgt,
|
||||
kHsaViCounterBlockIdIa,
|
||||
kHsaViCounterBlockIdMc,
|
||||
kHsaViCounterBlockIdSrbm,
|
||||
|
||||
kHsaViCounterBlockIdTcs,
|
||||
kHsaViCounterBlockIdWd,
|
||||
kHsaViCounterBlockIdCpg,
|
||||
kHsaViCounterBlockIdCpc,
|
||||
|
||||
// Counters retrieved by KFD
|
||||
kHsaViCounterBlockIdIommuV2,
|
||||
kHsaViCounterBlockIdKernelDriver,
|
||||
|
||||
kHsaViCounterBlockIdCpPipeStats,
|
||||
kHsaViCounterBlockIdHwInfo,
|
||||
kHsaViCounterBlockIdBlocksFirst = kHsaViCounterBlockIdCb0,
|
||||
kHsaViCounterBlockIdBlocksLast = kHsaViCounterBlockIdHwInfo
|
||||
} HsaViCounterBlockId;
|
||||
|
||||
extern GpuBlockInfo Gfx8HwBlocks[];
|
||||
extern GpuCounterRegInfo ViSqCounterRegAddr[];
|
||||
extern GpuCounterRegInfo ViCbCounterRegAddr[];
|
||||
extern GpuCounterRegInfo ViDrmdmaCounterRegAddr[];
|
||||
extern GpuCounterRegInfo ViIhCounterRegAddr[];
|
||||
extern GpuCounterRegInfo ViCpfCounterRegAddr[];
|
||||
extern GpuCounterRegInfo ViCpgCounterRegAddr[];
|
||||
extern GpuCounterRegInfo ViCpcCounterRegAddr[];
|
||||
extern GpuCounterRegInfo ViDrmCounterRegAddr[];
|
||||
extern GpuCounterRegInfo ViGrbmCounterRegAddr[];
|
||||
extern GpuCounterRegInfo ViGrbmSeCounterRegAddr[];
|
||||
extern GpuCounterRegInfo ViPaSuCounterRegAddr[];
|
||||
extern GpuCounterRegInfo ViPaScCounterRegAddr[];
|
||||
extern GpuCounterRegInfo ViSpiCounterRegAddr[];
|
||||
extern GpuCounterRegInfo ViTcaCounterRegAddr[];
|
||||
extern GpuCounterRegInfo ViTccCounterRegAddr[];
|
||||
extern GpuCounterRegInfo ViTcpCounterRegAddr[];
|
||||
extern GpuCounterRegInfo ViDbCounterRegAddr[];
|
||||
extern GpuCounterRegInfo ViRlcCounterRegAddr[];
|
||||
extern GpuCounterRegInfo ViScCounterRegAddr[];
|
||||
extern GpuCounterRegInfo ViSxCounterRegAddr[];
|
||||
extern GpuCounterRegInfo ViTaCounterRegAddr[];
|
||||
extern GpuCounterRegInfo ViTdCounterRegAddr[];
|
||||
extern GpuCounterRegInfo ViGdsCounterRegAddr[];
|
||||
extern GpuCounterRegInfo ViVgtCounterRegAddr[];
|
||||
extern GpuCounterRegInfo ViIaCounterRegAddr[];
|
||||
extern GpuCounterRegInfo ViMcCounterRegAddr[];
|
||||
extern GpuCounterRegInfo ViSrbmCounterRegAddr[];
|
||||
|
||||
// No Tcs Counter block on VI
|
||||
// extern GpuCounterRegInfo ViTcsCounterRegAddr[];
|
||||
extern GpuCounterRegInfo ViWdCounterRegAddr[];
|
||||
extern GpuCounterRegInfo ViCpgCounterRegAddr[];
|
||||
extern GpuCounterRegInfo ViCpcCounterRegAddr[];
|
||||
|
||||
extern GpuPrivCounterBlockId ViBlockIdSq;
|
||||
extern GpuPrivCounterBlockId ViBlockIdMc;
|
||||
extern GpuPrivCounterBlockId ViBlockIdIommuV2;
|
||||
extern GpuPrivCounterBlockId ViBlockIdKernelDriver;
|
||||
}
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,71 +0,0 @@
|
||||
#ifndef _VI_PMU_H_
|
||||
#define _VI_PMU_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "perf_counter.h"
|
||||
|
||||
namespace pm4_profile {
|
||||
class CommandWriter;
|
||||
|
||||
// This class implement the VI PMU. It is responsible for setting up
|
||||
// CounterGroups to represent each VI hardware block which exposes performance
|
||||
// counters.
|
||||
class Gfx8PerfCounter : public pm4_profile::PerfCounter {
|
||||
public:
|
||||
Gfx8PerfCounter();
|
||||
|
||||
// Returns number of shader engines per block
|
||||
// for the blocks featured shader engines instancing
|
||||
uint32_t getNumSe() { return num_se_; }
|
||||
|
||||
void begin(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter, const CountersMap& countersMap);
|
||||
|
||||
uint32_t end(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter, const CountersMap& countersMap,
|
||||
void* dataBuff);
|
||||
|
||||
private:
|
||||
void Init();
|
||||
|
||||
// Program SQ block related counters
|
||||
uint32_t ProgramSQCntrs(uint32_t sqRegIdx, uint32_t* regAddr, uint32_t* regVal, uint32_t blkId,
|
||||
uint32_t blkCntrIdx);
|
||||
|
||||
// Program TA block related counters
|
||||
uint32_t ProgramTaCntrs(uint32_t taRegIdx, uint32_t* regAddr, uint32_t* regVal, uint32_t blkId,
|
||||
uint32_t blkCntrIdx);
|
||||
|
||||
// Program TCA block related counters
|
||||
uint32_t ProgramTcaCntrs(uint32_t tcaRegIdx, uint32_t* regAddr, uint32_t* regVal, uint32_t blkId,
|
||||
uint32_t blkCntrIdx);
|
||||
|
||||
// Program TCC block related counters
|
||||
uint32_t ProgramTccCntrs(uint32_t tccRegIdx, uint32_t* regAddr, uint32_t* regVal, uint32_t blkId,
|
||||
uint32_t blkCntrIdx);
|
||||
|
||||
// Program TCP block related counters
|
||||
uint32_t ProgramTcpCntrs(uint32_t tcpRegIdx, uint32_t* regAddr, uint32_t* regVal, uint32_t blkId,
|
||||
uint32_t blkCntrIdx);
|
||||
|
||||
// Program TD block related counters
|
||||
uint32_t ProgramTdCntrs(uint32_t tdRegIdx, uint32_t* regAddr, uint32_t* regVal, uint32_t blkId,
|
||||
uint32_t blkCntrIdx);
|
||||
|
||||
// Build counter selection register, return how many registers are built
|
||||
uint32_t BuildCounterSelRegister(uint32_t cntrIdx, uint32_t* regAddr, uint32_t* regVal,
|
||||
uint32_t blkId, uint32_t blkCntrIdx);
|
||||
|
||||
// Build counter selection register, return how many registers are built
|
||||
uint32_t BuildCounterReadRegisters(uint32_t reg_index, uint32_t block_id, uint32_t* reg_addr,
|
||||
uint32_t* reg_val);
|
||||
|
||||
private:
|
||||
// Indicates the number of Shader Engines Present
|
||||
uint32_t num_se_;
|
||||
|
||||
// Used to reset GRBM to its default state
|
||||
uint32_t reset_grbm_;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // _VI_PMU_H_
|
||||
@@ -1,553 +0,0 @@
|
||||
#include "gfx9_block_info.h"
|
||||
#include "gfxip/gfx9/gfx9_offset.h"
|
||||
#include "gfxip/gfx9/gfx9_typedef.h"
|
||||
|
||||
namespace pm4_profile {
|
||||
/**
|
||||
* Table containing CounterGroups which represent AI hardware blocks
|
||||
* as defined by \ref GpuBlockInfo structure
|
||||
*/
|
||||
GpuBlockInfo Gfx9HwBlocks[] = {
|
||||
// Counter block CB
|
||||
{"AI_CB0", kHsaAiCounterBlockIdCb0, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_CB,
|
||||
CntlMethodBySeAndInstance, 395, AI_COUNTER_NUM_PER_CB, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_CB1", kHsaAiCounterBlockIdCb1, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_CB,
|
||||
CntlMethodBySeAndInstance, 395, AI_COUNTER_NUM_PER_CB, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_CB2", kHsaAiCounterBlockIdCb2, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_CB,
|
||||
CntlMethodBySeAndInstance, 395, AI_COUNTER_NUM_PER_CB, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_CB3", kHsaAiCounterBlockIdCb3, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_CB,
|
||||
CntlMethodBySeAndInstance, 395, AI_COUNTER_NUM_PER_CB, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Temp commented for Vega10
|
||||
// Counter block CPF
|
||||
/*
|
||||
{"AI_CPF", kHsaAiCounterBlockIdCpf, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodNone, 19,
|
||||
AI_COUNTER_NUM_PER_CPF, 0, 0, true, 0, 0, false, 0, 0},
|
||||
*/
|
||||
|
||||
// Counter block DB
|
||||
{"AI_DB0", kHsaAiCounterBlockIdDb0, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_DB,
|
||||
CntlMethodBySeAndInstance, 256, AI_COUNTER_NUM_PER_DB, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_DB1", kHsaAiCounterBlockIdDb1, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_DB,
|
||||
CntlMethodBySeAndInstance, 256, AI_COUNTER_NUM_PER_DB, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_DB2", kHsaAiCounterBlockIdDb2, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_DB,
|
||||
CntlMethodBySeAndInstance, 256, AI_COUNTER_NUM_PER_DB, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_DB3", kHsaAiCounterBlockIdDb3, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_DB,
|
||||
CntlMethodBySeAndInstance, 256, AI_COUNTER_NUM_PER_DB, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block GRBM
|
||||
{"AI_GRBM", kHsaAiCounterBlockIdGrbm, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodNone, 33,
|
||||
AI_COUNTER_NUM_PER_GRBM, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block GRBMSE
|
||||
{"AI_GRBMSE", kHsaAiCounterBlockIdGrbmSe, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodNone, 14,
|
||||
AI_COUNTER_NUM_PER_GRBMSE, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block PA_SU
|
||||
{"AI_PA_SU", kHsaAiCounterBlockIdPaSu, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 152,
|
||||
AI_COUNTER_NUM_PER_PA_SU, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block PA_SC
|
||||
{"AI_PA_SC", kHsaAiCounterBlockIdPaSc, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 396,
|
||||
AI_COUNTER_NUM_PER_PA_SC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block SPI
|
||||
{"AI_SPI", kHsaAiCounterBlockIdSpi, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 196,
|
||||
AI_COUNTER_NUM_PER_SPI, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block SQ
|
||||
{"AI_SQ", kHsaAiCounterBlockIdSq, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 171,
|
||||
AI_COUNTER_NUM_PER_SQ, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_SQ_GS", kHsaAiCounterBlockIdSqGs, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 298,
|
||||
AI_COUNTER_NUM_PER_SQ, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_SQ_VS", kHsaAiCounterBlockIdSqVs, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 298,
|
||||
AI_COUNTER_NUM_PER_SQ, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_SQ_PS", kHsaAiCounterBlockIdSqPs, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 298,
|
||||
AI_COUNTER_NUM_PER_SQ, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_SQ_HS", kHsaAiCounterBlockIdSqHs, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 298,
|
||||
AI_COUNTER_NUM_PER_SQ, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_SQ_CS", kHsaAiCounterBlockIdSqCs, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 298,
|
||||
AI_COUNTER_NUM_PER_SQ, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block SX
|
||||
{"AI_SX", kHsaAiCounterBlockIdSx, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 33,
|
||||
AI_COUNTER_NUM_PER_SX, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block TA
|
||||
{"AI_TA0", kHsaAiCounterBlockIdTa0, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TA,
|
||||
CntlMethodBySeAndInstance, 118, AI_COUNTER_NUM_PER_TA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TA1", kHsaAiCounterBlockIdTa1, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TA,
|
||||
CntlMethodBySeAndInstance, 118, AI_COUNTER_NUM_PER_TA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TA2", kHsaAiCounterBlockIdTa2, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TA,
|
||||
CntlMethodBySeAndInstance, 118, AI_COUNTER_NUM_PER_TA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TA3", kHsaAiCounterBlockIdTa3, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TA,
|
||||
CntlMethodBySeAndInstance, 118, AI_COUNTER_NUM_PER_TA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TA4", kHsaAiCounterBlockIdTa4, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TA,
|
||||
CntlMethodBySeAndInstance, 118, AI_COUNTER_NUM_PER_TA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TA5", kHsaAiCounterBlockIdTa5, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TA,
|
||||
CntlMethodBySeAndInstance, 118, AI_COUNTER_NUM_PER_TA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TA6", kHsaAiCounterBlockIdTa6, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TA,
|
||||
CntlMethodBySeAndInstance, 118, AI_COUNTER_NUM_PER_TA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TA7", kHsaAiCounterBlockIdTa7, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TA,
|
||||
CntlMethodBySeAndInstance, 118, AI_COUNTER_NUM_PER_TA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TA8", kHsaAiCounterBlockIdTa8, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TA,
|
||||
CntlMethodBySeAndInstance, 118, AI_COUNTER_NUM_PER_TA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TA9", kHsaAiCounterBlockIdTa9, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TA,
|
||||
CntlMethodBySeAndInstance, 118, AI_COUNTER_NUM_PER_TA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TA10", kHsaAiCounterBlockIdTa10, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TA,
|
||||
CntlMethodBySeAndInstance, 118, AI_COUNTER_NUM_PER_TA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TA11", kHsaAiCounterBlockIdTa11, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TA,
|
||||
CntlMethodBySeAndInstance, 118, AI_COUNTER_NUM_PER_TA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TA12", kHsaAiCounterBlockIdTa12, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TA,
|
||||
CntlMethodBySeAndInstance, 118, AI_COUNTER_NUM_PER_TA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TA13", kHsaAiCounterBlockIdTa13, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TA,
|
||||
CntlMethodBySeAndInstance, 118, AI_COUNTER_NUM_PER_TA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TA14", kHsaAiCounterBlockIdTa14, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TA,
|
||||
CntlMethodBySeAndInstance, 118, AI_COUNTER_NUM_PER_TA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TA15", kHsaAiCounterBlockIdTa15, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TA,
|
||||
CntlMethodBySeAndInstance, 118, AI_COUNTER_NUM_PER_TA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block TCA
|
||||
{"AI_TCA0", kHsaAiCounterBlockIdTca0, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCA,
|
||||
CntlMethodByInstance, 34, AI_COUNTER_NUM_PER_TCA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TCA1", kHsaAiCounterBlockIdTca1, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCA,
|
||||
CntlMethodByInstance, 34, AI_COUNTER_NUM_PER_TCA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block TCC
|
||||
{"AI_TCC0", kHsaAiCounterBlockIdTcc0, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCC,
|
||||
CntlMethodByInstance, 191, AI_COUNTER_NUM_PER_TCC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TCC1", kHsaAiCounterBlockIdTcc1, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCC,
|
||||
CntlMethodByInstance, 191, AI_COUNTER_NUM_PER_TCC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TCC2", kHsaAiCounterBlockIdTcc2, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCC,
|
||||
CntlMethodByInstance, 191, AI_COUNTER_NUM_PER_TCC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TCC3", kHsaAiCounterBlockIdTcc3, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCC,
|
||||
CntlMethodByInstance, 191, AI_COUNTER_NUM_PER_TCC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TCC4", kHsaAiCounterBlockIdTcc4, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCC,
|
||||
CntlMethodByInstance, 191, AI_COUNTER_NUM_PER_TCC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TCC5", kHsaAiCounterBlockIdTcc5, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCC,
|
||||
CntlMethodByInstance, 191, AI_COUNTER_NUM_PER_TCC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TCC6", kHsaAiCounterBlockIdTcc6, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCC,
|
||||
CntlMethodByInstance, 191, AI_COUNTER_NUM_PER_TCC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TCC7", kHsaAiCounterBlockIdTcc7, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCC,
|
||||
CntlMethodByInstance, 191, AI_COUNTER_NUM_PER_TCC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TCC8", kHsaAiCounterBlockIdTcc8, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCC,
|
||||
CntlMethodByInstance, 191, AI_COUNTER_NUM_PER_TCC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TCC9", kHsaAiCounterBlockIdTcc9, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCC,
|
||||
CntlMethodByInstance, 191, AI_COUNTER_NUM_PER_TCC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TCC10", kHsaAiCounterBlockIdTcc10, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCC,
|
||||
CntlMethodByInstance, 191, AI_COUNTER_NUM_PER_TCC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TCC11", kHsaAiCounterBlockIdTcc11, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCC,
|
||||
CntlMethodByInstance, 191, AI_COUNTER_NUM_PER_TCC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TCC12", kHsaAiCounterBlockIdTcc12, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCC,
|
||||
CntlMethodByInstance, 191, AI_COUNTER_NUM_PER_TCC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TCC13", kHsaAiCounterBlockIdTcc13, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCC,
|
||||
CntlMethodByInstance, 191, AI_COUNTER_NUM_PER_TCC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TCC14", kHsaAiCounterBlockIdTcc14, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCC,
|
||||
CntlMethodByInstance, 191, AI_COUNTER_NUM_PER_TCC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TCC15", kHsaAiCounterBlockIdTcc15, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCC,
|
||||
CntlMethodByInstance, 191, AI_COUNTER_NUM_PER_TCC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block TD
|
||||
{"AI_TD0", kHsaAiCounterBlockIdTd0, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TD,
|
||||
CntlMethodBySeAndInstance, 54, AI_COUNTER_NUM_PER_TD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TD1", kHsaAiCounterBlockIdTd1, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TD,
|
||||
CntlMethodBySeAndInstance, 54, AI_COUNTER_NUM_PER_TD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TD2", kHsaAiCounterBlockIdTd2, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TD,
|
||||
CntlMethodBySeAndInstance, 54, AI_COUNTER_NUM_PER_TD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TD3", kHsaAiCounterBlockIdTd3, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TD,
|
||||
CntlMethodBySeAndInstance, 54, AI_COUNTER_NUM_PER_TD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TD4", kHsaAiCounterBlockIdTd4, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TD,
|
||||
CntlMethodBySeAndInstance, 54, AI_COUNTER_NUM_PER_TD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TD5", kHsaAiCounterBlockIdTd5, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TD,
|
||||
CntlMethodBySeAndInstance, 54, AI_COUNTER_NUM_PER_TD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TD6", kHsaAiCounterBlockIdTd6, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TD,
|
||||
CntlMethodBySeAndInstance, 54, AI_COUNTER_NUM_PER_TD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TD7", kHsaAiCounterBlockIdTd7, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TD,
|
||||
CntlMethodBySeAndInstance, 54, AI_COUNTER_NUM_PER_TD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TD8", kHsaAiCounterBlockIdTd8, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TD,
|
||||
CntlMethodBySeAndInstance, 54, AI_COUNTER_NUM_PER_TD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TD9", kHsaAiCounterBlockIdTd9, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TD,
|
||||
CntlMethodBySeAndInstance, 54, AI_COUNTER_NUM_PER_TD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TD10", kHsaAiCounterBlockIdTd10, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TD,
|
||||
CntlMethodBySeAndInstance, 54, AI_COUNTER_NUM_PER_TD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TD11", kHsaAiCounterBlockIdTd11, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TD,
|
||||
CntlMethodBySeAndInstance, 54, AI_COUNTER_NUM_PER_TD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TD12", kHsaAiCounterBlockIdTd12, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TD,
|
||||
CntlMethodBySeAndInstance, 54, AI_COUNTER_NUM_PER_TD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TD13", kHsaAiCounterBlockIdTd13, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TD,
|
||||
CntlMethodBySeAndInstance, 54, AI_COUNTER_NUM_PER_TD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TD14", kHsaAiCounterBlockIdTd14, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TD,
|
||||
CntlMethodBySeAndInstance, 54, AI_COUNTER_NUM_PER_TD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TD15", kHsaAiCounterBlockIdTd15, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TD,
|
||||
CntlMethodBySeAndInstance, 54, AI_COUNTER_NUM_PER_TD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block TCP
|
||||
{"AI_TCP0", kHsaAiCounterBlockIdTcp0, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCP,
|
||||
CntlMethodBySeAndInstance, 182, AI_COUNTER_NUM_PER_TCP, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TCP1", kHsaAiCounterBlockIdTcp1, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCP,
|
||||
CntlMethodBySeAndInstance, 182, AI_COUNTER_NUM_PER_TCP, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TCP2", kHsaAiCounterBlockIdTcp2, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCP,
|
||||
CntlMethodBySeAndInstance, 182, AI_COUNTER_NUM_PER_TCP, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TCP3", kHsaAiCounterBlockIdTcp3, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCP,
|
||||
CntlMethodBySeAndInstance, 182, AI_COUNTER_NUM_PER_TCP, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TCP4", kHsaAiCounterBlockIdTcp4, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCP,
|
||||
CntlMethodBySeAndInstance, 182, AI_COUNTER_NUM_PER_TCP, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TCP5", kHsaAiCounterBlockIdTcp5, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCP,
|
||||
CntlMethodBySeAndInstance, 182, AI_COUNTER_NUM_PER_TCP, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TCP6", kHsaAiCounterBlockIdTcp6, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCP,
|
||||
CntlMethodBySeAndInstance, 182, AI_COUNTER_NUM_PER_TCP, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TCP7", kHsaAiCounterBlockIdTcp7, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCP,
|
||||
CntlMethodBySeAndInstance, 182, AI_COUNTER_NUM_PER_TCP, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TCP8", kHsaAiCounterBlockIdTcp8, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCP,
|
||||
CntlMethodBySeAndInstance, 182, AI_COUNTER_NUM_PER_TCP, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TCP9", kHsaAiCounterBlockIdTcp9, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCP,
|
||||
CntlMethodBySeAndInstance, 182, AI_COUNTER_NUM_PER_TCP, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TCP10", kHsaAiCounterBlockIdTcp10, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCP,
|
||||
CntlMethodBySeAndInstance, 182, AI_COUNTER_NUM_PER_TCP, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TCP11", kHsaAiCounterBlockIdTcp11, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCP,
|
||||
CntlMethodBySeAndInstance, 182, AI_COUNTER_NUM_PER_TCP, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TCP12", kHsaAiCounterBlockIdTcp12, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCP,
|
||||
CntlMethodBySeAndInstance, 182, AI_COUNTER_NUM_PER_TCP, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TCP13", kHsaAiCounterBlockIdTcp13, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCP,
|
||||
CntlMethodBySeAndInstance, 182, AI_COUNTER_NUM_PER_TCP, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TCP14", kHsaAiCounterBlockIdTcp14, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCP,
|
||||
CntlMethodBySeAndInstance, 182, AI_COUNTER_NUM_PER_TCP, 0, 0, true, 0, 0, false, 0, 0},
|
||||
{"AI_TCP15", kHsaAiCounterBlockIdTcp15, AI_MAX_NUM_SHADER_ENGINES, 2, AI_NUM_TCP,
|
||||
CntlMethodBySeAndInstance, 182, AI_COUNTER_NUM_PER_TCP, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block GDS
|
||||
{"AI_GDS", kHsaAiCounterBlockIdGds, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodNone, 120,
|
||||
AI_COUNTER_NUM_PER_GDS, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block VGT
|
||||
{"AI_VGT", kHsaAiCounterBlockIdVgt, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 145,
|
||||
AI_COUNTER_NUM_PER_VGT, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block IA
|
||||
{"AI_IA", kHsaAiCounterBlockIdIa, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodBySe, 23,
|
||||
AI_COUNTER_NUM_PER_IA, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block MC
|
||||
{"AI_MC", kHsaAiCounterBlockIdMc, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodNone, 22,
|
||||
AI_COUNTER_NUM_PER_MC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Temp commented out for Vega10
|
||||
// Counter block SRBM
|
||||
/*
|
||||
{"AI_SRBM", kHsaAiCounterBlockIdSrbm, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodNone, 19,
|
||||
AI_COUNTER_NUM_PER_SRBM, 0, 0, true, 0, 0, false, 0, 0},
|
||||
*/
|
||||
|
||||
// Counter block WD
|
||||
{"AI_WD", kHsaAiCounterBlockIdWd, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodNone, 36,
|
||||
AI_COUNTER_NUM_PER_WD, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block CPG
|
||||
// Temp commented for Vega10
|
||||
/*
|
||||
{"AI_CPG", kHsaAiCounterBlockIdCpg, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodNone, 48,
|
||||
AI_COUNTER_NUM_PER_CPG, 0, 0, true, 0, 0, false, 0, 0},
|
||||
*/
|
||||
|
||||
// Counter block CPC
|
||||
// Temp commented for Vega10
|
||||
{"AI_CPC", kHsaAiCounterBlockIdCpc, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodNone, 34,
|
||||
AI_COUNTER_NUM_PER_CPC, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block IOMMUV2
|
||||
{"AI_IOMMUV2", kHsaAiCounterBlockIdIommuV2, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodNone, 25,
|
||||
8, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Counter block KernelDriver
|
||||
{"AI_KD", kHsaAiCounterBlockIdKernelDriver, AI_MAX_NUM_SHADER_ENGINES, 2, 1, CntlMethodNone, 0,
|
||||
0, 0, 0, true, 0, 0, false, 0, 0},
|
||||
|
||||
// Name of the last line should be empty to indicate end of all counter groups
|
||||
{"", kHsaAiCounterBlockIdBlocksLast, 0, 0, 0, CntlMethodNone, 0, 0, 0, 0, false, 0, 0, false, 0,
|
||||
0}};
|
||||
|
||||
extern const uint32_t Gfx9HwBlockCount = sizeof(Gfx9HwBlocks) / sizeof(GpuBlockInfo);
|
||||
|
||||
/*
|
||||
* The following tables contain register addresses of the SQ counter registers
|
||||
*/
|
||||
|
||||
/*
|
||||
* SQ
|
||||
*/
|
||||
GpuCounterRegInfo AiSqCounterRegAddr[] = {
|
||||
{mmSQ_PERFCOUNTER0_SELECT, mmSQ_PERFCOUNTER_CTRL, mmSQ_PERFCOUNTER0_LO, mmSQ_PERFCOUNTER0_HI},
|
||||
{mmSQ_PERFCOUNTER1_SELECT, mmSQ_PERFCOUNTER_CTRL, mmSQ_PERFCOUNTER1_LO, mmSQ_PERFCOUNTER1_HI},
|
||||
{mmSQ_PERFCOUNTER2_SELECT, mmSQ_PERFCOUNTER_CTRL, mmSQ_PERFCOUNTER2_LO, mmSQ_PERFCOUNTER2_HI},
|
||||
{mmSQ_PERFCOUNTER3_SELECT, mmSQ_PERFCOUNTER_CTRL, mmSQ_PERFCOUNTER3_LO, mmSQ_PERFCOUNTER3_HI},
|
||||
{mmSQ_PERFCOUNTER4_SELECT, mmSQ_PERFCOUNTER_CTRL, mmSQ_PERFCOUNTER4_LO, mmSQ_PERFCOUNTER4_HI},
|
||||
{mmSQ_PERFCOUNTER5_SELECT, mmSQ_PERFCOUNTER_CTRL, mmSQ_PERFCOUNTER5_LO, mmSQ_PERFCOUNTER5_HI},
|
||||
{mmSQ_PERFCOUNTER6_SELECT, mmSQ_PERFCOUNTER_CTRL, mmSQ_PERFCOUNTER6_LO, mmSQ_PERFCOUNTER6_HI},
|
||||
{mmSQ_PERFCOUNTER7_SELECT, mmSQ_PERFCOUNTER_CTRL, mmSQ_PERFCOUNTER7_LO, mmSQ_PERFCOUNTER7_HI},
|
||||
{mmSQ_PERFCOUNTER8_SELECT, mmSQ_PERFCOUNTER_CTRL, mmSQ_PERFCOUNTER8_LO, mmSQ_PERFCOUNTER8_HI},
|
||||
{mmSQ_PERFCOUNTER9_SELECT, mmSQ_PERFCOUNTER_CTRL, mmSQ_PERFCOUNTER9_LO, mmSQ_PERFCOUNTER9_HI},
|
||||
{mmSQ_PERFCOUNTER10_SELECT, mmSQ_PERFCOUNTER_CTRL, mmSQ_PERFCOUNTER10_LO,
|
||||
mmSQ_PERFCOUNTER10_HI},
|
||||
{mmSQ_PERFCOUNTER11_SELECT, mmSQ_PERFCOUNTER_CTRL, mmSQ_PERFCOUNTER11_LO,
|
||||
mmSQ_PERFCOUNTER11_HI},
|
||||
{mmSQ_PERFCOUNTER12_SELECT, mmSQ_PERFCOUNTER_CTRL, mmSQ_PERFCOUNTER12_LO,
|
||||
mmSQ_PERFCOUNTER12_HI},
|
||||
{mmSQ_PERFCOUNTER13_SELECT, mmSQ_PERFCOUNTER_CTRL, mmSQ_PERFCOUNTER13_LO,
|
||||
mmSQ_PERFCOUNTER13_HI},
|
||||
{mmSQ_PERFCOUNTER14_SELECT, mmSQ_PERFCOUNTER_CTRL, mmSQ_PERFCOUNTER14_LO,
|
||||
mmSQ_PERFCOUNTER14_HI},
|
||||
{mmSQ_PERFCOUNTER15_SELECT, mmSQ_PERFCOUNTER_CTRL, mmSQ_PERFCOUNTER15_LO,
|
||||
mmSQ_PERFCOUNTER15_HI}};
|
||||
|
||||
/*
|
||||
* DRMDMA
|
||||
*/
|
||||
GpuCounterRegInfo AiDrmdmaCounterRegAddr[] = {
|
||||
{mmSDMA0_PERFMON_CNTL, 0, mmSDMA0_PERFCOUNTER0_RESULT, 0},
|
||||
{mmSDMA0_PERFMON_CNTL, 0, mmSDMA0_PERFCOUNTER1_RESULT, 0},
|
||||
{mmSDMA1_PERFMON_CNTL, 0, mmSDMA1_PERFCOUNTER0_RESULT, 0},
|
||||
{mmSDMA1_PERFMON_CNTL, 0, mmSDMA1_PERFCOUNTER1_RESULT, 0},
|
||||
};
|
||||
|
||||
/*
|
||||
* IH
|
||||
*/
|
||||
GpuCounterRegInfo AiIhCounterRegAddr[] = {{mmIH_PERFMON_CNTL, 0, mmIH_PERFCOUNTER0_RESULT, 0},
|
||||
{mmIH_PERFMON_CNTL, 0, mmIH_PERFCOUNTER1_RESULT, 0}};
|
||||
|
||||
/*
|
||||
* CPF
|
||||
*/
|
||||
GpuCounterRegInfo AiCpfCounterRegAddr[] = {
|
||||
{mmCPF_PERFCOUNTER0_SELECT, 0, mmCPF_PERFCOUNTER0_LO, mmCPF_PERFCOUNTER0_HI},
|
||||
{mmCPF_PERFCOUNTER1_SELECT, 0, mmCPF_PERFCOUNTER1_LO, mmCPF_PERFCOUNTER1_HI}};
|
||||
|
||||
/*
|
||||
* DRM
|
||||
*/
|
||||
GpuCounterRegInfo AiDrmCounterRegAddr[] = {
|
||||
/*
|
||||
{mmDRM_PERFCOUNTER1_SELECT, 0, mmDRM_PERFCOUNTER1_LO, mmDRM_PERFCOUNTER1_HI},
|
||||
{mmDRM_PERFCOUNTER2_SELECT, 0, mmDRM_PERFCOUNTER2_LO, mmDRM_PERFCOUNTER2_HI}
|
||||
*/
|
||||
};
|
||||
|
||||
/*
|
||||
* GRBM
|
||||
*/
|
||||
GpuCounterRegInfo AiGrbmCounterRegAddr[] = {
|
||||
{mmGRBM_PERFCOUNTER0_SELECT, 0, mmGRBM_PERFCOUNTER0_LO, mmGRBM_PERFCOUNTER0_HI},
|
||||
{mmGRBM_PERFCOUNTER1_SELECT, 0, mmGRBM_PERFCOUNTER1_LO, mmGRBM_PERFCOUNTER1_HI}};
|
||||
|
||||
/*
|
||||
* GRBM_SE
|
||||
*/
|
||||
GpuCounterRegInfo AiGrbmSeCounterRegAddr[] = {
|
||||
{mmGRBM_SE0_PERFCOUNTER_SELECT, 0, mmGRBM_SE0_PERFCOUNTER_LO, mmGRBM_SE0_PERFCOUNTER_HI},
|
||||
{mmGRBM_SE1_PERFCOUNTER_SELECT, 0, mmGRBM_SE1_PERFCOUNTER_LO, mmGRBM_SE1_PERFCOUNTER_HI},
|
||||
{mmGRBM_SE2_PERFCOUNTER_SELECT, 0, mmGRBM_SE2_PERFCOUNTER_LO, mmGRBM_SE2_PERFCOUNTER_HI},
|
||||
{mmGRBM_SE3_PERFCOUNTER_SELECT, 0, mmGRBM_SE3_PERFCOUNTER_LO, mmGRBM_SE3_PERFCOUNTER_HI}};
|
||||
|
||||
/*
|
||||
* PA_SU
|
||||
*/
|
||||
GpuCounterRegInfo AiPaSuCounterRegAddr[] = {
|
||||
{mmPA_SU_PERFCOUNTER0_SELECT, 0, mmPA_SU_PERFCOUNTER0_LO, mmPA_SU_PERFCOUNTER0_HI},
|
||||
{mmPA_SU_PERFCOUNTER1_SELECT, 0, mmPA_SU_PERFCOUNTER1_LO, mmPA_SU_PERFCOUNTER1_HI},
|
||||
{mmPA_SU_PERFCOUNTER2_SELECT, 0, mmPA_SU_PERFCOUNTER2_LO, mmPA_SU_PERFCOUNTER2_HI},
|
||||
{mmPA_SU_PERFCOUNTER3_SELECT, 0, mmPA_SU_PERFCOUNTER3_LO, mmPA_SU_PERFCOUNTER3_HI}};
|
||||
|
||||
/*
|
||||
* PA_SC
|
||||
*/
|
||||
GpuCounterRegInfo AiPaScCounterRegAddr[] = {
|
||||
{mmPA_SC_PERFCOUNTER0_SELECT, 0, mmPA_SC_PERFCOUNTER0_LO, mmPA_SC_PERFCOUNTER0_HI},
|
||||
{mmPA_SC_PERFCOUNTER1_SELECT, 0, mmPA_SC_PERFCOUNTER1_LO, mmPA_SC_PERFCOUNTER1_HI},
|
||||
{mmPA_SC_PERFCOUNTER2_SELECT, 0, mmPA_SC_PERFCOUNTER2_LO, mmPA_SC_PERFCOUNTER2_HI},
|
||||
{mmPA_SC_PERFCOUNTER3_SELECT, 0, mmPA_SC_PERFCOUNTER3_LO, mmPA_SC_PERFCOUNTER3_HI}};
|
||||
|
||||
/*
|
||||
* SPI
|
||||
*/
|
||||
GpuCounterRegInfo AiSpiCounterRegAddr[] = {
|
||||
{mmSPI_PERFCOUNTER0_SELECT, 0, mmSPI_PERFCOUNTER0_LO, mmSPI_PERFCOUNTER0_HI},
|
||||
{mmSPI_PERFCOUNTER1_SELECT, 0, mmSPI_PERFCOUNTER1_LO, mmSPI_PERFCOUNTER1_HI},
|
||||
{mmSPI_PERFCOUNTER2_SELECT, 0, mmSPI_PERFCOUNTER2_LO, mmSPI_PERFCOUNTER2_HI},
|
||||
{mmSPI_PERFCOUNTER3_SELECT, 0, mmSPI_PERFCOUNTER3_LO, mmSPI_PERFCOUNTER3_HI},
|
||||
{mmSPI_PERFCOUNTER4_SELECT, 0, mmSPI_PERFCOUNTER4_LO, mmSPI_PERFCOUNTER4_HI},
|
||||
{mmSPI_PERFCOUNTER5_SELECT, 0, mmSPI_PERFCOUNTER5_LO, mmSPI_PERFCOUNTER5_HI}};
|
||||
|
||||
/*
|
||||
* TCA
|
||||
*/
|
||||
GpuCounterRegInfo AiTcaCounterRegAddr[] = {
|
||||
{mmTCA_PERFCOUNTER0_SELECT, 0, mmTCA_PERFCOUNTER0_LO, mmTCA_PERFCOUNTER0_HI},
|
||||
{mmTCA_PERFCOUNTER1_SELECT, 0, mmTCA_PERFCOUNTER1_LO, mmTCA_PERFCOUNTER1_HI},
|
||||
{mmTCA_PERFCOUNTER2_SELECT, 0, mmTCA_PERFCOUNTER2_LO, mmTCA_PERFCOUNTER2_HI},
|
||||
{mmTCA_PERFCOUNTER3_SELECT, 0, mmTCA_PERFCOUNTER3_LO, mmTCA_PERFCOUNTER3_HI}};
|
||||
|
||||
/*
|
||||
* TCC
|
||||
*/
|
||||
GpuCounterRegInfo AiTccCounterRegAddr[] = {
|
||||
{mmTCC_PERFCOUNTER0_SELECT, 0, mmTCC_PERFCOUNTER0_LO, mmTCC_PERFCOUNTER0_HI},
|
||||
{mmTCC_PERFCOUNTER1_SELECT, 0, mmTCC_PERFCOUNTER1_LO, mmTCC_PERFCOUNTER1_HI},
|
||||
{mmTCC_PERFCOUNTER2_SELECT, 0, mmTCC_PERFCOUNTER2_LO, mmTCC_PERFCOUNTER2_HI},
|
||||
{mmTCC_PERFCOUNTER3_SELECT, 0, mmTCC_PERFCOUNTER3_LO, mmTCC_PERFCOUNTER3_HI}};
|
||||
|
||||
/*
|
||||
* TCP
|
||||
*/
|
||||
GpuCounterRegInfo AiTcpCounterRegAddr[] = {
|
||||
{mmTCP_PERFCOUNTER0_SELECT, 0, mmTCP_PERFCOUNTER0_LO, mmTCP_PERFCOUNTER0_HI},
|
||||
{mmTCP_PERFCOUNTER1_SELECT, 0, mmTCP_PERFCOUNTER1_LO, mmTCP_PERFCOUNTER1_HI},
|
||||
{mmTCP_PERFCOUNTER2_SELECT, 0, mmTCP_PERFCOUNTER2_LO, mmTCP_PERFCOUNTER2_HI},
|
||||
{mmTCP_PERFCOUNTER3_SELECT, 0, mmTCP_PERFCOUNTER3_LO, mmTCP_PERFCOUNTER3_HI}};
|
||||
|
||||
/*
|
||||
* CB
|
||||
*/
|
||||
GpuCounterRegInfo AiCbCounterRegAddr[] = {
|
||||
{mmCB_PERFCOUNTER0_SELECT, 0, mmCB_PERFCOUNTER0_LO, mmCB_PERFCOUNTER0_HI},
|
||||
{mmCB_PERFCOUNTER1_SELECT, 0, mmCB_PERFCOUNTER1_LO, mmCB_PERFCOUNTER1_HI},
|
||||
{mmCB_PERFCOUNTER2_SELECT, 0, mmCB_PERFCOUNTER2_LO, mmCB_PERFCOUNTER2_HI},
|
||||
{mmCB_PERFCOUNTER3_SELECT, 0, mmCB_PERFCOUNTER3_LO, mmCB_PERFCOUNTER3_HI}};
|
||||
|
||||
/*
|
||||
* DB
|
||||
*/
|
||||
GpuCounterRegInfo AiDbCounterRegAddr[] = {
|
||||
{mmDB_PERFCOUNTER0_SELECT, 0, mmDB_PERFCOUNTER0_LO, mmDB_PERFCOUNTER0_HI},
|
||||
{mmDB_PERFCOUNTER1_SELECT, 0, mmDB_PERFCOUNTER1_LO, mmDB_PERFCOUNTER1_HI},
|
||||
{mmDB_PERFCOUNTER2_SELECT, 0, mmDB_PERFCOUNTER2_LO, mmDB_PERFCOUNTER2_HI},
|
||||
{mmDB_PERFCOUNTER3_SELECT, 0, mmDB_PERFCOUNTER3_LO, mmDB_PERFCOUNTER3_HI}};
|
||||
|
||||
/*
|
||||
* RLC
|
||||
*/
|
||||
GpuCounterRegInfo AiRlcCounterRegAddr[] = {
|
||||
{mmRLC_PERFCOUNTER0_SELECT, 0, mmRLC_PERFCOUNTER0_LO, mmRLC_PERFCOUNTER0_HI},
|
||||
{mmRLC_PERFCOUNTER1_SELECT, 0, mmRLC_PERFCOUNTER1_LO, mmRLC_PERFCOUNTER1_HI}};
|
||||
|
||||
/*
|
||||
* SC
|
||||
*/
|
||||
GpuCounterRegInfo AiScCounterRegAddr[] = {
|
||||
{mmPA_SC_PERFCOUNTER0_SELECT, 0, mmPA_SC_PERFCOUNTER0_LO, mmPA_SC_PERFCOUNTER0_HI},
|
||||
{mmPA_SC_PERFCOUNTER1_SELECT, 0, mmPA_SC_PERFCOUNTER1_LO, mmPA_SC_PERFCOUNTER1_HI},
|
||||
{mmPA_SC_PERFCOUNTER2_SELECT, 0, mmPA_SC_PERFCOUNTER2_LO, mmPA_SC_PERFCOUNTER2_HI},
|
||||
{mmPA_SC_PERFCOUNTER3_SELECT, 0, mmPA_SC_PERFCOUNTER3_LO, mmPA_SC_PERFCOUNTER3_HI},
|
||||
{mmPA_SC_PERFCOUNTER4_SELECT, 0, mmPA_SC_PERFCOUNTER4_LO, mmPA_SC_PERFCOUNTER4_HI},
|
||||
{mmPA_SC_PERFCOUNTER5_SELECT, 0, mmPA_SC_PERFCOUNTER5_LO, mmPA_SC_PERFCOUNTER5_HI},
|
||||
{mmPA_SC_PERFCOUNTER6_SELECT, 0, mmPA_SC_PERFCOUNTER6_LO, mmPA_SC_PERFCOUNTER6_HI},
|
||||
{mmPA_SC_PERFCOUNTER7_SELECT, 0, mmPA_SC_PERFCOUNTER7_LO, mmPA_SC_PERFCOUNTER7_HI}};
|
||||
|
||||
/*
|
||||
* SX
|
||||
*/
|
||||
GpuCounterRegInfo AiSxCounterRegAddr[] = {
|
||||
{mmSX_PERFCOUNTER0_SELECT, 0, mmSX_PERFCOUNTER0_LO, mmSX_PERFCOUNTER0_HI},
|
||||
{mmSX_PERFCOUNTER1_SELECT, 0, mmSX_PERFCOUNTER1_LO, mmSX_PERFCOUNTER1_HI},
|
||||
{mmSX_PERFCOUNTER2_SELECT, 0, mmSX_PERFCOUNTER2_LO, mmSX_PERFCOUNTER2_HI},
|
||||
{mmSX_PERFCOUNTER3_SELECT, 0, mmSX_PERFCOUNTER3_LO, mmSX_PERFCOUNTER3_HI}};
|
||||
|
||||
/*
|
||||
* TA
|
||||
*/
|
||||
GpuCounterRegInfo AiTaCounterRegAddr[] = {
|
||||
{mmTA_PERFCOUNTER0_SELECT, 0, mmTA_PERFCOUNTER0_LO, mmTA_PERFCOUNTER0_HI},
|
||||
{mmTA_PERFCOUNTER1_SELECT, 0, mmTA_PERFCOUNTER1_LO, mmTA_PERFCOUNTER1_HI}};
|
||||
|
||||
/*
|
||||
* TD
|
||||
*/
|
||||
GpuCounterRegInfo AiTdCounterRegAddr[] = {
|
||||
{mmTD_PERFCOUNTER0_SELECT, 0, mmTD_PERFCOUNTER0_LO, mmTD_PERFCOUNTER0_HI},
|
||||
{mmTD_PERFCOUNTER1_SELECT, 0, mmTD_PERFCOUNTER1_LO, mmTD_PERFCOUNTER1_HI}};
|
||||
|
||||
/*
|
||||
* GDS
|
||||
*/
|
||||
GpuCounterRegInfo AiGdsCounterRegAddr[] = {
|
||||
{mmGDS_PERFCOUNTER0_SELECT, 0, mmGDS_PERFCOUNTER0_LO, mmGDS_PERFCOUNTER0_HI},
|
||||
{mmGDS_PERFCOUNTER1_SELECT, 0, mmGDS_PERFCOUNTER1_LO, mmGDS_PERFCOUNTER1_HI},
|
||||
{mmGDS_PERFCOUNTER2_SELECT, 0, mmGDS_PERFCOUNTER2_LO, mmGDS_PERFCOUNTER2_HI},
|
||||
{mmGDS_PERFCOUNTER3_SELECT, 0, mmGDS_PERFCOUNTER3_LO, mmGDS_PERFCOUNTER3_HI}};
|
||||
|
||||
/*
|
||||
* VGT
|
||||
*/
|
||||
GpuCounterRegInfo AiVgtCounterRegAddr[] = {
|
||||
{mmVGT_PERFCOUNTER0_SELECT, 0, mmVGT_PERFCOUNTER0_LO, mmVGT_PERFCOUNTER0_HI},
|
||||
{mmVGT_PERFCOUNTER1_SELECT, 0, mmVGT_PERFCOUNTER1_LO, mmVGT_PERFCOUNTER1_HI},
|
||||
{mmVGT_PERFCOUNTER2_SELECT, 0, mmVGT_PERFCOUNTER2_LO, mmVGT_PERFCOUNTER2_HI},
|
||||
{mmVGT_PERFCOUNTER3_SELECT, 0, mmVGT_PERFCOUNTER3_LO, mmVGT_PERFCOUNTER3_HI}};
|
||||
|
||||
/*
|
||||
* IA
|
||||
*/
|
||||
GpuCounterRegInfo AiIaCounterRegAddr[] = {
|
||||
{mmIA_PERFCOUNTER0_SELECT, 0, mmIA_PERFCOUNTER0_LO, mmIA_PERFCOUNTER0_HI},
|
||||
{mmIA_PERFCOUNTER1_SELECT, 0, mmIA_PERFCOUNTER1_LO, mmIA_PERFCOUNTER1_HI},
|
||||
{mmIA_PERFCOUNTER2_SELECT, 0, mmIA_PERFCOUNTER2_LO, mmIA_PERFCOUNTER2_HI},
|
||||
{mmIA_PERFCOUNTER3_SELECT, 0, mmIA_PERFCOUNTER3_LO, mmIA_PERFCOUNTER3_HI}};
|
||||
|
||||
/*
|
||||
* MC
|
||||
*/
|
||||
GpuCounterRegInfo AiMcCounterRegAddr[] = {
|
||||
/*
|
||||
|
||||
{mmMC_SEQ_PERF_SEQ_CTL__SI__VI, 0, mmMC_SEQ_PERF_SEQ_CNT_A_I0__VI,
|
||||
mmMC_SEQ_PERF_SEQ_CNT_A_I1__VI},
|
||||
{mmMC_SEQ_PERF_SEQ_CTL__SI__VI, 0, mmMC_SEQ_PERF_SEQ_CNT_B_I0__VI,
|
||||
mmMC_SEQ_PERF_SEQ_CNT_B_I1__VI},
|
||||
{mmMC_SEQ_PERF_SEQ_CTL__SI__VI, 0, mmMC_SEQ_PERF_SEQ_CNT_C_I0__VI,
|
||||
mmMC_SEQ_PERF_SEQ_CNT_C_I1__VI},
|
||||
{mmMC_SEQ_PERF_SEQ_CTL__SI__VI, 0, mmMC_SEQ_PERF_SEQ_CNT_D_I0__VI,
|
||||
mmMC_SEQ_PERF_SEQ_CNT_D_I1__VI}
|
||||
|
||||
*/
|
||||
};
|
||||
|
||||
/*
|
||||
* SRBM
|
||||
*/
|
||||
GpuCounterRegInfo AiSrbmCounterRegAddr[] = {
|
||||
/*
|
||||
{mmSRBM_PERFCOUNTER0_SELECT, 0, mmSRBM_PERFCOUNTER0_LO,
|
||||
mmSRBM_PERFCOUNTER0_HI},
|
||||
{mmSRBM_PERFCOUNTER1_SELECT, 0, mmSRBM_PERFCOUNTER1_LO,
|
||||
mmSRBM_PERFCOUNTER1_HI}
|
||||
*/
|
||||
};
|
||||
|
||||
/*
|
||||
* WD
|
||||
*/
|
||||
GpuCounterRegInfo AiWdCounterRegAddr[] = {
|
||||
{mmWD_PERFCOUNTER0_SELECT, 0, mmWD_PERFCOUNTER0_LO, mmWD_PERFCOUNTER0_HI},
|
||||
{mmWD_PERFCOUNTER1_SELECT, 0, mmWD_PERFCOUNTER1_LO, mmWD_PERFCOUNTER1_HI},
|
||||
{mmWD_PERFCOUNTER2_SELECT, 0, mmWD_PERFCOUNTER2_LO, mmWD_PERFCOUNTER2_HI},
|
||||
{mmWD_PERFCOUNTER3_SELECT, 0, mmWD_PERFCOUNTER3_LO, mmWD_PERFCOUNTER3_HI}};
|
||||
|
||||
/*
|
||||
* CPG
|
||||
*/
|
||||
GpuCounterRegInfo AiCpgCounterRegAddr[] = {
|
||||
{mmCPG_PERFCOUNTER0_SELECT, 0, mmCPG_PERFCOUNTER0_LO, mmCPG_PERFCOUNTER0_HI},
|
||||
{mmCPG_PERFCOUNTER1_SELECT, 0, mmCPG_PERFCOUNTER1_LO, mmCPG_PERFCOUNTER1_HI}};
|
||||
|
||||
/*
|
||||
* CPC
|
||||
*/
|
||||
GpuCounterRegInfo AiCpcCounterRegAddr[] = {
|
||||
{mmCPC_PERFCOUNTER0_SELECT, 0, mmCPC_PERFCOUNTER0_LO, mmCPC_PERFCOUNTER0_HI},
|
||||
{mmCPC_PERFCOUNTER1_SELECT, 0, mmCPC_PERFCOUNTER1_LO, mmCPC_PERFCOUNTER1_HI}};
|
||||
|
||||
GpuPrivCounterBlockId AiBlockIdSq = {{0xb5c396b6, 0x47e4d310, 0xc35cfc86, 0x08f53a04}};
|
||||
GpuPrivCounterBlockId AiBlockIdMc = {{0x13900b57, 0x4d984956, 0x5268d081, 0x9cf53719}};
|
||||
GpuPrivCounterBlockId AiBlockIdIommuV2 = {{0x80969879, 0x4be6b0f6, 0x636af697, 0x1d10f500}};
|
||||
GpuPrivCounterBlockId AiBlockIdKernelDriver = {{0xea9b5ae1, 0x44b36c3f, 0xf0da5489, 0x0aa96575}};
|
||||
|
||||
} // pm4_profile
|
||||
@@ -1,245 +0,0 @@
|
||||
#ifndef _AI_BLOCKINFO_H_
|
||||
#define _AI_BLOCKINFO_H_
|
||||
|
||||
#include "gpu_block_info.h"
|
||||
|
||||
namespace pm4_profile {
|
||||
|
||||
// MAX Number of block instances for ARCTIC ISLANDS (From Vega10)
|
||||
// Values are found here //gfxip/gfx8/main/src/meta/features/variant/Fiji/album.dj
|
||||
|
||||
// @brief Number of block instances.
|
||||
|
||||
// Number of CB block instances per SE
|
||||
// and number of Perf Cntrs per CB block
|
||||
#define AI_NUM_CB 4
|
||||
#define AI_COUNTER_NUM_PER_CB 4
|
||||
|
||||
// Number of DB block instances per SE
|
||||
// and number of Perf Cntrs per DB block
|
||||
#define AI_NUM_DB 4
|
||||
#define AI_COUNTER_NUM_PER_DB 4
|
||||
|
||||
// Number of TA block instances per SE
|
||||
// and number of Perf Cntrs per TA block
|
||||
#define AI_NUM_TA 16
|
||||
#define AI_COUNTER_NUM_PER_TA 2
|
||||
|
||||
// Number of TD block instances per SE
|
||||
// and number of Perf Cntrs per TD block
|
||||
#define AI_NUM_TD 16
|
||||
#define AI_COUNTER_NUM_PER_TD 2
|
||||
|
||||
// Number of TCP block instances per SE
|
||||
// and number of Perf Cntrs per TCP block
|
||||
#define AI_NUM_TCP 16
|
||||
#define AI_COUNTER_NUM_PER_TCP 4
|
||||
|
||||
// Number of TCA block instances per chip
|
||||
// and number of Perf Cntrs per TCA block
|
||||
#define AI_NUM_TCA 2
|
||||
#define AI_COUNTER_NUM_PER_TCA 4
|
||||
|
||||
// Number of TCC block instances per chip
|
||||
// and number of Perf Cntrs per TCC block
|
||||
#define AI_NUM_TCC 16
|
||||
#define AI_COUNTER_NUM_PER_TCC 4
|
||||
|
||||
// Number of SDMA block instances per chip
|
||||
// and number of Perf Cntrs per SDMA block
|
||||
#define AI_NUM_SDMA 2
|
||||
|
||||
// Number of counter registers per block for arctic islands
|
||||
#define AI_COUNTER_NUM_PER_DRM 2
|
||||
#define AI_COUNTER_NUM_PER_DRMDMA 2
|
||||
#define AI_COUNTER_NUM_PER_IH 2
|
||||
#define AI_COUNTER_NUM_PER_SRBM 2
|
||||
#define AI_COUNTER_NUM_PER_CPF 2
|
||||
#define AI_COUNTER_NUM_PER_GRBM 2
|
||||
#define AI_COUNTER_NUM_PER_GRBMSE 4
|
||||
#define AI_COUNTER_NUM_PER_PA_SU 4
|
||||
#define AI_COUNTER_NUM_PER_RLC 2
|
||||
#define AI_COUNTER_NUM_PER_PA_SC 8
|
||||
#define AI_COUNTER_NUM_PER_SPI 6 // [Shucai: To do: double check the value]
|
||||
#define AI_COUNTER_NUM_PER_SQ 16
|
||||
#define AI_COUNTER_NUM_PER_SX 4
|
||||
#define AI_COUNTER_NUM_PER_GDS 4
|
||||
#define AI_COUNTER_NUM_PER_VGT 4
|
||||
#define AI_COUNTER_NUM_PER_IA 4
|
||||
#define AI_COUNTER_NUM_PER_MC 4
|
||||
#define AI_COUNTER_NUM_PER_TCS 4
|
||||
#define AI_COUNTER_NUM_PER_WD 4
|
||||
#define AI_COUNTER_NUM_PER_CPG 2
|
||||
#define AI_COUNTER_NUM_PER_CPC 2
|
||||
#define AI_COUNTER_NUM_PER_VM 1
|
||||
#define AI_COUNTER_NUM_PER_VM_MD 1
|
||||
#define AI_COUNTER_NUM_PER_PIPESTATS 12
|
||||
|
||||
#define AI_MAX_NUM_SHADER_ENGINES 1
|
||||
|
||||
// Enumeration of AI hardware counter blocks
|
||||
typedef enum HsaAiCounterBlockId {
|
||||
kHsaAiCounterBlockIdCb0 = 0,
|
||||
kHsaAiCounterBlockIdCb1,
|
||||
kHsaAiCounterBlockIdCb2,
|
||||
kHsaAiCounterBlockIdCb3,
|
||||
|
||||
// Temp commented out for Vega10
|
||||
// kHsaAiCounterBlockIdCpf,
|
||||
|
||||
kHsaAiCounterBlockIdDb0,
|
||||
kHsaAiCounterBlockIdDb1,
|
||||
kHsaAiCounterBlockIdDb2,
|
||||
kHsaAiCounterBlockIdDb3,
|
||||
|
||||
kHsaAiCounterBlockIdGrbm,
|
||||
kHsaAiCounterBlockIdGrbmSe,
|
||||
kHsaAiCounterBlockIdPaSu,
|
||||
kHsaAiCounterBlockIdPaSc,
|
||||
kHsaAiCounterBlockIdSpi,
|
||||
|
||||
kHsaAiCounterBlockIdSq,
|
||||
kHsaAiCounterBlockIdSqGs,
|
||||
kHsaAiCounterBlockIdSqVs,
|
||||
kHsaAiCounterBlockIdSqPs,
|
||||
kHsaAiCounterBlockIdSqHs,
|
||||
kHsaAiCounterBlockIdSqCs,
|
||||
|
||||
kHsaAiCounterBlockIdSx,
|
||||
|
||||
kHsaAiCounterBlockIdTa0,
|
||||
kHsaAiCounterBlockIdTa1,
|
||||
kHsaAiCounterBlockIdTa2,
|
||||
kHsaAiCounterBlockIdTa3,
|
||||
kHsaAiCounterBlockIdTa4,
|
||||
kHsaAiCounterBlockIdTa5,
|
||||
kHsaAiCounterBlockIdTa6,
|
||||
kHsaAiCounterBlockIdTa7,
|
||||
kHsaAiCounterBlockIdTa8,
|
||||
kHsaAiCounterBlockIdTa9,
|
||||
kHsaAiCounterBlockIdTa10,
|
||||
kHsaAiCounterBlockIdTa11,
|
||||
kHsaAiCounterBlockIdTa12,
|
||||
kHsaAiCounterBlockIdTa13,
|
||||
kHsaAiCounterBlockIdTa14,
|
||||
kHsaAiCounterBlockIdTa15,
|
||||
|
||||
kHsaAiCounterBlockIdTca0,
|
||||
kHsaAiCounterBlockIdTca1,
|
||||
|
||||
kHsaAiCounterBlockIdTcc0,
|
||||
kHsaAiCounterBlockIdTcc1,
|
||||
kHsaAiCounterBlockIdTcc2,
|
||||
kHsaAiCounterBlockIdTcc3,
|
||||
kHsaAiCounterBlockIdTcc4,
|
||||
kHsaAiCounterBlockIdTcc5,
|
||||
kHsaAiCounterBlockIdTcc6,
|
||||
kHsaAiCounterBlockIdTcc7,
|
||||
kHsaAiCounterBlockIdTcc8,
|
||||
kHsaAiCounterBlockIdTcc9,
|
||||
kHsaAiCounterBlockIdTcc10,
|
||||
kHsaAiCounterBlockIdTcc11,
|
||||
kHsaAiCounterBlockIdTcc12,
|
||||
kHsaAiCounterBlockIdTcc13,
|
||||
kHsaAiCounterBlockIdTcc14,
|
||||
kHsaAiCounterBlockIdTcc15,
|
||||
|
||||
kHsaAiCounterBlockIdTd0,
|
||||
kHsaAiCounterBlockIdTd1,
|
||||
kHsaAiCounterBlockIdTd2,
|
||||
kHsaAiCounterBlockIdTd3,
|
||||
kHsaAiCounterBlockIdTd4,
|
||||
kHsaAiCounterBlockIdTd5,
|
||||
kHsaAiCounterBlockIdTd6,
|
||||
kHsaAiCounterBlockIdTd7,
|
||||
kHsaAiCounterBlockIdTd8,
|
||||
kHsaAiCounterBlockIdTd9,
|
||||
kHsaAiCounterBlockIdTd10,
|
||||
kHsaAiCounterBlockIdTd11,
|
||||
kHsaAiCounterBlockIdTd12,
|
||||
kHsaAiCounterBlockIdTd13,
|
||||
kHsaAiCounterBlockIdTd14,
|
||||
kHsaAiCounterBlockIdTd15,
|
||||
|
||||
kHsaAiCounterBlockIdTcp0,
|
||||
kHsaAiCounterBlockIdTcp1,
|
||||
kHsaAiCounterBlockIdTcp2,
|
||||
kHsaAiCounterBlockIdTcp3,
|
||||
kHsaAiCounterBlockIdTcp4,
|
||||
kHsaAiCounterBlockIdTcp5,
|
||||
kHsaAiCounterBlockIdTcp6,
|
||||
kHsaAiCounterBlockIdTcp7,
|
||||
kHsaAiCounterBlockIdTcp8,
|
||||
kHsaAiCounterBlockIdTcp9,
|
||||
kHsaAiCounterBlockIdTcp10,
|
||||
kHsaAiCounterBlockIdTcp11,
|
||||
kHsaAiCounterBlockIdTcp12,
|
||||
kHsaAiCounterBlockIdTcp13,
|
||||
kHsaAiCounterBlockIdTcp14,
|
||||
kHsaAiCounterBlockIdTcp15,
|
||||
|
||||
kHsaAiCounterBlockIdGds,
|
||||
kHsaAiCounterBlockIdVgt,
|
||||
kHsaAiCounterBlockIdIa,
|
||||
kHsaAiCounterBlockIdMc,
|
||||
|
||||
// Temp commented out for Vega10
|
||||
// kHsaAiCounterBlockIdSrbm,
|
||||
|
||||
kHsaAiCounterBlockIdTcs,
|
||||
kHsaAiCounterBlockIdWd,
|
||||
|
||||
// Temp commented out for Vega10
|
||||
// kHsaAiCounterBlockIdCpg,
|
||||
|
||||
kHsaAiCounterBlockIdCpc,
|
||||
|
||||
// Counters retrieved by KFD
|
||||
kHsaAiCounterBlockIdIommuV2,
|
||||
kHsaAiCounterBlockIdKernelDriver,
|
||||
|
||||
kHsaAiCounterBlockIdCpPipeStats,
|
||||
kHsaAiCounterBlockIdHwInfo,
|
||||
kHsaAiCounterBlockIdBlocksFirst = kHsaAiCounterBlockIdCb0,
|
||||
kHsaAiCounterBlockIdBlocksLast = kHsaAiCounterBlockIdHwInfo
|
||||
} HsaAiCounterBlockId;
|
||||
|
||||
extern GpuBlockInfo Gfx9HwBlocks[];
|
||||
extern GpuCounterRegInfo AiSqCounterRegAddr[];
|
||||
extern GpuCounterRegInfo AiCbCounterRegAddr[];
|
||||
extern GpuCounterRegInfo AiDrmdmaCounterRegAddr[];
|
||||
extern GpuCounterRegInfo AiIhCounterRegAddr[];
|
||||
extern GpuCounterRegInfo AiCpfCounterRegAddr[];
|
||||
extern GpuCounterRegInfo AiCpgCounterRegAddr[];
|
||||
extern GpuCounterRegInfo AiCpcCounterRegAddr[];
|
||||
extern GpuCounterRegInfo AiDrmCounterRegAddr[];
|
||||
extern GpuCounterRegInfo AiGrbmCounterRegAddr[];
|
||||
extern GpuCounterRegInfo AiGrbmSeCounterRegAddr[];
|
||||
extern GpuCounterRegInfo AiPaSuCounterRegAddr[];
|
||||
extern GpuCounterRegInfo AiPaScCounterRegAddr[];
|
||||
extern GpuCounterRegInfo AiSpiCounterRegAddr[];
|
||||
extern GpuCounterRegInfo AiTcaCounterRegAddr[];
|
||||
extern GpuCounterRegInfo AiTccCounterRegAddr[];
|
||||
extern GpuCounterRegInfo AiTcpCounterRegAddr[];
|
||||
extern GpuCounterRegInfo AiDbCounterRegAddr[];
|
||||
extern GpuCounterRegInfo AiRlcCounterRegAddr[];
|
||||
extern GpuCounterRegInfo AiScCounterRegAddr[];
|
||||
extern GpuCounterRegInfo AiSxCounterRegAddr[];
|
||||
extern GpuCounterRegInfo AiTaCounterRegAddr[];
|
||||
extern GpuCounterRegInfo AiTdCounterRegAddr[];
|
||||
extern GpuCounterRegInfo AiGdsCounterRegAddr[];
|
||||
extern GpuCounterRegInfo AiVgtCounterRegAddr[];
|
||||
extern GpuCounterRegInfo AiIaCounterRegAddr[];
|
||||
extern GpuCounterRegInfo AiMcCounterRegAddr[];
|
||||
extern GpuCounterRegInfo AiSrbmCounterRegAddr[];
|
||||
// No Tcs Counter block on AI
|
||||
// extern GpuCounterRegInfo AiTcsCounterRegAddr[];
|
||||
extern GpuCounterRegInfo AiWdCounterRegAddr[];
|
||||
|
||||
extern GpuPrivCounterBlockId AiBlockIdSq;
|
||||
extern GpuPrivCounterBlockId AiBlockIdMc;
|
||||
extern GpuPrivCounterBlockId AiBlockIdIommuV2;
|
||||
extern GpuPrivCounterBlockId AiBlockIdKernelDriver;
|
||||
}
|
||||
|
||||
#endif // _AI_BLOCKINFO_H_
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,69 +0,0 @@
|
||||
#ifndef _AI_PMU_H_
|
||||
#define _AI_PMU_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "perf_counter.h"
|
||||
|
||||
namespace pm4_profile {
|
||||
class CommandWriter;
|
||||
|
||||
// This class implement the AI PMU. It is responsible for setting up
|
||||
// CounterGroups to represent each AI hardware block which exposes performance
|
||||
// counters.
|
||||
class Gfx9PerfCounter : public pm4_profile::PerfCounter {
|
||||
public:
|
||||
Gfx9PerfCounter();
|
||||
|
||||
void begin(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter, const CountersMap& countersMap);
|
||||
|
||||
uint32_t end(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter, const CountersMap& countersMap,
|
||||
void* dataBuff);
|
||||
|
||||
uint32_t getNumSe() { return num_se_; }
|
||||
|
||||
private:
|
||||
void Init();
|
||||
|
||||
// Program SQ block related counters
|
||||
uint32_t ProgramSQCntrs(uint32_t sqRegIdx, uint32_t* regAddr, uint32_t* regVal, uint32_t blkId,
|
||||
uint32_t blkCntrIdx);
|
||||
|
||||
// Program TA block related counters
|
||||
uint32_t ProgramTaCntrs(uint32_t taRegIdx, uint32_t* regAddr, uint32_t* regVal, uint32_t blkId,
|
||||
uint32_t blkCntrIdx);
|
||||
|
||||
// Program TCA block related counters
|
||||
uint32_t ProgramTcaCntrs(uint32_t tcaRegIdx, uint32_t* regAddr, uint32_t* regVal, uint32_t blkId,
|
||||
uint32_t blkCntrIdx);
|
||||
|
||||
// Program TCC block related counters
|
||||
uint32_t ProgramTccCntrs(uint32_t tccRegIdx, uint32_t* regAddr, uint32_t* regVal, uint32_t blkId,
|
||||
uint32_t blkCntrIdx);
|
||||
|
||||
// Program TCP block related counters
|
||||
uint32_t ProgramTcpCntrs(uint32_t tcpRegIdx, uint32_t* regAddr, uint32_t* regVal, uint32_t blkId,
|
||||
uint32_t blkCntrIdx);
|
||||
|
||||
// Program TD block related counters
|
||||
uint32_t ProgramTdCntrs(uint32_t tdRegIdx, uint32_t* regAddr, uint32_t* regVal, uint32_t blkId,
|
||||
uint32_t blkCntrIdx);
|
||||
|
||||
// Build counter selection register, return how many registers are built
|
||||
uint32_t BuildCounterSelRegister(uint32_t cntrIdx, uint32_t* regAddr, uint32_t* regVal,
|
||||
uint32_t blkId, uint32_t blkCntrIdx);
|
||||
|
||||
// Build counter selection register, return how many registers are built
|
||||
uint32_t BuildCounterReadRegisters(uint32_t reg_index, uint32_t block_id, uint32_t* reg_addr,
|
||||
uint32_t* reg_val);
|
||||
|
||||
private:
|
||||
// Indicates the number of Shader Engines Present
|
||||
uint32_t num_se_;
|
||||
|
||||
// Used to reset GRBM to its default state
|
||||
uint32_t reset_grbm_;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // _AI_PMU_H_
|
||||
@@ -1,98 +0,0 @@
|
||||
#ifndef _GPU_BLOCKINFO_H_
|
||||
#define _GPU_BLOCKINFO_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
namespace pm4_profile {
|
||||
|
||||
typedef enum CntlMethod {
|
||||
CntlMethodNone = 0,
|
||||
CntlMethodByInstance = 1,
|
||||
CntlMethodBySe = 2,
|
||||
CntlMethodBySeAndInstance = 3
|
||||
} CntlMethod;
|
||||
|
||||
// Structure which contains information about a specific hardware block for CI.
|
||||
#define GPU_BLOCK_NAME_SIZE 15
|
||||
|
||||
typedef struct GpuBlockInfo_ {
|
||||
// Unique string identifier of the block.
|
||||
const char blockName[GPU_BLOCK_NAME_SIZE];
|
||||
|
||||
// Unique string identifier of the block.
|
||||
uint32_t counterGroupId;
|
||||
|
||||
// Maximum number of shader engines
|
||||
uint32_t maxShaderEngineCount;
|
||||
|
||||
// Maximum number of shader arrays
|
||||
uint32_t maxShaderArrayCount;
|
||||
|
||||
// Maximum number of block instances in the group per shader array
|
||||
uint32_t maxInstanceCount;
|
||||
|
||||
// Counter control method
|
||||
CntlMethod method;
|
||||
|
||||
// Maximum counter event ID
|
||||
uint32_t maxEventId;
|
||||
|
||||
// Maximum number of counters that can be enabled at once
|
||||
uint32_t maxSimultaneousCounters;
|
||||
|
||||
// Maximum number of streaming counters that can be enabled at once
|
||||
uint32_t maxStreamingCounters;
|
||||
|
||||
// The number of hardware counters that are shared
|
||||
// between regular and streaming counters.
|
||||
// This is important so that resources are not double-booked
|
||||
// between the two types of counters.
|
||||
uint32_t sharedHWCounters;
|
||||
|
||||
// Block counters can be configured with additional filters
|
||||
bool hasFilters;
|
||||
|
||||
//------------------------------------------
|
||||
// Trace specific stuff regarding when they get locked
|
||||
|
||||
// Buffer size in bytes
|
||||
uint32_t bufferSize;
|
||||
|
||||
// Current write pointer offset from beginning of the buffer
|
||||
uint32_t wptrOffset;
|
||||
|
||||
// Flag that buffer might have wrapped
|
||||
bool wrapped;
|
||||
|
||||
// If buffer has wrapped, this could indicate approximate
|
||||
// total amount of data that was dumpued in the trace buffer
|
||||
uint32_t dataSizeEstimate;
|
||||
|
||||
// Buffer data pointer
|
||||
void* pData;
|
||||
} GpuBlockInfo;
|
||||
|
||||
// Register address corresponding to each counter
|
||||
typedef struct GpuCounterRegInfo_ {
|
||||
// counter select register address
|
||||
uint32_t counterSelRegAddr;
|
||||
|
||||
// counter control register address
|
||||
uint32_t counterCntlRegAddr;
|
||||
|
||||
// counter read register address low
|
||||
uint32_t counterReadRegAddrLo;
|
||||
|
||||
// counter read register address high
|
||||
uint32_t counterReadRegAddrHi;
|
||||
} GpuCounterRegInfo;
|
||||
|
||||
// Gpu Privileged Block ID info. This number should be the same as that
|
||||
// defined in KFD
|
||||
typedef struct GpuPrivCounterBlockId_ {
|
||||
// Block ID consists of 4 dwords
|
||||
uint32_t items[4];
|
||||
} GpuPrivCounterBlockId;
|
||||
|
||||
} // pm4_profile
|
||||
#endif
|
||||
@@ -1,35 +0,0 @@
|
||||
#ifndef _HSA_PERF_H_
|
||||
#define _HSA_PERF_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
namespace pm4_profile {
|
||||
class DefaultCmdBuf;
|
||||
class CommandWriter;
|
||||
|
||||
typedef std::vector<uint32_t> CountersVec;
|
||||
typedef std::map<uint32_t, CountersVec> CountersMap;
|
||||
|
||||
class PerfCounter {
|
||||
public:
|
||||
virtual ~PerfCounter() {}
|
||||
|
||||
// Generate start profiling commands.
|
||||
virtual void begin(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter,
|
||||
const CountersMap& countersMap) = 0;
|
||||
|
||||
// Generate stop profiling commands.
|
||||
// Return actual required data buffer size.
|
||||
virtual uint32_t end(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter,
|
||||
const CountersMap& countersMap, void* dataBuff) = 0;
|
||||
|
||||
// Returns number of shader engines per block
|
||||
// for the blocks featured shader engines instancing
|
||||
virtual uint32_t getNumSe() = 0;
|
||||
};
|
||||
} // namespace pm4_profile
|
||||
#endif // _HSA_PERF_H_
|
||||
@@ -1,16 +0,0 @@
|
||||
#
|
||||
# Source files for Rocr ThreadTrace
|
||||
#
|
||||
set ( LIB_SRC thread_trace.cpp )
|
||||
set ( LIB_SRC ${LIB_SRC} gfx8_thread_trace.cpp )
|
||||
set ( LIB_SRC ${LIB_SRC} gfx9_thread_trace.cpp )
|
||||
|
||||
#
|
||||
# Header files include path(s).
|
||||
#
|
||||
include_directories ( ${PROJ_DIR}/commandwriter )
|
||||
|
||||
#
|
||||
# Build ThreadTrace as a Static Library object
|
||||
#
|
||||
add_library ( ${SQTT_LIB} STATIC ${LIB_SRC} )
|
||||
@@ -1,352 +0,0 @@
|
||||
#include "gfx8_thread_trace.h"
|
||||
|
||||
/// @brief Returns the lower 32-bits of a value
|
||||
inline uint32_t Low32(uint64_t u) { return (u & 0xFFFFFFFFUL); }
|
||||
|
||||
/// @brief Returns the upper 32-bits of a value
|
||||
inline uint32_t High32(uint64_t u) { return (u >> 32); }
|
||||
|
||||
namespace pm4_profile {
|
||||
|
||||
Gfx8ThreadTrace::Gfx8ThreadTrace() {
|
||||
// Initialize the number of shader engines
|
||||
numSE_ = 4;
|
||||
}
|
||||
|
||||
Gfx8ThreadTrace::~Gfx8ThreadTrace() {}
|
||||
|
||||
bool Gfx8ThreadTrace::Init(const ThreadTraceConfig* config) {
|
||||
// Initialize SQTT Configuration and Register objects
|
||||
if (!ThreadTrace::Init(config)) return false;
|
||||
InitThreadTraceCfgRegs();
|
||||
return true;
|
||||
}
|
||||
|
||||
void Gfx8ThreadTrace::InitThreadTraceCfgRegs() {
|
||||
// Indicates the size of buffer to use per Shader Engine instance.
|
||||
// The size is specified in terms of 4KB blocks
|
||||
ttCfgRegs_.ttRegSize.u32All = 0;
|
||||
|
||||
// Indicates various attributes of a thread trace session.
|
||||
//
|
||||
// MASK_CS: Which shader types should be enabled for data collection
|
||||
// Enable CS Shader types.
|
||||
//
|
||||
// WRAP: How trace buffer should be used as a ring buffer or as a linear
|
||||
// buffer - Disable WRAP mode i.e use it as a linear buffer
|
||||
//
|
||||
// MODE: Enables a thread trace session
|
||||
//
|
||||
// CAPTURE_MODE: When thread trace data is collected immediately after MODE
|
||||
// is enabled or wait until a Thread Trace Start event is received
|
||||
//
|
||||
// AUTOFLUSH_EN: Flush thread trace data to buffer often automatically
|
||||
//
|
||||
ttCfgRegs_.ttRegMode.u32All = 0;
|
||||
ttCfgRegs_.ttRegMode.bits.WRAP = 0;
|
||||
ttCfgRegs_.ttRegMode.bits.CAPTURE_MODE = 0;
|
||||
ttCfgRegs_.ttRegMode.bits.MASK_CS = 1;
|
||||
ttCfgRegs_.ttRegMode.bits.AUTOFLUSH_EN = 1;
|
||||
ttCfgRegs_.ttRegMode.bits.MODE = SQ_THREAD_TRACE_MODE_OFF;
|
||||
|
||||
// Enable Thread Trace for all VM Id's
|
||||
// Enable all of the SIMD's of the compute unit
|
||||
// Enable Compute Unit (CU) at index Zero to be used for fine-grained data
|
||||
// Enable Shader Array (SH) at index Zero to be used for fine-grained data
|
||||
//
|
||||
// @note: Not enabling REG_STALL_EN, SPI_STALL_EN and SQ_STALL_EN bits. They
|
||||
// are useful if we wish to program buffer throttling.
|
||||
//
|
||||
ttCfgRegs_.ttRegMask.u32All = 0;
|
||||
ttCfgRegs_.ttRegMask.bits.SH_SEL = 0x0;
|
||||
ttCfgRegs_.ttRegMask.bits.SIMD_EN = 0xF;
|
||||
ttCfgRegs_.ttRegMask.bits.CU_SEL = GetCuId();
|
||||
ttCfgRegs_.ttRegMask.bits.SQ_STALL_EN__CI__VI = 0x1;
|
||||
ttCfgRegs_.ttRegMask.bits.SPI_STALL_EN__CI__VI = 0x1;
|
||||
ttCfgRegs_.ttRegMask.bits.REG_STALL_EN__CI__VI = 0x1;
|
||||
ttCfgRegs_.ttRegMask.bits.VM_ID_MASK = GetVmId();
|
||||
|
||||
// Override Mask value if a user value is available
|
||||
uint32_t ttMask = GetMask();
|
||||
if (ttMask) {
|
||||
ttCfgRegs_.ttRegMask.u32All = ttMask;
|
||||
}
|
||||
|
||||
// Mask of compute units to get thread trace data from
|
||||
ttCfgRegs_.ttRegPerfMask.u32All = 0;
|
||||
ttCfgRegs_.ttRegPerfMask.bits.SH0_MASK = 0xFFFF;
|
||||
ttCfgRegs_.ttRegPerfMask.bits.SH1_MASK = 0xFFFF;
|
||||
|
||||
// Indicate the different TT messages/tokens that should be enabled/logged
|
||||
// Indicate the different TT tokens that specify register operations to be logged
|
||||
ttCfgRegs_.ttRegTokenMask.u32All = 0;
|
||||
ttCfgRegs_.ttRegTokenMask.bits.REG_MASK = 0xFF;
|
||||
ttCfgRegs_.ttRegTokenMask.bits.TOKEN_MASK = 0xFFFF;
|
||||
ttCfgRegs_.ttRegTokenMask.bits.REG_DROP_ON_STALL__CI__VI = 0x1;
|
||||
|
||||
// Override TokenMask1 value if a user value is available
|
||||
uint32_t tokenMask1 = GetTokenMask();
|
||||
if (tokenMask1) {
|
||||
ttCfgRegs_.ttRegTokenMask.u32All = tokenMask1;
|
||||
}
|
||||
|
||||
// Indicate the different TT tokens that specify instruction operations to be logged
|
||||
// Disabling specifically instruction operations updating Program Counter (PC).
|
||||
// @note: The field is defined in the spec incorrectly as a 16-bit value
|
||||
ttCfgRegs_.ttRegTokenMask2.u32All = 0;
|
||||
ttCfgRegs_.ttRegTokenMask2.bits.INST_MASK = 0xFFFFFF7F;
|
||||
|
||||
// Override TokenMask2 value if a user value is available
|
||||
uint32_t tokenMask2 = GetTokenMask2();
|
||||
if (tokenMask2) {
|
||||
ttCfgRegs_.ttRegTokenMask2.u32All = tokenMask2;
|
||||
}
|
||||
}
|
||||
|
||||
void Gfx8ThreadTrace::setSqttDataBuff(uint8_t* sqttBuffer, uint32_t sqttBuffSz) {
|
||||
// Compute the size of buffer available for each shader engine
|
||||
ttBuffSize_ = sqttBuffSz / numSE_;
|
||||
|
||||
// Populate the sqtt buffer array submitted to device
|
||||
for (int idx = 0; idx < numSE_; idx++) {
|
||||
uint64_t sqttSEAddr = uint64_t(sqttBuffer + (ttBuffSize_ * idx));
|
||||
devMemList_.push_back(sqttSEAddr);
|
||||
}
|
||||
|
||||
// Update the size bit-field of sqtt ctrl register
|
||||
ttCfgRegs_.ttRegSize.bits.SIZE = ttBuffSize_ >> TT_BUFF_ALIGN_SHIFT;
|
||||
}
|
||||
|
||||
void Gfx8ThreadTrace::BeginSession(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter) {
|
||||
// Program Grbm to broadcast messages to all shader engines
|
||||
regGRBM_GFX_INDEX grbm_gfx_index;
|
||||
grbm_gfx_index.u32All = 0;
|
||||
grbm_gfx_index.bitfields.SH_BROADCAST_WRITES = 1;
|
||||
grbm_gfx_index.bitfields.SE_BROADCAST_WRITES = 1;
|
||||
grbm_gfx_index.bitfields.INSTANCE_BROADCAST_WRITES = 1;
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmGRBM_GFX_INDEX__CI__VI, grbm_gfx_index.u32All);
|
||||
|
||||
// Issue a CSPartialFlush cmd including cache flush
|
||||
cmdWriter->BuildWriteWaitIdlePacket(cmdBuff);
|
||||
|
||||
// Disable RLC Perfmon Clock Gating
|
||||
// On Vega this is needed to collect Perf Cntrs
|
||||
// cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmRLC_PERFMON_CLK_CNTL__VI, 1);
|
||||
|
||||
// Program the Compute register to indicate SQTT is enabled
|
||||
/*
|
||||
regCOMPUTE_THREAD_TRACE_ENABLE__CI__VI enableTT = {0};
|
||||
enableTT.bits.THREAD_TRACE_ENABLE = 1;
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff,
|
||||
mmCOMPUTE_THREAD_TRACE_ENABLE__CI__VI,
|
||||
enableTT.u32All);
|
||||
*/
|
||||
|
||||
// Program the thread trace mask - specifies SH, CU, SIMD and
|
||||
// VM Id masks to apply. Enabling SQ/SPI/REG_STALL_EN bits
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmSQ_THREAD_TRACE_MASK__VI,
|
||||
ttCfgRegs_.ttRegMask.u32All);
|
||||
|
||||
// Program the thread trace Perf mask
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmSQ_THREAD_TRACE_PERF_MASK__VI,
|
||||
ttCfgRegs_.ttRegPerfMask.u32All);
|
||||
|
||||
// Program the thread trace token mask
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmSQ_THREAD_TRACE_TOKEN_MASK__VI,
|
||||
ttCfgRegs_.ttRegTokenMask.u32All);
|
||||
|
||||
// Program the thread trace token mask2 to specify the list of instruction
|
||||
// tokens to record. Disabling INST_PC instruction tokens
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmSQ_THREAD_TRACE_TOKEN_MASK2__VI,
|
||||
ttCfgRegs_.ttRegTokenMask2.u32All);
|
||||
|
||||
// Program the thread trace mode register
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmSQ_THREAD_TRACE_MODE__VI,
|
||||
ttCfgRegs_.ttRegMode.u32All);
|
||||
|
||||
// Program the HiWaterMark register to support stalling
|
||||
if ((ttCfgRegs_.ttRegMask.bits.SQ_STALL_EN__CI__VI) ||
|
||||
(ttCfgRegs_.ttRegMask.bits.SPI_STALL_EN__CI__VI) ||
|
||||
(ttCfgRegs_.ttRegMask.bits.REG_STALL_EN__CI__VI) ||
|
||||
(ttCfgRegs_.ttRegTokenMask.bits.REG_DROP_ON_STALL__CI__VI)) {
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmSQ_THREAD_TRACE_HIWATER__VI, 0x06);
|
||||
}
|
||||
|
||||
// Iterate through the list of SE's and program the register
|
||||
// for carrying address of thread trace buffer which is aligned
|
||||
// to 4KB per thread trace specification
|
||||
uint64_t baseAddr = 0;
|
||||
for (int idx = 0; idx < numSE_; idx++) {
|
||||
// Program Grbm to direct writes to one SE
|
||||
grbm_gfx_index.bitfields.SH_INDEX = 0;
|
||||
grbm_gfx_index.bitfields.SE_INDEX = idx;
|
||||
grbm_gfx_index.bitfields.SH_BROADCAST_WRITES = 0;
|
||||
grbm_gfx_index.bitfields.SE_BROADCAST_WRITES = 0;
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmGRBM_GFX_INDEX__CI__VI, grbm_gfx_index.u32All);
|
||||
|
||||
// Program base2 address of buffer to use for thread trace
|
||||
// Encodes ATC bit, so the correct way to program is to use
|
||||
// ATC Bit property of the device
|
||||
/*
|
||||
regSQ_THREAD_TRACE_BASE2__CI__VI sqttBase2 = {};
|
||||
sqttBase2.u32All = 0;
|
||||
sqttBase2.bits.ATC = 0;
|
||||
sqttBase2.bits.ADDR_HI = 0;
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff,
|
||||
mmSQ_THREAD_TRACE_BASE2__VI,
|
||||
sqttBase2.u32All);
|
||||
*/
|
||||
|
||||
// Program the base address to use
|
||||
baseAddr = devMemList_[idx] >> TT_BUFF_ALIGN_SHIFT;
|
||||
|
||||
// Program base address of buffer to use for thread trace
|
||||
regSQ_THREAD_TRACE_BASE sqttBase = {};
|
||||
sqttBase.bits.ADDR = Low32(baseAddr);
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmSQ_THREAD_TRACE_BASE__VI, sqttBase.u32All);
|
||||
|
||||
// Program the size of thread trace buffer
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmSQ_THREAD_TRACE_SIZE__VI,
|
||||
ttCfgRegs_.ttRegSize.u32All);
|
||||
|
||||
// Program the thread trace ctrl register
|
||||
regSQ_THREAD_TRACE_CTRL sqttCtrl = {};
|
||||
sqttCtrl.u32All = 0;
|
||||
sqttCtrl.bits.RESET_BUFFER = 1;
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmSQ_THREAD_TRACE_CTRL__VI, sqttCtrl.u32All);
|
||||
}
|
||||
|
||||
// Reset the GRBM to broadcast mode
|
||||
grbm_gfx_index.bitfields.SH_BROADCAST_WRITES = 1;
|
||||
grbm_gfx_index.bitfields.SE_BROADCAST_WRITES = 1;
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmGRBM_GFX_INDEX__CI__VI, grbm_gfx_index.u32All);
|
||||
|
||||
// Issue a CSPartialFlush cmd including cache flush
|
||||
cmdWriter->BuildWriteWaitIdlePacket(cmdBuff);
|
||||
|
||||
// Program the thread trace mode register
|
||||
ttCfgRegs_.ttRegMode.bits.MODE = SQ_THREAD_TRACE_MODE_ON;
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmSQ_THREAD_TRACE_MODE__VI,
|
||||
ttCfgRegs_.ttRegMode.u32All);
|
||||
ttCfgRegs_.ttRegMode.bits.MODE = SQ_THREAD_TRACE_MODE_OFF;
|
||||
|
||||
// Issue a CSPartialFlush cmd including cache flush
|
||||
cmdWriter->BuildWriteWaitIdlePacket(cmdBuff);
|
||||
return;
|
||||
}
|
||||
|
||||
void Gfx8ThreadTrace::StopSession(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter) {
|
||||
// Program Grbm to broadcast messages to all shader engines
|
||||
regGRBM_GFX_INDEX grbm_gfx_index;
|
||||
grbm_gfx_index.u32All = 0;
|
||||
grbm_gfx_index.bitfields.SH_BROADCAST_WRITES = 1;
|
||||
grbm_gfx_index.bitfields.SE_BROADCAST_WRITES = 1;
|
||||
grbm_gfx_index.bitfields.INSTANCE_BROADCAST_WRITES = 1;
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmGRBM_GFX_INDEX__CI__VI, grbm_gfx_index.u32All);
|
||||
|
||||
// Issue a CSPartialFlush cmd including cache flush
|
||||
cmdWriter->BuildWriteWaitIdlePacket(cmdBuff);
|
||||
|
||||
// Program the thread trace mode register to disable thread trace
|
||||
// The MODE register is set to disable thread trace by default
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmSQ_THREAD_TRACE_MODE__VI,
|
||||
ttCfgRegs_.ttRegMode.u32All);
|
||||
|
||||
// Issue a CSPartialFlush cmd including cache flush
|
||||
cmdWriter->BuildWriteWaitIdlePacket(cmdBuff);
|
||||
|
||||
// Iterate through the list of SE's and read the Status, Counter and
|
||||
// Write Pointer registers of Thread Trace subsystem
|
||||
uint64_t baseAddr = 0;
|
||||
for (int idx = 0; idx < numSE_; idx++) {
|
||||
// Program Grbm to direct writes to one SE
|
||||
grbm_gfx_index.bitfields.SH_INDEX = 0;
|
||||
grbm_gfx_index.bitfields.SE_INDEX = idx;
|
||||
grbm_gfx_index.bitfields.SH_BROADCAST_WRITES = 0;
|
||||
grbm_gfx_index.bitfields.SE_BROADCAST_WRITES = 0;
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmGRBM_GFX_INDEX__CI__VI, grbm_gfx_index.u32All);
|
||||
|
||||
// Issue WaitRegMem command to wait until SQTT event has completed
|
||||
bool funcEq = false;
|
||||
bool memSpace = false;
|
||||
uint32_t waitVal = 0x01;
|
||||
uint32_t maskVal = 0x40000000L;
|
||||
uint32_t statusOffset = mmSQ_THREAD_TRACE_STATUS__VI - UCONFIG_SPACE_START__CI__VI;
|
||||
cmdWriter->BuildWaitRegMemCommand(cmdBuff, memSpace, statusOffset, funcEq, maskVal, waitVal);
|
||||
|
||||
// Retrieve the values from various status registers
|
||||
cmdWriter->BuildCopyDataPacket(cmdBuff, COPY_DATA_SEL_SRC_SYS_PERF_COUNTER,
|
||||
mmSQ_THREAD_TRACE_STATUS__VI, 0,
|
||||
ttStatus_ + ((TT_STATUS_IDX_MAX * idx) + TT_STATUS_IDX_STATUS),
|
||||
COPY_DATA_SEL_COUNT_1DW, true);
|
||||
|
||||
cmdWriter->BuildCopyDataPacket(cmdBuff, COPY_DATA_SEL_SRC_SYS_PERF_COUNTER,
|
||||
mmSQ_THREAD_TRACE_CNTR, 0,
|
||||
ttStatus_ + ((TT_STATUS_IDX_MAX * idx) + TT_STATUS_IDX_CNTR),
|
||||
COPY_DATA_SEL_COUNT_1DW, true);
|
||||
|
||||
uint32_t wptrIdx = ((TT_STATUS_IDX_MAX * idx) + TT_STATUS_IDX_WPTR);
|
||||
cmdWriter->BuildCopyDataPacket(cmdBuff, COPY_DATA_SEL_SRC_SYS_PERF_COUNTER,
|
||||
mmSQ_THREAD_TRACE_WPTR__VI, 0, ttStatus_ + wptrIdx,
|
||||
COPY_DATA_SEL_COUNT_1DW, true);
|
||||
}
|
||||
|
||||
// Reset the GRBM to broadcast mode
|
||||
grbm_gfx_index.bitfields.SH_BROADCAST_WRITES = 1;
|
||||
grbm_gfx_index.bitfields.SE_BROADCAST_WRITES = 1;
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmGRBM_GFX_INDEX__CI__VI, grbm_gfx_index.u32All);
|
||||
|
||||
// Initialize cache flush request object
|
||||
FlushCacheOptions flush;
|
||||
flush.l1 = true;
|
||||
flush.l2 = true;
|
||||
flush.icache = true;
|
||||
flush.kcache = true;
|
||||
cmdWriter->BuildFlushCacheCmd(cmdBuff, &flush, NULL, 0);
|
||||
|
||||
// Program the size of thread trace buffer
|
||||
regSQ_THREAD_TRACE_SIZE ttRegSize = {0};
|
||||
ttRegSize.u32All = 0;
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmSQ_THREAD_TRACE_SIZE__VI, ttRegSize.u32All);
|
||||
|
||||
// Program the thread trace ctrl register
|
||||
regSQ_THREAD_TRACE_CTRL sqttCtrl = {};
|
||||
sqttCtrl.u32All = 0;
|
||||
sqttCtrl.bits.RESET_BUFFER = 1;
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmSQ_THREAD_TRACE_CTRL__VI, sqttCtrl.u32All);
|
||||
|
||||
// Program the compute_thread_trace_enable register
|
||||
/*
|
||||
regCOMPUTE_THREAD_TRACE_ENABLE__CI__VI disableTT = {0};
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff,
|
||||
mmCOMPUTE_THREAD_TRACE_ENABLE__CI__VI,
|
||||
disableTT.u32All);
|
||||
*/
|
||||
|
||||
// Disable RLC Perfmon Clock Gating
|
||||
// On Vega this is needed to collect Perf Cntrs
|
||||
// cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmRLC_PERFMON_CLK_CNTL__VI, 0);
|
||||
|
||||
// Issue a CSPartialFlush cmd including cache flush
|
||||
cmdWriter->BuildWriteWaitIdlePacket(cmdBuff);
|
||||
return;
|
||||
}
|
||||
|
||||
bool Gfx8ThreadTrace::Validate() {
|
||||
// Iterate through the list of SE to verify
|
||||
for (int idx = 0; idx < numSE_; idx++) {
|
||||
// Determine if the buffer has wrapped
|
||||
uint32_t statusIdx = ((TT_STATUS_IDX_MAX * idx) + TT_STATUS_IDX_STATUS);
|
||||
if (ttStatus_[statusIdx] & 0x80000000) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Adjust the value of Write Ptr which is bits [29-0]
|
||||
uint32_t wptrIdx = ((TT_STATUS_IDX_MAX * idx) + TT_STATUS_IDX_WPTR);
|
||||
ttStatus_[wptrIdx] = (ttStatus_[wptrIdx] & TT_WRITE_PTR_MASK);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // pm4_profile
|
||||
@@ -1,99 +0,0 @@
|
||||
#ifndef _GFX8_THREAD_TRACE_H_
|
||||
#define _GFX8_THREAD_TRACE_H_
|
||||
|
||||
#include "gfxip/gfx8/si_ci_vi_merged_typedef.h"
|
||||
#include "gfxip/gfx8/si_ci_vi_merged_offset.h"
|
||||
#include "gfxip/gfx8/si_ci_vi_merged_enum.h"
|
||||
#include "gfxip/gfx8/si_pm4defs.h"
|
||||
#include "thread_trace.h"
|
||||
|
||||
namespace pm4_profile {
|
||||
|
||||
typedef struct Gfx8ThreadTraceCfgRegs {
|
||||
// Size of thread trace buffer
|
||||
regSQ_THREAD_TRACE_SIZE ttRegSize;
|
||||
// Thread trace mode
|
||||
regSQ_THREAD_TRACE_MODE ttRegMode;
|
||||
// Thread trace wave mask
|
||||
regSQ_THREAD_TRACE_MASK ttRegMask;
|
||||
// Thread trace token mask
|
||||
regSQ_THREAD_TRACE_TOKEN_MASK ttRegTokenMask;
|
||||
// Thread trace token mask2
|
||||
regSQ_THREAD_TRACE_TOKEN_MASK2__VI ttRegTokenMask2;
|
||||
// Thread trace perf mask
|
||||
regSQ_THREAD_TRACE_PERF_MASK ttRegPerfMask;
|
||||
} Gfx8ThreadTraceCfgRegs;
|
||||
|
||||
// Encapsulates the various Api and structures used to enable a thread
|
||||
// trace session and collect its data
|
||||
class Gfx8ThreadTrace : public ThreadTrace {
|
||||
public:
|
||||
Gfx8ThreadTrace();
|
||||
|
||||
~Gfx8ThreadTrace();
|
||||
|
||||
// Initializes various data structures and handles that
|
||||
// are needed to support a thread trace session
|
||||
bool Init(const ThreadTraceConfig* config);
|
||||
|
||||
// Builds Pm4 command stream to program hardware registers that
|
||||
// enable a thread trace session, including the issue of an event
|
||||
// to begin thread session
|
||||
void BeginSession(pm4_profile::DefaultCmdBuf* cmdBuff, pm4_profile::CommandWriter* cmdWriter);
|
||||
|
||||
// Builds Pm4 command stream to program hardware registers that
|
||||
// disable a thread trace session, including the issue of an event
|
||||
// to stop currently ongoing thread session
|
||||
void StopSession(pm4_profile::DefaultCmdBuf* cmdBuff, pm4_profile::CommandWriter* cmdWriter);
|
||||
|
||||
// Validates that thread trace session ran correctly i.e. did not
|
||||
// encounter any errors.
|
||||
bool Validate();
|
||||
|
||||
// Initializes the handle of buffer used to collect SQTT data
|
||||
void setSqttDataBuff(uint8_t* sqttBuffer, uint32_t sqttBuffSz);
|
||||
|
||||
// Initializes the handle of buffer used to read control data of SQTT
|
||||
void setSqttCtrlBuff(uint32_t* ctrlBuff) { ttStatus_ = ctrlBuff; }
|
||||
|
||||
// Return status info size
|
||||
uint32_t StatusSizeInfo() const { return TT_STATUS_IDX_MAX * sizeof(uint32_t) * numSE_; }
|
||||
|
||||
// Return number of Shader Engines
|
||||
uint32_t getNumSe() { return numSE_; }
|
||||
|
||||
private:
|
||||
// Holds number of Shader Engines present on device
|
||||
uint32_t numSE_;
|
||||
|
||||
// Thread traces status register indices to determine
|
||||
// status of thread trace run
|
||||
typedef enum {
|
||||
TT_STATUS_IDX_STATUS = 0,
|
||||
TT_STATUS_IDX_CNTR = 1,
|
||||
TT_STATUS_IDX_WPTR = 2,
|
||||
TT_STATUS_IDX_MAX = 3
|
||||
} TTStatusReg;
|
||||
|
||||
// A list of tuples of TT_STATUS_IDX_MAX size,
|
||||
// giving status of thread trace
|
||||
uint32_t* ttStatus_;
|
||||
|
||||
// Size of thread trace buffer per shader engine
|
||||
uint32_t ttBuffSize_;
|
||||
|
||||
// Handles of Device memory used for thread trace
|
||||
std::vector<uint64_t> devMemList_;
|
||||
|
||||
// Registers that need to be programmed for Thread Trace
|
||||
Gfx8ThreadTraceCfgRegs ttCfgRegs_;
|
||||
|
||||
// Initializes thread trace registers with default parameters.
|
||||
// These are potentially updated based on updates to thread trace
|
||||
// configuration object by user
|
||||
void InitThreadTraceCfgRegs();
|
||||
};
|
||||
|
||||
} // pm4_profile
|
||||
|
||||
#endif // _GFX8_THREAD_TRACE_H_
|
||||
@@ -1,348 +0,0 @@
|
||||
#include "gfx9_thread_trace.h"
|
||||
|
||||
/// @brief Returns the lower 32-bits of a value
|
||||
inline uint32_t Low32(uint64_t u) { return (u & 0xFFFFFFFFUL); }
|
||||
|
||||
/// @brief Returns the upper 32-bits of a value
|
||||
inline uint32_t High32(uint64_t u) { return (u >> 32); }
|
||||
|
||||
namespace pm4_profile {
|
||||
|
||||
Gfx9ThreadTrace::Gfx9ThreadTrace() {
|
||||
// Initialize the number of shader engines
|
||||
numSE_ = 4;
|
||||
}
|
||||
|
||||
Gfx9ThreadTrace::~Gfx9ThreadTrace() {}
|
||||
|
||||
bool Gfx9ThreadTrace::Init(const ThreadTraceConfig* config) {
|
||||
// Initialize SQTT Configuration and Register objects
|
||||
if (!ThreadTrace::Init(config)) return false;
|
||||
InitThreadTraceCfgRegs();
|
||||
return true;
|
||||
}
|
||||
|
||||
void Gfx9ThreadTrace::InitThreadTraceCfgRegs() {
|
||||
// Indicates the size of buffer to use per Shader Engine instance.
|
||||
// The size is specified in terms of 4KB blocks
|
||||
ttCfgRegs_.ttRegSize.u32All = 0;
|
||||
|
||||
// Indicates various attributes of a thread trace session.
|
||||
//
|
||||
// MASK_CS: Which shader types should be enabled for data collection
|
||||
// Enable CS Shader types.
|
||||
//
|
||||
// WRAP: How trace buffer should be used as a ring buffer or as a linear
|
||||
// buffer - Disable WRAP mode i.e use it as a linear buffer
|
||||
//
|
||||
// MODE: Enables a thread trace session
|
||||
//
|
||||
// CAPTURE_MODE: When thread trace data is collected immediately after MODE
|
||||
// is enabled or wait until a Thread Trace Start event is received
|
||||
//
|
||||
// AUTOFLUSH_EN: Flush thread trace data to buffer often automatically
|
||||
//
|
||||
ttCfgRegs_.ttRegMode.u32All = 0;
|
||||
ttCfgRegs_.ttRegMode.bits.WRAP = 0;
|
||||
ttCfgRegs_.ttRegMode.bits.CAPTURE_MODE = 0;
|
||||
ttCfgRegs_.ttRegMode.bits.MASK_CS = 1;
|
||||
ttCfgRegs_.ttRegMode.bits.AUTOFLUSH_EN = 1;
|
||||
ttCfgRegs_.ttRegMode.bits.MODE = SQ_THREAD_TRACE_MODE_OFF;
|
||||
|
||||
// Enable Thread Trace for all VM Id's
|
||||
// Enable all of the SIMD's of the compute unit
|
||||
// Enable Compute Unit (CU) at index Zero to be used for fine-grained data
|
||||
// Enable Shader Array (SH) at index Zero to be used for fine-grained data
|
||||
//
|
||||
// @note: Not enabling REG_STALL_EN, SPI_STALL_EN and SQ_STALL_EN bits. They
|
||||
// are useful if we wish to program buffer throttling.
|
||||
//
|
||||
ttCfgRegs_.ttRegMask.u32All = 0;
|
||||
ttCfgRegs_.ttRegMask.bits.SH_SEL = 0x0;
|
||||
ttCfgRegs_.ttRegMask.bits.SIMD_EN = 0xF;
|
||||
ttCfgRegs_.ttRegMask.bits.CU_SEL = GetCuId();
|
||||
ttCfgRegs_.ttRegMask.bits.SQ_STALL_EN = 0x1;
|
||||
ttCfgRegs_.ttRegMask.bits.SPI_STALL_EN = 0x1;
|
||||
ttCfgRegs_.ttRegMask.bits.REG_STALL_EN = 0x1;
|
||||
ttCfgRegs_.ttRegMask.bits.VM_ID_MASK = GetVmId();
|
||||
|
||||
// Override Mask value if a user value is available
|
||||
uint32_t ttMask = GetMask();
|
||||
if (ttMask) {
|
||||
ttCfgRegs_.ttRegMask.u32All = ttMask;
|
||||
}
|
||||
|
||||
// Mask of compute units to get thread trace data from
|
||||
ttCfgRegs_.ttRegPerfMask.u32All = 0;
|
||||
ttCfgRegs_.ttRegPerfMask.bits.SH0_MASK = 0xFFFF;
|
||||
ttCfgRegs_.ttRegPerfMask.bits.SH1_MASK = 0xFFFF;
|
||||
|
||||
// Indicate the different TT messages/tokens that should be enabled/logged
|
||||
// Indicate the different TT tokens that specify register operations to be logged
|
||||
ttCfgRegs_.ttRegTokenMask.u32All = 0;
|
||||
ttCfgRegs_.ttRegTokenMask.bits.REG_MASK = 0xFF;
|
||||
ttCfgRegs_.ttRegTokenMask.bits.TOKEN_MASK = 0xFFFF;
|
||||
ttCfgRegs_.ttRegTokenMask.bits.REG_DROP_ON_STALL = 0x1;
|
||||
|
||||
// Override TokenMask1 value if a user value is available
|
||||
uint32_t tokenMask1 = GetTokenMask();
|
||||
if (tokenMask1) {
|
||||
ttCfgRegs_.ttRegTokenMask.u32All = tokenMask1;
|
||||
}
|
||||
|
||||
// Indicate the different TT tokens that specify instruction operations to be logged
|
||||
// Disabling specifically instruction operations updating Program Counter (PC).
|
||||
// @note: The field is defined in the spec incorrectly as a 16-bit value
|
||||
ttCfgRegs_.ttRegTokenMask2.u32All = 0;
|
||||
ttCfgRegs_.ttRegTokenMask2.bits.INST_MASK = 0xFFFFFF7F;
|
||||
|
||||
// Override TokenMask2 value if a user value is available
|
||||
uint32_t tokenMask2 = GetTokenMask2();
|
||||
if (tokenMask2) {
|
||||
ttCfgRegs_.ttRegTokenMask2.u32All = tokenMask2;
|
||||
}
|
||||
}
|
||||
|
||||
void Gfx9ThreadTrace::setSqttDataBuff(uint8_t* sqttBuffer, uint32_t sqttBuffSz) {
|
||||
// Compute the size of buffer available for each shader engine
|
||||
ttBuffSize_ = sqttBuffSz / numSE_;
|
||||
|
||||
// Populate the sqtt buffer array submitted to device
|
||||
for (int idx = 0; idx < numSE_; idx++) {
|
||||
uint64_t sqttSEAddr = uint64_t(sqttBuffer + (ttBuffSize_ * idx));
|
||||
devMemList_.push_back(sqttSEAddr);
|
||||
}
|
||||
|
||||
// Update the size bit-field of sqtt ctrl register
|
||||
ttCfgRegs_.ttRegSize.bits.SIZE = ttBuffSize_ >> TT_BUFF_ALIGN_SHIFT;
|
||||
}
|
||||
|
||||
void Gfx9ThreadTrace::BeginSession(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter) {
|
||||
// Program Grbm to broadcast messages to all shader engines
|
||||
regGRBM_GFX_INDEX grbm_gfx_index;
|
||||
grbm_gfx_index.u32All = 0;
|
||||
grbm_gfx_index.bitfields.SH_BROADCAST_WRITES = 1;
|
||||
grbm_gfx_index.bitfields.SE_BROADCAST_WRITES = 1;
|
||||
grbm_gfx_index.bitfields.INSTANCE_BROADCAST_WRITES = 1;
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmGRBM_GFX_INDEX, grbm_gfx_index.u32All);
|
||||
|
||||
// Issue a CSPartialFlush cmd including cache flush
|
||||
cmdWriter->BuildWriteWaitIdlePacket(cmdBuff);
|
||||
|
||||
// Disable RLC Perfmon Clock Gating
|
||||
// On Vega this is needed to collect Perf Cntrs
|
||||
// cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmRLC_PERFMON_CLK_CNTL, 1);
|
||||
|
||||
// Program the Compute register to indicate SQTT is enabled
|
||||
/*
|
||||
regCOMPUTE_THREAD_TRACE_ENABLE enableTT = {0};
|
||||
enableTT.bits.THREAD_TRACE_ENABLE = 1;
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff,
|
||||
mmCOMPUTE_THREAD_TRACE_ENABLE,
|
||||
enableTT.u32All);
|
||||
*/
|
||||
|
||||
// Program the thread trace mask - specifies SH, CU, SIMD and
|
||||
// VM Id masks to apply. Enabling SQ/SPI/REG_STALL_EN bits
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmSQ_THREAD_TRACE_MASK,
|
||||
ttCfgRegs_.ttRegMask.u32All);
|
||||
|
||||
// Program the thread trace Perf mask
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmSQ_THREAD_TRACE_PERF_MASK,
|
||||
ttCfgRegs_.ttRegPerfMask.u32All);
|
||||
|
||||
// Program the thread trace token mask
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmSQ_THREAD_TRACE_TOKEN_MASK,
|
||||
ttCfgRegs_.ttRegTokenMask.u32All);
|
||||
|
||||
// Program the thread trace token mask2 to specify the list of instruction
|
||||
// tokens to record. Disabling INST_PC instruction tokens
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmSQ_THREAD_TRACE_TOKEN_MASK2,
|
||||
ttCfgRegs_.ttRegTokenMask2.u32All);
|
||||
|
||||
// Program the thread trace mode register
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmSQ_THREAD_TRACE_MODE,
|
||||
ttCfgRegs_.ttRegMode.u32All);
|
||||
|
||||
// Program the HiWaterMark register to support stalling
|
||||
if ((ttCfgRegs_.ttRegMask.bits.SQ_STALL_EN) || (ttCfgRegs_.ttRegMask.bits.SPI_STALL_EN) ||
|
||||
(ttCfgRegs_.ttRegMask.bits.REG_STALL_EN) ||
|
||||
(ttCfgRegs_.ttRegTokenMask.bits.REG_DROP_ON_STALL)) {
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmSQ_THREAD_TRACE_HIWATER, 0x06);
|
||||
}
|
||||
|
||||
// Iterate through the list of SE's and program the register
|
||||
// for carrying address of thread trace buffer which is aligned
|
||||
// to 4KB per thread trace specification
|
||||
uint64_t baseAddr = 0;
|
||||
for (int idx = 0; idx < numSE_; idx++) {
|
||||
// Program Grbm to direct writes to one SE
|
||||
grbm_gfx_index.bitfields.SH_INDEX = 0;
|
||||
grbm_gfx_index.bitfields.SE_INDEX = idx;
|
||||
grbm_gfx_index.bitfields.SH_BROADCAST_WRITES = 0;
|
||||
grbm_gfx_index.bitfields.SE_BROADCAST_WRITES = 0;
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmGRBM_GFX_INDEX, grbm_gfx_index.u32All);
|
||||
|
||||
// Program base2 address of buffer to use for thread trace
|
||||
/*
|
||||
regSQ_THREAD_TRACE_BASE2 sqttBase2 = {};
|
||||
sqttBase2.u32All = 0;
|
||||
sqttBase2.bits.ADDR_HI = 0;
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff,
|
||||
mmSQ_THREAD_TRACE_BASE2,
|
||||
sqttBase2.u32All);
|
||||
*/
|
||||
|
||||
// Program the base address to use
|
||||
baseAddr = devMemList_[idx] >> TT_BUFF_ALIGN_SHIFT;
|
||||
|
||||
// Program base address of buffer to use for thread trace
|
||||
regSQ_THREAD_TRACE_BASE sqttBase = {};
|
||||
sqttBase.bits.ADDR = Low32(baseAddr);
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmSQ_THREAD_TRACE_BASE, sqttBase.u32All);
|
||||
|
||||
// Program the size of thread trace buffer
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmSQ_THREAD_TRACE_SIZE,
|
||||
ttCfgRegs_.ttRegSize.u32All);
|
||||
|
||||
// Program the thread trace ctrl register
|
||||
regSQ_THREAD_TRACE_CTRL sqttCtrl = {};
|
||||
sqttCtrl.u32All = 0;
|
||||
sqttCtrl.bits.RESET_BUFFER = 1;
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmSQ_THREAD_TRACE_CTRL, sqttCtrl.u32All);
|
||||
}
|
||||
|
||||
// Reset the GRBM to broadcast mode
|
||||
grbm_gfx_index.bitfields.SH_BROADCAST_WRITES = 1;
|
||||
grbm_gfx_index.bitfields.SE_BROADCAST_WRITES = 1;
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmGRBM_GFX_INDEX, grbm_gfx_index.u32All);
|
||||
|
||||
// Issue a CSPartialFlush cmd including cache flush
|
||||
cmdWriter->BuildWriteWaitIdlePacket(cmdBuff);
|
||||
|
||||
// Program the thread trace mode register
|
||||
ttCfgRegs_.ttRegMode.bits.MODE = SQ_THREAD_TRACE_MODE_ON;
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmSQ_THREAD_TRACE_MODE,
|
||||
ttCfgRegs_.ttRegMode.u32All);
|
||||
ttCfgRegs_.ttRegMode.bits.MODE = SQ_THREAD_TRACE_MODE_OFF;
|
||||
|
||||
// Issue a CSPartialFlush cmd including cache flush
|
||||
cmdWriter->BuildWriteWaitIdlePacket(cmdBuff);
|
||||
return;
|
||||
}
|
||||
|
||||
void Gfx9ThreadTrace::StopSession(DefaultCmdBuf* cmdBuff, CommandWriter* cmdWriter) {
|
||||
// Program Grbm to broadcast messages to all shader engines
|
||||
regGRBM_GFX_INDEX grbm_gfx_index;
|
||||
grbm_gfx_index.u32All = 0;
|
||||
grbm_gfx_index.bitfields.SH_BROADCAST_WRITES = 1;
|
||||
grbm_gfx_index.bitfields.SE_BROADCAST_WRITES = 1;
|
||||
grbm_gfx_index.bitfields.INSTANCE_BROADCAST_WRITES = 1;
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmGRBM_GFX_INDEX, grbm_gfx_index.u32All);
|
||||
|
||||
// Issue a CSPartialFlush cmd including cache flush
|
||||
cmdWriter->BuildWriteWaitIdlePacket(cmdBuff);
|
||||
|
||||
// Program the thread trace mode register to disable thread trace
|
||||
// The MODE register is set to disable thread trace by default
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmSQ_THREAD_TRACE_MODE,
|
||||
ttCfgRegs_.ttRegMode.u32All);
|
||||
|
||||
// Issue a CSPartialFlush cmd including cache flush
|
||||
cmdWriter->BuildWriteWaitIdlePacket(cmdBuff);
|
||||
|
||||
// Iterate through the list of SE's and read the Status, Counter and
|
||||
// Write Pointer registers of Thread Trace subsystem
|
||||
uint64_t baseAddr = 0;
|
||||
for (int idx = 0; idx < numSE_; idx++) {
|
||||
// Program Grbm to direct writes to one SE
|
||||
grbm_gfx_index.bitfields.SH_INDEX = 0;
|
||||
grbm_gfx_index.bitfields.SE_INDEX = idx;
|
||||
grbm_gfx_index.bitfields.SH_BROADCAST_WRITES = 0;
|
||||
grbm_gfx_index.bitfields.SE_BROADCAST_WRITES = 0;
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmGRBM_GFX_INDEX, grbm_gfx_index.u32All);
|
||||
|
||||
// Issue WaitRegMem command to wait until SQTT event has completed
|
||||
bool funcEq = false;
|
||||
bool memSpace = false;
|
||||
uint32_t waitVal = 0x01;
|
||||
uint32_t maskVal = 0x40000000L;
|
||||
uint32_t statusOffset = mmSQ_THREAD_TRACE_STATUS - UCONFIG_SPACE_START;
|
||||
cmdWriter->BuildWaitRegMemCommand(cmdBuff, memSpace, statusOffset, funcEq, maskVal, waitVal);
|
||||
|
||||
// Retrieve the values from various status registers
|
||||
cmdWriter->BuildCopyDataPacket(cmdBuff, COPY_DATA_SEL_SRC_SYS_PERF_COUNTER,
|
||||
mmSQ_THREAD_TRACE_STATUS, 0,
|
||||
ttStatus_ + ((TT_STATUS_IDX_MAX * idx) + TT_STATUS_IDX_STATUS),
|
||||
COPY_DATA_SEL_COUNT_1DW, true);
|
||||
|
||||
cmdWriter->BuildCopyDataPacket(cmdBuff, COPY_DATA_SEL_SRC_SYS_PERF_COUNTER,
|
||||
mmSQ_THREAD_TRACE_CNTR, 0,
|
||||
ttStatus_ + ((TT_STATUS_IDX_MAX * idx) + TT_STATUS_IDX_CNTR),
|
||||
COPY_DATA_SEL_COUNT_1DW, true);
|
||||
|
||||
uint32_t wptrIdx = ((TT_STATUS_IDX_MAX * idx) + TT_STATUS_IDX_WPTR);
|
||||
cmdWriter->BuildCopyDataPacket(cmdBuff, COPY_DATA_SEL_SRC_SYS_PERF_COUNTER,
|
||||
mmSQ_THREAD_TRACE_WPTR, 0, ttStatus_ + wptrIdx,
|
||||
COPY_DATA_SEL_COUNT_1DW, true);
|
||||
}
|
||||
|
||||
// Reset the GRBM to broadcast mode
|
||||
grbm_gfx_index.bitfields.SH_BROADCAST_WRITES = 1;
|
||||
grbm_gfx_index.bitfields.SE_BROADCAST_WRITES = 1;
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmGRBM_GFX_INDEX, grbm_gfx_index.u32All);
|
||||
|
||||
// Initialize cache flush request object
|
||||
FlushCacheOptions flush;
|
||||
flush.l1 = true;
|
||||
flush.l2 = true;
|
||||
flush.icache = true;
|
||||
flush.kcache = true;
|
||||
cmdWriter->BuildFlushCacheCmd(cmdBuff, &flush, NULL, 0);
|
||||
|
||||
// Program the size of thread trace buffer
|
||||
regSQ_THREAD_TRACE_SIZE ttRegSize = {0};
|
||||
ttRegSize.u32All = 0;
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmSQ_THREAD_TRACE_SIZE, ttRegSize.u32All);
|
||||
|
||||
// Program the thread trace ctrl register
|
||||
regSQ_THREAD_TRACE_CTRL sqttCtrl = {};
|
||||
sqttCtrl.u32All = 0;
|
||||
sqttCtrl.bits.RESET_BUFFER = 1;
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmSQ_THREAD_TRACE_CTRL, sqttCtrl.u32All);
|
||||
|
||||
// Program the compute_thread_trace_enable register
|
||||
/*
|
||||
regCOMPUTE_THREAD_TRACE_ENABLE disableTT = {0};
|
||||
cmdWriter->BuildWriteUConfigRegPacket(cmdBuff,
|
||||
mmCOMPUTE_THREAD_TRACE_ENABLE,
|
||||
disableTT.u32All);
|
||||
*/
|
||||
|
||||
// Disable RLC Perfmon Clock Gating
|
||||
// On Vega this is needed to collect Perf Cntrs
|
||||
// cmdWriter->BuildWriteUConfigRegPacket(cmdBuff, mmRLC_PERFMON_CLK_CNTL, 0);
|
||||
|
||||
// Issue a CSPartialFlush cmd including cache flush
|
||||
cmdWriter->BuildWriteWaitIdlePacket(cmdBuff);
|
||||
return;
|
||||
}
|
||||
|
||||
bool Gfx9ThreadTrace::Validate() {
|
||||
// Iterate through the list of SE to verify
|
||||
for (int idx = 0; idx < numSE_; idx++) {
|
||||
// Determine if the buffer has wrapped
|
||||
uint32_t statusIdx = ((TT_STATUS_IDX_MAX * idx) + TT_STATUS_IDX_STATUS);
|
||||
if (ttStatus_[statusIdx] & 0x80000000) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Adjust the value of Write Ptr which is bits [29-0]
|
||||
uint32_t wptrIdx = ((TT_STATUS_IDX_MAX * idx) + TT_STATUS_IDX_WPTR);
|
||||
ttStatus_[wptrIdx] = (ttStatus_[wptrIdx] & TT_WRITE_PTR_MASK);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // pm4_profile
|
||||
@@ -1,104 +0,0 @@
|
||||
#ifndef _GFX9_THREAD_TRACE_H_
|
||||
#define _GFX9_THREAD_TRACE_H_
|
||||
|
||||
#include "gfxip/gfx9/gfx9_registers.h"
|
||||
#include "gfxip/gfx9/gfx9_typedef.h"
|
||||
#include "gfxip/gfx9/gfx9_enum.h"
|
||||
#include "gfxip/gfx9/gfx9_offset.h"
|
||||
#include "gfxip/gfx9/gfx9_pm4defs.h"
|
||||
#include "thread_trace.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
using namespace pm4_profile::gfx9;
|
||||
|
||||
namespace pm4_profile {
|
||||
|
||||
typedef struct Gfx9ThreadTraceCfgRegs {
|
||||
// Size of thread trace buffer
|
||||
regSQ_THREAD_TRACE_SIZE ttRegSize;
|
||||
// Thread trace mode
|
||||
regSQ_THREAD_TRACE_MODE ttRegMode;
|
||||
// Thread trace wave mask
|
||||
regSQ_THREAD_TRACE_MASK ttRegMask;
|
||||
// Thread trace token mask
|
||||
regSQ_THREAD_TRACE_TOKEN_MASK ttRegTokenMask;
|
||||
// Thread trace token mask2
|
||||
regSQ_THREAD_TRACE_TOKEN_MASK2 ttRegTokenMask2;
|
||||
// Thread trace perf mask
|
||||
regSQ_THREAD_TRACE_PERF_MASK ttRegPerfMask;
|
||||
} Gfx9ThreadTraceCfgRegs;
|
||||
|
||||
// Encapsulates the various Api and structures used to enable a thread
|
||||
// trace session and collect its data
|
||||
class Gfx9ThreadTrace : public ThreadTrace {
|
||||
public:
|
||||
Gfx9ThreadTrace();
|
||||
|
||||
~Gfx9ThreadTrace();
|
||||
|
||||
// Initializes various data structures and handles that
|
||||
// are needed to support a thread trace session
|
||||
bool Init(const ThreadTraceConfig* config);
|
||||
|
||||
// Builds Pm4 command stream to program hardware registers that
|
||||
// enable a thread trace session, including the issue of an event
|
||||
// to begin thread session
|
||||
void BeginSession(pm4_profile::DefaultCmdBuf* cmdBuff, pm4_profile::CommandWriter* cmdWriter);
|
||||
|
||||
// Builds Pm4 command stream to program hardware registers that
|
||||
// disable a thread trace session, including the issue of an event
|
||||
// to stop currently ongoing thread session
|
||||
void StopSession(pm4_profile::DefaultCmdBuf* cmdBuff, pm4_profile::CommandWriter* cmdWriter);
|
||||
|
||||
// Validates that thread trace session ran correctly i.e. did not
|
||||
// encounter any errors.
|
||||
bool Validate();
|
||||
|
||||
// Initializes the handle of buffer used to collect SQTT data
|
||||
void setSqttDataBuff(uint8_t* sqttBuffer, uint32_t sqttBuffSz);
|
||||
|
||||
// Initializes the handle of buffer used to read control data of SQTT
|
||||
void setSqttCtrlBuff(uint32_t* ctrlBuff) { ttStatus_ = ctrlBuff; }
|
||||
|
||||
// Return status info size
|
||||
uint32_t StatusSizeInfo() const { return TT_STATUS_IDX_MAX * sizeof(uint32_t) * numSE_; }
|
||||
|
||||
// Return number of Shader Engines
|
||||
uint32_t getNumSe() { return numSE_; }
|
||||
|
||||
private:
|
||||
// Holds number of Shader Engines present on device
|
||||
uint32_t numSE_;
|
||||
|
||||
// Thread traces status register indices to determine
|
||||
// status of thread trace run
|
||||
typedef enum {
|
||||
TT_STATUS_IDX_STATUS = 0,
|
||||
TT_STATUS_IDX_CNTR = 1,
|
||||
TT_STATUS_IDX_WPTR = 2,
|
||||
TT_STATUS_IDX_MAX = 3
|
||||
} TTStatusReg;
|
||||
|
||||
// A list of tuples of TT_STATUS_IDX_MAX size,
|
||||
// giving status of thread trace
|
||||
uint32_t* ttStatus_;
|
||||
|
||||
// Size of thread trace buffer per shader engine
|
||||
uint32_t ttBuffSize_;
|
||||
|
||||
// Handles of Device memory used for thread trace
|
||||
std::vector<uint64_t> devMemList_;
|
||||
|
||||
// Registers that need to be programmed for Thread Trace
|
||||
Gfx9ThreadTraceCfgRegs ttCfgRegs_;
|
||||
|
||||
// Initializes thread trace registers with default parameters.
|
||||
// These are potentially updated based on updates to thread trace
|
||||
// configuration object by user
|
||||
void InitThreadTraceCfgRegs();
|
||||
};
|
||||
|
||||
} // pm4_profile
|
||||
|
||||
#endif // _GFX9_THREAD_TRACE_H_
|
||||
@@ -1,59 +0,0 @@
|
||||
#include <assert.h>
|
||||
|
||||
#include "thread_trace.h"
|
||||
|
||||
namespace pm4_profile {
|
||||
|
||||
bool ThreadTrace::Init(const ThreadTraceConfig* config) {
|
||||
if (config) {
|
||||
ttConfig_ = *config;
|
||||
} else {
|
||||
InitThreadTraceConfig(&ttConfig_);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void ThreadTrace::InitThreadTraceConfig(ThreadTraceConfig* config) const {
|
||||
memset(config, 0, sizeof(ThreadTraceConfig));
|
||||
|
||||
config->threadTraceTargetCu = 0;
|
||||
config->threadTraceVmIdMask = 0;
|
||||
config->threadTraceMask = 0;
|
||||
config->threadTraceTokenMask = 0;
|
||||
config->threadTraceTokenMask2 = 0;
|
||||
}
|
||||
|
||||
uint8_t ThreadTrace::GetCuId() {
|
||||
uint32_t cuId = ttConfig_.threadTraceTargetCu;
|
||||
assert((cuId <= 15) && "Cu Id must be between 0 and 15");
|
||||
return cuId;
|
||||
}
|
||||
|
||||
uint8_t ThreadTrace::GetVmId() {
|
||||
uint32_t vmId = ttConfig_.threadTraceVmIdMask;
|
||||
assert((vmId <= 2) && "VmId must be between 0 and 2");
|
||||
return vmId;
|
||||
}
|
||||
|
||||
uint32_t ThreadTrace::GetMask() {
|
||||
uint32_t ttMask = ttConfig_.threadTraceMask;
|
||||
const uint32_t validMask = 0x00C0D0;
|
||||
assert(((ttMask & validMask) == 0) && "Mask should have bits [4,6,7] set to Zero");
|
||||
return ttMask;
|
||||
}
|
||||
|
||||
uint32_t ThreadTrace::GetTokenMask() {
|
||||
uint32_t tokenMask = ttConfig_.threadTraceTokenMask;
|
||||
const uint32_t validMask = 0xFF000000;
|
||||
assert(((tokenMask & validMask) == 0) && "TokenMask should have bits [31:25] set to Zero");
|
||||
return tokenMask;
|
||||
}
|
||||
|
||||
uint32_t ThreadTrace::GetTokenMask2() {
|
||||
uint32_t tokenMask2 = ttConfig_.threadTraceTokenMask2;
|
||||
const uint32_t validMask = 0xFFFF0000;
|
||||
assert(((tokenMask2 & validMask) == 0) && "TokenMask2 should have bits [31:16] set to Zero");
|
||||
return tokenMask2;
|
||||
}
|
||||
|
||||
} // pm4_profile
|
||||
@@ -1,97 +0,0 @@
|
||||
#ifndef _THREAD_TRACE_H_
|
||||
#define _THREAD_TRACE_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "cmdwriter.h"
|
||||
|
||||
// Move them as static variables later on
|
||||
#define TT_WRITE_PTR_MASK (0x3FFFFFFF)
|
||||
// Size of block in bytesper increment in WPTR
|
||||
#define TT_WRITE_PTR_BLK (32)
|
||||
// Factor by which to shift buffer address
|
||||
#define TT_BUFF_ALIGN_SHIFT (12)
|
||||
|
||||
namespace pm4_profile {
|
||||
|
||||
// ThreadTrace config
|
||||
typedef struct ThreadTraceConfig {
|
||||
uint32_t threadTraceTargetCu;
|
||||
uint32_t threadTraceVmIdMask;
|
||||
uint32_t threadTraceMask;
|
||||
uint32_t threadTraceTokenMask;
|
||||
uint32_t threadTraceTokenMask2;
|
||||
} ThreadTraceConfig;
|
||||
|
||||
// Encapsulates the various Api and structures that are used to enable
|
||||
// a thread trace session and collect its data. Implementations of this
|
||||
// interface program device specific registers to realize the functionality
|
||||
class ThreadTrace {
|
||||
// Holds Thread Trace configuration information
|
||||
// @note: Currently not used i.e. is not exposed to users
|
||||
ThreadTraceConfig ttConfig_;
|
||||
|
||||
public:
|
||||
// Destructor of the thread trace service handle
|
||||
virtual ~ThreadTrace(){};
|
||||
|
||||
// Obtain the CU id to use for thread tracing
|
||||
uint8_t GetCuId();
|
||||
|
||||
// Obtain the VM id to use for thread tracing
|
||||
uint8_t GetVmId();
|
||||
|
||||
// Obtain the Mask to use for thread tracing
|
||||
uint32_t GetMask();
|
||||
|
||||
// Obtain the Token Mask 1 to use for thread tracing
|
||||
uint32_t GetTokenMask();
|
||||
|
||||
// Obtain the Token Mask 2 to use for thread tracing
|
||||
uint32_t GetTokenMask2();
|
||||
|
||||
// Initializes various data structures and handles that
|
||||
// are needed to support a thread trace session
|
||||
virtual bool Init(const ThreadTraceConfig* config);
|
||||
|
||||
// Initializes thread trace configuration object with default
|
||||
// parameters, that could potentially be overriden by user
|
||||
// @note: Currently not used i.e. is not exposed to users
|
||||
virtual void InitThreadTraceConfig(ThreadTraceConfig* config) const;
|
||||
|
||||
// Allows user to configure various parameters of a thread trace session
|
||||
// @note: Currently not used i.e. is not exposed to users
|
||||
bool Config(uint32_t key, uint32_t value) { return true; };
|
||||
|
||||
// Builds Pm4 command stream to program hardware registers that
|
||||
// enable a thread trace session, including the issue of an event
|
||||
// to begin thread session
|
||||
virtual void BeginSession(pm4_profile::DefaultCmdBuf* cmdBuff,
|
||||
pm4_profile::CommandWriter* cmdWriter) = 0;
|
||||
|
||||
// Builds Pm4 command stream to program hardware registers that
|
||||
// disable a thread trace session, including the issue of an event
|
||||
// to stop currently ongoing thread session
|
||||
virtual void StopSession(pm4_profile::DefaultCmdBuf* cmdBuff,
|
||||
pm4_profile::CommandWriter* cmdWriter) = 0;
|
||||
|
||||
// Validates that thread trace session ran correctly i.e. did not
|
||||
// encounter any errors.
|
||||
virtual bool Validate() = 0;
|
||||
|
||||
// Initializes the handle of buffer used to collect SQTT data
|
||||
virtual void setSqttDataBuff(uint8_t* sqttBuffer, uint32_t sqttBuffSz) = 0;
|
||||
|
||||
// Initializes the handle of buffer used to read control data of SQTT
|
||||
virtual void setSqttCtrlBuff(uint32_t* ctrlBuff) = 0;
|
||||
|
||||
// Return number of Shader Engines
|
||||
virtual uint32_t getNumSe() = 0;
|
||||
|
||||
// Return status info size
|
||||
virtual uint32_t StatusSizeInfo() const = 0;
|
||||
};
|
||||
|
||||
} // pm4_profile
|
||||
|
||||
#endif // _THREAD_TRACE_H_
|
||||
@@ -1,48 +0,0 @@
|
||||
#
|
||||
# Header files include path(s).
|
||||
#
|
||||
include_directories ( $ENV{ROCR_INC_DIR} )
|
||||
include_directories ( ${API_DIR} )
|
||||
include_directories ( ${TEST_DIR}/util )
|
||||
include_directories ( ${TEST_DIR}/ctrl )
|
||||
|
||||
#
|
||||
# Specify the directory containing the libraries of HsaRt
|
||||
# to be linked against for building a Hsa Perf application
|
||||
#
|
||||
LINK_DIRECTORIES($ENV{ROCR_LIB_DIR})
|
||||
find_library ( ROCR_LIB NAMES hsa-runtime64 PATHS $ENV{ROCR_LIB_DIR} )
|
||||
|
||||
#
|
||||
# Set Name for Common library and build it as a
|
||||
# static library to be linked with others
|
||||
#
|
||||
set ( UTIL_LIB "util${ONLY64STR}" )
|
||||
add_subdirectory ( ${TEST_DIR}/util "${PROJECT_BINARY_DIR}/util" )
|
||||
|
||||
#
|
||||
# Build the test library
|
||||
#
|
||||
set ( TEST_NAME simple_convolution )
|
||||
include_directories ( ${TEST_DIR}/${TEST_NAME} )
|
||||
set ( LIB_NAME "${TEST_NAME}${ONLY64STR}" )
|
||||
add_library ( ${LIB_NAME} STATIC ${TEST_DIR}/${TEST_NAME}/${TEST_NAME}.cpp )
|
||||
target_link_libraries( ${LIB_NAME} c stdc++ )
|
||||
set ( TEST_LIBS ${LIB_NAME} )
|
||||
|
||||
#
|
||||
# Build the test control
|
||||
#
|
||||
set ( SRC_LIST ${TEST_DIR}/ctrl/test.cpp )
|
||||
set ( SRC_LIST ${SRC_LIST} ${TEST_DIR}/ctrl/test_pmgr.cpp )
|
||||
set ( SRC_LIST ${SRC_LIST} ${TEST_DIR}/ctrl/test_hsa.cpp )
|
||||
set ( LIB_LIST ${TEST_LIBS} ${UTIL_LIB} ${CORE_UTILS_LIB} ${ROCR_LIB} )
|
||||
set ( EXE_NAME "ctrl" )
|
||||
add_executable ( ${EXE_NAME} ${SRC_LIST} )
|
||||
target_link_libraries( ${EXE_NAME} ${LIB_LIST} c stdc++ dl pthread rt atomic )
|
||||
|
||||
#
|
||||
# Copy the test files
|
||||
#
|
||||
execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/${TEST_NAME}/*.hsaco ${PROJECT_BINARY_DIR}" )
|
||||
execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/run.sh ${PROJECT_BINARY_DIR}" )
|
||||
@@ -1,876 +0,0 @@
|
||||
/*
|
||||
* =============================================================================
|
||||
* ROC Runtime Conformance Release License
|
||||
* =============================================================================
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2017, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
*
|
||||
* AMD Research and AMD ROC Software Development
|
||||
*
|
||||
* Advanced Micro Devices, Inc.
|
||||
*
|
||||
* www.amd.com
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal with the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimers.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimers in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the names of <Name of Development Group, Name of Institution>,
|
||||
* nor the names of its contributors may be used to endorse or promote
|
||||
* products derived from this Software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS WITH THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <climits>
|
||||
#include "hsa/hsa.h"
|
||||
#include "hsa/hsa_ext_amd.h"
|
||||
|
||||
#define RET_IF_HSA_ERR(err) { \
|
||||
if ((err) != HSA_STATUS_SUCCESS) { \
|
||||
std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \
|
||||
__FILE__ << ". Call returned " << err << std::endl; \
|
||||
return (err); \
|
||||
} \
|
||||
}
|
||||
|
||||
static const uint32_t kBinarySearchLength = 512;
|
||||
static const uint32_t kBinarySearchFindMe = 108;
|
||||
static const uint32_t kWorkGroupSize = 256;
|
||||
|
||||
// Hold all the info specific to binary search
|
||||
typedef struct BinarySearch {
|
||||
// Binary Search parameters
|
||||
uint32_t length;
|
||||
uint32_t work_group_size;
|
||||
uint32_t work_grid_size;
|
||||
uint32_t num_sub_divisions;
|
||||
uint32_t find_me;
|
||||
|
||||
// Buffers needed for this application
|
||||
uint32_t* input;
|
||||
uint32_t* input_arr;
|
||||
uint32_t* input_arr_local;
|
||||
uint32_t* output;
|
||||
// Keneral argument buffers and addresses
|
||||
void* kern_arg_buffer; // Begin of allocated memory
|
||||
// this pointer to be deallocated
|
||||
void* kern_arg_address; // Properly aligned address to be used in aql
|
||||
// packet (don't use for deallocation)
|
||||
|
||||
// Kernel code
|
||||
std::string kernel_file_name;
|
||||
std::string kernel_name;
|
||||
uint32_t kernarg_size;
|
||||
uint32_t kernarg_align;
|
||||
|
||||
// HSA/RocR objects needed for this application
|
||||
hsa_agent_t gpu_dev;
|
||||
hsa_agent_t cpu_dev;
|
||||
hsa_signal_t signal;
|
||||
hsa_queue_t* queue;
|
||||
hsa_amd_memory_pool_t cpu_pool;
|
||||
hsa_amd_memory_pool_t gpu_pool;
|
||||
hsa_amd_memory_pool_t kern_arg_pool;
|
||||
|
||||
// Other items we need to populate AQL packet
|
||||
uint64_t kernel_object;
|
||||
uint32_t group_segment_size; ///< Kernel group seg size
|
||||
uint32_t private_segment_size; ///< Kernel private seg size
|
||||
} BinarySearch;
|
||||
|
||||
void InitializeBinarySearch(BinarySearch* bs) {
|
||||
bs->kernel_file_name = "./binary_search_kernels.hsaco";
|
||||
bs->kernel_name = "binarySearch";
|
||||
bs->length = 512;
|
||||
bs->find_me = 108;
|
||||
bs->work_group_size = 256;
|
||||
bs->num_sub_divisions = bs->length / bs->work_group_size;
|
||||
}
|
||||
|
||||
// This function is called by the call-back functions used to find an agent of
|
||||
// the specified hsa_device_type_t. Note that it cannot be called directly from
|
||||
// hsa_iterate_agents() as it does not match the prototype of the call-back
|
||||
// function. It must be wrapped by a function with the correct prototype.
|
||||
//
|
||||
// Return values:
|
||||
// HSA_STATUS_INFO_BREAK -- "agent" is of the specified type (dev_type)
|
||||
// HSA_STATUS_SUCCESS -- "agent" is not of the specified type
|
||||
// Other -- Some error occurred
|
||||
static hsa_status_t FindAgent(hsa_agent_t agent, void* data,
|
||||
hsa_device_type_t dev_type) {
|
||||
if (data == nullptr) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
// See if the provided agent matches the input type (dev_type)
|
||||
hsa_device_type_t hsa_device_type;
|
||||
hsa_status_t hsa_error_code = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE,
|
||||
&hsa_device_type);
|
||||
RET_IF_HSA_ERR(hsa_error_code);
|
||||
|
||||
if (hsa_device_type == dev_type) {
|
||||
*(reinterpret_cast<hsa_agent_t*>(data)) = agent;
|
||||
return HSA_STATUS_INFO_BREAK;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// This is the call-back function used to find a GPU type agent. Note that the
|
||||
// prototype of this function is dictated by the HSA specification
|
||||
hsa_status_t FindGPUDevice(hsa_agent_t agent, void* data) {
|
||||
return FindAgent(agent, data, HSA_DEVICE_TYPE_GPU);
|
||||
}
|
||||
|
||||
// This is the call-back function used to find a CPU type agent. Note that the
|
||||
// prototype of this function is dictated by the HSA specification
|
||||
hsa_status_t FindCPUDevice(hsa_agent_t agent, void* data) {
|
||||
return FindAgent(agent, data, HSA_DEVICE_TYPE_CPU);
|
||||
}
|
||||
|
||||
// Find the CPU and GPU agents we need to run this sample, and save them in the
|
||||
// BinarySearch structure for later use.
|
||||
hsa_status_t FindDevices(BinarySearch* bs) {
|
||||
hsa_status_t err;
|
||||
|
||||
// Note that hsa_iterate_agents iterate through all known agents until
|
||||
// HSA_STATUS_SUCCESS is not returned. The call-backs are implemented such
|
||||
// that HSA_STATUS_INFO_BREAK means we found an agent of the specified type.
|
||||
// This value is returned by hsa_iterate_agents.
|
||||
bs->gpu_dev.handle = 0;
|
||||
err = hsa_iterate_agents(FindGPUDevice, &bs->gpu_dev);
|
||||
|
||||
if (err != HSA_STATUS_INFO_BREAK) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
bs->cpu_dev.handle = 0;
|
||||
err = hsa_iterate_agents(FindCPUDevice, &bs->cpu_dev);
|
||||
|
||||
if (err != HSA_STATUS_INFO_BREAK) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
if (0 == bs->gpu_dev.handle) {
|
||||
std::cout << "GPU Device is not Created properly!" << std::endl;
|
||||
RET_IF_HSA_ERR(HSA_STATUS_ERROR);
|
||||
}
|
||||
|
||||
if (0 == bs->cpu_dev.handle) {
|
||||
std::cout << "CPU Device is not Created properly!" << std::endl;
|
||||
RET_IF_HSA_ERR(HSA_STATUS_ERROR);
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// This function checks to see if the provided
|
||||
// pool has the HSA_AMD_SEGMENT_GLOBAL property. If the kern_arg flag is true,
|
||||
// the function adds an additional requirement that the pool have the
|
||||
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT property. If kern_arg is false,
|
||||
// pools must NOT have this property.
|
||||
// Upon finding a pool that meets these conditions, HSA_STATUS_INFO_BREAK is
|
||||
// returned. HSA_STATUS_SUCCESS is returned if no errors were encountered, but
|
||||
// no pool was found meeting the requirements. If an error is encountered, we
|
||||
// return that error.
|
||||
|
||||
// Note that this function does not match the required prototype for the
|
||||
// hsa_amd_agent_iterate_memory_pools call back function, and therefore must be
|
||||
// wrapped by a function with the correct prototype.
|
||||
static hsa_status_t
|
||||
FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool kern_arg) {
|
||||
hsa_status_t err;
|
||||
hsa_amd_segment_t segment;
|
||||
uint32_t flag;
|
||||
|
||||
if (nullptr == data) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
|
||||
&segment);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
if (HSA_AMD_SEGMENT_GLOBAL != segment) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
err = hsa_amd_memory_pool_get_info(pool,
|
||||
HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT;
|
||||
|
||||
if ((karg_st == 0 && kern_arg) ||
|
||||
(karg_st != 0 && !kern_arg)) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
*(reinterpret_cast<hsa_amd_memory_pool_t*>(data)) = pool;
|
||||
return HSA_STATUS_INFO_BREAK;
|
||||
}
|
||||
|
||||
// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that
|
||||
// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that is NOT
|
||||
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT
|
||||
hsa_status_t FindStandardPool(hsa_amd_memory_pool_t pool, void* data) {
|
||||
return FindGlobalPool(pool, data, false);
|
||||
}
|
||||
|
||||
// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that
|
||||
// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that IS
|
||||
// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT
|
||||
hsa_status_t FindKernArgPool(hsa_amd_memory_pool_t pool, void* data) {
|
||||
return FindGlobalPool(pool, data, true);
|
||||
}
|
||||
|
||||
// Find memory pools that we will need to allocate from for this sample
|
||||
// application. We will need memory associated with the host CPU, the GPU
|
||||
// executing the kernels, and for kernel arguments. This function will
|
||||
// save the found pools to the BinarySearch structure for use elsewhere
|
||||
// in this program.
|
||||
hsa_status_t FindPools(BinarySearch* bs) {
|
||||
hsa_status_t err;
|
||||
|
||||
err = hsa_amd_agent_iterate_memory_pools(bs->cpu_dev, FindStandardPool,
|
||||
&bs->cpu_pool);
|
||||
|
||||
if (err != HSA_STATUS_INFO_BREAK) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
err = hsa_amd_agent_iterate_memory_pools(bs->gpu_dev, FindStandardPool,
|
||||
&bs->gpu_pool);
|
||||
|
||||
if (err != HSA_STATUS_INFO_BREAK) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
err = hsa_amd_agent_iterate_memory_pools(bs->cpu_dev,
|
||||
FindKernArgPool, &bs->kern_arg_pool);
|
||||
|
||||
if (err != HSA_STATUS_INFO_BREAK) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Once the needed memory pools have been found and the BinarySearch structure
|
||||
// has been updated with these handles, this function is then used to allocate
|
||||
// memory from those pools.
|
||||
// Devices with which a pool is associated already have access to the pool.
|
||||
// However, other devices may also need to read or write to that memory. Below,
|
||||
// we see how we can grant access to other devices to address this issue.
|
||||
hsa_status_t AllocateAndInitBuffers(BinarySearch* bs) {
|
||||
hsa_status_t err;
|
||||
uint32_t out_length = 4 * sizeof(uint32_t);
|
||||
uint32_t in_length = bs->num_sub_divisions * 2 * sizeof(uint32_t);
|
||||
|
||||
// In all of these examples, we want both the cpu and gpu to have access to
|
||||
// the buffer in question. We use the array of agents below in the susequent
|
||||
// calls to hsa_amd_agents_allow_access() for this purpose.
|
||||
hsa_agent_t ag_list[2] = {bs->gpu_dev, bs->cpu_dev};
|
||||
|
||||
err = hsa_amd_memory_pool_allocate(bs->cpu_pool, in_length, 0,
|
||||
reinterpret_cast<void**>(&bs->input));
|
||||
RET_IF_HSA_ERR(err);
|
||||
err = hsa_amd_agents_allow_access(2, ag_list, NULL, bs->input);
|
||||
RET_IF_HSA_ERR(err);
|
||||
(void)memset(bs->input, 0, in_length);
|
||||
|
||||
err = hsa_amd_memory_pool_allocate(bs->cpu_pool, out_length, 0,
|
||||
reinterpret_cast<void**>(&bs->output));
|
||||
RET_IF_HSA_ERR(err);
|
||||
err = hsa_amd_agents_allow_access(2, ag_list, NULL, bs->output);
|
||||
RET_IF_HSA_ERR(err);
|
||||
(void)memset(bs->input, 0, in_length);
|
||||
|
||||
err = hsa_amd_memory_pool_allocate(bs->cpu_pool, in_length, 0,
|
||||
reinterpret_cast<void**>(&bs->input_arr));
|
||||
RET_IF_HSA_ERR(err);
|
||||
err = hsa_amd_agents_allow_access(2, ag_list, NULL, bs->input_arr);
|
||||
RET_IF_HSA_ERR(err);
|
||||
(void)memset(bs->input, 0, in_length);
|
||||
|
||||
err = hsa_amd_memory_pool_allocate(bs->cpu_pool, in_length, 0,
|
||||
reinterpret_cast<void**>(&bs->input_arr_local));
|
||||
RET_IF_HSA_ERR(err);
|
||||
err = hsa_amd_agents_allow_access(2, ag_list, NULL, bs->input_arr_local);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Binary-search application specific code...
|
||||
// Initialize input buffer with random values in an increasing order
|
||||
uint32_t max = bs->length * 20;
|
||||
bs->input[0] = 0;
|
||||
|
||||
uint32_t seed = (unsigned int)time(NULL);
|
||||
srand(seed);
|
||||
|
||||
for (uint32_t i = 1; i < bs->length; ++i) {
|
||||
bs->input[i] = bs->input[i - 1] +
|
||||
static_cast<uint32_t>(max * rand_r(&seed) / static_cast<float>(RAND_MAX));
|
||||
}
|
||||
|
||||
// #define VERBOSE 1
|
||||
#ifdef VERBOSE
|
||||
std::cout << "Input array values:" << std::endl;
|
||||
|
||||
for (uint32_t i = 0; i < bs->length; ++i) {
|
||||
std::cout << "input[" << i << "] = " << bs->input[i] << " ";
|
||||
|
||||
if (i % 4 == 0) {
|
||||
std::cout << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << std::endl;
|
||||
#endif
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
// The code in this function illustrates how to load a kernel from
|
||||
// pre-compiled code. The goal is to get a handle that can be later
|
||||
// used in an AQL packet and also to extract information about kernel
|
||||
// that we will need. All of the information hand kernel handle will
|
||||
// be saved to the BinarySearch structure. It will be used when we
|
||||
// populate the AQL packet.
|
||||
hsa_status_t LoadKernelFromObjFile(BinarySearch* bs) {
|
||||
hsa_status_t err;
|
||||
hsa_code_object_reader_t code_obj_rdr = {0};
|
||||
hsa_executable_t executable = {0};
|
||||
|
||||
hsa_file_t file_handle = open(bs->kernel_file_name.c_str(), O_RDONLY);
|
||||
|
||||
if (file_handle == -1) {
|
||||
std::cout << "failed to open " << bs->kernel_file_name.c_str() <<
|
||||
" at line " << __LINE__ << ", errno: " << errno << std::endl;
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
err = hsa_code_object_reader_create_from_file(file_handle, &code_obj_rdr);
|
||||
RET_IF_HSA_ERR(err);
|
||||
close(file_handle);
|
||||
|
||||
err = hsa_executable_create_alt(HSA_PROFILE_FULL,
|
||||
HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, NULL, &executable);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_executable_load_agent_code_object(executable, bs->gpu_dev,
|
||||
code_obj_rdr, NULL, NULL);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_executable_freeze(executable, NULL);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
hsa_executable_symbol_t kern_sym;
|
||||
err = hsa_executable_get_symbol(executable, NULL, bs->kernel_name.c_str(),
|
||||
bs->gpu_dev, 0, &kern_sym);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_executable_symbol_get_info(kern_sym,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
|
||||
&bs->kernel_object);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_executable_symbol_get_info(kern_sym,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
|
||||
&bs->private_segment_size);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_executable_symbol_get_info(kern_sym,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
|
||||
&bs->group_segment_size);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_executable_symbol_get_info(kern_sym,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
|
||||
&bs->kernarg_size);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_executable_symbol_get_info(kern_sym,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT,
|
||||
&bs->kernarg_align);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
// This function shows how to do an asynchronous copy. We have to create a
|
||||
// signal and use the signal to notify us when the copy has completed.
|
||||
hsa_status_t AgentMemcpy(void* dst, const void* src,
|
||||
size_t size, hsa_agent_t dst_ag, hsa_agent_t src_ag) {
|
||||
hsa_signal_t s;
|
||||
hsa_status_t err;
|
||||
|
||||
err = hsa_signal_create(1, 0, NULL, &s);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_amd_memory_async_copy(dst, dst_ag, src, src_ag, size, 0, NULL, s);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
if (hsa_signal_wait_scacquire(s, HSA_SIGNAL_CONDITION_LT, 1,
|
||||
UINT64_MAX, HSA_WAIT_STATE_BLOCKED) != 0) {
|
||||
err = HSA_STATUS_ERROR;
|
||||
std::cout << "Async copy signal error" << std::endl;
|
||||
|
||||
RET_IF_HSA_ERR(err);
|
||||
}
|
||||
|
||||
err = hsa_signal_destroy(s);
|
||||
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
// AlignDown and AlignUp are 2 utility functions we use to find an aligned
|
||||
// boundary either below or above a given value (address). The function will
|
||||
// return a value that has the specified alignment.
|
||||
static intptr_t
|
||||
AlignDown(intptr_t value, size_t alignment) {
|
||||
return (intptr_t) (value & ~(alignment - 1));
|
||||
}
|
||||
static void*
|
||||
AlignUp(void* value, size_t alignment) {
|
||||
return reinterpret_cast<void*>(AlignDown((uintptr_t)
|
||||
(reinterpret_cast<uintptr_t>(value) + alignment - 1), alignment));
|
||||
}
|
||||
|
||||
// This function populates the AQL patch with the information
|
||||
// we have collected and stored in the BinarySearch structure thus far.
|
||||
void PopulateAQLPacket(BinarySearch const* bs,
|
||||
hsa_kernel_dispatch_packet_t* aql) {
|
||||
aql->header = 0; // Dummy val. for now. Set this right before doorbell ring
|
||||
aql->setup = 1;
|
||||
aql->workgroup_size_x = bs->work_group_size;
|
||||
aql->workgroup_size_y = 1;
|
||||
aql->workgroup_size_z = 1;
|
||||
aql->grid_size_x = bs->work_grid_size;
|
||||
aql->grid_size_y = 1;
|
||||
aql->grid_size_z = 1;
|
||||
aql->private_segment_size = bs->private_segment_size;
|
||||
aql->group_segment_size = bs->group_segment_size;
|
||||
aql->kernel_object = bs->kernel_object;
|
||||
aql->kernarg_address = bs->kern_arg_address;
|
||||
aql->completion_signal = bs->signal;
|
||||
|
||||
return;
|
||||
}
|
||||
/*
|
||||
* Write everything in the provided AQL packet to the queue except the first 32
|
||||
* bits which include the header and setup fields. That should be done
|
||||
* last.
|
||||
*/
|
||||
void WriteAQLToQueue(hsa_kernel_dispatch_packet_t const* in_aql,
|
||||
hsa_queue_t* q) {
|
||||
void* queue_base = q->base_address;
|
||||
const uint32_t queue_mask = q->size - 1;
|
||||
uint64_t que_idx = hsa_queue_add_write_index_relaxed(q, 1);
|
||||
|
||||
hsa_kernel_dispatch_packet_t* queue_aql_packet;
|
||||
|
||||
queue_aql_packet =
|
||||
&(reinterpret_cast<hsa_kernel_dispatch_packet_t*>(queue_base))
|
||||
[que_idx & queue_mask];
|
||||
|
||||
queue_aql_packet->workgroup_size_x = in_aql->workgroup_size_x;
|
||||
queue_aql_packet->workgroup_size_y = in_aql->workgroup_size_y;
|
||||
queue_aql_packet->workgroup_size_z = in_aql->workgroup_size_z;
|
||||
queue_aql_packet->grid_size_x = in_aql->grid_size_x;
|
||||
queue_aql_packet->grid_size_y = in_aql->grid_size_y;
|
||||
queue_aql_packet->grid_size_z = in_aql->grid_size_z;
|
||||
queue_aql_packet->private_segment_size = in_aql->private_segment_size;
|
||||
queue_aql_packet->group_segment_size = in_aql->group_segment_size;
|
||||
queue_aql_packet->kernel_object = in_aql->kernel_object;
|
||||
queue_aql_packet->kernarg_address = in_aql->kernarg_address;
|
||||
queue_aql_packet->completion_signal = in_aql->completion_signal;
|
||||
}
|
||||
|
||||
// This function allocates memory from the kern_arg pool we already found, and
|
||||
// then sets the argument values needed by the kernel code.
|
||||
hsa_status_t AllocAndSetKernArgs(BinarySearch* bs, void* args,
|
||||
size_t arg_size, void** aql_buf_ptr) {
|
||||
void* kern_arg_buf = nullptr;
|
||||
hsa_status_t err;
|
||||
size_t buf_size;
|
||||
size_t req_align;
|
||||
|
||||
// The kernel code must be written to memory at the correct alignment. We
|
||||
// already queried the executable to get the correct alignment, which is
|
||||
// stored in bs->kernarg_align. In case the memory returned from
|
||||
// hsa_amd_memory_pool is not of the correct alignment, we request a little
|
||||
// more than what we need in case we need to adjust.
|
||||
req_align = bs->kernarg_align;
|
||||
// Allocate enough extra space for alignment adjustments if ncessary
|
||||
buf_size = arg_size + (req_align << 1);
|
||||
|
||||
err = hsa_amd_memory_pool_allocate(bs->kern_arg_pool, buf_size, 0,
|
||||
reinterpret_cast<void**>(&kern_arg_buf));
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Address of the allocated buffer
|
||||
bs->kern_arg_buffer = kern_arg_buf;
|
||||
|
||||
// Addr. of kern arg start.
|
||||
bs->kern_arg_address = AlignUp(kern_arg_buf, req_align);
|
||||
|
||||
assert(arg_size >= bs->kernarg_size);
|
||||
assert(((uintptr_t)bs->kern_arg_address + arg_size) <
|
||||
((uintptr_t)bs->kern_arg_buffer + buf_size));
|
||||
|
||||
(void)memcpy(bs->kern_arg_address, args, arg_size);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Make sure both the CPU and GPU can access the kernel arguments
|
||||
hsa_agent_t ag_list[2] = {bs->gpu_dev, bs->cpu_dev};
|
||||
err = hsa_amd_agents_allow_access(2, ag_list, NULL, bs->kern_arg_buffer);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Save this info in our BinarySearch structure for later.
|
||||
*aql_buf_ptr = bs->kern_arg_address;
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// This wrapper atomically writes the provided header and setup to the
|
||||
// provided AQL packet. The provided AQL packet address should be in the
|
||||
// queue memory space.
|
||||
inline void AtomicSetPacketHeader(uint16_t header, uint16_t setup,
|
||||
hsa_kernel_dispatch_packet_t* queue_packet) {
|
||||
__atomic_store_n(reinterpret_cast<uint32_t*>(queue_packet),
|
||||
header | (setup << 16), __ATOMIC_RELEASE);
|
||||
}
|
||||
|
||||
// Once all the required data for kernel execution is collected (in this
|
||||
// application it is stored in the BinarySearch structure) we can put it in
|
||||
// an AQL packet and ring the queue door bell to tell the command processor to
|
||||
// execute it.
|
||||
hsa_status_t Run(BinarySearch* bs) {
|
||||
hsa_status_t err;
|
||||
|
||||
std::cout << "Executing kernel " << bs->kernel_name << std::endl;
|
||||
|
||||
// Adjust the size of workgroup
|
||||
// This is mostly application specific.
|
||||
if (bs->work_group_size > 64) {
|
||||
bs->work_group_size = 64;
|
||||
bs->num_sub_divisions = bs->length / bs->work_group_size;
|
||||
|
||||
if (bs->num_sub_divisions < bs->work_group_size) {
|
||||
bs->num_sub_divisions = bs->work_group_size;
|
||||
}
|
||||
|
||||
bs->work_grid_size = bs->num_sub_divisions;
|
||||
}
|
||||
|
||||
// Explanation of BinarySearch algorithm.
|
||||
/*
|
||||
* Since a plain binary search on the GPU would not achieve much benefit
|
||||
* over the GPU we are doing an N'ary search. We split the array into N
|
||||
* segments every pass and therefore get log (base N) passes instead of log
|
||||
* (base 2) passes.
|
||||
*
|
||||
* In every pass, only the thread that can potentially have the element we
|
||||
* are looking for writes to the output array. For ex: if we are looking to
|
||||
* find 4567 in the array and every thread is searching over a segment of
|
||||
* 1000 values and the input array is 1, 2, 3, 4,... then the first thread
|
||||
* is searching in 1 to 1000, the second one from 1001 to 2000, etc. The
|
||||
* first one does not write to the output. The second one doesn't either.
|
||||
* The fifth one however is from 4001 to 5000. So it can potentially have
|
||||
* the element 4567 which lies between them.
|
||||
*
|
||||
* This particular thread writes to the output the lower bound, upper bound
|
||||
* and whether the element equals the lower bound element. So, it would be
|
||||
* 4001, 5000, 0
|
||||
*
|
||||
* The next pass would subdivide 4001 to 5000 into smaller segments and
|
||||
* continue the same process from there.
|
||||
*
|
||||
* When a pass returns 1 in the third element, it means the element has been
|
||||
* found and we can stop executing the kernel. If the element is not found,
|
||||
* then the execution stops after looking at segment of size 1.
|
||||
*/
|
||||
|
||||
uint32_t global_lower_bound = 0;
|
||||
uint32_t global_upper_bound = bs->length - 1;
|
||||
uint32_t sub_div_size = (global_upper_bound - global_lower_bound + 1) /
|
||||
bs->num_sub_divisions;
|
||||
|
||||
if ((bs->input[0] > bs->find_me) ||
|
||||
(bs->input[bs->length - 1] < bs->find_me)) {
|
||||
bs->output[0] = 0;
|
||||
bs->output[1] = bs->length - 1;
|
||||
bs->output[2] = 0;
|
||||
std::cout << "Returning too early" << std::endl;
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
bs->output[3] = 1;
|
||||
|
||||
// Setup the kernel args
|
||||
// See the meta-data for the compiled OpenCL kernel code to ascertain
|
||||
// the sizes, padding and alignment required for kernel arguments.
|
||||
// This can be seen by executing
|
||||
// $ amdgcn-amd-amdhsa-readelf -aw ./binary_search_kernels.hsaco
|
||||
// The kernel code will expect the following arguments aligned as shown.
|
||||
typedef uint32_t uint2[2];
|
||||
typedef uint32_t uint4[4];
|
||||
struct __attribute__((aligned(16))) local_args_t {
|
||||
uint4* outputArray;
|
||||
uint2* sortedArray;
|
||||
uint32_t findMe;
|
||||
uint32_t pad;
|
||||
uint64_t global_offset_x;
|
||||
uint64_t global_offset_y;
|
||||
uint64_t global_offset_z;
|
||||
} local_args;
|
||||
|
||||
local_args.outputArray = reinterpret_cast<uint4*>(bs->output);
|
||||
local_args.sortedArray = reinterpret_cast<uint2*>(bs->input_arr_local);
|
||||
local_args.findMe = bs->find_me;
|
||||
local_args.global_offset_x = 0;
|
||||
local_args.global_offset_y = 0;
|
||||
local_args.global_offset_z = 0;
|
||||
|
||||
// Copy the kernel args structure into kernel arg memory
|
||||
err = AllocAndSetKernArgs(bs, &local_args, sizeof(local_args),
|
||||
&bs->kern_arg_address);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Populate an AQL packet with the info we've gathered
|
||||
hsa_kernel_dispatch_packet_t aql;
|
||||
PopulateAQLPacket(bs, &aql);
|
||||
|
||||
uint32_t in_length = bs->num_sub_divisions * 2 * sizeof(uint32_t);
|
||||
|
||||
while ((sub_div_size > 1) && (bs->output[3] != 0)) {
|
||||
for (uint32_t i = 0 ; i < bs->num_sub_divisions; i++) {
|
||||
int idx1 = i * sub_div_size;
|
||||
int idx2 = ((i + 1) * sub_div_size) - 1;
|
||||
bs->input_arr[2 * i] = bs->input[idx1];
|
||||
bs->input_arr[2 * i + 1] = bs->input[idx2];
|
||||
}
|
||||
|
||||
// Copy kernel parameter from system memory to local memory
|
||||
err = AgentMemcpy(reinterpret_cast<uint8_t*>(bs->input_arr_local),
|
||||
reinterpret_cast<uint8_t*>(bs->input_arr),
|
||||
in_length, bs->gpu_dev, bs->cpu_dev);
|
||||
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Reset output buffer to zero
|
||||
bs->output[3] = 0;
|
||||
|
||||
// Dispatch kernel with global work size, work group size with ONE dimesion
|
||||
// and wait for kernel to complete
|
||||
|
||||
// Compute the write index of queue and copy Aql packet into it
|
||||
uint64_t que_idx = hsa_queue_load_write_index_relaxed(bs->queue);
|
||||
|
||||
const uint32_t mask = bs->queue->size - 1;
|
||||
|
||||
// This function simply copies the data we've collected so far into our
|
||||
// local AQL packet, except the the setup and header fields.
|
||||
WriteAQLToQueue(&aql, bs->queue);
|
||||
|
||||
uint32_t aql_header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
|
||||
aql_header |= HSA_FENCE_SCOPE_SYSTEM <<
|
||||
HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
|
||||
aql_header |= HSA_FENCE_SCOPE_SYSTEM <<
|
||||
HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
|
||||
|
||||
// Set the packet's type, acquire and release fences. This should be done
|
||||
// atomically after all the other fields have been set, using release
|
||||
// memory ordering to ensure all the fields are set when the door bell
|
||||
// signal is activated.
|
||||
void* q_base = bs->queue->base_address;
|
||||
|
||||
AtomicSetPacketHeader(aql_header, aql.setup,
|
||||
&(reinterpret_cast<hsa_kernel_dispatch_packet_t*>
|
||||
(q_base))[que_idx & mask]);
|
||||
|
||||
// Increment the write index and ring the doorbell to dispatch kernel.
|
||||
hsa_queue_store_write_index_relaxed(bs->queue, (que_idx + 1));
|
||||
hsa_signal_store_relaxed(bs->queue->doorbell_signal, que_idx);
|
||||
|
||||
// Wait on the dispatch signal until the kernel is finished.
|
||||
// Modify the wait condition to HSA_WAIT_STATE_ACTIVE (instead of
|
||||
// HSA_WAIT_STATE_BLOCKED) if polling is needed instead of blocking, as we
|
||||
// have below.
|
||||
// The call below will block until the condition is met. Below we have said
|
||||
// the condition is that the signal value (initiailzed to 1) associated with
|
||||
// the queue is less than 1. When the kernel associated with the queued AQL
|
||||
// packet has completed execution, the signal value is automatically
|
||||
// decremented by the packet processor.
|
||||
hsa_signal_value_t value = hsa_signal_wait_scacquire(bs->signal,
|
||||
HSA_SIGNAL_CONDITION_LT, 1,
|
||||
UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
|
||||
|
||||
// value should be 0, or we timed-out
|
||||
if (value) {
|
||||
std::cout << "Timed out waiting for kernel to complete?" << std::endl;
|
||||
RET_IF_HSA_ERR(HSA_STATUS_ERROR);
|
||||
}
|
||||
|
||||
// Reset the signal to its initial value for the next iteration
|
||||
hsa_signal_store_screlease(bs->signal, 1);
|
||||
|
||||
// Binary search algorithm stuff...
|
||||
global_lower_bound = bs->output[0] * sub_div_size;
|
||||
global_upper_bound = global_lower_bound + sub_div_size - 1;
|
||||
sub_div_size = (global_upper_bound - global_lower_bound + 1) /
|
||||
bs->num_sub_divisions;
|
||||
}
|
||||
|
||||
uint32_t element_index = UINT_MAX;
|
||||
|
||||
for (uint32_t i = global_lower_bound; i <= global_upper_bound; i++) {
|
||||
if (bs->input[i] == bs->find_me) {
|
||||
element_index = i;
|
||||
bs->output[0] = i;
|
||||
bs->output[1] = i + 1;
|
||||
bs->output[2] = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
// Element is not found in region specified
|
||||
// by global lower bound to global upper bound
|
||||
bs->output[2] = 0;
|
||||
}
|
||||
|
||||
uint32_t is_elem_found = bs->output[2];
|
||||
|
||||
std::cout << "Lower bound = " << global_lower_bound << std::endl;
|
||||
std::cout << "Upper bound = " << global_upper_bound << std::endl;
|
||||
std::cout << "Element search for = " << bs->find_me << std::endl;
|
||||
|
||||
|
||||
if (is_elem_found == 1) {
|
||||
std::cout << "Element found at index " << element_index << std::endl;
|
||||
} else {
|
||||
std::cout << "Element value " << bs->find_me << " not found" << std::endl;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Release all the RocR resources we have acquired in this application.
|
||||
hsa_status_t CleanUp(BinarySearch* bs) {
|
||||
hsa_status_t err;
|
||||
|
||||
err = hsa_amd_memory_pool_free(bs->input);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_amd_memory_pool_free(bs->output);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_amd_memory_pool_free(bs->input_arr);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_amd_memory_pool_free(bs->kern_arg_buffer);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_queue_destroy(bs->queue);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_signal_destroy(bs->signal);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
err = hsa_shut_down();
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
// This BinarySearch structure (bs) below holds all of the appl. specific
|
||||
// info we need to run the sample. This includes algorithm specific
|
||||
// information as well as handles to RocR/HSA objects.
|
||||
|
||||
// The basic structure of this sample is to fill in this structure with the
|
||||
// required RocR/HSA handles to RocR resources (e.g., agents, memory pools,
|
||||
// queues, etc.) and then dispatch the packets to the queue, and examine the
|
||||
// output.
|
||||
|
||||
BinarySearch bs;
|
||||
hsa_status_t err;
|
||||
|
||||
// Set some working values specific to this application
|
||||
InitializeBinarySearch(&bs);
|
||||
|
||||
// hsa_init() initializes internal data structures and causes devices
|
||||
// (agents), memory pools and other resources to be discovered.
|
||||
err = hsa_init();
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Find the agents needed for the sample
|
||||
err = FindDevices(&bs);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Create the completion signal used when dispatching a packet
|
||||
err = hsa_signal_create(1, 0, NULL, &bs.signal);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Create a queue to submit our binary search AQL packets
|
||||
err = hsa_queue_create(bs.gpu_dev, 128, HSA_QUEUE_TYPE_MULTI, NULL, NULL,
|
||||
UINT32_MAX, UINT32_MAX, &bs.queue);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Find the HSA memory pools we need to run this sample
|
||||
err = FindPools(&bs);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Allocate memory from the correct memory pool, and initialize them as
|
||||
// neeeded for the algorihm.
|
||||
err = AllocateAndInitBuffers(&bs);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Create a kernel object from the pre-compiled kernel, and read some
|
||||
// attributes associated with the kernel that we will need.
|
||||
err = LoadKernelFromObjFile(&bs);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Fill in the AQL packet, assign the kernel arguments, enqueue the packet,
|
||||
// "ring" the doorbell, and wait for completion.
|
||||
err = Run(&bs);
|
||||
RET_IF_HSA_ERR(err);
|
||||
|
||||
// Release all the RocR resources we've acquired and shutdown HSA.
|
||||
err = CleanUp(&bs);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#undef RET_IF_HSA_ERR
|
||||
@@ -1,127 +0,0 @@
|
||||
/*
|
||||
* =============================================================================
|
||||
* ROC Runtime Conformance Release License
|
||||
* =============================================================================
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2017, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
*
|
||||
* AMD Research and AMD ROC Software Development
|
||||
*
|
||||
* Advanced Micro Devices, Inc.
|
||||
*
|
||||
* www.amd.com
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal with the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimers.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimers in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the names of <Name of Development Group, Name of Institution>,
|
||||
* nor the names of its contributors may be used to endorse or promote
|
||||
* products derived from this Software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS WITH THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* One instance of this kernel call is a thread.
|
||||
* Each thread finds out the segment in which it should look for the element.
|
||||
* After that, it checks if the element is between the lower bound and upper
|
||||
* bound of its segment. If yes, then this segment becomes the total
|
||||
* searchspace for the next pass.
|
||||
*
|
||||
* To achieve this, it writes the lower bound and upper bound to the output
|
||||
* array. In case the element at the left end (lower bound) matches the element
|
||||
* we are looking for, that is marked in the output and we no longer need to
|
||||
* look any further.
|
||||
*/
|
||||
|
||||
__kernel void
|
||||
binarySearch(__global uint4 * outputArray,
|
||||
__const __global uint2 * sortedArray,
|
||||
const unsigned int findMe) {
|
||||
unsigned int tid = get_global_id(0);
|
||||
|
||||
// Then we find the elements for this thread
|
||||
uint2 element = sortedArray[tid];
|
||||
|
||||
|
||||
// If the element to be found does not lie between
|
||||
// them, then nothing left to do in this thread
|
||||
if((element.x > findMe) || (element.y < findMe)) {
|
||||
return;
|
||||
} else {
|
||||
// However, if the element does lie between the lower
|
||||
// and upper bounds of this thread's searchspace
|
||||
// we need to narrow down the search further in this
|
||||
// search space
|
||||
// The search space for this thread is marked in the
|
||||
// output as being the total search space for the next pass
|
||||
outputArray[0].x = tid;
|
||||
outputArray[0].w = 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void
|
||||
binarySearch_mulkeys(__global int *keys,
|
||||
__global uint *input,
|
||||
const unsigned int numKeys,
|
||||
__global int *output) {
|
||||
|
||||
int gid = get_global_id(0);
|
||||
int lBound = gid * 256;
|
||||
int uBound = lBound + 255;
|
||||
|
||||
for(int i = 0; i < numKeys; i++) {
|
||||
if(keys[i] >= input[lBound] && keys[i] <= input[uBound])
|
||||
output[i]=lBound;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
__kernel void
|
||||
binarySearch_mulkeysConcurrent(__global uint *keys,
|
||||
__global uint *input,
|
||||
const unsigned int inputSize, // num. of inputs
|
||||
const unsigned int numSubdivisions,
|
||||
__global int *output) {
|
||||
|
||||
int lBound = (get_global_id(0) % numSubdivisions) * (inputSize / numSubdivisions);
|
||||
int uBound = lBound + inputSize / numSubdivisions;
|
||||
int myKey = keys[get_global_id(0) / numSubdivisions];
|
||||
int mid;
|
||||
|
||||
while(uBound >= lBound) {
|
||||
mid = (lBound + uBound) / 2;
|
||||
if(input[mid] == myKey) {
|
||||
output[get_global_id(0) / numSubdivisions] = mid;
|
||||
return;
|
||||
} else if(input[mid] > myKey) {
|
||||
uBound = mid - 1;
|
||||
} else {
|
||||
lBound = mid + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,91 +0,0 @@
|
||||
/******************************************************************************
|
||||
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this list
|
||||
of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "test_assert.h"
|
||||
#include "simple_convolution.h"
|
||||
#include "test_hsa.h"
|
||||
#include "test_pgen_pmc.h"
|
||||
#include "test_pgen_sqtt.h"
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
#if defined(NDEBUG)
|
||||
clog.rdbuf(NULL);
|
||||
#endif
|
||||
|
||||
bool ret_val = true;
|
||||
|
||||
// Create SimpleConvolution test object
|
||||
TestKernel* test_kernel = new SimpleConvolution();
|
||||
TestAql* test_aql = new TestHSA(test_kernel);
|
||||
|
||||
const bool pmc_enable = (getenv("ROCR_ENABLE_PMC") != NULL);
|
||||
const bool sqtt_enable = (getenv("ROCR_ENABLE_SQTT") != NULL);
|
||||
if (pmc_enable)
|
||||
test_aql = new TestPGenPMC(test_aql);
|
||||
else if (sqtt_enable)
|
||||
test_aql = new TestPGenSQTT(test_aql);
|
||||
test_assert(test_aql != NULL);
|
||||
if (test_aql == NULL) return 1;
|
||||
|
||||
// Initialization of Hsa Runtime
|
||||
ret_val = test_aql->initialize(argc, argv);
|
||||
if (ret_val == false) {
|
||||
std::cout << "Error in the test initialization" << std::endl;
|
||||
test_assert(ret_val);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Setup Hsa resources needed for execution
|
||||
ret_val = test_aql->setup();
|
||||
if (ret_val == false) {
|
||||
std::cout << "Error in creating hsa resources" << std::endl;
|
||||
test_assert(ret_val);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Run SimpleConvolution kernel
|
||||
ret_val = test_aql->run();
|
||||
if (ret_val == false) {
|
||||
std::cout << "Error in running the test kernel" << std::endl;
|
||||
test_assert(ret_val);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Verify the results of the execution
|
||||
ret_val = test_aql->verify_results();
|
||||
if (ret_val) {
|
||||
std::cout << "Test : Passed" << std::endl;
|
||||
} else {
|
||||
std::cout << "Test : Failed" << std::endl;
|
||||
}
|
||||
|
||||
// Print time taken by sample
|
||||
test_aql->print_time();
|
||||
test_aql->cleanup();
|
||||
|
||||
return (ret_val) ? 0 : 1;
|
||||
}
|
||||
@@ -1,78 +0,0 @@
|
||||
/******************************************************************************
|
||||
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this list
|
||||
of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef _TEST_AQL_H_
|
||||
#define _TEST_AQL_H_
|
||||
|
||||
#include "hsa.h"
|
||||
#include "hsa_rsrc_factory.h"
|
||||
#include "hsa_ven_amd_aqlprofile.h"
|
||||
|
||||
// Test AQL interface
|
||||
class TestAql {
|
||||
TestAql* const test_aql;
|
||||
|
||||
public:
|
||||
explicit TestAql(TestAql* t = 0) : test_aql(t) {}
|
||||
virtual ~TestAql() {}
|
||||
|
||||
TestAql* testAql() { return test_aql; }
|
||||
virtual AgentInfo* getAgentInfo() { return (test_aql) ? test_aql->getAgentInfo() : 0; }
|
||||
virtual hsa_queue_t* getQueue() { return (test_aql) ? test_aql->getQueue() : 0; }
|
||||
virtual HsaRsrcFactory* getRsrcFactory() { return (test_aql) ? test_aql->getRsrcFactory() : 0; }
|
||||
|
||||
// Initialize application environment including setting
|
||||
// up of various configuration parameters based on
|
||||
// command line arguments
|
||||
// @return bool true on success and false on failure
|
||||
virtual bool initialize(int argc, char** argv) {
|
||||
return (test_aql) ? test_aql->initialize(argc, argv) : true;
|
||||
}
|
||||
|
||||
// Setup application parameters for exectuion
|
||||
// @return bool true on success and false on failure
|
||||
virtual bool setup() { return (test_aql) ? test_aql->setup() : true; }
|
||||
|
||||
// Run the kernel
|
||||
// @return bool true on success and false on failure
|
||||
virtual bool run() { return (test_aql) ? test_aql->run() : true; }
|
||||
|
||||
// Verify results
|
||||
// @return bool true on success and false on failure
|
||||
virtual bool verify_results() { return (test_aql) ? test_aql->verify_results() : true; }
|
||||
|
||||
// Print to console the time taken to execute kernel
|
||||
virtual void print_time() {
|
||||
if (test_aql) test_aql->print_time();
|
||||
}
|
||||
|
||||
// Release resources e.g. memory allocations
|
||||
// @return bool true on success and false on failure
|
||||
virtual bool cleanup() { return (test_aql) ? test_aql->cleanup() : true; }
|
||||
};
|
||||
|
||||
#endif // _TEST_AQL_H_
|
||||
@@ -1,13 +0,0 @@
|
||||
#ifndef _TEST_ASSERT_H_
|
||||
#define _TEST_ASSERT_H_
|
||||
|
||||
#define test_assert(cond) \
|
||||
{ \
|
||||
if (!(cond)) { \
|
||||
std::cout << "ASSERT FAILED(" << #cond << ") at \"" << __FILE__ << "\" line " << __LINE__ \
|
||||
<< std::endl; \
|
||||
exit(-1); \
|
||||
} \
|
||||
}
|
||||
|
||||
#endif // _TEST_ASSERT_H_
|
||||
@@ -1,237 +0,0 @@
|
||||
/******************************************************************************
|
||||
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this list
|
||||
of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "test_assert.h"
|
||||
#include <atomic>
|
||||
|
||||
//#include "os.h"
|
||||
#include "helper_funcs.h"
|
||||
#include "hsa_rsrc_factory.h"
|
||||
#include "test_hsa.h"
|
||||
|
||||
bool TestHSA::initialize(int arg_cnt, char** arg_list) {
|
||||
std::cout << "TestHSA::initialize :" << std::endl;
|
||||
// Initialize command line arguments
|
||||
hsa_cmdline_arg_cnt = arg_cnt;
|
||||
hsa_cmdline_arg_list = arg_list;
|
||||
|
||||
// Instantiate a Timer object
|
||||
setup_timer_idx_ = hsa_timer_.CreateTimer();
|
||||
dispatch_timer_idx_ = hsa_timer_.CreateTimer();
|
||||
|
||||
// Instantiate an instance of Hsa Resources Factory
|
||||
hsa_rsrc_ = new HsaRsrcFactory();
|
||||
|
||||
// Print properties of the agents
|
||||
hsa_rsrc_->PrintGpuAgents("> GPU agents");
|
||||
|
||||
// Create an instance of Gpu agent
|
||||
const char* p = getenv("ROCR_AGENT_IND");
|
||||
const uint32_t agent_ind = (p == NULL) ? 0 : atol(p);
|
||||
if (!hsa_rsrc_->GetGpuAgentInfo(agent_ind, &agent_info_)) {
|
||||
std::cout << "> error: agent[" << agent_ind << "] is not found" << std::endl;
|
||||
return false;
|
||||
}
|
||||
std::cout << "> Using agent[" << agent_ind << "] : " << agent_info_->name << std::endl;
|
||||
|
||||
// Create an instance of Aql Queue
|
||||
uint32_t num_pkts = 128;
|
||||
hsa_rsrc_->CreateQueue(agent_info_, num_pkts, &hsa_queue_);
|
||||
|
||||
// Obtain handle of signal
|
||||
hsa_rsrc_->CreateSignal(1, &hsa_signal_);
|
||||
|
||||
// Obtain the code object file name
|
||||
std::string agentName(agent_info_->name);
|
||||
if (agentName.compare(0, 4, "gfx8") == 0) {
|
||||
brig_path_obj_.append("gfx8");
|
||||
} else if (agentName.compare(0, 4, "gfx9") == 0) {
|
||||
brig_path_obj_.append("gfx9");
|
||||
} else {
|
||||
test_assert(false);
|
||||
return false;
|
||||
}
|
||||
brig_path_obj_.append("_" + name_ + ".hsaco");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TestHSA::setup() {
|
||||
std::cout << "TestHSA::setup :" << std::endl;
|
||||
|
||||
// Start the timer object
|
||||
hsa_timer_.StartTimer(setup_timer_idx_);
|
||||
|
||||
mem_map_t& mem_map = test_->get_mem_map();
|
||||
for (mem_it_t it = mem_map.begin(); it != mem_map.end(); ++it) {
|
||||
mem_descr_t& des = it->second;
|
||||
void* ptr = (des.local) ? hsa_rsrc_->AllocateLocalMemory(agent_info_, des.size)
|
||||
: hsa_rsrc_->AllocateSysMemory(agent_info_, des.size);
|
||||
des.ptr = ptr;
|
||||
test_assert(ptr != NULL);
|
||||
if (ptr == NULL) return false;
|
||||
}
|
||||
test_->init();
|
||||
|
||||
// Load and Finalize Kernel Code Descriptor
|
||||
char* brig_path = (char*)brig_path_obj_.c_str();
|
||||
const bool ret_val =
|
||||
hsa_rsrc_->LoadAndFinalize(agent_info_, brig_path, strdup(name_.c_str()), &kernel_code_desc_);
|
||||
if (ret_val == false) {
|
||||
std::cout << "Error in loading and finalizing Kernel" << std::endl;
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
// Stop the timer object
|
||||
hsa_timer_.StopTimer(setup_timer_idx_);
|
||||
setup_time_taken_ = hsa_timer_.ReadTimer(setup_timer_idx_);
|
||||
total_time_taken_ = setup_time_taken_;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TestHSA::run() {
|
||||
std::cout << "TestHSA::run :" << std::endl;
|
||||
|
||||
const uint32_t work_group_size = 64;
|
||||
const uint32_t work_grid_size = test_->get_elements_count();
|
||||
uint32_t group_segment_size = 0;
|
||||
uint32_t private_segment_size = 0;
|
||||
const size_t kernarg_segment_size = test_->get_kernarg_size();
|
||||
uint64_t code_handle = 0;
|
||||
|
||||
// Retrieve the amount of group memory needed
|
||||
hsa_executable_symbol_get_info(
|
||||
kernel_code_desc_, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, &group_segment_size);
|
||||
|
||||
// Retrieve the amount of private memory needed
|
||||
hsa_executable_symbol_get_info(kernel_code_desc_,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
|
||||
&private_segment_size);
|
||||
|
||||
// Check the kernel args size
|
||||
size_t size_info = 0;
|
||||
hsa_executable_symbol_get_info(
|
||||
kernel_code_desc_, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &size_info);
|
||||
test_assert(kernarg_segment_size == size_info);
|
||||
if (kernarg_segment_size != size_info) return false;
|
||||
|
||||
// Retrieve handle of the code block
|
||||
hsa_executable_symbol_get_info(kernel_code_desc_, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
|
||||
&code_handle);
|
||||
|
||||
// Initialize the dispatch packet.
|
||||
hsa_kernel_dispatch_packet_t aql;
|
||||
memset(&aql, 0, sizeof(aql));
|
||||
// Set the packet's type, barrier bit, acquire and release fences
|
||||
aql.header = HSA_PACKET_TYPE_KERNEL_DISPATCH;
|
||||
aql.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE;
|
||||
aql.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE;
|
||||
// Populate Aql packet with default values
|
||||
aql.setup = 1;
|
||||
aql.grid_size_x = work_grid_size;
|
||||
aql.grid_size_y = 1;
|
||||
aql.grid_size_z = 1;
|
||||
aql.workgroup_size_x = work_group_size;
|
||||
aql.workgroup_size_y = 1;
|
||||
aql.workgroup_size_z = 1;
|
||||
// Bind the kernel code descriptor and arguments
|
||||
aql.kernel_object = code_handle;
|
||||
aql.kernarg_address = test_->get_kernarg_ptr();
|
||||
aql.group_segment_size = group_segment_size;
|
||||
aql.private_segment_size = private_segment_size;
|
||||
// Initialize Aql packet with handle of signal
|
||||
aql.completion_signal = hsa_signal_;
|
||||
|
||||
// Compute the write index of queue and copy Aql packet into it
|
||||
const uint64_t que_idx = hsa_queue_load_write_index_relaxed(hsa_queue_);
|
||||
const uint32_t mask = hsa_queue_->size - 1;
|
||||
|
||||
std::cout << "> Executing kernel: \"" << name_ << "\"" << std::endl;
|
||||
|
||||
// Start the timer object
|
||||
hsa_timer_.StartTimer(dispatch_timer_idx_);
|
||||
|
||||
// Disable packet so that submission to HW is complete
|
||||
const auto header = aql.header;
|
||||
const uint8_t packet_type_mask = (1 << HSA_PACKET_HEADER_WIDTH_TYPE) - 1;
|
||||
aql.header &= (~packet_type_mask) << HSA_PACKET_HEADER_TYPE;
|
||||
aql.header |= HSA_PACKET_TYPE_INVALID << HSA_PACKET_HEADER_TYPE;
|
||||
|
||||
// Copy Aql packet into queue buffer
|
||||
((hsa_kernel_dispatch_packet_t*)(hsa_queue_->base_address))[que_idx & mask] = aql;
|
||||
|
||||
// After AQL packet is fully copied into queue buffer
|
||||
// update packet header from invalid state to valid state
|
||||
std::atomic_thread_fence(std::memory_order_release);
|
||||
((hsa_kernel_dispatch_packet_t*)(hsa_queue_->base_address))[que_idx & mask].header = header;
|
||||
|
||||
// Increment the write index and ring the doorbell to dispatch the kernel.
|
||||
hsa_queue_store_write_index_relaxed(hsa_queue_, (que_idx + 1));
|
||||
hsa_signal_store_relaxed(hsa_queue_->doorbell_signal, que_idx);
|
||||
|
||||
std::cout << "> Waiting on kernel dispatch signal" << std::endl;
|
||||
|
||||
// Wait on the dispatch signal until the kernel is finished.
|
||||
// Update wait condition to HSA_WAIT_STATE_ACTIVE for Polling
|
||||
hsa_signal_value_t value = hsa_signal_wait_acquire(hsa_signal_, HSA_SIGNAL_CONDITION_LT, 1,
|
||||
(uint64_t)-1, HSA_WAIT_STATE_BLOCKED);
|
||||
|
||||
// Stop the timer object
|
||||
hsa_timer_.StopTimer(dispatch_timer_idx_);
|
||||
dispatch_time_taken_ = hsa_timer_.ReadTimer(dispatch_timer_idx_);
|
||||
total_time_taken_ += dispatch_time_taken_;
|
||||
|
||||
// Copy kernel buffers from local memory into system memory
|
||||
hsa_rsrc_->TransferData((uint8_t*)test_->get_output_ptr(), (uint8_t*)test_->get_local_ptr(),
|
||||
test_->get_output_size(), false);
|
||||
test_->print_output();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TestHSA::verify_results() {
|
||||
// Compare the results and see if they match
|
||||
const int32_t cmp_val =
|
||||
memcmp(test_->get_output_ptr(), test_->get_refout_ptr(), test_->get_output_size());
|
||||
return (cmp_val == 0);
|
||||
}
|
||||
|
||||
void TestHSA::print_time() {
|
||||
std::cout << "Time taken for Setup by " << this->name_ << " : " << this->setup_time_taken_
|
||||
<< std::endl;
|
||||
std::cout << "Time taken for Dispatch by " << this->name_ << " : " << this->dispatch_time_taken_
|
||||
<< std::endl;
|
||||
std::cout << "Time taken in Total by " << this->name_ << " : " << this->total_time_taken_
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
bool TestHSA::cleanup() {
|
||||
// shutdown Hsa Runtime system
|
||||
hsa_status_t ret_val = hsa_shut_down();
|
||||
return (HSA_STATUS_SUCCESS == ret_val);
|
||||
}
|
||||
@@ -1,115 +0,0 @@
|
||||
/******************************************************************************
|
||||
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this list
|
||||
of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef _TEST_HSA_H_
|
||||
#define _TEST_HSA_H_
|
||||
|
||||
#include "test_aql.h"
|
||||
#include "test_kernel.h"
|
||||
#include "hsa_rsrc_factory.h"
|
||||
|
||||
// Class implements HSA test
|
||||
class TestHSA : public TestAql {
|
||||
public:
|
||||
// Constructor
|
||||
explicit TestHSA(TestKernel* test) : test_(test), name_(test->Name()) {
|
||||
total_time_taken_ = 0;
|
||||
setup_time_taken_ = 0;
|
||||
dispatch_time_taken_ = 0;
|
||||
}
|
||||
|
||||
// Get methods for Agent Info, HAS queue, HSA Resourcse Manager
|
||||
AgentInfo* getAgentInfo() { return agent_info_; }
|
||||
hsa_queue_t* getQueue() { return hsa_queue_; }
|
||||
HsaRsrcFactory* getRsrcFactory() { return hsa_rsrc_; }
|
||||
|
||||
// Initialize application environment including setting
|
||||
// up of various configuration parameters based on
|
||||
// command line arguments
|
||||
// @return bool true on success and false on failure
|
||||
bool initialize(int argc, char** argv);
|
||||
|
||||
// Setup application parameters for exectuion
|
||||
// @return bool true on success and false on failure
|
||||
bool setup();
|
||||
|
||||
// Run the BinarySearch kernel
|
||||
// @return bool true on success and false on failure
|
||||
bool run();
|
||||
|
||||
// Verify against reference implementation
|
||||
// @return bool true on success and false on failure
|
||||
bool verify_results();
|
||||
|
||||
// Print to console the time taken to execute kernel
|
||||
void print_time();
|
||||
|
||||
// Release resources e.g. memory allocations
|
||||
// @return bool true on success and false on failure
|
||||
bool cleanup();
|
||||
|
||||
private:
|
||||
typedef TestKernel::mem_descr_t mem_descr_t;
|
||||
typedef TestKernel::mem_map_t mem_map_t;
|
||||
typedef TestKernel::mem_it_t mem_it_t;
|
||||
|
||||
// Test object
|
||||
TestKernel* test_;
|
||||
|
||||
// Path of Brig file
|
||||
std::string brig_path_obj_;
|
||||
|
||||
// Used to track time taken to run the sample
|
||||
double total_time_taken_;
|
||||
double setup_time_taken_;
|
||||
double dispatch_time_taken_;
|
||||
|
||||
// Handle to an Hsa Gpu Agent
|
||||
AgentInfo* agent_info_;
|
||||
|
||||
// Handle to an Hsa Queue
|
||||
hsa_queue_t* hsa_queue_;
|
||||
|
||||
// Handle of signal
|
||||
hsa_signal_t hsa_signal_;
|
||||
|
||||
// Handle of Kernel Code Descriptor
|
||||
hsa_executable_symbol_t kernel_code_desc_;
|
||||
|
||||
// Instance of timer object
|
||||
uint32_t setup_timer_idx_;
|
||||
uint32_t dispatch_timer_idx_;
|
||||
PerfTimer hsa_timer_;
|
||||
|
||||
// Instance of Hsa Resources Factory
|
||||
HsaRsrcFactory* hsa_rsrc_;
|
||||
|
||||
// Test kernel name
|
||||
std::string name_;
|
||||
};
|
||||
|
||||
#endif // _TEST_HSA_H_
|
||||
@@ -1,105 +0,0 @@
|
||||
/******************************************************************************
|
||||
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this list
|
||||
of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef _TEST_KERNEL_H_
|
||||
#define _TEST_KERNEL_H_
|
||||
|
||||
#include <map>
|
||||
#include <stdint.h>
|
||||
|
||||
// Class implements Kernel test
|
||||
class TestKernel {
|
||||
public:
|
||||
// Memory descriptors IDs
|
||||
enum { INPUT_DES_ID, OUTPUT_DES_ID, LOCAL_DES_ID, MASK_DES_ID, KERNARG_DES_ID, REFOUT_DES_ID };
|
||||
|
||||
// Memory descriptors vector declaration
|
||||
struct mem_descr_t {
|
||||
void* ptr;
|
||||
uint32_t size;
|
||||
bool local;
|
||||
};
|
||||
|
||||
// Memory map declaration
|
||||
typedef std::map<uint32_t, mem_descr_t> mem_map_t;
|
||||
typedef mem_map_t::iterator mem_it_t;
|
||||
typedef mem_map_t::const_iterator mem_const_it_t;
|
||||
|
||||
// Initialize method
|
||||
virtual void init() = 0;
|
||||
|
||||
// Return kernel memory map
|
||||
mem_map_t& get_mem_map() { return mem_map_; }
|
||||
|
||||
// Return NULL descriptor
|
||||
static mem_descr_t null_descriptor() { return {0, 0, 0}; }
|
||||
|
||||
// Methods to get the kernel attributes
|
||||
void* get_kernarg_ptr() const { return get_descr(KERNARG_DES_ID).ptr; }
|
||||
uint32_t get_kernarg_size() const { return get_descr(KERNARG_DES_ID).size; }
|
||||
void* get_output_ptr() const { return get_descr(OUTPUT_DES_ID).ptr; }
|
||||
uint32_t get_output_size() const { return get_descr(OUTPUT_DES_ID).size; }
|
||||
void* get_local_ptr() const { return get_descr(LOCAL_DES_ID).ptr; }
|
||||
void* get_refout_ptr() const { return get_descr(REFOUT_DES_ID).ptr; }
|
||||
virtual uint32_t get_elements_count() const = 0;
|
||||
|
||||
// Print output
|
||||
virtual void print_output() const = 0;
|
||||
|
||||
// Return name
|
||||
virtual std::string Name() const = 0;
|
||||
|
||||
protected:
|
||||
// Set system memory descriptor
|
||||
bool set_sys_descr(const uint32_t& id, const uint32_t& size) {
|
||||
return set_mem_descr(id, size, false);
|
||||
}
|
||||
|
||||
// Set local memory descriptor
|
||||
bool set_local_descr(const uint32_t& id, const uint32_t& size) {
|
||||
return set_mem_descr(id, size, true);
|
||||
}
|
||||
|
||||
// Get memory descriptor
|
||||
mem_descr_t get_descr(const uint32_t& id) const {
|
||||
mem_const_it_t it = mem_map_.find(id);
|
||||
return (it != mem_map_.end()) ? it->second : null_descriptor();
|
||||
}
|
||||
|
||||
private:
|
||||
// Set memory descriptor
|
||||
bool set_mem_descr(const uint32_t& id, const uint32_t& size, const bool& local) {
|
||||
const mem_descr_t des = {NULL, size, local};
|
||||
auto ret = mem_map_.insert(mem_map_t::value_type(id, des));
|
||||
return ret.second;
|
||||
}
|
||||
|
||||
// Kernel memory map object
|
||||
mem_map_t mem_map_;
|
||||
};
|
||||
|
||||
#endif // _TEST_KERNEL_H_
|
||||
@@ -1,45 +0,0 @@
|
||||
/******************************************************************************
|
||||
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this list
|
||||
of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef _TEST_PGEN_H_
|
||||
#define _TEST_PGEN_H_
|
||||
|
||||
#include "test_pmgr.h"
|
||||
|
||||
// SimpleConvolution: Class implements OpenCL SimpleConvolution sample
|
||||
class TestPGen : public TestPMgr {
|
||||
protected:
|
||||
typedef hsa_ext_amd_aql_pm4_packet_t packet_t;
|
||||
|
||||
packet_t* PrePacket() { return reinterpret_cast<packet_t*>(&prePacket); }
|
||||
packet_t* PostPacket() { return reinterpret_cast<packet_t*>(&postPacket); }
|
||||
|
||||
public:
|
||||
explicit TestPGen(TestAql* t) : TestPMgr(t) {}
|
||||
};
|
||||
|
||||
#endif // _TEST_PGEN_H_
|
||||
@@ -1,163 +0,0 @@
|
||||
/******************************************************************************
|
||||
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this list
|
||||
of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef _TEST_PGEN_PMC_H_
|
||||
#define _TEST_PGEN_PMC_H_
|
||||
|
||||
#include "test_assert.h"
|
||||
#include "test_pgen.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
hsa_status_t TestPGenPMC_Callback(hsa_ven_amd_aqlprofile_info_type_t info_type,
|
||||
hsa_ven_amd_aqlprofile_info_data_t* info_data,
|
||||
void* callback_data) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
typedef std::vector<hsa_ven_amd_aqlprofile_info_data_t> passed_data_t;
|
||||
reinterpret_cast<passed_data_t*>(callback_data)->push_back(*info_data);
|
||||
return status;
|
||||
}
|
||||
|
||||
// SimpleConvolution: Class implements OpenCL SimpleConvolution sample
|
||||
class TestPGenPMC : public TestPGen {
|
||||
const static uint32_t buffer_alignment = 0x1000; // 4K
|
||||
|
||||
hsa_agent_t agent;
|
||||
hsa_ven_amd_aqlprofile_profile_t profile;
|
||||
hsa_ven_amd_aqlprofile_event_t* events;
|
||||
|
||||
bool buildPackets() { return true; }
|
||||
|
||||
bool dumpData() {
|
||||
std::cout << "TestPGenPMC::dumpData :" << std::endl;
|
||||
|
||||
typedef std::vector<hsa_ven_amd_aqlprofile_info_data_t> callback_data_t;
|
||||
|
||||
callback_data_t data;
|
||||
api.hsa_ven_amd_aqlprofile_iterate_data(&profile, TestPGenPMC_Callback, &data);
|
||||
for (callback_data_t::iterator it = data.begin(); it != data.end(); ++it) {
|
||||
std::cout << dec << "event( block(" << it->pmc_data.event.block_name << "_"
|
||||
<< it->pmc_data.event.block_index << "), id(" << it->pmc_data.event.counter_id
|
||||
<< ")), sample(" << it->sample_id << "), result(" << it->pmc_data.result << ")"
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public:
|
||||
explicit TestPGenPMC(TestAql* t) : TestPGen(t) { std::cout << "Test: PGen PMC" << std::endl; }
|
||||
|
||||
bool initialize(int arg_cnt, char** arg_list) {
|
||||
if (!TestPMgr::initialize(arg_cnt, arg_list)) return false;
|
||||
|
||||
hsa_status_t status;
|
||||
hsa_agent_t agent;
|
||||
uint32_t command_buffer_alignment;
|
||||
uint32_t command_buffer_size;
|
||||
uint32_t output_buffer_alignment;
|
||||
uint32_t output_buffer_size;
|
||||
|
||||
// GPU identificator
|
||||
agent = getAgentInfo()->dev_id;
|
||||
|
||||
// Instantiation of the profile object
|
||||
// //////////////////////////////////////////////////////////////
|
||||
// Set the event fields
|
||||
const hsa_ven_amd_aqlprofile_event_t events_arr[] = {
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0, 4 /*WAVES*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0, 14 /*ITEMS*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ, 0, 47 /*WAVE_READY*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 2, 1 /*CYCLE*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 2, 3 /*REQ*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_TCC, 2, 22 /*WRITEBACK*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPC, 0, 0 /*ALWAYS_COUNT*/},
|
||||
{HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_CPC, 0, 8 /*ME1_STALL_WAIT_ON_RCIU_READ*/},
|
||||
};
|
||||
const size_t event_count = sizeof(events_arr) / sizeof(hsa_ven_amd_aqlprofile_event_t);
|
||||
events = new hsa_ven_amd_aqlprofile_event_t[event_count];
|
||||
memcpy(events, events_arr, sizeof(events_arr));
|
||||
|
||||
// Initialization the profile
|
||||
memset(&profile, 0, sizeof(profile));
|
||||
profile.agent = agent;
|
||||
profile.type = HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC;
|
||||
|
||||
// set enabled events list
|
||||
profile.events = events;
|
||||
profile.event_count = event_count;
|
||||
|
||||
// Profile buffers attributes
|
||||
command_buffer_alignment = buffer_alignment;
|
||||
status = api.hsa_ven_amd_aqlprofile_get_info(
|
||||
&profile, HSA_VEN_AMD_AQLPROFILE_INFO_COMMAND_BUFFER_SIZE, &command_buffer_size);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
const char* str = "";
|
||||
api.hsa_ven_amd_aqlprofile_error_string(&str);
|
||||
std::cout << "aqlprofile err: " << str << std::endl;
|
||||
}
|
||||
test_assert(status == HSA_STATUS_SUCCESS);
|
||||
|
||||
output_buffer_alignment = buffer_alignment;
|
||||
status = api.hsa_ven_amd_aqlprofile_get_info(
|
||||
&profile, HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA_SIZE, &output_buffer_size);
|
||||
test_assert(status == HSA_STATUS_SUCCESS);
|
||||
|
||||
// Application is allocating the command buffer
|
||||
// Allocate(command_buffer_alignment, command_buffer_size,
|
||||
// MODE_HOST_ACC|MODE_DEV_ACC|MODE_EXEC_DATA)
|
||||
profile.command_buffer.ptr =
|
||||
getRsrcFactory()->AllocateSysMemory(getAgentInfo(), command_buffer_size);
|
||||
profile.command_buffer.size = command_buffer_size;
|
||||
|
||||
// Application is allocating the output buffer
|
||||
// Allocate(output_buffer_alignment, output_buffer_size,
|
||||
// MODE_HOST_ACC|MODE_DEV_ACC)
|
||||
profile.output_buffer.ptr =
|
||||
getRsrcFactory()->AllocateSysMemory(getAgentInfo(), output_buffer_size);
|
||||
profile.output_buffer.size = output_buffer_size;
|
||||
memset(profile.output_buffer.ptr, 0x77, output_buffer_size);
|
||||
|
||||
// Populating the AQL start packet
|
||||
status = api.hsa_ven_amd_aqlprofile_start(&profile, PrePacket());
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
const char* str;
|
||||
api.hsa_ven_amd_aqlprofile_error_string(&str);
|
||||
std::cout << "aqlprofile err: " << str << std::endl;
|
||||
}
|
||||
test_assert(status == HSA_STATUS_SUCCESS);
|
||||
if (status != HSA_STATUS_SUCCESS) return false;
|
||||
|
||||
// Populating the AQL stop packet
|
||||
status = api.hsa_ven_amd_aqlprofile_stop(&profile, PostPacket());
|
||||
test_assert(status == HSA_STATUS_SUCCESS);
|
||||
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
};
|
||||
|
||||
#endif // _TEST_PGEN_PMC_H_
|
||||
@@ -1,162 +0,0 @@
|
||||
/******************************************************************************
|
||||
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this list
|
||||
of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef _TEST_PGEN_SQTT_H_
|
||||
#define _TEST_PGEN_SQTT_H_
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
|
||||
#include "test_assert.h"
|
||||
#include "test_pgen.h"
|
||||
|
||||
hsa_status_t TestPGenSQTT_Callback(hsa_ven_amd_aqlprofile_info_type_t info_type,
|
||||
hsa_ven_amd_aqlprofile_info_data_t* info_data,
|
||||
void* callback_data) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
typedef std::vector<hsa_ven_amd_aqlprofile_info_data_t> passed_data_t;
|
||||
reinterpret_cast<passed_data_t*>(callback_data)->push_back(*info_data);
|
||||
return status;
|
||||
}
|
||||
|
||||
// SimpleConvolution: Class implements OpenCL SimpleConvolution sample
|
||||
class TestPGenSQTT : public TestPGen {
|
||||
const static uint32_t buffer_alignment = 0x1000; // 4K
|
||||
const static uint32_t buffer_size = 0x2000000; // 32M
|
||||
|
||||
hsa_agent_t agent;
|
||||
hsa_ven_amd_aqlprofile_profile_t profile;
|
||||
|
||||
bool buildPackets() { return true; }
|
||||
|
||||
bool dumpData() {
|
||||
std::cout << "TestPGenSQTT::dumpData :" << std::endl;
|
||||
|
||||
typedef std::vector<hsa_ven_amd_aqlprofile_info_data_t> callback_data_t;
|
||||
|
||||
callback_data_t data;
|
||||
api.hsa_ven_amd_aqlprofile_iterate_data(&profile, TestPGenSQTT_Callback, &data);
|
||||
for (callback_data_t::iterator it = data.begin(); it != data.end(); ++it) {
|
||||
std::cout << "> sample(" << dec << it->sample_id << ") ptr(" << hex << it->sqtt_data.ptr
|
||||
<< ") size(" << dec << it->sqtt_data.size << ")" << std::endl;
|
||||
|
||||
void* sys_buf = getRsrcFactory()->AllocateSysMemory(getAgentInfo(), it->sqtt_data.size);
|
||||
test_assert(sys_buf != NULL);
|
||||
if (sys_buf == NULL) return HSA_STATUS_ERROR;
|
||||
|
||||
hsa_status_t status = hsa_memory_copy(sys_buf, it->sqtt_data.ptr, it->sqtt_data.size);
|
||||
test_assert(status == HSA_STATUS_SUCCESS);
|
||||
if (status != HSA_STATUS_SUCCESS) return status;
|
||||
|
||||
std::string file_name;
|
||||
file_name.append("sqtt_dump_");
|
||||
file_name.append(std::to_string(it->sample_id));
|
||||
file_name.append(".txt");
|
||||
std::ofstream out_file;
|
||||
out_file.open(file_name);
|
||||
|
||||
// Write the buffer in terms of shorts (16 bits)
|
||||
short* sqtt_data = (short*)sys_buf;
|
||||
for (int i = 0; i < (it->sqtt_data.size / sizeof(short)); ++i) {
|
||||
out_file << std::setw(4) << std::setfill('0') << std::hex << sqtt_data[i] << "\n";
|
||||
}
|
||||
|
||||
out_file.close();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public:
|
||||
explicit TestPGenSQTT(TestAql* t) : TestPGen(t) { std::cout << "Test: PGen SQTT" << std::endl; }
|
||||
|
||||
bool initialize(int arg_cnt, char** arg_list) {
|
||||
if (!TestPMgr::initialize(arg_cnt, arg_list)) return false;
|
||||
|
||||
hsa_status_t status;
|
||||
hsa_agent_t agent;
|
||||
uint32_t command_buffer_alignment;
|
||||
uint32_t command_buffer_size;
|
||||
uint32_t output_buffer_alignment;
|
||||
uint32_t output_buffer_size;
|
||||
|
||||
// GPU identificator
|
||||
agent = getAgentInfo()->dev_id;
|
||||
|
||||
// Instantiation of the profile object
|
||||
// //////////////////////////////////////////////////////////////
|
||||
// Set the parameters
|
||||
// parameters = ....;
|
||||
|
||||
// Initialization the profile
|
||||
memset(&profile, 0, sizeof(profile));
|
||||
profile.agent = agent;
|
||||
profile.type = HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_SQTT;
|
||||
|
||||
// set parameters
|
||||
// profile.parameters = &event;
|
||||
// profile.parameter_count = 1;
|
||||
|
||||
// Profile buffers attributes
|
||||
command_buffer_alignment = buffer_alignment;
|
||||
status = api.hsa_ven_amd_aqlprofile_get_info(
|
||||
&profile, HSA_VEN_AMD_AQLPROFILE_INFO_COMMAND_BUFFER_SIZE, &command_buffer_size);
|
||||
test_assert(status == HSA_STATUS_SUCCESS);
|
||||
|
||||
output_buffer_alignment = buffer_alignment;
|
||||
output_buffer_size = buffer_size;
|
||||
|
||||
// Application is allocating the command buffer
|
||||
// AllocateSystem(command_buffer_alignment, command_buffer_size,
|
||||
// MODE_HOST_ACC|MODE_DEV_ACC|MODE_EXEC_DATA)
|
||||
profile.command_buffer.ptr =
|
||||
getRsrcFactory()->AllocateSysMemory(getAgentInfo(), command_buffer_size);
|
||||
profile.command_buffer.size = command_buffer_size;
|
||||
|
||||
// Application is allocating the output buffer
|
||||
// AllocateLocal(output_buffer_alignment, output_buffer_size,
|
||||
// MODE_DEV_ACC)
|
||||
profile.output_buffer.ptr =
|
||||
getRsrcFactory()->AllocateLocalMemory(getAgentInfo(), output_buffer_size);
|
||||
profile.output_buffer.size = output_buffer_size;
|
||||
|
||||
// Populating the AQL start packet
|
||||
status = api.hsa_ven_amd_aqlprofile_start(&profile, PrePacket());
|
||||
test_assert(status == HSA_STATUS_SUCCESS);
|
||||
if (status != HSA_STATUS_SUCCESS) return false;
|
||||
|
||||
// Populating the AQL stop packet
|
||||
status = api.hsa_ven_amd_aqlprofile_stop(&profile, PostPacket());
|
||||
test_assert(status == HSA_STATUS_SUCCESS);
|
||||
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
};
|
||||
|
||||
#endif // _TEST_PGEN_SQTT_H_
|
||||
@@ -1,130 +0,0 @@
|
||||
/******************************************************************************
|
||||
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this list
|
||||
of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include <atomic>
|
||||
#include "test_assert.h"
|
||||
|
||||
#include "test_pmgr.h"
|
||||
|
||||
bool TestPMgr::addPacketGfx9(const packet_t* packet) {
|
||||
packet_t aql_packet = *packet;
|
||||
|
||||
// Compute the write index of queue and copy Aql packet into it
|
||||
uint64_t que_idx = hsa_queue_load_write_index_relaxed(getQueue());
|
||||
const uint32_t mask = getQueue()->size - 1;
|
||||
|
||||
// Disable packet so that submission to HW is complete
|
||||
const auto header = HSA_PACKET_TYPE_VENDOR_SPECIFIC << HSA_PACKET_HEADER_TYPE;
|
||||
aql_packet.header &= (~((1 << HSA_PACKET_HEADER_WIDTH_TYPE) - 1)) << HSA_PACKET_HEADER_TYPE;
|
||||
aql_packet.header |= HSA_PACKET_TYPE_INVALID << HSA_PACKET_HEADER_TYPE;
|
||||
|
||||
// Copy Aql packet into queue buffer
|
||||
((packet_t*)(getQueue()->base_address))[que_idx & mask] = aql_packet;
|
||||
|
||||
// After AQL packet is fully copied into queue buffer
|
||||
// update packet header from invalid state to valid state
|
||||
std::atomic_thread_fence(std::memory_order_release);
|
||||
((packet_t*)(getQueue()->base_address))[que_idx & mask].header = header;
|
||||
|
||||
// Increment the write index and ring the doorbell to dispatch the kernel.
|
||||
hsa_queue_store_write_index_relaxed(getQueue(), (que_idx + 1));
|
||||
hsa_signal_store_relaxed(getQueue()->doorbell_signal, que_idx);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TestPMgr::addPacketGfx8(const packet_t* packet) {
|
||||
// Create legacy devices PM4 data
|
||||
const hsa_ext_amd_aql_pm4_packet_t* aql_packet = (const hsa_ext_amd_aql_pm4_packet_t*)packet;
|
||||
slot_pm4_s data;
|
||||
api.hsa_ven_amd_aqlprofile_legacy_get_pm4(aql_packet, reinterpret_cast<void*>(data.words));
|
||||
|
||||
// Compute the write index of queue and copy Aql packet into it
|
||||
uint64_t que_idx = hsa_queue_load_write_index_relaxed(getQueue());
|
||||
const uint32_t mask = getQueue()->size - 1;
|
||||
|
||||
// Copy Aql packet into queue buffer
|
||||
packet_t* ptr = ((packet_t*)(getQueue()->base_address)) + (que_idx & mask);
|
||||
slot_pm4_t* slot_pm4 = (slot_pm4_t*)ptr;
|
||||
slot_pm4->store(data, std::memory_order_relaxed);
|
||||
|
||||
// Increment the write index and ring the doorbell to dispatch the kernel.
|
||||
hsa_queue_store_write_index_relaxed(getQueue(), (que_idx + SLOT_PM4_SIZE_AQLP));
|
||||
hsa_signal_store_relaxed(getQueue()->doorbell_signal, que_idx + SLOT_PM4_SIZE_AQLP - 1);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TestPMgr::addPacket(const packet_t* packet) {
|
||||
const char* agent_name = getAgentInfo()->name;
|
||||
return (strncmp(agent_name, "gfx8", 4) == 0) ? addPacketGfx8(packet) : addPacketGfx9(packet);
|
||||
}
|
||||
|
||||
bool TestPMgr::run() {
|
||||
// Build Aql Pkts
|
||||
const bool active = buildPackets();
|
||||
if (active) {
|
||||
// Submit Pre-Dispatch Aql packet
|
||||
addPacket(&prePacket);
|
||||
}
|
||||
|
||||
testAql()->run();
|
||||
|
||||
if (active) {
|
||||
// Set post packet completion signal
|
||||
postPacket.completion_signal = postSignal;
|
||||
|
||||
// Submit Post-Dispatch Aql packet
|
||||
addPacket(&postPacket);
|
||||
|
||||
// Wait for Post-Dispatch packet to complete
|
||||
hsa_signal_wait_acquire(postSignal, HSA_SIGNAL_CONDITION_LT, 1, (uint64_t)-1,
|
||||
HSA_WAIT_STATE_BLOCKED);
|
||||
|
||||
// Dumping profiling data
|
||||
dumpData();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TestPMgr::initialize(int argc, char** argv) {
|
||||
TestAql::initialize(argc, argv);
|
||||
hsa_status_t status = hsa_signal_create(1, 0, NULL, &postSignal);
|
||||
test_assert(status == HSA_STATUS_SUCCESS);
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
TestPMgr::TestPMgr(TestAql* t) : TestAql(t) {
|
||||
dummySignal.handle = 0;
|
||||
postSignal = dummySignal;
|
||||
|
||||
hsa_status_t status = hsa_init();
|
||||
test_assert(status == HSA_STATUS_SUCCESS);
|
||||
status = hsa_system_get_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, 1, 0, &api);
|
||||
test_assert(status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
@@ -1,71 +0,0 @@
|
||||
/******************************************************************************
|
||||
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this list
|
||||
of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef _TEST_PMGR_H_
|
||||
#define _TEST_PMGR_H_
|
||||
|
||||
#include <atomic>
|
||||
|
||||
#include "hsa.h"
|
||||
#include "test_aql.h"
|
||||
#include "hsa_ven_amd_aqlprofile.h"
|
||||
|
||||
// SimpleConvolution: Class implements OpenCL SimpleConvolution sample
|
||||
class TestPMgr : public TestAql {
|
||||
public:
|
||||
typedef hsa_ext_amd_aql_pm4_packet_t packet_t;
|
||||
explicit TestPMgr(TestAql* t);
|
||||
bool run();
|
||||
|
||||
protected:
|
||||
packet_t prePacket;
|
||||
packet_t postPacket;
|
||||
hsa_signal_t dummySignal;
|
||||
hsa_signal_t postSignal;
|
||||
|
||||
hsa_ven_amd_aqlprofile_1_00_pfn_t api;
|
||||
|
||||
virtual bool buildPackets() { return false; }
|
||||
virtual bool dumpData() { return false; }
|
||||
virtual bool initialize(int argc, char** argv);
|
||||
|
||||
private:
|
||||
enum {
|
||||
SLOT_PM4_SIZE_DW = HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(uint32_t),
|
||||
SLOT_PM4_SIZE_AQLP = HSA_VEN_AMD_AQLPROFILE_LEGACY_PM4_PACKET_SIZE / sizeof(packet_t)
|
||||
};
|
||||
struct slot_pm4_s {
|
||||
uint32_t words[SLOT_PM4_SIZE_DW];
|
||||
};
|
||||
typedef std::atomic<slot_pm4_s> slot_pm4_t;
|
||||
|
||||
bool addPacket(const packet_t* packet);
|
||||
bool addPacketGfx8(const packet_t* packet);
|
||||
bool addPacketGfx9(const packet_t* packet);
|
||||
};
|
||||
|
||||
#endif // _TEST_PMGR_H_
|
||||
@@ -1,30 +0,0 @@
|
||||
#/bin/sh
|
||||
set -x
|
||||
|
||||
tbin=./test/ctrl
|
||||
|
||||
CDIR=`pwd`
|
||||
export LD_LIBRARY_PATH=$CDIR
|
||||
|
||||
export HSA_ENABLE_SDMA=0
|
||||
export HSA_EMULATE_AQL=1
|
||||
|
||||
echo
|
||||
echo "Run simple convolution kernel"
|
||||
unset ROCR_ENABLE_PMC
|
||||
unset ROCR_ENABLE_SQTT
|
||||
eval $tbin
|
||||
|
||||
echo
|
||||
echo "Run with PMC"
|
||||
export ROCR_ENABLE_PMC=1
|
||||
unset ROCR_ENABLE_SQTT
|
||||
eval $tbin
|
||||
|
||||
echo
|
||||
echo "Run with SQTT"
|
||||
unset ROCR_ENABLE_PMC
|
||||
export ROCR_ENABLE_SQTT=1
|
||||
eval $tbin
|
||||
|
||||
|
||||
Binary file not shown.
Binary file not shown.
@@ -1,81 +0,0 @@
|
||||
/******************************************************************************
|
||||
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this list
|
||||
of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
********************************************************************************/
|
||||
|
||||
/**
|
||||
* SimpleConvolution is where each pixel of the output image
|
||||
* is the weighted sum of the neighborhood pixels of the input image
|
||||
* The neighborhood is defined by the dimensions of the mask and
|
||||
* weight of each neighbor is defined by the mask itself.
|
||||
* @param output Output matrix after performing convolution
|
||||
* @param input Input matrix on which convolution is to be performed
|
||||
* @param mask mask matrix using which convolution was to be performed
|
||||
* @param inputDimensions dimensions of the input matrix
|
||||
* @param maskDimensions dimensions of the mask matrix
|
||||
*/
|
||||
__kernel void simpleConvolution(__global uint * output,
|
||||
__global uint * input,
|
||||
__global float * mask,
|
||||
const uint2 inputDimensions,
|
||||
const uint2 maskDimensions) {
|
||||
|
||||
uint tid = get_global_id(0);
|
||||
|
||||
uint width = inputDimensions.x;
|
||||
uint height = inputDimensions.y;
|
||||
|
||||
uint x = tid%width;
|
||||
uint y = tid/width;
|
||||
|
||||
uint maskWidth = maskDimensions.x;
|
||||
uint maskHeight = maskDimensions.y;
|
||||
|
||||
uint vstep = (maskWidth -1)/2;
|
||||
uint hstep = (maskHeight -1)/2;
|
||||
|
||||
// find the left, right, top and bottom indices such that
|
||||
// the indices do not go beyond image boundaires
|
||||
uint left = (x < vstep) ? 0 : (x - vstep);
|
||||
uint right = ((x + vstep) >= width) ? width - 1 : (x + vstep);
|
||||
uint top = (y < hstep) ? 0 : (y - hstep);
|
||||
uint bottom = ((y + hstep) >= height)? height - 1: (y + hstep);
|
||||
|
||||
// initializing wighted sum value
|
||||
float sumFX = 0;
|
||||
|
||||
for(uint i = left; i <= right; ++i) {
|
||||
for(uint j = top ; j <= bottom; ++j) {
|
||||
// performing wighted sum within the mask boundaries
|
||||
uint maskIndex = (j - (y - hstep)) * maskWidth + (i - (x - vstep));
|
||||
uint index = j * width + i;
|
||||
sumFX += ((float)input[index] * mask[maskIndex]);
|
||||
}
|
||||
}
|
||||
|
||||
// To round to the nearest integer
|
||||
sumFX += 0.5f;
|
||||
output[tid] = (uint)sumFX;
|
||||
}
|
||||
@@ -1,160 +0,0 @@
|
||||
/******************************************************************************
|
||||
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this list
|
||||
of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include <iostream>
|
||||
#include <string.h>
|
||||
|
||||
#include "helper_funcs.h"
|
||||
#include "simple_convolution.h"
|
||||
|
||||
SimpleConvolution::SimpleConvolution() {
|
||||
width_ = 64;
|
||||
height_ = 64;
|
||||
mask_width_ = 3;
|
||||
mask_height_ = mask_width_;
|
||||
|
||||
if (!isPowerOf2(width_)) {
|
||||
width_ = roundToPowerOf2(width_);
|
||||
}
|
||||
|
||||
if (!isPowerOf2(height_)) {
|
||||
height_ = roundToPowerOf2(height_);
|
||||
}
|
||||
|
||||
if (!(mask_width_ % 2)) {
|
||||
mask_width_++;
|
||||
}
|
||||
|
||||
if (!(mask_height_ % 2)) {
|
||||
mask_height_++;
|
||||
}
|
||||
|
||||
if (width_ * height_ < 256) {
|
||||
width_ = 64;
|
||||
height_ = 64;
|
||||
}
|
||||
|
||||
const uint32_t input_size_bytes = width_ * height_ * sizeof(uint32_t);
|
||||
const uint32_t mask_size_bytes = mask_width_ * mask_height_ * sizeof(float);
|
||||
|
||||
set_sys_descr(KERNARG_DES_ID, sizeof(kernel_args_t));
|
||||
set_sys_descr(INPUT_DES_ID, input_size_bytes);
|
||||
set_sys_descr(OUTPUT_DES_ID, input_size_bytes);
|
||||
set_local_descr(LOCAL_DES_ID, input_size_bytes);
|
||||
set_sys_descr(MASK_DES_ID, mask_size_bytes);
|
||||
set_sys_descr(REFOUT_DES_ID, input_size_bytes);
|
||||
}
|
||||
|
||||
void SimpleConvolution::init() {
|
||||
std::cout << "SimpleConvolution::init :" << std::endl;
|
||||
|
||||
mem_descr_t input_des = get_descr(INPUT_DES_ID);
|
||||
mem_descr_t local_des = get_descr(LOCAL_DES_ID);
|
||||
mem_descr_t mask_des = get_descr(MASK_DES_ID);
|
||||
mem_descr_t refout_des = get_descr(REFOUT_DES_ID);
|
||||
mem_descr_t kernarg_des = get_descr(KERNARG_DES_ID);
|
||||
|
||||
uint32_t* input = (uint32_t*)input_des.ptr;
|
||||
uint32_t* output_local = (uint32_t*)local_des.ptr;
|
||||
float* mask = (float*)mask_des.ptr;
|
||||
kernel_args_t* kernel_args = (kernel_args_t*)kernarg_des.ptr;
|
||||
|
||||
// random initialisation of input
|
||||
fillRandom<uint32_t>(input, width_, height_, 0, 255);
|
||||
|
||||
// Fill a blurr filter or some other filter of your choice
|
||||
const float val = 1.0f / (mask_width_ * 2.0f - 1.0f);
|
||||
for (uint32_t i = 0; i < (mask_width_ * mask_height_); i++) {
|
||||
mask[i] = 0;
|
||||
}
|
||||
for (uint32_t i = 0; i < mask_width_; i++) {
|
||||
uint32_t y = mask_height_ / 2;
|
||||
mask[y * mask_width_ + i] = val;
|
||||
}
|
||||
for (uint32_t i = 0; i < mask_height_; i++) {
|
||||
uint32_t x = mask_width_ / 2;
|
||||
mask[i * mask_width_ + x] = val;
|
||||
}
|
||||
|
||||
// Print the INPUT array.
|
||||
printArray<uint32_t>("> Input[0]", input, width_, 1);
|
||||
printArray<float>("> Mask", mask, mask_width_, mask_height_);
|
||||
|
||||
// Fill the kernel args
|
||||
kernel_args->arg1 = output_local;
|
||||
kernel_args->arg2 = input;
|
||||
kernel_args->arg3 = mask;
|
||||
kernel_args->arg4 = width_;
|
||||
kernel_args->arg41 = height_;
|
||||
kernel_args->arg5 = mask_width_;
|
||||
kernel_args->arg51 = mask_height_;
|
||||
|
||||
// Calculate the reference output
|
||||
memset(refout_des.ptr, 0, refout_des.size);
|
||||
reference_impl((uint32_t*)refout_des.ptr, input, mask, width_, height_, mask_width_,
|
||||
mask_height_);
|
||||
}
|
||||
|
||||
void SimpleConvolution::print_output() const {
|
||||
printArray<uint32_t>("> Output[0]", (uint32_t*)get_output_ptr(), width_, 1);
|
||||
}
|
||||
|
||||
bool SimpleConvolution::reference_impl(uint32_t* output, const uint32_t* input, const float* mask,
|
||||
const uint32_t width, const uint32_t height,
|
||||
const uint32_t mask_width, const uint32_t mask_height) {
|
||||
const uint32_t vstep = (mask_width - 1) / 2;
|
||||
const uint32_t hstep = (mask_height - 1) / 2;
|
||||
|
||||
// for each pixel in the input
|
||||
for (uint32_t x = 0; x < width; x++) {
|
||||
for (uint32_t y = 0; y < height; y++) {
|
||||
// find the left, right, top and bottom indices such that
|
||||
// the indices do not go beyond image boundaires
|
||||
const uint32_t left = (x < vstep) ? 0 : (x - vstep);
|
||||
const uint32_t right = ((x + vstep) >= width) ? width - 1 : (x + vstep);
|
||||
const uint32_t top = (y < hstep) ? 0 : (y - hstep);
|
||||
const uint32_t bottom = ((y + hstep) >= height) ? height - 1 : (y + hstep);
|
||||
|
||||
// initializing wighted sum value
|
||||
float sum_fx = 0;
|
||||
for (uint32_t i = left; i <= right; ++i) {
|
||||
for (uint32_t j = top; j <= bottom; ++j) {
|
||||
// performing wighted sum within the mask boundaries
|
||||
uint32_t mask_idx = (j - (y - hstep)) * mask_width + (i - (x - vstep));
|
||||
uint32_t index = j * width + i;
|
||||
|
||||
// to round to the nearest integer
|
||||
sum_fx += ((float)input[index] * mask[mask_idx]);
|
||||
}
|
||||
}
|
||||
sum_fx += 0.5f;
|
||||
output[y * width + x] = uint32_t(sum_fx);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -1,90 +0,0 @@
|
||||
/******************************************************************************
|
||||
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this list
|
||||
of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef _SIMPLE_CONVOLUTION_H_
|
||||
#define _SIMPLE_CONVOLUTION_H_
|
||||
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
#include "test_kernel.h"
|
||||
|
||||
// SimpleConvolution: Class implements OpenCL SimpleConvolution sample
|
||||
class SimpleConvolution : public TestKernel {
|
||||
public:
|
||||
// Constructor
|
||||
SimpleConvolution();
|
||||
|
||||
// Initialize method
|
||||
void init();
|
||||
|
||||
// Return number of compute elements
|
||||
uint32_t get_elements_count() const { return width_ * height_; }
|
||||
|
||||
// Print output
|
||||
void print_output() const;
|
||||
|
||||
// Return name
|
||||
std::string Name() const { return std::string("simpleConvolution"); }
|
||||
|
||||
private:
|
||||
// Local kernel arguments declaration
|
||||
struct kernel_args_t {
|
||||
void* arg1;
|
||||
void* arg2;
|
||||
void* arg3;
|
||||
uint32_t arg4;
|
||||
uint32_t arg41;
|
||||
uint32_t arg5;
|
||||
uint32_t arg51;
|
||||
};
|
||||
|
||||
// Width of the Input array
|
||||
uint32_t width_;
|
||||
|
||||
// Height of the Input array
|
||||
uint32_t height_;
|
||||
|
||||
// Mask dimensions
|
||||
uint32_t mask_width_;
|
||||
|
||||
// Mask dimensions
|
||||
uint32_t mask_height_;
|
||||
|
||||
// Reference CPU implementation of Simple Convolution
|
||||
// @param output Output matrix after performing convolution
|
||||
// @param input Input matrix on which convolution is to be performed
|
||||
// @param mask mask matrix using which convolution was to be performed
|
||||
// @param input_dimensions dimensions of the input matrix
|
||||
// @param mask_dimensions dimensions of the mask matrix
|
||||
// @return bool true on success and false on failure
|
||||
bool reference_impl(uint32_t* output, const uint32_t* input, const float* mask,
|
||||
const uint32_t width, const uint32_t height, const uint32_t maskWidth,
|
||||
const uint32_t maskHeight);
|
||||
};
|
||||
|
||||
#endif // _SIMPLE_CONVOLUTION_H_
|
||||
@@ -1,154 +0,0 @@
|
||||
module &m:1:0:$full:$large:$default;
|
||||
extension "amd:gcn";
|
||||
extension "IMAGE";
|
||||
|
||||
decl prog function &abort()();
|
||||
|
||||
prog kernel &__OpenCL_SimpleConvolution(kernarg_u64 %__global_offset_0,
|
||||
kernarg_u64 %output,
|
||||
kernarg_u64 %input,
|
||||
kernarg_u64 %mask,
|
||||
kernarg_u32 %inputDimensions[2],
|
||||
kernarg_u32 %maskDimensions[2]) {
|
||||
|
||||
pragma "AMD RTI", "ARGSTART:__OpenCL_SimpleConvolution";
|
||||
pragma "AMD RTI", "version:3:1:104";
|
||||
pragma "AMD RTI", "device:generic";
|
||||
pragma "AMD RTI", "uniqueid:1024";
|
||||
pragma "AMD RTI", "memory:private:0";
|
||||
pragma "AMD RTI", "memory:region:0";
|
||||
pragma "AMD RTI", "memory:local:0";
|
||||
pragma "AMD RTI", "value:__global_offset_0:u64:1:1:0";
|
||||
pragma "AMD RTI", "pointer:output:u32:1:1:96:uav:7:4:RW:0:0:0";
|
||||
pragma "AMD RTI", "pointer:input:u32:1:1:112:uav:7:4:RW:0:0:0";
|
||||
pragma "AMD RTI", "pointer:mask:float:1:1:128:uav:7:4:RW:0:0:0";
|
||||
pragma "AMD RTI", "value:inputDimensions:u32:2:1:144";
|
||||
pragma "AMD RTI", "constarg:4:inputDimensions";
|
||||
pragma "AMD RTI", "value:maskDimensions:u32:2:1:160";
|
||||
pragma "AMD RTI", "constarg:5:maskDimensions";
|
||||
pragma "AMD RTI", "function:1:0";
|
||||
pragma "AMD RTI", "memory:64bitABI";
|
||||
pragma "AMD RTI", "privateid:8";
|
||||
pragma "AMD RTI", "enqueue_kernel:0";
|
||||
pragma "AMD RTI", "kernel_index:0";
|
||||
pragma "AMD RTI", "reflection:0:size_t";
|
||||
pragma "AMD RTI", "reflection:1:uint*";
|
||||
pragma "AMD RTI", "reflection:2:uint*";
|
||||
pragma "AMD RTI", "reflection:3:float*";
|
||||
pragma "AMD RTI", "reflection:4:uint2";
|
||||
pragma "AMD RTI", "reflection:5:uint2";
|
||||
pragma "AMD RTI", "ARGEND:__OpenCL_SimpleConvolution";
|
||||
|
||||
@__OpenCL_SimpleConvolution_Entry:
|
||||
|
||||
// BB#0: // %entry
|
||||
|
||||
workitemabsid_u32 $s6, 0;
|
||||
cvt_u64_u32 $d0, $s6;
|
||||
ld_kernarg_align(8)_width(all)_u64 $d4, [%__global_offset_0];
|
||||
add_u64 $d0, $d0, $d4;
|
||||
cvt_u32_u64 $s5, $d0;
|
||||
ld_v2_kernarg_align(4)_width(all)_u32 ($s0, $s4), [%inputDimensions];
|
||||
ld_v2_kernarg_align(4)_width(all)_u32 ($s1, $s9), [%maskDimensions];
|
||||
rem_u32 $s7, $s5, $s0;
|
||||
add_u32 $s2, $s1, 4294967295;
|
||||
shr_u32 $s8, $s2, 1;
|
||||
add_u32 $s2, $s7, $s8;
|
||||
add_u32 $s3, $s0, 4294967295;
|
||||
cmp_ge_b1_u32 $c0, $s2, $s0;
|
||||
cmov_b32 $s2, $c0, $s3, $s2;
|
||||
sub_u32 $s3, $s7, $s8;
|
||||
cmp_lt_b1_u32 $c0, $s7, $s8;
|
||||
cmov_b32 $s3, $c0, 0, $s3;
|
||||
ld_kernarg_align(8)_width(all)_u64 $d1, [%output];
|
||||
cmp_le_b1_u32 $c0, $s3, $s2;
|
||||
cbr_b1 $c0, @BB0_2;
|
||||
|
||||
// BB#1:
|
||||
|
||||
mov_b32 $s6, 0;
|
||||
br @BB0_6;
|
||||
|
||||
// @BB0_2: // %for.cond32.preheader.lr.ph
|
||||
|
||||
@BB0_2:
|
||||
|
||||
div_u32 $s5, $s5, $s0;
|
||||
add_u32 $s9, $s9, 4294967295;
|
||||
shr_u32 $s9, $s9, 1;
|
||||
add_u32 $s10, $s5, $s9;
|
||||
add_u32 $s11, $s4, 4294967295;
|
||||
cmp_ge_b1_u32 $c0, $s10, $s4;
|
||||
cmov_b32 $s4, $c0, $s11, $s10;
|
||||
sub_u32 $s10, $s5, $s9;
|
||||
cmp_lt_b1_u32 $c0, $s5, $s9;
|
||||
cmov_b32 $s5, $c0, 0, $s10;
|
||||
ld_kernarg_align(8)_width(all)_u64 $d2, [%mask];
|
||||
ld_kernarg_align(8)_width(all)_u64 $d3, [%input];
|
||||
cvt_u64_u32 $d5, $s6;
|
||||
add_u64 $d4, $d4, $d5;
|
||||
cvt_u32_u64 $s6, $d4;
|
||||
div_u32 $s6, $s6, $s0;
|
||||
max_u32 $s10, $s9, $s6;
|
||||
sub_u32 $s12, $s10, $s6;
|
||||
max_u32 $s11, $s7, $s8;
|
||||
mov_b32 $s6, 0;
|
||||
mad_u32 $s12, $s1, $s12, $s11;
|
||||
sub_u32 $s7, $s12, $s7;
|
||||
sub_u32 $s9, $s10, $s9;
|
||||
mad_u32 $s9, $s0, $s9, $s11;
|
||||
sub_u32 $s8, $s9, $s8;
|
||||
|
||||
// @BB0_3: // %for.cond32.preheader
|
||||
|
||||
@BB0_3:
|
||||
|
||||
cmp_gt_b1_u32 $c0, $s5, $s4;
|
||||
mov_b32 $s9, $s7;
|
||||
mov_b32 $s10, $s8;
|
||||
mov_b32 $s11, $s5;
|
||||
cbr_b1 $c0, @BB0_5;
|
||||
|
||||
// @BB0_4: // %for.body35
|
||||
|
||||
@BB0_4:
|
||||
|
||||
cvt_u64_u32 $d4, $s9;
|
||||
shl_u64 $d4, $d4, 2;
|
||||
add_u64 $d4, $d2, $d4;
|
||||
ld_global_align(4)_f32 $s12, [$d4];
|
||||
cvt_u64_u32 $d4, $s10;
|
||||
shl_u64 $d4, $d4, 2;
|
||||
add_u64 $d4, $d3, $d4;
|
||||
ld_global_align(4)_u32 $s13, [$d4];
|
||||
cvt_f32_u32 $s13, $s13;
|
||||
mul_ftz_f32 $s12, $s13, $s12;
|
||||
add_u32 $s9, $s9, $s1;
|
||||
add_u32 $s10, $s10, $s0;
|
||||
add_u32 $s11, $s11, 1;
|
||||
add_ftz_f32 $s6, $s6, $s12;
|
||||
cmp_le_b1_u32 $c0, $s11, $s4;
|
||||
cbr_b1 $c0, @BB0_4;
|
||||
|
||||
// @BB0_5: // %for.inc48
|
||||
|
||||
@BB0_5:
|
||||
|
||||
add_u32 $s7, $s7, 1;
|
||||
add_u32 $s8, $s8, 1;
|
||||
add_u32 $s3, $s3, 1;
|
||||
cmp_le_b1_u32 $c0, $s3, $s2;
|
||||
cbr_b1 $c0, @BB0_3;
|
||||
|
||||
// @BB0_6: // %for.end50
|
||||
|
||||
@BB0_6:
|
||||
|
||||
and_b64 $d0, $d0, 4294967295;
|
||||
shl_u64 $d0, $d0, 2;
|
||||
add_u64 $d0, $d1, $d0;
|
||||
add_ftz_f32 $s0, $s6, 0F3f000000;
|
||||
cvt_ftz_u32_f32 $s0, $s0;
|
||||
st_global_align(4)_u32 $s0, [$d0];
|
||||
ret;
|
||||
};
|
||||
@@ -1,15 +0,0 @@
|
||||
#
|
||||
# Source files for Rocr Utils library
|
||||
#
|
||||
file( GLOB MODULE_SRC "*.cpp" )
|
||||
|
||||
#
|
||||
# Header files include path(s).
|
||||
#
|
||||
include_directories ( $ENV{ROCR_INC_DIR} )
|
||||
|
||||
#
|
||||
# Build Utils as a Static Library object
|
||||
#
|
||||
add_library( ${UTIL_LIB} STATIC ${MODULE_SRC} )
|
||||
target_link_libraries( ${UTIL_LIB} c stdc++ dl pthread rt )
|
||||
@@ -1,230 +0,0 @@
|
||||
/**********************************************************************
|
||||
Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
provided that the following conditions are met:
|
||||
|
||||
• Redistributions of source code must retain the above copyright notice, this list of
|
||||
conditions and the following disclaimer.
|
||||
• Redistributions in binary form must reproduce the above copyright notice, this list of
|
||||
conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
|
||||
SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
********************************************************************/
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <cmath>
|
||||
#include <time.h>
|
||||
|
||||
#include "helper_funcs.h"
|
||||
|
||||
#ifndef _WIN32
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
void error(std::string errorMsg) { std::cout << "Error: " << errorMsg << std::endl; }
|
||||
|
||||
/*
|
||||
* Prints no more than 256 elements of the given array.
|
||||
* Prints full array if length is less than 256.
|
||||
* Prints Array name followed by elements.
|
||||
*/
|
||||
template <typename T>
|
||||
void printArray(const std::string header, const T* data, const int width, const int height) {
|
||||
std::cout << header << " :\n";
|
||||
for (int i = 0; i < height; i++) {
|
||||
std::cout << "> ";
|
||||
for (int j = 0; j < width; j++) {
|
||||
std::cout << data[i * width + j] << " ";
|
||||
}
|
||||
std::cout << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool fillRandom(T* arrayPtr, const int width, const int height, const T rangeMin, const T rangeMax,
|
||||
unsigned int seed) {
|
||||
if (!arrayPtr) {
|
||||
error("Cannot fill array. NULL pointer.");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!seed) seed = (unsigned int)time(NULL);
|
||||
|
||||
srand(seed);
|
||||
double range = double(rangeMax - rangeMin) + 1.0;
|
||||
|
||||
/* random initialisation of input */
|
||||
for (int i = 0; i < height; i++)
|
||||
for (int j = 0; j < width; j++) {
|
||||
int index = i * width + j;
|
||||
arrayPtr[index] = rangeMin + T(range * rand() / (RAND_MAX + 1.0));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T> bool fillPos(T* arrayPtr, const int width, const int height) {
|
||||
if (!arrayPtr) {
|
||||
error("Cannot fill array. NULL pointer.");
|
||||
return false;
|
||||
}
|
||||
|
||||
/* initialisation of input with positions*/
|
||||
for (T i = 0; i < height; i++)
|
||||
for (T j = 0; j < width; j++) {
|
||||
T index = i * width + j;
|
||||
arrayPtr[index] = index;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool fillConstant(T* arrayPtr, const int width, const int height, const T val) {
|
||||
if (!arrayPtr) {
|
||||
error("Cannot fill array. NULL pointer.");
|
||||
return false;
|
||||
}
|
||||
|
||||
/* initialisation of input with constant value*/
|
||||
for (int i = 0; i < height; i++)
|
||||
for (int j = 0; j < width; j++) {
|
||||
int index = i * width + j;
|
||||
arrayPtr[index] = val;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T> T roundToPowerOf2(T val) {
|
||||
int bytes = sizeof(T);
|
||||
|
||||
val--;
|
||||
for (int i = 0; i < bytes; i++) val |= val >> (1 << i);
|
||||
val++;
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
template <typename T> bool isPowerOf2(T val) {
|
||||
long long _val = val;
|
||||
return (((_val & (-_val)) - _val == 0) && (_val != 0));
|
||||
}
|
||||
|
||||
template <typename T> std::string toString(T t, std::ios_base& (*r)(std::ios_base&)) {
|
||||
std::ostringstream output;
|
||||
output << r << t;
|
||||
return output.str();
|
||||
}
|
||||
|
||||
bool compare(const float* refData, const float* data, const int length, const float epsilon) {
|
||||
float error = 0.0f;
|
||||
float ref = 0.0f;
|
||||
|
||||
for (int i = 1; i < length; ++i) {
|
||||
float diff = refData[i] - data[i];
|
||||
error += diff * diff;
|
||||
ref += refData[i] * refData[i];
|
||||
}
|
||||
|
||||
float normRef = ::sqrtf((float)ref);
|
||||
if (::fabs((float)ref) < 1e-7f) {
|
||||
return false;
|
||||
}
|
||||
float normError = ::sqrtf((float)error);
|
||||
error = normError / normRef;
|
||||
|
||||
return error < epsilon;
|
||||
}
|
||||
|
||||
bool compare(const double* refData, const double* data, const int length, const double epsilon) {
|
||||
double error = 0.0;
|
||||
double ref = 0.0;
|
||||
|
||||
for (int i = 1; i < length; ++i) {
|
||||
double diff = refData[i] - data[i];
|
||||
error += diff * diff;
|
||||
ref += refData[i] * refData[i];
|
||||
}
|
||||
|
||||
double normRef = ::sqrt((double)ref);
|
||||
if (::fabs((double)ref) < 1e-7) {
|
||||
return false;
|
||||
}
|
||||
double normError = ::sqrt((double)error);
|
||||
error = normError / normRef;
|
||||
|
||||
return error < epsilon;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Template Instantiations
|
||||
/////////////////////////////////////////////////////////////////
|
||||
template void printArray<short>(const std::string, const short*, int, int);
|
||||
template void printArray<unsigned char>(const std::string, const unsigned char*, int, int);
|
||||
template void printArray<unsigned int>(const std::string, const unsigned int*, int, int);
|
||||
template void printArray<int>(const std::string, const int*, int, int);
|
||||
template void printArray<long>(const std::string, const long*, int, int);
|
||||
template void printArray<float>(const std::string, const float*, int, int);
|
||||
template void printArray<double>(const std::string, const double*, int, int);
|
||||
|
||||
template bool fillRandom<unsigned char>(unsigned char* arrayPtr, const int width, const int height,
|
||||
unsigned char rangeMin, unsigned char rangeMax,
|
||||
unsigned int seed);
|
||||
template bool fillRandom<unsigned int>(unsigned int* arrayPtr, const int width, const int height,
|
||||
unsigned int rangeMin, unsigned int rangeMax,
|
||||
unsigned int seed);
|
||||
template bool fillRandom<int>(int* arrayPtr, const int width, const int height, int rangeMin,
|
||||
int rangeMax, unsigned int seed);
|
||||
template bool fillRandom<long>(long* arrayPtr, const int width, const int height, long rangeMin,
|
||||
long rangeMax, unsigned int seed);
|
||||
template bool fillRandom<float>(float* arrayPtr, const int width, const int height, float rangeMin,
|
||||
float rangeMax, unsigned int seed);
|
||||
template bool fillRandom<double>(double* arrayPtr, const int width, const int height,
|
||||
double rangeMin, double rangeMax, unsigned int seed);
|
||||
|
||||
template short roundToPowerOf2<short>(short val);
|
||||
template unsigned int roundToPowerOf2<unsigned int>(unsigned int val);
|
||||
template int roundToPowerOf2<int>(int val);
|
||||
template long roundToPowerOf2<long>(long val);
|
||||
|
||||
template bool isPowerOf2<short>(short val);
|
||||
template bool isPowerOf2<unsigned int>(unsigned int val);
|
||||
template bool isPowerOf2<int>(int val);
|
||||
template bool isPowerOf2<long>(long val);
|
||||
|
||||
template <> bool fillPos<short>(short* arrayPtr, const int width, const int height);
|
||||
template <> bool fillPos<unsigned int>(unsigned int* arrayPtr, const int width, const int height);
|
||||
template <> bool fillPos<int>(int* arrayPtr, const int width, const int height);
|
||||
template <> bool fillPos<long>(long* arrayPtr, const int width, const int height);
|
||||
|
||||
template <>
|
||||
bool fillConstant<short>(short* arrayPtr, const int width, const int height, const short val);
|
||||
template <>
|
||||
bool fillConstant(unsigned int* arrayPtr, const int width, const int height,
|
||||
const unsigned int val);
|
||||
template <> bool fillConstant(int* arrayPtr, const int width, const int height, const int val);
|
||||
template <> bool fillConstant(long* arrayPtr, const int width, const int height, const long val);
|
||||
template <> bool fillConstant(long* arrayPtr, const int width, const int height, const long val);
|
||||
template <> bool fillConstant(long* arrayPtr, const int width, const int height, const long val);
|
||||
|
||||
template std::string toString<char>(char t, std::ios_base& (*r)(std::ios_base&));
|
||||
template std::string toString<short>(short t, std::ios_base& (*r)(std::ios_base&));
|
||||
template std::string toString<unsigned int>(unsigned int t, std::ios_base& (*r)(std::ios_base&));
|
||||
template std::string toString<int>(int t, std::ios_base& (*r)(std::ios_base&));
|
||||
template std::string toString<long>(long t, std::ios_base& (*r)(std::ios_base&));
|
||||
template std::string toString<float>(float t, std::ios_base& (*r)(std::ios_base&));
|
||||
template std::string toString<double>(double t, std::ios_base& (*r)(std::ios_base&));
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user