Merge branch 'sp3-llvm-transistion' into amd-staging
Transistion KFDTest to use open source LLVM compiler instead of SP3
compiler
Change-Id: I26fff6a958bc48cb1f5509a11ec194d2ececf0ce
[ROCm/ROCR-Runtime commit: b9651d3118]
Этот коммит содержится в:
@@ -95,12 +95,42 @@ endif()
|
||||
|
||||
message ( "Find libhsakmt at ${HSAKMT_LIBRARY_DIRS}" )
|
||||
|
||||
set ( SP3_DIR ${PROJECT_SOURCE_DIR}/sp3 )
|
||||
if ( POLICY CMP0074 )
|
||||
cmake_policy( SET CMP0074 NEW )
|
||||
endif()
|
||||
|
||||
find_path( LIGHTNING_CMAKE_DIR NAMES LLVMConfig.cmake
|
||||
PATHS $ENV{OUT_DIR}/llvm/lib/cmake/llvm NO_CACHE NO_DEFAULT_PATH)
|
||||
|
||||
if ( DEFINED LIGHTNING_CMAKE_DIR AND EXISTS ${LIGHTNING_CMAKE_DIR} )
|
||||
set ( LLVM_DIR ${LIGHTNING_CMAKE_DIR} )
|
||||
else()
|
||||
message( WARNING "Couldn't find Lightning build. "
|
||||
"Attempting to use system LLVM install..." )
|
||||
endif()
|
||||
|
||||
find_package( LLVM REQUIRED CONFIG )
|
||||
|
||||
if( ${LLVM_PACKAGE_VERSION} VERSION_LESS "7.0" )
|
||||
message( FATAL_ERROR "Requires LLVM 7.0 or greater "
|
||||
"(found ${LLVM_PACKAGE_VERSION})" )
|
||||
elseif( ${LLVM_PACKAGE_VERSION} VERSION_LESS "14.0" )
|
||||
message( WARNING "Not using latest LLVM version. "
|
||||
"Some ASIC targets may not work!" )
|
||||
endif()
|
||||
|
||||
message( STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}" )
|
||||
message( STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}" )
|
||||
|
||||
include_directories(${LLVM_INCLUDE_DIRS})
|
||||
separate_arguments(LLVM_DEFINITIONS_LIST NATIVE_COMMAND ${LLVM_DEFINITIONS})
|
||||
add_definitions(${LLVM_DEFINITIONS_LIST})
|
||||
|
||||
llvm_map_components_to_libnames(llvm_libs AMDGPUAsmParser Core Support)
|
||||
|
||||
include_directories(${PROJECT_SOURCE_DIR}/gtest-1.6.0)
|
||||
include_directories(${PROJECT_SOURCE_DIR}/include)
|
||||
include_directories(${PROJECT_SOURCE_DIR}/../../include)
|
||||
include_directories(${SP3_DIR})
|
||||
|
||||
include_directories(${DRM_INCLUDE_DIRS})
|
||||
|
||||
@@ -112,12 +142,8 @@ set (SRC_FILES gtest-1.6.0/gtest-all.cpp
|
||||
src/Dispatch.cpp
|
||||
src/GoogleTestExtension.cpp
|
||||
src/IndirectBuffer.cpp
|
||||
src/IsaGenerator.cpp
|
||||
src/IsaGenerator_Aldebaran.cpp
|
||||
src/IsaGenerator_Gfx10.cpp
|
||||
src/IsaGenerator_Gfx72.cpp
|
||||
src/IsaGenerator_Gfx8.cpp
|
||||
src/IsaGenerator_Gfx9.cpp
|
||||
src/Assemble.cpp
|
||||
src/ShaderStore.cpp
|
||||
src/LinuxOSWrapper.cpp
|
||||
src/PM4Packet.cpp
|
||||
src/PM4Queue.cpp
|
||||
@@ -143,6 +169,7 @@ set (SRC_FILES gtest-1.6.0/gtest-all.cpp
|
||||
src/KFDDBGTest.cpp
|
||||
src/KFDGWSTest.cpp
|
||||
src/KFDIPCTest.cpp
|
||||
src/KFDASMTest.cpp
|
||||
|
||||
src/KFDEvictTest.cpp
|
||||
src/KFDHWSTest.cpp
|
||||
@@ -163,7 +190,7 @@ message( STATUS "PROJECT_SOURCE_DIR:" ${PROJECT_SOURCE_DIR} )
|
||||
|
||||
if ( "${CMAKE_C_COMPILER_VERSION}" STRGREATER "4.8.0")
|
||||
## Add --enable-new-dtags to generate DT_RUNPATH
|
||||
set ( CMAKE_CXX_FLAGS "-std=gnu++11 -Wl,--enable-new-dtags" )
|
||||
set ( CMAKE_CXX_FLAGS "-std=gnu++14 -Wl,--enable-new-dtags" )
|
||||
endif()
|
||||
if ( "${CMAKE_BUILD_TYPE}" STREQUAL Release )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2" )
|
||||
@@ -181,11 +208,10 @@ endif ()
|
||||
# The modules found by pkg_check_modules() in the default pkg config
|
||||
# path do not need to use link_directories() here.
|
||||
link_directories(${HSAKMT_LIBRARY_DIRS})
|
||||
link_directories(${SP3_DIR})
|
||||
|
||||
add_executable(kfdtest ${SRC_FILES})
|
||||
|
||||
target_link_libraries(kfdtest ${HSAKMT_LIBRARIES} ${DRM_LDFLAGS} ${DRM_AMDGPU_LDFLAGS} pthread m stdc++ rt amdsp3 numa)
|
||||
target_link_libraries(kfdtest ${HSAKMT_LIBRARIES} ${DRM_LDFLAGS} ${DRM_AMDGPU_LDFLAGS} ${llvm_libs} pthread m stdc++ rt numa)
|
||||
|
||||
configure_file ( scripts/kfdtest.exclude kfdtest.exclude COPYONLY )
|
||||
configure_file ( scripts/run_kfdtest.sh run_kfdtest.sh COPYONLY )
|
||||
|
||||
@@ -224,26 +224,10 @@ FILTER[aldebaran]=\
|
||||
"KFDMemoryTest.PtraceAccess:"\
|
||||
"KFDMemoryTest.DeviceHdpFlush"
|
||||
|
||||
# SP3 Compiler needs to be updated for GFX10. Temporarily disable all tests
|
||||
# that require shader compiler
|
||||
# Adding KFDSVMEvictTest as SVM/HMM was never validated on GFX10
|
||||
TEMP_GFX10_BLACKLIST=\
|
||||
"KFDMemoryTest.FlatScratchAccess:"\
|
||||
"KFDMemoryTest.PtraceAccessInvisibleVram:"\
|
||||
"KFDQMTest.QueuePriorityOnDifferentPipe:"\
|
||||
"KFDQMTest.QueuePriorityOnSamePipe:"\
|
||||
"KFDCWSRTest.BasicTest:"\
|
||||
"KFDQMTest.BasicCuMaskingEven:"\
|
||||
"KFDEvictTest.QueueTest:"\
|
||||
"KFDMemoryTest.MapUnmapToNodes:"\
|
||||
"KFDMemoryTest.HostHdpFlush:"\
|
||||
"KFDMemoryTest.DeviceHdpFlush:"\
|
||||
"KFDSVMEvictTest.*"
|
||||
|
||||
FILTER[navi10]=\
|
||||
"$BLACKLIST_ALL_ASICS:"\
|
||||
"$TEMP_GFX10_BLACKLIST:"\
|
||||
"KFDMemoryTest.MMBench"
|
||||
"KFDMemoryTest.MMBench:"\
|
||||
"KFDSVMEvictTest.*"
|
||||
|
||||
# Need to verify the following failed tests on another machine:
|
||||
# Exceptions not being received during exception tests
|
||||
@@ -254,42 +238,42 @@ FILTER[navi12]=\
|
||||
"KFDExceptionTest.*:"\
|
||||
"KFDPerfCountersTest.*:"\
|
||||
"KFDPerformanceTest.P2PBandWidthTest:"\
|
||||
"$TEMP_GFX10_BLACKLIST"
|
||||
"KFDSVMEvictTest.*"
|
||||
|
||||
FILTER[navi14]=\
|
||||
"$BLACKLIST_ALL_ASICS:"\
|
||||
"$TEMP_GFX10_BLACKLIST"
|
||||
"KFDSVMEvictTest.*"
|
||||
|
||||
FILTER[sienna_cichlid]=\
|
||||
"$BLACKLIST_ALL_ASICS:"\
|
||||
"$TEMP_GFX10_BLACKLIST:"\
|
||||
"KFDQMTest.BasicCuMaskingEven:"\
|
||||
"KFDDBGTest.*:"\
|
||||
"KFDPerfCountersTest.*:"\
|
||||
"KFDSVMEvictTest.*"
|
||||
|
||||
FILTER[navy_flounder]=\
|
||||
"$BLACKLIST_ALL_ASICS:"\
|
||||
"$TEMP_GFX10_BLACKLIST:"\
|
||||
"KFDQMTest.BasicCuMaskingEven:"\
|
||||
"KFDDBGTest.*:"\
|
||||
"KFDPerfCountersTest.*:"\
|
||||
"KFDSVMEvictTest.*"
|
||||
|
||||
FILTER[dimgrey_cavefish]=\
|
||||
"$BLACKLIST_ALL_ASICS:"\
|
||||
"$TEMP_GFX10_BLACKLIST:"\
|
||||
"KFDQMTest.BasicCuMaskingEven:"\
|
||||
"KFDDBGTest.*:"\
|
||||
"KFDPerfCountersTest.*:"\
|
||||
"KFDSVMEvictTest.*"
|
||||
|
||||
FILTER[beige_goby]=\
|
||||
"$BLACKLIST_ALL_ASICS:"\
|
||||
"$TEMP_GFX10_BLACKLIST:"\
|
||||
"KFDQMTest.BasicCuMaskingEven:"\
|
||||
"KFDDBGTest.*:"\
|
||||
"KFDPerfCountersTest.*:"\
|
||||
"KFDSVMEvictTest.*"
|
||||
|
||||
FILTER[yellow_carp]=\
|
||||
"$BLACKLIST_ALL_ASICS:"\
|
||||
"$TEMP_GFX10_BLACKLIST:"\
|
||||
"KFDQMTest.BasicCuMaskingEven:"\
|
||||
"KFDIPCTest.CMABasicTest"
|
||||
"KFDIPCTest.CMABasicTest:"\
|
||||
"KFDSVMEvictTest.*"
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
Note: This folder is primarily intended for AMD internal developers.
|
||||
|
||||
The folder lib_helper contains the script to generate SP3 library libamdsp3.a
|
||||
and the associated header files in the current folder for kfdtest to use.
|
||||
cmake is required for the script to run. Just run ./build_sp3.sh after setting
|
||||
up the environment variables (source build/envsetup.sh).
|
||||
@@ -1,79 +0,0 @@
|
||||
#
|
||||
# Copyright (C) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
# OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
#
|
||||
|
||||
cmake_minimum_required(VERSION 2.8 FATAL_ERROR)
|
||||
|
||||
project(amdsp3)
|
||||
|
||||
#set ( CMAKE_VERBOSE_MAKEFILE on )
|
||||
|
||||
find_package(PkgConfig)
|
||||
|
||||
set ( P4_PATH $ENV{WORK_ROOT}/p4/driver/drivers )
|
||||
|
||||
set ( SCLIB_SRC ${PROJECT_SOURCE_DIR} )
|
||||
#if( DEFINED ENV{SCLIB_SRC} )
|
||||
# set ( SCLIB_SRC $ENV{SCLIB_SRC} )
|
||||
#else()
|
||||
# set ( SCLIB_SRC ${P4_PATH}/sc/Chip )
|
||||
#endif()
|
||||
|
||||
include_directories(${SCLIB_SRC}/sp3)
|
||||
#include_directories(${SCLIB_SRC}/sp3/release_headers)
|
||||
include_directories(${SCLIB_SRC}/sp3/gen)
|
||||
|
||||
set ( SRC_FILES ${SRC_FILES} ${SCLIB_SRC}/sp3/sp3-asic.c )
|
||||
set ( SRC_FILES ${SRC_FILES} ${SCLIB_SRC}/sp3/sp3-dispatch.c )
|
||||
set ( SRC_FILES ${SRC_FILES} ${SCLIB_SRC}/sp3/sp3-eval.c )
|
||||
set ( SRC_FILES ${SRC_FILES} ${SCLIB_SRC}/sp3/sp3-gc.c )
|
||||
set ( SRC_FILES ${SRC_FILES} ${SCLIB_SRC}/sp3/sp3-int.c )
|
||||
set ( SRC_FILES ${SRC_FILES} ${SCLIB_SRC}/sp3/sp3-lib.c )
|
||||
set ( SRC_FILES ${SRC_FILES} ${SCLIB_SRC}/sp3/sp3-native.c )
|
||||
set ( SRC_FILES ${SRC_FILES} ${SCLIB_SRC}/sp3/sp3-cipher.c )
|
||||
set ( SRC_FILES ${SRC_FILES} ${SCLIB_SRC}/sp3/sp3-vm.c )
|
||||
|
||||
aux_source_directory(${SCLIB_SRC}/sp3/gen SRC_FILES)
|
||||
aux_source_directory(${SCLIB_SRC}/sp3/backend/si/lib SRC_FILES)
|
||||
aux_source_directory(${SCLIB_SRC}/sp3/backend/ci/lib SRC_FILES)
|
||||
aux_source_directory(${SCLIB_SRC}/sp3/backend/gfx8/lib SRC_FILES)
|
||||
aux_source_directory(${SCLIB_SRC}/sp3/backend/gfx81/lib SRC_FILES)
|
||||
aux_source_directory(${SCLIB_SRC}/sp3/backend/gfx9/lib SRC_FILES)
|
||||
aux_source_directory(${SCLIB_SRC}/sp3/backend/gfx10/lib SRC_FILES)
|
||||
aux_source_directory(${SCLIB_SRC}/sp3/backend/aldbrn/lib SRC_FILES)
|
||||
aux_source_directory(${SCLIB_SRC}/sp3/backend/gfx81/arch SRC_FILES)
|
||||
aux_source_directory(${SCLIB_SRC}/sp3/backend/gfx9/arch SRC_FILES)
|
||||
aux_source_directory(${SCLIB_SRC}/sp3/backend/gfx10/arch SRC_FILES)
|
||||
aux_source_directory(${SCLIB_SRC}/sp3/backend/aldbrn/arch SRC_FILES)
|
||||
|
||||
|
||||
message( STATUS "PROJECT_SOURCE_DIR:" ${PROJECT_SOURCE_DIR} )
|
||||
#message( STATUS "SRC_FILES: ")
|
||||
#foreach(file ${SRC_FILES})
|
||||
# message(STATUS "${file}")
|
||||
#endforeach()
|
||||
|
||||
set ( CMAKE_C_FLAGS "-DSP3_STATIC_LIB -Wno-error -DPUBLIC_RELEASE -DLITTLEENDIAN_CPU -fPIC -DGFX101_BUILD -DALDBRN_BUILD" )
|
||||
|
||||
add_library(amdsp3 ${SRC_FILES})
|
||||
|
||||
|
||||
@@ -1,57 +0,0 @@
|
||||
#
|
||||
# Copyright (C) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
# OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
#
|
||||
|
||||
#!/bin/bash
|
||||
|
||||
if [ "$KFDTEST_ROOT" == "" ] || [ "$P4_ROOT" == "" ]; then
|
||||
echo "Environment variables should be set before running this script"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cd $KFDTEST_ROOT/sp3/lib_helper
|
||||
|
||||
SP3_PROJECT=$P4_ROOT/driver/drivers/sc/Chip/
|
||||
LIB_OUTPUT=$KFDTEST_ROOT/sp3/
|
||||
|
||||
cp CMakeLists_sp3.txt $SP3_PROJECT/CMakeLists.txt
|
||||
|
||||
mkdir -p build
|
||||
echo "Building SP3 lib"
|
||||
pushd build
|
||||
cmake $SP3_PROJECT/
|
||||
make
|
||||
popd
|
||||
|
||||
rsync --progress -a build/libamdsp3.a $LIB_OUTPUT
|
||||
# Put the intermediate header files in the current folder for further processing
|
||||
rsync --progress -a $SP3_PROJECT/sp3/public/lib/sp3.h .
|
||||
|
||||
# Remove the build folder and CMakeLists.txt put into SP source folder
|
||||
rm -r build
|
||||
rm $SP3_PROJECT/CMakeLists.txt
|
||||
|
||||
# Replace the license statement in the header files
|
||||
{ cat AMD_opensource_license.txt; sed -e '1,/#ifndef/ { /#ifndef/b; d }' sp3.h; } > $LIB_OUTPUT/sp3.h
|
||||
|
||||
# Delete the intermediate header files
|
||||
rm sp3.h
|
||||
@@ -1,643 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __SP3_H__
|
||||
#define __SP3_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/// @file sp3.h
|
||||
/// @brief sp3 API
|
||||
#include <stdint.h>
|
||||
|
||||
// Export tags
|
||||
#define SP3_EXPORT
|
||||
|
||||
|
||||
/// @defgroup sp3main SP3 Main API
|
||||
///
|
||||
/// Main API to assemble and disassemble SP3 shaders.
|
||||
///
|
||||
/// @{
|
||||
|
||||
|
||||
/// Valid shader stages.
|
||||
enum sp3_shtype {
|
||||
SP3_SHTYPE_NONE = -1,
|
||||
SP3_SHTYPE_PS = 0,
|
||||
SP3_SHTYPE_VS = 1,
|
||||
SP3_SHTYPE_GS = 2,
|
||||
SP3_SHTYPE_ES = 3,
|
||||
SP3_SHTYPE_HS = 4,
|
||||
SP3_SHTYPE_LS = 5,
|
||||
SP3_SHTYPE_CS = 6,
|
||||
#ifdef NAVI10LITE_BUILD
|
||||
SP3_SHTYPE_ACV = 7,
|
||||
#endif
|
||||
};
|
||||
|
||||
/// Assorted constants used by sp3 API.
|
||||
enum sp3_count {
|
||||
SP3_NUM_MRT = 8, ///< Maximum number of render targets supported.
|
||||
SP3_NUM_STRM = 4, ///< Maximum number of streams supported.
|
||||
};
|
||||
|
||||
/// Disassembly flags. Bitwise-OR flags to set options.
|
||||
enum sp3_flag {
|
||||
SP3DIS_NO_STATE = 0x01, ///< Do not include state header at top of shader.
|
||||
SP3DIS_NO_BINARY = 0x02, ///< Do not include comments with raw binary microcode.
|
||||
SP3DIS_COMMENTS = 0x04, ///< Do not include comments.
|
||||
SP3DIS_NO_GPR_COUNT = 0x08, ///< Do not include GPR allocation counts.
|
||||
SP3DIS_FORCEVALID = 0x10, ///< Force all bytes of microcode to be disassembled.
|
||||
SP3DIS_NO_ASIC = 0x20, ///< Do not emit the asic header at top of shader.
|
||||
};
|
||||
|
||||
/// Shader context. Contains no user-visible fields.
|
||||
struct sp3_context;
|
||||
|
||||
/// Memory object. Contains no user-visible fields.
|
||||
struct sp3_vma;
|
||||
|
||||
/// VM addresses are 64-bit and the address unit is 32 bits
|
||||
typedef uint64_t sp3_vmaddr;
|
||||
|
||||
/// Storage entry for register streams.
|
||||
struct sp3_reg {
|
||||
uint32_t index; ///< One of the MM aperture register addresses.
|
||||
uint32_t value; ///< 32-bit register data.
|
||||
};
|
||||
|
||||
/// Bits for a single instruction.
|
||||
struct sp3_inst_bits {
|
||||
uint32_t val[5]; ///< Largest single instruction in any backend is 5 dwords.
|
||||
};
|
||||
|
||||
/// Wrapped shader metadata.
|
||||
///
|
||||
/// After generation, shaders are encapsulated in sp3_shader structures.
|
||||
///
|
||||
/// Those structures contain the shader binary, its register stream, constants and constant
|
||||
/// buffers and metadata needed for SC compatibility.
|
||||
///
|
||||
struct sp3_shader {
|
||||
enum sp3_shtype type; ///< One of the SHTYPE_* constants.
|
||||
uint32_t asic_int; ///< Internal ASIC index. Do not use.
|
||||
char asic[0x100]; ///< ASIC name as a string ("RV870" etc).
|
||||
uint32_t size; ///< Size of the compiled shader, in 32-bit words.
|
||||
uint32_t nsgprs; ///< Number of scalar GPRs used.
|
||||
uint32_t nvgprs; ///< Number of vector GPRs used.
|
||||
uint32_t nsvgprs; ///< Number of shared vector GPRs used (only available in certain projects).
|
||||
uint32_t naccvgprs; ///< Number of accumulator vector GPRs used (only available in certain projects).
|
||||
uint32_t nsgprs_manual_alloc;
|
||||
uint32_t nvgprs_manual_alloc;
|
||||
uint32_t nsvgprs_manual_alloc;
|
||||
uint32_t naccvgprs_manual_alloc;
|
||||
uint32_t trap_present;
|
||||
uint32_t user_sgpr_count;
|
||||
uint32_t scratch_en;
|
||||
uint32_t dispatch_draw_en;
|
||||
uint32_t so_en;
|
||||
uint32_t so_base0_en;
|
||||
uint32_t so_base1_en;
|
||||
uint32_t so_base2_en;
|
||||
uint32_t so_base3_en;
|
||||
uint32_t oc_lds_en;
|
||||
uint32_t tg_size_en;
|
||||
uint32_t tidig_comp_cnt; ///< Number of components(-1) enabled for thread id in group
|
||||
uint32_t tgid_x_en;
|
||||
uint32_t tgid_y_en;
|
||||
uint32_t tgid_z_en;
|
||||
uint32_t wave_cnt_en;
|
||||
uint32_t primgen_en;
|
||||
uint32_t pc_base_en;
|
||||
uint32_t sgpr_scratch;
|
||||
uint32_t sgpr_psvs_state;
|
||||
uint32_t sgpr_gs2vs_offset;
|
||||
uint32_t sgpr_so_write_index;
|
||||
uint32_t sgpr_so_base_offset0;
|
||||
uint32_t sgpr_so_base_offset1;
|
||||
uint32_t sgpr_so_base_offset2;
|
||||
uint32_t sgpr_so_base_offset3;
|
||||
uint32_t sgpr_offchip_lds;
|
||||
uint32_t sgpr_is_offchip;
|
||||
uint32_t sgpr_ring_offset;
|
||||
uint32_t sgpr_gs_wave_id;
|
||||
uint32_t sgpr_global_wave_id;
|
||||
uint32_t sgpr_tg_size;
|
||||
uint32_t sgpr_tgid_x;
|
||||
uint32_t sgpr_tgid_y;
|
||||
uint32_t sgpr_tgid_z;
|
||||
uint32_t sgpr_tf_base;
|
||||
uint32_t sgpr_pc_base;
|
||||
uint32_t sgpr_wave_cnt;
|
||||
uint32_t wave_size; ///< Number of threads in a wavefront (only certain ASICs; 0 = don't care).
|
||||
uint32_t pc_exports; ///< Range of parameters exported (if VS).
|
||||
uint32_t pos_export; ///< Shader executes a position export (if VS).
|
||||
uint32_t cb_exports; ///< Range of MRTs exported (if PS).
|
||||
uint32_t mrtz_export_format;///< Export format of the mrtz export.
|
||||
uint32_t z_export; ///< Shader executes a Z export (if PS).
|
||||
uint32_t pops_en; ///< Shader is POPS (PS)
|
||||
uint32_t pops_num_samples; ///< (PS)
|
||||
uint32_t load_collision_waveid; ///< Shader sets load collision waveid (if PS).
|
||||
uint32_t load_intrawave_collision; ///< Shader is in intrawave mode (if PS).
|
||||
uint32_t stencil_test_export; ///< Shader exports stencil (if PS).
|
||||
uint32_t stencil_op_export; ///< Shader exports stencil (if PS).
|
||||
uint32_t kill_used; ///< Shader executes ALU KILL operations.
|
||||
uint32_t cb_masks[SP3_NUM_MRT]; ///< Component masks for each MRT exported (if PS).
|
||||
uint32_t emit_used; ///< EMIT opcodes used (if GS).
|
||||
uint32_t covmask_export; ///< Shader exports coverage mask (if PS).
|
||||
uint32_t mask_export; ///< Shader exports mask (if PS).
|
||||
uint32_t strm_used[SP3_NUM_STRM]; ///< Streamout operations used (map).
|
||||
uint32_t scratch_used; ///< Scratch SMX exports used.
|
||||
uint32_t scratch_itemsize; ///< Scratch ring item size.
|
||||
uint32_t reduction_used; ///< Reduction SMX exports used.
|
||||
uint32_t ring_used; ///< ESGS/GSVS ring SMX exports used.
|
||||
uint32_t ring_itemsize; ///< ESGS/GSVS ring item size (for ES/GS respectively).
|
||||
uint32_t vertex_size[4]; ///< GSVS ring vertex size (for GS).
|
||||
uint32_t mem_used; ///< Raw memory SMX exports used.
|
||||
uint32_t rats_used; ///< Mask of RATs (UAVs) used
|
||||
uint32_t group_size[3]; ///< Wavefront group size (for ELF files).
|
||||
uint32_t alloc_lds; ///< Number of LDS bytes allocated for wave group. (translates to lds_size in CS and LS)
|
||||
uint32_t *data; ///< Shader binary data.
|
||||
uint32_t nregs; ///< Number of register writes in the stream.
|
||||
uint64_t crc64; ///< CRC64 of compiled shader, may be used for identification/fingerprinting.
|
||||
uint32_t crc32; ///< 32-bit CRC of compiled shader (based on crc64), may be used for identification/fingerprinting.
|
||||
struct sp3_reg *regs; ///< Register writes (index-value pairs).
|
||||
struct sp3_shader *merged_2nd_shader; ///< Merged es/gs, ls/hs shader, this points to start of the second shader (only certain ASICs).
|
||||
};
|
||||
|
||||
/// Comment callback.
|
||||
typedef const char *(*sp3_comment_cb)(void *, int);
|
||||
|
||||
|
||||
/// Get version of the sp3 library.
|
||||
///
|
||||
/// @return String containing the version number.
|
||||
///
|
||||
SP3_EXPORT const char *sp3_version(void);
|
||||
|
||||
/// Create a new sp3 context.
|
||||
///
|
||||
/// @return A new context for use in assembling and disassembling shaders. Free with sp3_close().
|
||||
///
|
||||
SP3_EXPORT struct sp3_context *sp3_new(void);
|
||||
|
||||
/// Set option for sp3.
|
||||
///
|
||||
/// @param state sp3 context.
|
||||
/// @param option Option name. Unknown options will raise an error.
|
||||
/// @param value Option value. NULL is used to represent value-less options.
|
||||
///
|
||||
/// Currently supported options:
|
||||
///
|
||||
/// stdlib (string) -- absolute path to standard library files. May be a colon-separated list
|
||||
/// of paths that will be used to search for stdlib files. Used by sp3_parse_library().
|
||||
///
|
||||
/// The following options are deprecated because they take integer arguments; you should use
|
||||
/// sp3_set_option_int() for these settings going forward. They will continue to be accepted by
|
||||
/// this API to support legacy users.
|
||||
///
|
||||
/// Werror (boolean) -- indicates whether warnings should be treated as errors.
|
||||
///
|
||||
/// wave_size (integer) -- sets the wave size being used by the draw calls that will be using
|
||||
/// this shader. Ignored in certain ASICs. You may set this to 32, 64 or the special value 0
|
||||
/// to indicate no preference on wave size. The shader will be checked to ensure it is
|
||||
/// compatible with the size specified here.
|
||||
///
|
||||
/// omit_version (boolean) -- omit generation of the S_VERSION opcode.
|
||||
///
|
||||
/// omit_code_end (boolean) -- omit generation of the S_CODE_END footer.
|
||||
///
|
||||
/// allow_raw_bits (boolean) -- allow use of the raw_bits() function in sp3 shaders. This is a
|
||||
/// dangerous option to allow in general so you must explicitly enable this option, otherwise
|
||||
/// the raw_bits() function will always error out.
|
||||
///
|
||||
SP3_EXPORT void sp3_set_option(
|
||||
struct sp3_context *state,
|
||||
const char *option,
|
||||
const char *value);
|
||||
|
||||
/// Set option for sp3.
|
||||
///
|
||||
/// @param state sp3 context.
|
||||
/// @param option Option name. Unknown options will raise an error.
|
||||
/// @param value Option value.
|
||||
///
|
||||
/// Currently supported options:
|
||||
///
|
||||
/// Werror (boolean) -- indicates whether warnings should be treated as errors.
|
||||
///
|
||||
/// wave_size (integer) -- sets the wave size being used by the draw calls that will be using
|
||||
/// this shader. Ignored in certain ASICs. You may set this to 32, 64 or the special value 0
|
||||
/// to indicate no preference on wave size. The shader will be checked to ensure it is
|
||||
/// compatible with the size specified here.
|
||||
///
|
||||
/// omit_version (boolean) -- omit generation of the S_VERSION opcode.
|
||||
///
|
||||
/// omit_code_end (boolean) -- omit generation of the S_CODE_END footer.
|
||||
///
|
||||
/// allow_raw_bits (boolean) -- allow use of the raw_bits() function in sp3 shaders. This is a
|
||||
/// dangerous option to allow in general so you must explicitly enable this option, otherwise
|
||||
/// the raw_bits() function will always error out.
|
||||
///
|
||||
/// secure_mode (boolean) -- run in secure mode. Disables macro language features in assembly
|
||||
/// path including calls to custom functions. Useful if sp3 is used as a backend to a web-based
|
||||
/// assembly tool.
|
||||
///
|
||||
/// debug_encoding (boolean) -- if true, debug encoding selection logic for assembly. Only
|
||||
/// supported in 10.4+ backends.
|
||||
///
|
||||
/// no_vs_export_check (boolean) -- if true, disable VS export sanity check. Only supported in
|
||||
/// 10.4+ backends.
|
||||
///
|
||||
SP3_EXPORT void sp3_set_option_int(
|
||||
struct sp3_context *state,
|
||||
const char *option,
|
||||
int32_t value);
|
||||
|
||||
/// Parse a file into a context.
|
||||
///
|
||||
/// Use sp3_compile to generate binary microcode after the shader is parsed.
|
||||
///
|
||||
/// @param state Context to use for parsing.
|
||||
/// @param file File to read. If NULL, parse from stdin.
|
||||
///
|
||||
SP3_EXPORT void sp3_parse_file(struct sp3_context *state, const char *file);
|
||||
|
||||
/// Parse a string into a context.
|
||||
///
|
||||
/// Use sp3_compile to generate binary microcode after the shader is parsed.
|
||||
///
|
||||
/// @param state Context to use for parsing.
|
||||
/// @param string String to parse.
|
||||
///
|
||||
SP3_EXPORT void sp3_parse_string(struct sp3_context *state, const char *string);
|
||||
|
||||
/// Parse a file from the standard library into a context.
|
||||
///
|
||||
/// Use sp3_compile to generate binary microcode after the shader is parsed.
|
||||
///
|
||||
/// @param state Context to use for parsing.
|
||||
/// @param name Path to the standard library; files in this directory are parsed.
|
||||
///
|
||||
SP3_EXPORT void sp3_parse_library(struct sp3_context *state, const char *name);
|
||||
|
||||
/// Call a sp3 function.
|
||||
///
|
||||
SP3_EXPORT void sp3_call(struct sp3_context *state, const char *func);
|
||||
|
||||
/// Compile a shader program that has been parsed into the context.
|
||||
///
|
||||
/// @param state sp3 context.
|
||||
/// @param cffunc Name of clause to call. By convention, this is "main".
|
||||
/// @return A compiled and linked shader. Free memory with sp3_free_shader().
|
||||
///
|
||||
SP3_EXPORT struct sp3_shader *sp3_compile(
|
||||
struct sp3_context *state,
|
||||
const char *cffunc);
|
||||
|
||||
/// Free a sp3_shader.
|
||||
///
|
||||
/// @param sh Shader object to delete.
|
||||
///
|
||||
SP3_EXPORT void sp3_free_shader(struct sp3_shader *sh);
|
||||
|
||||
/// Get current ASIC name set for a context.
|
||||
///
|
||||
/// @param state Context to query.
|
||||
/// @return Name of ASIC.
|
||||
///
|
||||
SP3_EXPORT const char *sp3_getasic(struct sp3_context *state);
|
||||
|
||||
/// Set current ASIC name for a context.
|
||||
///
|
||||
/// @param state Context to modify.
|
||||
/// @param chip Case-insensitive string representing the ASIC to compile or disassemble for.
|
||||
///
|
||||
SP3_EXPORT void sp3_setasic(struct sp3_context *state, const char *chip);
|
||||
|
||||
/// Set global variable in context to an integer.
|
||||
///
|
||||
SP3_EXPORT void sp3_set_param_int(
|
||||
struct sp3_context *state,
|
||||
const char *name,
|
||||
int32_t value);
|
||||
|
||||
/// Set global variable in context to an integer vector.
|
||||
///
|
||||
SP3_EXPORT void sp3_set_param_intvec(
|
||||
struct sp3_context *state,
|
||||
const char *name,
|
||||
uint32_t size,
|
||||
const int32_t *value);
|
||||
|
||||
/// Set global variable in context to a float.
|
||||
///
|
||||
SP3_EXPORT void sp3_set_param_float(
|
||||
struct sp3_context *state,
|
||||
const char *name,
|
||||
float value);
|
||||
|
||||
/// Set global variable in context to a float vector.
|
||||
///
|
||||
SP3_EXPORT void sp3_set_param_floatvec(
|
||||
struct sp3_context *state,
|
||||
const char *name,
|
||||
uint32_t size,
|
||||
const float *value);
|
||||
|
||||
/// Set error message header.
|
||||
///
|
||||
/// @param state Context to modify.
|
||||
/// @param str Text to include in error message header.
|
||||
///
|
||||
SP3_EXPORT void sp3_set_error_header(struct sp3_context *state, const char *str);
|
||||
|
||||
/// Get ASIC metrics for the ASIC in current state.
|
||||
///
|
||||
/// Used by ELF tools to fill in some CAL fields.
|
||||
///
|
||||
/// @param state Context to query.
|
||||
/// @param name Name of ASIC metric.
|
||||
/// @return Value of ASIC metric.
|
||||
///
|
||||
SP3_EXPORT int sp3_asicinfo(struct sp3_context *state, const char *name);
|
||||
|
||||
/// Free a context allocated by sp3_new/open/parse.
|
||||
///
|
||||
/// @param state Context to delete.
|
||||
///
|
||||
SP3_EXPORT void sp3_close(struct sp3_context *state);
|
||||
|
||||
/// Disassemble a shader.
|
||||
///
|
||||
/// This call is likely to change to something that will take a filled sp3_shader structure
|
||||
/// later on.
|
||||
///
|
||||
/// @param state sp3 context (use sp3_new to allocate and sp3_setasic to set ASIC).
|
||||
/// @param bin Memory map with the opcodes (see sp3-vm.h).
|
||||
/// @param base Start of the shader in the memory map (in VM entries, i.e. 32-bit words).
|
||||
/// @param name Same to give the disassembled shader.
|
||||
/// @param shader_type One of the SHTYPE_* constants.
|
||||
/// @param include Literal text to include in the CF clause (NULL includes nothing).
|
||||
/// @param max_len Maximum length of CF clause. Matters if SP3DIS_FORCEVALID is set.
|
||||
/// @param flags A bitmask of SP3DIS_* flags.
|
||||
///
|
||||
/// @return Shader disassembly as a string. Free memory with sp3_free().
|
||||
///
|
||||
SP3_EXPORT char *sp3_disasm(
|
||||
struct sp3_context *state,
|
||||
struct sp3_vma *bin,
|
||||
sp3_vmaddr base,
|
||||
const char *name,
|
||||
enum sp3_shtype shader_type,
|
||||
const char *include,
|
||||
uint32_t max_len,
|
||||
uint32_t flags);
|
||||
|
||||
/// Disassemble a single shader instruction.
|
||||
///
|
||||
/// This call is likely to change to something that will take a filled sp3_shader structure
|
||||
/// later on.
|
||||
///
|
||||
/// @param state sp3 context (use sp3_new to allocate and sp3_setasic to set ASIC).
|
||||
/// @param inst Pointer to dwords containing instruction (exact number of dwords required depends on instruction).
|
||||
/// @param base Start of the shader in the memory map (in VM entries, i.e. 32-bit words).
|
||||
/// @param addr Address of the instruction being disassembled (in VM entries, i.e. 32-bit words).
|
||||
/// @param shader_type One of the SHTYPE_* constants.
|
||||
/// @param flags A mask of SP3DIS_* flags.
|
||||
///
|
||||
/// @return Shader disassembly as a string. Free memory with sp3_free().
|
||||
///
|
||||
SP3_EXPORT char *sp3_disasm_inst(
|
||||
struct sp3_context *state,
|
||||
const struct sp3_inst_bits *inst,
|
||||
sp3_vmaddr base,
|
||||
sp3_vmaddr addr,
|
||||
enum sp3_shtype shader_type,
|
||||
uint32_t flags);
|
||||
|
||||
/// Parse a register stream.
|
||||
///
|
||||
/// Can be called before sp3_disasm to preset things like ALU, boolean and loop constants.
|
||||
///
|
||||
/// This call is likely to merge with sp3_disasm later on.
|
||||
///
|
||||
/// @param state sp3 context to fill with state.
|
||||
/// @param nregs Number of register entries.
|
||||
/// @param regs Register stream to parse.
|
||||
/// @param shader_type One of the SHTYPE_* constants.
|
||||
///
|
||||
SP3_EXPORT void sp3_setregs(
|
||||
struct sp3_context *state,
|
||||
uint32_t nregs,
|
||||
const struct sp3_reg *regs,
|
||||
enum sp3_shtype shader_type);
|
||||
|
||||
|
||||
/// Set shader comments
|
||||
///
|
||||
/// @param state sp3 context.
|
||||
/// @param map Map of comments (0 for no comment, other values will be passed to the callback).
|
||||
/// @param f_top Callback returning comment to place above the opcode.
|
||||
/// @param f_right Callback returning comment to place to the right of the opcode.
|
||||
/// @param ctx Void pointer to pass to comment callbacks.
|
||||
///
|
||||
SP3_EXPORT void sp3_setcomments(
|
||||
struct sp3_context *state,
|
||||
struct sp3_vma *map,
|
||||
sp3_comment_cb f_top,
|
||||
sp3_comment_cb f_right,
|
||||
void *ctx);
|
||||
|
||||
/// Set alternate shader entry points
|
||||
///
|
||||
/// Used for disassembly; this marks an additional location in memory
|
||||
/// (besides the start address) where shader code may be found. Generally
|
||||
/// required for jump tables and any case where the shader may perform
|
||||
/// indirect jumps to ensure that disassembly locates all shader
|
||||
/// instructions.
|
||||
///
|
||||
/// @param state sp3 context (use sp3_new to allocate and sp3_setasic to set ASIC).
|
||||
/// @param addr Address of the instruction being disassembled (in VM entries, i.e. 32-bit words).
|
||||
///
|
||||
SP3_EXPORT void sp3_setentrypoint(
|
||||
struct sp3_context *state,
|
||||
sp3_vmaddr addr);
|
||||
|
||||
/// Clear alternate shader entry points.
|
||||
///
|
||||
/// Clear all entry points previously set with sp3_setentrypoint.
|
||||
///
|
||||
/// @param state sp3 context (use sp3_new to allocate and sp3_setasic to set ASIC).
|
||||
///
|
||||
SP3_EXPORT void sp3_clearentrypoints(struct sp3_context *state);
|
||||
|
||||
/// Free memory allocated by sp3.
|
||||
///
|
||||
/// Windows DLLs that allocate memory have to free it. This function
|
||||
/// should be used to free the result of sp3_disasm, sp3_compile etc.
|
||||
///
|
||||
SP3_EXPORT void sp3_free(void *ptr);
|
||||
|
||||
/// SP3 API to merge two shaders given file names as input.
|
||||
///
|
||||
SP3_EXPORT struct sp3_shader* sp3_merge_shaders(
|
||||
struct sp3_context *pointer,
|
||||
const char *first_file,
|
||||
const char *second_file);
|
||||
|
||||
/// SP3 API to merge two shaders given shader strings as input.
|
||||
///
|
||||
SP3_EXPORT struct sp3_shader* sp3_merge_shader_strings(
|
||||
struct sp3_context *pointer,
|
||||
const char *first_string,
|
||||
const char *second_string);
|
||||
|
||||
|
||||
/// @}
|
||||
|
||||
|
||||
/// @defgroup sp3vm SP3 Memory Objects
|
||||
///
|
||||
/// The VM API is used to manage virtual memory maps. Those maps are used for binary storage
|
||||
/// for disassembly, as they can naturally mirror the GPU's memory map (so no register
|
||||
/// translation is needed).
|
||||
///
|
||||
/// @{
|
||||
|
||||
/// Callback function that will fill a VMA on demand
|
||||
///
|
||||
/// The VMA to be filled will be specified through the request address.
|
||||
/// The callback should fill the VMA using sp3_vm_write calls.
|
||||
///
|
||||
typedef void (* sp3_vmfill)(struct sp3_vma *vm, sp3_vmaddr addr, void *ctx);
|
||||
|
||||
/// Create a new VM that is empty.
|
||||
///
|
||||
/// Free the object with sp3_vm_free().
|
||||
///
|
||||
/// @return New VM object.
|
||||
///
|
||||
SP3_EXPORT
|
||||
struct sp3_vma *sp3_vm_new(void);
|
||||
|
||||
/// Create a new VM that has a sp3_vmfill callback.
|
||||
///
|
||||
/// Free the object with sp3_vm_free().
|
||||
///
|
||||
/// @param fill Function used to populate data in VM. The function will be pass the new VM object, the address and a context.
|
||||
/// @param ctx User-specified context. Passed to the fill function and not used by sp3 itself.
|
||||
/// @return New VM object.
|
||||
///
|
||||
SP3_EXPORT
|
||||
struct sp3_vma *sp3_vm_new_fill(sp3_vmfill fill, void *ctx);
|
||||
|
||||
/// Create a new VM from an array of words.
|
||||
///
|
||||
/// Free the object with sp3_vm_free().
|
||||
///
|
||||
/// @param base VM address to load array at.
|
||||
/// @param len Number of 32-bit words in the array.
|
||||
/// @param data Pointer to the array.
|
||||
/// @return New VM object.
|
||||
///
|
||||
SP3_EXPORT
|
||||
struct sp3_vma *sp3_vm_new_ptr(sp3_vmaddr base, sp3_vmaddr len, const uint32_t *data);
|
||||
|
||||
/// Find a VMA, optionally adding it.
|
||||
///
|
||||
/// @param vm VM to search in.
|
||||
/// @param addr Address to search for.
|
||||
/// @param add Flag indicating whether a failure should result in adding a new VMA.
|
||||
/// @return VM object matching the specified address.
|
||||
///
|
||||
SP3_EXPORT
|
||||
struct sp3_vma *sp3_vm_find(struct sp3_vma *vm, sp3_vmaddr addr, uint32_t add);
|
||||
|
||||
/// Write a word to a VM.
|
||||
///
|
||||
/// @param vm VM to write.
|
||||
/// @param addr Address to write.
|
||||
/// @param val 32-bits of data to write.
|
||||
///
|
||||
SP3_EXPORT
|
||||
void sp3_vm_write(struct sp3_vma *vm, sp3_vmaddr addr, uint32_t val);
|
||||
|
||||
/// Read a word from a VM.
|
||||
///
|
||||
/// @param vm VM to read.
|
||||
/// @param addr Address to read.
|
||||
/// @return 32-bits of data at specified address.
|
||||
///
|
||||
SP3_EXPORT
|
||||
uint32_t sp3_vm_read(struct sp3_vma *vm, sp3_vmaddr addr);
|
||||
|
||||
/// Probe VM for presence.
|
||||
///
|
||||
/// @param vm VM to probe.
|
||||
/// @param addr Address to search for.
|
||||
/// @return 1 if the specified address is backed in the VM, 0 otherwise.
|
||||
///
|
||||
SP3_EXPORT
|
||||
int sp3_vm_present(struct sp3_vma *vm, sp3_vmaddr addr);
|
||||
|
||||
/// Return base address of VM.
|
||||
///
|
||||
/// @param vm VM to query.
|
||||
/// @return Base address.
|
||||
///
|
||||
SP3_EXPORT
|
||||
sp3_vmaddr sp3_vm_base(struct sp3_vma *vm);
|
||||
|
||||
/// Return next VM.
|
||||
///
|
||||
/// @param vm VM to query.
|
||||
/// @return Next VM in list.
|
||||
///
|
||||
SP3_EXPORT
|
||||
struct sp3_vma *sp3_vm_next(struct sp3_vma *vm);
|
||||
|
||||
/// Free a VM and all its storage.
|
||||
///
|
||||
/// Use this function to free memory allocated by sp3_vm_new, sp3_vm_new_fill and
|
||||
/// sp3_vm_new_ptr.
|
||||
///
|
||||
/// @param vm VM to free.
|
||||
///
|
||||
SP3_EXPORT
|
||||
void sp3_vm_free(struct sp3_vma *vm);
|
||||
|
||||
|
||||
/// @}
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* __SP3_H__ */
|
||||
@@ -0,0 +1,379 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/**
|
||||
* Self-contained assembler that uses the LLVM MC API to assemble AMDGCN
|
||||
* instructions
|
||||
*/
|
||||
|
||||
#include <llvm/Config/llvm-config.h>
|
||||
#include <llvm/MC/MCAsmBackend.h>
|
||||
#include <llvm/MC/MCAsmInfo.h>
|
||||
#include <llvm/MC/MCCodeEmitter.h>
|
||||
#include <llvm/MC/MCContext.h>
|
||||
#include <llvm/MC/MCInstPrinter.h>
|
||||
#include <llvm/MC/MCInstrInfo.h>
|
||||
#include <llvm/MC/MCObjectFileInfo.h>
|
||||
#include <llvm/MC/MCObjectWriter.h>
|
||||
#include <llvm/MC/MCParser/AsmLexer.h>
|
||||
#include <llvm/MC/MCParser/MCTargetAsmParser.h>
|
||||
#include <llvm/MC/MCRegisterInfo.h>
|
||||
#include <llvm/MC/MCStreamer.h>
|
||||
#include <llvm/MC/MCSubtargetInfo.h>
|
||||
#include <llvm/Support/CommandLine.h>
|
||||
#include <llvm/Support/InitLLVM.h>
|
||||
#include <llvm/Support/MemoryBuffer.h>
|
||||
#include <llvm/Support/SourceMgr.h>
|
||||
#include <llvm/Support/TargetSelect.h>
|
||||
#if LLVM_VERSION_MAJOR > 13
|
||||
#include <llvm/MC/TargetRegistry.h>
|
||||
#else
|
||||
#include <llvm/Support/TargetRegistry.h>
|
||||
#endif
|
||||
|
||||
#include <linux/elf.h>
|
||||
#include "OSWrapper.hpp"
|
||||
#include "Assemble.hpp"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
Assembler::Assembler(const uint32_t Gfxv) {
|
||||
SetTargetAsic(Gfxv);
|
||||
TextData = nullptr;
|
||||
TextSize = 0;
|
||||
LLVMInit();
|
||||
}
|
||||
|
||||
Assembler::~Assembler() {
|
||||
FlushText();
|
||||
llvm_shutdown();
|
||||
}
|
||||
|
||||
const char* Assembler::GetInstrStream() {
|
||||
return TextData;
|
||||
}
|
||||
|
||||
const size_t Assembler::GetInstrStreamSize() {
|
||||
return TextSize;
|
||||
}
|
||||
|
||||
int Assembler::CopyInstrStream(char* OutBuf, const size_t BufSize) {
|
||||
if (TextSize > BufSize)
|
||||
return -2;
|
||||
|
||||
std::copy(TextData, TextData + TextSize, OutBuf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
const char* Assembler::GetTargetAsic() {
|
||||
return MCPU;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set MCPU via GFX Version from Thunk
|
||||
* LLVM Target IDs use decimal for Maj/Min, hex for Step
|
||||
*/
|
||||
void Assembler::SetTargetAsic(const uint32_t Gfxv) {
|
||||
const uint8_t Major = (Gfxv >> 16) & 0xff;
|
||||
const uint8_t Minor = (Gfxv >> 8) & 0xff;
|
||||
const uint8_t Step = Gfxv & 0xff;
|
||||
|
||||
snprintf(MCPU, ASM_MCPU_LEN, "gfx%d%d%x", Major, Minor, Step);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize LLVM targets and assembly printers/parsers
|
||||
*/
|
||||
void Assembler::LLVMInit() {
|
||||
LLVMInitializeAMDGPUTargetInfo();
|
||||
LLVMInitializeAMDGPUTargetMC();
|
||||
LLVMInitializeAMDGPUAsmParser();
|
||||
}
|
||||
|
||||
/**
|
||||
* Flush/reset TextData and TextSize to initial state
|
||||
*/
|
||||
void Assembler::FlushText() {
|
||||
if (TextData)
|
||||
delete[] TextData;
|
||||
TextData = nullptr;
|
||||
TextSize = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Print hex of ELF object to stdout (debug)
|
||||
*/
|
||||
void Assembler::PrintELFHex(const std::string Data) {
|
||||
outs() << "ASM Info: assembled ELF hex data (length " << Data.length() << "):\n";
|
||||
outs() << "0x00:\t";
|
||||
for (size_t i = 0; i < Data.length(); ++i) {
|
||||
char c = Data[i];
|
||||
outs() << format_hex(static_cast<uint8_t>(c), 4);
|
||||
if ((i+1) % 16 == 0)
|
||||
outs() << "\n" << format_hex(i+1, 4) << ":\t";
|
||||
else
|
||||
outs() << " ";
|
||||
}
|
||||
outs() << "\n";
|
||||
}
|
||||
|
||||
/**
|
||||
* Print hex of raw instruction stream to stdout (debug)
|
||||
*/
|
||||
void Assembler::PrintTextHex() {
|
||||
outs() << "ASM Info: assembled .text hex data (length " << TextSize << "):\n";
|
||||
outs() << "0x00:\t";
|
||||
for (size_t i = 0; i < TextSize; i++) {
|
||||
outs() << format_hex(static_cast<uint8_t>(TextData[i]), 4);
|
||||
if ((i+1) % 16 == 0)
|
||||
outs() << "\n" << format_hex(i+1, 4) << ":\t";
|
||||
else
|
||||
outs() << " ";
|
||||
}
|
||||
outs() << "\n";
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract raw instruction stream from .text section in ELF object
|
||||
*
|
||||
* @param RawData Raw C string of ELF object
|
||||
* @return 0 on success
|
||||
*/
|
||||
int Assembler::ExtractELFText(const char* RawData) {
|
||||
const Elf64_Ehdr* ElfHeader;
|
||||
const Elf64_Shdr* SectHeader;
|
||||
const Elf64_Shdr* SectStrTable;
|
||||
const char* SectStrAddr;
|
||||
unsigned NumSects, SectIdx;
|
||||
|
||||
if (!(ElfHeader = reinterpret_cast<const Elf64_Ehdr*>(RawData))) {
|
||||
outs() << "ASM Error: elf data is invalid or corrupted\n";
|
||||
return -1;
|
||||
}
|
||||
if (ElfHeader->e_ident[EI_CLASS] != ELFCLASS64) {
|
||||
outs() << "ASM Error: elf object must be of 64-bit type\n";
|
||||
return -1;
|
||||
}
|
||||
|
||||
SectHeader = reinterpret_cast<const Elf64_Shdr*>(RawData + ElfHeader->e_shoff);
|
||||
SectStrTable = &SectHeader[ElfHeader->e_shstrndx];
|
||||
SectStrAddr = static_cast<const char*>(RawData + SectStrTable->sh_offset);
|
||||
|
||||
// Loop through sections, break on .text
|
||||
NumSects = ElfHeader->e_shnum;
|
||||
for (SectIdx = 0; SectIdx < NumSects; SectIdx++) {
|
||||
std::string SectName = std::string(SectStrAddr + SectHeader[SectIdx].sh_name);
|
||||
if (SectName == std::string(".text")) {
|
||||
TextSize = SectHeader[SectIdx].sh_size;
|
||||
TextData = new char[TextSize];
|
||||
memcpy(TextData, RawData + SectHeader[SectIdx].sh_offset, TextSize);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (SectIdx >= NumSects) {
|
||||
outs() << "ASM Error: couldn't locate .text section\n";
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Assemble shader, fill member vars, and copy to output buffer
|
||||
*
|
||||
* @param AssemblySource Shader source represented as a raw C string
|
||||
* @param OutBuf Raw instruction stream output buffer
|
||||
* @param BufSize Size of OutBuf (defaults to PAGE_SIZE)
|
||||
* @return Value of RunAssemble() (0 on success)
|
||||
*/
|
||||
int Assembler::RunAssembleBuf(const char* const AssemblySource, char* OutBuf,
|
||||
const size_t BufSize) {
|
||||
int ret = RunAssemble(AssemblySource);
|
||||
return ret ? ret : CopyInstrStream(OutBuf, BufSize);
|
||||
}
|
||||
|
||||
/**
|
||||
* Assemble shader and fill member vars
|
||||
*
|
||||
* @param AssemblySource Shader source represented as a raw C string
|
||||
* @return 0 on success
|
||||
*/
|
||||
int Assembler::RunAssemble(const char* const AssemblySource) {
|
||||
// Ensure target ASIC has been set
|
||||
if (!MCPU) {
|
||||
outs() << "ASM Error: target asic is uninitialized\n";
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Delete TextData for any previous runs
|
||||
FlushText();
|
||||
|
||||
#if 0
|
||||
outs() << "ASM Info: running assembly for target: " << MCPU << "\n";
|
||||
outs() << "ASM Info: source:\n";
|
||||
outs() << AssemblySource << "\n";
|
||||
#endif
|
||||
|
||||
// Initialize MCOptions and target triple
|
||||
const MCTargetOptions MCOptions;
|
||||
Triple TheTriple;
|
||||
|
||||
const Target* TheTarget =
|
||||
TargetRegistry::lookupTarget(ArchName, TheTriple, Error);
|
||||
if (!TheTarget) {
|
||||
outs() << Error;
|
||||
return -1;
|
||||
}
|
||||
|
||||
TheTriple.setArchName(ArchName);
|
||||
TheTriple.setVendorName(VendorName);
|
||||
TheTriple.setOSName(OSName);
|
||||
|
||||
TripleName = TheTriple.getTriple();
|
||||
TheTriple.setTriple(Triple::normalize(TripleName));
|
||||
|
||||
// Create MemoryBuffer for assembly source
|
||||
StringRef AssemblyRef(AssemblySource);
|
||||
std::unique_ptr<MemoryBuffer> BufferPtr =
|
||||
MemoryBuffer::getMemBuffer(AssemblyRef, "", false);
|
||||
if (!BufferPtr->getBufferSize()) {
|
||||
outs() << "ASM Error: assembly source is empty\n";
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Instantiate SrcMgr and transfer BufferPtr ownership
|
||||
SourceMgr SrcMgr;
|
||||
SrcMgr.AddNewSourceBuffer(std::move(BufferPtr), SMLoc());
|
||||
|
||||
// Initialize MC interfaces and base class objects
|
||||
std::unique_ptr<const MCRegisterInfo> MRI(
|
||||
TheTarget->createMCRegInfo(TripleName));
|
||||
if (!MRI) {
|
||||
outs() << "ASM Error: no register info for target " << MCPU << "\n";
|
||||
return -1;
|
||||
}
|
||||
#if LLVM_VERSION_MAJOR > 9
|
||||
std::unique_ptr<const MCAsmInfo> MAI(
|
||||
TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
|
||||
#else
|
||||
std::unique_ptr<const MCAsmInfo> MAI(
|
||||
TheTarget->createMCAsmInfo(*MRI, TripleName));
|
||||
#endif
|
||||
if (!MAI) {
|
||||
outs() << "ASM Error: no assembly info for target " << MCPU << "\n";
|
||||
return -1;
|
||||
}
|
||||
std::unique_ptr<MCInstrInfo> MCII(
|
||||
TheTarget->createMCInstrInfo());
|
||||
if (!MCII) {
|
||||
outs() << "ASM Error: no instruction info for target " << MCPU << "\n";
|
||||
return -1;
|
||||
}
|
||||
std::unique_ptr<MCSubtargetInfo> STI(
|
||||
TheTarget->createMCSubtargetInfo(TripleName, MCPU, std::string()));
|
||||
if (!STI || !STI->isCPUStringValid(MCPU)) {
|
||||
outs() << "ASM Error: no subtarget info for target " << MCPU << "\n";
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Set up the MCContext for creating symbols and MCExpr's
|
||||
#if LLVM_VERSION_MAJOR > 12
|
||||
MCContext Ctx(TheTriple, MAI.get(), MRI.get(), STI.get(), &SrcMgr, &MCOptions);
|
||||
#else
|
||||
MCObjectFileInfo MOFI;
|
||||
MCContext Ctx(MAI.get(), MRI.get(), &MOFI, &SrcMgr, &MCOptions);
|
||||
MOFI.InitMCObjectFileInfo(TheTriple, true, Ctx);
|
||||
#endif
|
||||
|
||||
// Finalize setup for output object code stream
|
||||
std::string Data;
|
||||
std::unique_ptr<raw_string_ostream> DataStream(std::make_unique<raw_string_ostream>(Data));
|
||||
std::unique_ptr<buffer_ostream> BOS(std::make_unique<buffer_ostream>(*DataStream));
|
||||
raw_pwrite_stream* OS = BOS.get();
|
||||
|
||||
#if LLVM_VERSION_MAJOR > 14
|
||||
MCCodeEmitter* CE = TheTarget->createMCCodeEmitter(*MCII, Ctx);
|
||||
#else
|
||||
MCCodeEmitter* CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx);
|
||||
#endif
|
||||
MCAsmBackend* MAB = TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions);
|
||||
|
||||
std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
|
||||
TheTriple, Ctx,
|
||||
std::unique_ptr<MCAsmBackend>(MAB), MAB->createObjectWriter(*OS),
|
||||
std::unique_ptr<MCCodeEmitter>(CE), *STI, MCOptions.MCRelaxAll,
|
||||
MCOptions.MCIncrementalLinkerCompatible, /*DWARFMustBeAtTheEnd*/ false));
|
||||
|
||||
std::unique_ptr<MCAsmParser> Parser(
|
||||
createMCAsmParser(SrcMgr, Ctx, *Streamer, *MAI));
|
||||
|
||||
// Set parser to target parser and run
|
||||
std::unique_ptr<MCTargetAsmParser> TAP(
|
||||
TheTarget->createMCAsmParser(*STI, *Parser, *MCII, MCOptions));
|
||||
if (!TAP) {
|
||||
outs() << "ASM Error: no assembly parsing support for target " << MCPU << "\n";
|
||||
return -1;
|
||||
}
|
||||
Parser->setTargetParser(*TAP);
|
||||
|
||||
if (Parser->Run(true)) {
|
||||
outs() << "ASM Error: assembly parser failed\n";
|
||||
return -1;
|
||||
}
|
||||
|
||||
BOS.reset();
|
||||
DataStream->flush();
|
||||
|
||||
int ret = ExtractELFText(Data.data());
|
||||
if (ret < 0 || !TextData) {
|
||||
outs() << "ASM Error: .text extraction failed\n";
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if 0
|
||||
PrintELFHex(Data);
|
||||
PrintTextHex();
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,86 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef _ASSEMBLE_H_
|
||||
#define _ASSEMBLE_H_
|
||||
|
||||
#include "OSWrapper.hpp"
|
||||
|
||||
#define ASM_MCPU_LEN 16
|
||||
|
||||
class Assembler {
|
||||
private:
|
||||
const char* ArchName = "amdgcn";
|
||||
const char* VendorName = "amd";
|
||||
const char* OSName = "amdhsa";
|
||||
char MCPU[ASM_MCPU_LEN];
|
||||
|
||||
std::string TripleName;
|
||||
std::string Error;
|
||||
|
||||
char* TextData;
|
||||
size_t TextSize;
|
||||
|
||||
void SetTargetAsic(const uint32_t Gfxv);
|
||||
|
||||
void LLVMInit();
|
||||
void FlushText();
|
||||
void PrintELFHex(const std::string Data);
|
||||
int ExtractELFText(const char* RawData);
|
||||
|
||||
public:
|
||||
Assembler(const uint32_t Gfxv);
|
||||
~Assembler();
|
||||
|
||||
void PrintTextHex();
|
||||
const char* GetTargetAsic();
|
||||
|
||||
const char* GetInstrStream();
|
||||
const size_t GetInstrStreamSize();
|
||||
int CopyInstrStream(char* OutBuf, const size_t BufSize = PAGE_SIZE);
|
||||
|
||||
int RunAssemble(const char* const AssemblySource);
|
||||
int RunAssembleBuf(const char* const AssemblySource, char* OutBuf,
|
||||
const size_t BufSize = PAGE_SIZE);
|
||||
};
|
||||
|
||||
#endif // _ASSEMBLE_H_
|
||||
@@ -1,126 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "IsaGenerator.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
|
||||
#include "IsaGenerator_Gfx72.hpp"
|
||||
#include "IsaGenerator_Gfx8.hpp"
|
||||
#include "IsaGenerator_Gfx9.hpp"
|
||||
#include "IsaGenerator_Gfx10.hpp"
|
||||
#include "IsaGenerator_Aldebaran.hpp"
|
||||
|
||||
#include "GoogleTestExtension.hpp"
|
||||
|
||||
#include "sp3.h"
|
||||
|
||||
const std::string IsaGenerator::ADDRESS_WATCH_SP3(
|
||||
"var REG_TRAPSTS_EXCP_MASK = 0x000001ff\n"
|
||||
"var WAVE_COUNT_OFFSET = 12\n"
|
||||
"var TMA_CYCLE_OFFSET = 16\n"
|
||||
"\n"
|
||||
"/*\n"
|
||||
" * ttmp[0:1] -- The ISA address that triggered this trap handler\n"
|
||||
" * ttmp[10:11] -- The TMA user provided, used to store the debug info in this shader\n"
|
||||
" * v[10:14] ttmp[7:8] -- temp use inside this shader\n"
|
||||
" * s5 -- store the counts that this trap been triggered\n"
|
||||
" * Each time when the trap is triggered , this shader will write\n"
|
||||
" * ttmp[0] : ttmp[1] : Trap_Status : [reserved]\n"
|
||||
" * to TMA + (trap count * TMA_CYCLE_OFFSET)\n"
|
||||
" * The TMA + WAVE_COUNT_OFFSET(the first [reserved] address)\n"
|
||||
" * used to store the total triggered trap count.\n"
|
||||
" */\n"
|
||||
"shader main\n"
|
||||
"\n"
|
||||
" asic(VI)\n"
|
||||
"\n"
|
||||
" type(CS)\n"
|
||||
" v_mov_b32 v10, ttmp10\n"
|
||||
" v_mov_b32 v11, ttmp11\n"
|
||||
" s_mov_b32 ttmp7, s5\n"
|
||||
" s_mulk_i32 ttmp7, TMA_CYCLE_OFFSET\n"
|
||||
" s_addk_i32 s5, 1\n"
|
||||
" v_mov_b32 v12, ttmp0\n"
|
||||
" v_add_u32 v10, vcc, ttmp7, v10\n"
|
||||
" flat_store_dword v[10,11], v12 slc glc\n"
|
||||
" v_mov_b32 v12, ttmp1\n"
|
||||
" v_add_u32 v10, vcc, 4, v10\n"
|
||||
" flat_store_dword v[10,11], v12 slc glc\n"
|
||||
" s_getreg_b32 ttmp8, hwreg(HW_REG_TRAPSTS)\n"
|
||||
" s_and_b32 ttmp8, ttmp8, REG_TRAPSTS_EXCP_MASK\n"
|
||||
" v_mov_b32 v12, ttmp8\n"
|
||||
" v_add_u32 v10, vcc, 4, v10\n"
|
||||
" flat_store_dword v[10,11], v12 glc\n"
|
||||
" v_mov_b32 v10, ttmp10\n"
|
||||
" v_add_u32 v10, vcc, WAVE_COUNT_OFFSET, v10\n"
|
||||
" v_mov_b32 v13, 1\n"
|
||||
" flat_atomic_add v14, v[10:11], v13 slc glc\n"
|
||||
" s_and_b32 ttmp1, ttmp1, 0xffff\n"
|
||||
" s_rfe_b64 [ttmp0,ttmp1]\n"
|
||||
"end\n"
|
||||
);
|
||||
|
||||
IsaGenerator* IsaGenerator::Create(unsigned int familyId) {
|
||||
switch (familyId) {
|
||||
case FAMILY_CI:
|
||||
case FAMILY_KV:
|
||||
return new IsaGenerator_Gfx72;
|
||||
case FAMILY_VI:
|
||||
case FAMILY_CZ:
|
||||
return new IsaGenerator_Gfx8;
|
||||
case FAMILY_AI:
|
||||
case FAMILY_RV:
|
||||
case FAMILY_AR:
|
||||
return new IsaGenerator_Gfx9;
|
||||
case FAMILY_AL:
|
||||
return new IsaGenerator_Aldbrn;
|
||||
case FAMILY_NV:
|
||||
return new IsaGenerator_Gfx10;
|
||||
|
||||
default:
|
||||
LOG() << "Error: Invalid ISA" << std::endl;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void IsaGenerator::GetAwTrapHandler(HsaMemoryBuffer& rBuf) {
|
||||
CompileShader(ADDRESS_WATCH_SP3.c_str(), "main", rBuf);
|
||||
}
|
||||
|
||||
void IsaGenerator::CompileShader(const char* shaderCode, const char* shaderName, HsaMemoryBuffer& rBuf) {
|
||||
sp3_context* pSp3 = sp3_new();
|
||||
sp3_setasic(pSp3, GetAsicName().c_str());
|
||||
sp3_parse_string(pSp3, shaderCode);
|
||||
sp3_shader* pShader = sp3_compile(pSp3, shaderName);
|
||||
|
||||
std::copy(pShader->data, pShader->data + pShader->size, rBuf.As<unsigned int*>());
|
||||
sp3_free_shader(pShader);
|
||||
|
||||
/** Inside this close function, there is an unknown reason of free memory not used by compiler.
|
||||
* Comment out this as a workaround. System will do the garbage collection after this
|
||||
* application is closed.
|
||||
*/
|
||||
// sp3_close(pSp3);
|
||||
}
|
||||
@@ -1,52 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _ISAGENERATOR_H_
|
||||
#define _ISAGENERATOR_H_
|
||||
|
||||
#include "KFDTestUtil.hpp"
|
||||
|
||||
/* isa generation class - interface */
|
||||
class IsaGenerator {
|
||||
public:
|
||||
static IsaGenerator* Create(unsigned int familyId);
|
||||
|
||||
virtual ~IsaGenerator() {}
|
||||
|
||||
virtual void GetNoopIsa(HsaMemoryBuffer& rBuf) = 0;
|
||||
virtual void GetCopyDwordIsa(HsaMemoryBuffer& rBuf) = 0;
|
||||
virtual void GetInfiniteLoopIsa(HsaMemoryBuffer& rBuf) = 0;
|
||||
virtual void GetAtomicIncIsa(HsaMemoryBuffer& rBuf) = 0;
|
||||
virtual void GetCwsrTrapHandler(HsaMemoryBuffer& rBuf) {}
|
||||
virtual void GetAwTrapHandler(HsaMemoryBuffer& rBuf);
|
||||
|
||||
void CompileShader(const char* shaderCode, const char* shaderName, HsaMemoryBuffer& rBuf);
|
||||
|
||||
protected:
|
||||
virtual const std::string& GetAsicName() = 0;
|
||||
|
||||
private:
|
||||
static const std::string ADDRESS_WATCH_SP3;
|
||||
};
|
||||
|
||||
#endif // _ISAGENERATOR_H_
|
||||
@@ -1,113 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "IsaGenerator_Aldebaran.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
|
||||
const std::string IsaGenerator_Aldbrn::ASIC_NAME = "ALDEBARAN";
|
||||
|
||||
/* The binaries are generated from following ISA */
|
||||
#if 0
|
||||
/* flat_atomic_inc will not support by some PCIE, use flat_atomic_add instead */
|
||||
shader atomic_add
|
||||
asic(ALDEBARAN)
|
||||
type(CS)
|
||||
v_mov_b32 v0, s0
|
||||
v_mov_b32 v1, s1
|
||||
v_mov_b32 v2, 1
|
||||
flat_atomic_add v3, v[0:1], v2 slc glc scc
|
||||
s_waitcnt 0
|
||||
s_endpgm
|
||||
end
|
||||
|
||||
shader copy_dword
|
||||
asic(ALDEBARAN)
|
||||
type(CS)
|
||||
/* copy the parameters from scalar registers to vector registers */
|
||||
v_mov_b32 v0, s0
|
||||
v_mov_b32 v1, s1
|
||||
v_mov_b32 v2, s2
|
||||
v_mov_b32 v3, s3
|
||||
/* copy a dword between the passed addresses */
|
||||
flat_load_dword v4, v[0:1] slc glc
|
||||
s_waitcnt 0
|
||||
flat_store_dword v[2:3], v4 slc glc
|
||||
s_endpgm
|
||||
end
|
||||
|
||||
shader main
|
||||
asic(ALDEBARAN)
|
||||
type(CS)
|
||||
loop:
|
||||
s_branch loop
|
||||
s_endpgm
|
||||
end
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
const uint32_t IsaGenerator_Aldbrn::NOOP_ISA[] = {
|
||||
0xbf810000
|
||||
};
|
||||
|
||||
const uint32_t IsaGenerator_Aldbrn::COPY_DWORD_ISA[] = {
|
||||
0x7e000200, 0x7e020201,
|
||||
0x7e040202, 0x7e060203,
|
||||
0xdc530000, 0x047f0000,
|
||||
0xbf8c0000, 0xdc730000,
|
||||
0x007f0402, 0xbf810000
|
||||
};
|
||||
|
||||
const uint32_t IsaGenerator_Aldbrn::INFINITE_LOOP_ISA[] = {
|
||||
0xbf82ffff, 0xbf810000
|
||||
};
|
||||
|
||||
const uint32_t IsaGenerator_Aldbrn::ATOMIC_ADD_ISA[] = {
|
||||
0x7e000200, 0x7e020201,
|
||||
0x7e040281, 0xdf0b0000,
|
||||
0x037f0200, 0xbf8c0000,
|
||||
0xbf810000, 0x00000000
|
||||
};
|
||||
|
||||
void IsaGenerator_Aldbrn::GetNoopIsa(HsaMemoryBuffer& rBuf) {
|
||||
std::copy(NOOP_ISA, NOOP_ISA+ARRAY_SIZE(NOOP_ISA), rBuf.As<uint32_t*>());
|
||||
}
|
||||
|
||||
void IsaGenerator_Aldbrn::GetCopyDwordIsa(HsaMemoryBuffer& rBuf) {
|
||||
std::copy(COPY_DWORD_ISA, COPY_DWORD_ISA+ARRAY_SIZE(COPY_DWORD_ISA), rBuf.As<uint32_t*>());
|
||||
}
|
||||
|
||||
void IsaGenerator_Aldbrn::GetInfiniteLoopIsa(HsaMemoryBuffer& rBuf) {
|
||||
std::copy(INFINITE_LOOP_ISA, INFINITE_LOOP_ISA+ARRAY_SIZE(INFINITE_LOOP_ISA), rBuf.As<uint32_t*>());
|
||||
}
|
||||
|
||||
void IsaGenerator_Aldbrn::GetAtomicIncIsa(HsaMemoryBuffer& rBuf) {
|
||||
std::copy(ATOMIC_ADD_ISA, ATOMIC_ADD_ISA+ARRAY_SIZE(ATOMIC_ADD_ISA), rBuf.As<uint32_t*>());
|
||||
}
|
||||
|
||||
const std::string& IsaGenerator_Aldbrn::GetAsicName() {
|
||||
return ASIC_NAME;
|
||||
}
|
||||
|
||||
@@ -1,142 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2019 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "IsaGenerator_Gfx10.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
|
||||
/* The binaries are generated from following ISA */
|
||||
const std::string IsaGenerator_Gfx10::ASIC_NAME = "GFX10";
|
||||
#if 0
|
||||
static const char * atomic_add = \
|
||||
"\
|
||||
shader atomic_add \n\
|
||||
asic(GFX10) \n\
|
||||
wave_size(32) \n\
|
||||
type(CS) \n\
|
||||
v_mov_b32 v0, s0 \n\
|
||||
v_mov_b32 v1, s1 \n\
|
||||
v_mov_b32 v2, 1 \n\
|
||||
flat_atomic_add v3, v[0:1], v2 slc glc \n\
|
||||
s_waitcnt 0 \n\
|
||||
s_endpgm \n\
|
||||
end \n\
|
||||
";
|
||||
|
||||
static const char * copy_dword = \
|
||||
"\
|
||||
shader copy_dword \n\
|
||||
asic(GFX10) \n\
|
||||
wave_size(32) \n\
|
||||
type(CS) \n\
|
||||
v_mov_b32 v0, s0 \n\
|
||||
v_mov_b32 v1, s1 \n\
|
||||
v_mov_b32 v2, s2 \n\
|
||||
v_mov_b32 v3, s3 \n\
|
||||
flat_load_dword v4, v[0:1] slc glc \n\
|
||||
s_waitcnt 0 \n\
|
||||
flat_store_dword v[2:3], v4 slc glc \n\
|
||||
s_endpgm \n\
|
||||
end \n\
|
||||
";
|
||||
|
||||
static const char * loop= \
|
||||
"\
|
||||
shader loop \n\
|
||||
asic(GFX10) \n\
|
||||
type(CS) \n\
|
||||
wave_size(32) \n\
|
||||
loop: \n\
|
||||
s_branch loop \n\
|
||||
s_endpgm \n\
|
||||
end \n\
|
||||
";
|
||||
|
||||
static const char * noop= \
|
||||
"\
|
||||
shader noop \n\
|
||||
asic(GFX10) \n\
|
||||
type(CS) \n\
|
||||
wave_size(32) \n\
|
||||
s_endpgm \n\
|
||||
end \n\
|
||||
";
|
||||
#endif
|
||||
|
||||
const uint32_t IsaGenerator_Gfx10::NOOP_ISA[] = {
|
||||
0xb0804004, 0xbf810000,
|
||||
0xbf9f0000, 0xbf9f0000,
|
||||
0xbf9f0000, 0xbf9f0000,
|
||||
0xbf9f0000
|
||||
};
|
||||
|
||||
const uint32_t IsaGenerator_Gfx10::COPY_DWORD_ISA[] = {
|
||||
0xb0804004, 0x7e000200,
|
||||
0x7e020201, 0x7e040202,
|
||||
0x7e060203, 0xdc330000,
|
||||
0x47d0000, 0xbf8c0000,
|
||||
0xdc730000, 0x7d0402,
|
||||
0xbf810000, 0xbf9f0000,
|
||||
0xbf9f0000, 0xbf9f0000,
|
||||
0xbf9f0000, 0xbf9f0000
|
||||
};
|
||||
|
||||
const uint32_t IsaGenerator_Gfx10::INFINITE_LOOP_ISA[] = {
|
||||
0xbf82ffff, 0xb0804004,
|
||||
0xbf810000, 0xbf9f0000,
|
||||
0xbf9f0000, 0xbf9f0000,
|
||||
0xbf9f0000, 0xbf9f0000
|
||||
};
|
||||
|
||||
const uint32_t IsaGenerator_Gfx10::ATOMIC_ADD_ISA[] = {
|
||||
0xb0804004, 0x7e000200,
|
||||
0x7e020201, 0x7e040281,
|
||||
0xdccb0000, 0x37d0200,
|
||||
0xbf8c0000, 0xbf810000,
|
||||
0xbf9f0000, 0xbf9f0000,
|
||||
0xbf9f0000, 0xbf9f0000,
|
||||
0xbf9f0000
|
||||
};
|
||||
|
||||
|
||||
void IsaGenerator_Gfx10::GetNoopIsa(HsaMemoryBuffer& rBuf) {
|
||||
std::copy(NOOP_ISA, NOOP_ISA+ARRAY_SIZE(NOOP_ISA), rBuf.As<uint32_t*>());
|
||||
}
|
||||
|
||||
void IsaGenerator_Gfx10::GetCopyDwordIsa(HsaMemoryBuffer& rBuf) {
|
||||
std::copy(COPY_DWORD_ISA, COPY_DWORD_ISA+ARRAY_SIZE(COPY_DWORD_ISA), rBuf.As<uint32_t*>());
|
||||
}
|
||||
|
||||
void IsaGenerator_Gfx10::GetInfiniteLoopIsa(HsaMemoryBuffer& rBuf) {
|
||||
std::copy(INFINITE_LOOP_ISA, INFINITE_LOOP_ISA+ARRAY_SIZE(INFINITE_LOOP_ISA), rBuf.As<uint32_t*>());
|
||||
}
|
||||
|
||||
void IsaGenerator_Gfx10::GetAtomicIncIsa(HsaMemoryBuffer& rBuf) {
|
||||
std::copy(ATOMIC_ADD_ISA, ATOMIC_ADD_ISA+ARRAY_SIZE(ATOMIC_ADD_ISA), rBuf.As<uint32_t*>());
|
||||
}
|
||||
|
||||
const std::string& IsaGenerator_Gfx10::GetAsicName() {
|
||||
return ASIC_NAME;
|
||||
}
|
||||
|
||||
@@ -1,123 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "IsaGenerator_Gfx72.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
|
||||
const std::string IsaGenerator_Gfx72::ASIC_NAME = "CI";
|
||||
|
||||
const uint32_t IsaGenerator_Gfx72::NOOP_ISA[] = {
|
||||
0xbf810000 // S_ENDPGM
|
||||
};
|
||||
|
||||
/* The below arrays are filled with hex values in order not to reference
|
||||
* proprietary header files, but we still leave the code here for future
|
||||
* reference.
|
||||
*/
|
||||
#if 0
|
||||
const uint32_t IsaGenerator_Gfx72::COPY_DWORD_ISA[] = {
|
||||
(63u << SQ_VOP1__ENCODING__SHIFT) | (0 << SQ_VOP1__VDST__SHIFT) | (SQ_V_MOV_B32 << SQ_VOP1__OP__SHIFT) | (0 << SQ_VOP1__SRC0__SHIFT), // v_mov_b32 v0, s0 (VOP1)
|
||||
(63u << SQ_VOP1__ENCODING__SHIFT) | (1 << SQ_VOP1__VDST__SHIFT) | (SQ_V_MOV_B32 << SQ_VOP1__OP__SHIFT) | (1 << SQ_VOP1__SRC0__SHIFT), // v_mov_b32 v1, s1 (VOP1)
|
||||
(63u << SQ_VOP1__ENCODING__SHIFT) | (2 << SQ_VOP1__VDST__SHIFT) | (SQ_V_MOV_B32 << SQ_VOP1__OP__SHIFT) | (2 << SQ_VOP1__SRC0__SHIFT), // v_mov_b32 v2, s2 (VOP1)
|
||||
(63u << SQ_VOP1__ENCODING__SHIFT) | (3 << SQ_VOP1__VDST__SHIFT) | (SQ_V_MOV_B32 << SQ_VOP1__OP__SHIFT) | (3 << SQ_VOP1__SRC0__SHIFT), // v_mov_b32 v3, s3 (VOP1)
|
||||
|
||||
(55u << SQ_FLAT_0__ENCODING__SHIFT) | (SQ_FLAT_LOAD_DWORD << SQ_FLAT_0__OP__SHIFT) | (1 << SQ_FLAT_0__SLC__SHIFT) | (1 << SQ_FLAT_0__GLC__SHIFT)/*(3 << 16)*/, // SQ_FLAT_0, flat_load_dword, slc = 1, glc = 1 (FLAT_0)
|
||||
(4u << SQ_FLAT_1__VDST__SHIFT) | (0 << SQ_FLAT_1__ADDR__SHIFT), // ADDR = V0:V1, VDST = V4 (FLAT_1)
|
||||
|
||||
(383u << SQ_SOPP__ENCODING__SHIFT) | (SQ_S_WAITCNT << SQ_SOPP__OP__SHIFT) | (0 << SQ_SOPP__SIMM16__SHIFT), // s_waitcnt 0 (SOPP)
|
||||
|
||||
(55u << SQ_FLAT_0__ENCODING__SHIFT) | (SQ_FLAT_STORE_DWORD << SQ_FLAT_0__OP__SHIFT) | (1 << SQ_FLAT_0__SLC__SHIFT) | (1 << SQ_FLAT_0__GLC__SHIFT), // SQ_FLAT_0, flat_store_dword, slc = 1, glc = 1 (FLAT_0)
|
||||
(4u << SQ_FLAT_1__DATA__SHIFT) | (2 << SQ_FLAT_1__ADDR__SHIFT), // ADDR = V2:V3, DATA = V4 (FLAT_1)
|
||||
|
||||
0xBF810000u // s_endpgm, note that we rely on the implicit s_waitcnt 0,0,0
|
||||
};
|
||||
|
||||
const uint32_t IsaGenerator_Gfx72::INFINITE_LOOP_ISA[] = {
|
||||
(0x17F << SQ_SOPP__ENCODING__SHIFT) | (SQ_S_BRANCH << SQ_SOPP__OP__SHIFT) | ( (const uint32_t)-1 << SQ_SOPP__SIMM16__SHIFT), // s_branch -1 (PC <- PC + SIMM*4)+4
|
||||
0xBF810000u // S_ENDPGM
|
||||
};
|
||||
|
||||
const uint32_t IsaGenerator_Gfx72::ATOMIC_INC_ISA[] = {
|
||||
(63u << SQ_VOP1__ENCODING__SHIFT) | (0 << SQ_VOP1__VDST__SHIFT) | (SQ_V_MOV_B32 << SQ_VOP1__OP__SHIFT) | (0 << SQ_VOP1__SRC0__SHIFT), // v_mov_b32 v0, s0 (VOP1)
|
||||
(63u << SQ_VOP1__ENCODING__SHIFT) | (1 << SQ_VOP1__VDST__SHIFT) | (SQ_V_MOV_B32 << SQ_VOP1__OP__SHIFT) | (1 << SQ_VOP1__SRC0__SHIFT), // v_mov_b32 v1, s1 (VOP1)
|
||||
(63u << SQ_VOP1__ENCODING__SHIFT) | (2 << SQ_VOP1__VDST__SHIFT) | (SQ_V_MOV_B32 << SQ_VOP1__OP__SHIFT) | (0xC1 << SQ_VOP1__SRC0__SHIFT), // v_mov_b32 0xFFFFFFFF, s2 (VOP1)
|
||||
|
||||
(55u << SQ_FLAT_0__ENCODING__SHIFT) | (SQ_FLAT_ATOMIC_INC << SQ_FLAT_0__OP__SHIFT) | (1 << SQ_FLAT_0__SLC__SHIFT) | (0 << SQ_FLAT_0__GLC__SHIFT), // SQ_FLAT_0, flat_atomic_inc, slc = 1, glc = 0 (FLAT_0)
|
||||
(3u << SQ_FLAT_1__VDST__SHIFT) | (2u << SQ_FLAT_1__DATA__SHIFT) | (0 << SQ_FLAT_1__ADDR__SHIFT), // ADDR/dst = V0:V1, VDST/ret = V3, DATA/src=V2 (FLAT_1)
|
||||
0xBF810000u // s_endpgm, note that we rely on the implicit s_waitcnt 0,0,0
|
||||
};
|
||||
#endif
|
||||
|
||||
const uint32_t IsaGenerator_Gfx72::COPY_DWORD_ISA[] = {
|
||||
0x7e000200, // v_mov_b32 v0, s0 (VOP1)
|
||||
0x7e020201, // v_mov_b32 v1, s1 (VOP1)
|
||||
0x7e040202, // v_mov_b32 v2, s2 (VOP1)
|
||||
0x7e060203, // v_mov_b32 v3, s3 (VOP1)
|
||||
|
||||
0xdc330000, // SQ_FLAT_0, flat_load_dword, slc = 1, glc = 1 (FLAT_0)
|
||||
0x04000000, // ADDR = V0:V1, VDST = V4 (FLAT_1)
|
||||
|
||||
0xbf8c0000, // s_waitcnt 0 (SOPP)
|
||||
|
||||
0xdc730000, // SQ_FLAT_0, flat_store_dword, slc = 1, glc = 1 (FLAT_0)
|
||||
0x00000402, // ADDR = V2:V3, DATA = V4 (FLAT_1)
|
||||
|
||||
0xbf810000 // s_endpgm, note that we rely on the implicit s_waitcnt 0,0,0
|
||||
};
|
||||
|
||||
const uint32_t IsaGenerator_Gfx72::INFINITE_LOOP_ISA[] = {
|
||||
0xbf82ffff, // s_branch -1 (PC <- PC + SIMM*4)+4
|
||||
0xbf810000 // S_ENDPGM
|
||||
};
|
||||
|
||||
const uint32_t IsaGenerator_Gfx72::ATOMIC_INC_ISA[] = {
|
||||
0x7e000200, // v_mov_b32 v0, s0 (VOP1)
|
||||
0x7e020201, // v_mov_b32 v1, s1 (VOP1)
|
||||
0x7e0402c1, // v_mov_b32 0xFFFFFFFF, s2 (VOP1)
|
||||
|
||||
0xdcf20000, // SQ_FLAT_0, flat_atomic_inc, slc = 1, glc = 0 (FLAT_0)
|
||||
0x03000200, // ADDR/dst = V0:V1, VDST/ret = V3, DATA/src=V2 (FLAT_1)
|
||||
0xbf810000 // s_endpgm, note that we rely on the implicit s_waitcnt 0,0,0
|
||||
};
|
||||
|
||||
void IsaGenerator_Gfx72::GetNoopIsa(HsaMemoryBuffer& rBuf) {
|
||||
std::copy(NOOP_ISA, NOOP_ISA+ARRAY_SIZE(NOOP_ISA), rBuf.As<uint32_t*>());
|
||||
}
|
||||
|
||||
void IsaGenerator_Gfx72::GetCopyDwordIsa(HsaMemoryBuffer& rBuf) {
|
||||
std::copy(COPY_DWORD_ISA, COPY_DWORD_ISA+ARRAY_SIZE(COPY_DWORD_ISA), rBuf.As<uint32_t*>());
|
||||
}
|
||||
|
||||
void IsaGenerator_Gfx72::GetInfiniteLoopIsa(HsaMemoryBuffer& rBuf) {
|
||||
std::copy(INFINITE_LOOP_ISA, INFINITE_LOOP_ISA+ARRAY_SIZE(INFINITE_LOOP_ISA), rBuf.As<uint32_t*>());
|
||||
}
|
||||
|
||||
void IsaGenerator_Gfx72::GetAtomicIncIsa(HsaMemoryBuffer& rBuf) {
|
||||
std::copy(ATOMIC_INC_ISA, ATOMIC_INC_ISA+ARRAY_SIZE(ATOMIC_INC_ISA), rBuf.As<uint32_t*>());
|
||||
}
|
||||
|
||||
const std::string& IsaGenerator_Gfx72::GetAsicName() {
|
||||
return ASIC_NAME;
|
||||
}
|
||||
@@ -1,49 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _ISAGENERATOR_GFX72_H_
|
||||
#define _ISAGENERATOR_GFX72_H_
|
||||
|
||||
#include <string>
|
||||
#include "IsaGenerator.hpp"
|
||||
|
||||
class IsaGenerator_Gfx72 : public IsaGenerator {
|
||||
public:
|
||||
virtual void GetNoopIsa(HsaMemoryBuffer& rBuf);
|
||||
virtual void GetCopyDwordIsa(HsaMemoryBuffer& rBuf);
|
||||
virtual void GetInfiniteLoopIsa(HsaMemoryBuffer& rBuf);
|
||||
virtual void GetAtomicIncIsa(HsaMemoryBuffer& rBuf);
|
||||
|
||||
protected:
|
||||
virtual const std::string& GetAsicName();
|
||||
|
||||
private:
|
||||
static const std::string ASIC_NAME;
|
||||
|
||||
static const uint32_t NOOP_ISA[];
|
||||
static const uint32_t COPY_DWORD_ISA[];
|
||||
static const uint32_t INFINITE_LOOP_ISA[];
|
||||
static const uint32_t ATOMIC_INC_ISA[];
|
||||
};
|
||||
|
||||
#endif // _ISAGENERATOR_GFX72_H_
|
||||
@@ -1,128 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "IsaGenerator_Gfx8.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
|
||||
const std::string IsaGenerator_Gfx8::ASIC_NAME = "VI";
|
||||
|
||||
const uint32_t IsaGenerator_Gfx8::NOOP_ISA[] = {
|
||||
0xbf810000 // S_ENDPGM
|
||||
};
|
||||
|
||||
/** The below arrays are filled with hex values in order not to reference
|
||||
* proprietary header files, but we still leave the code here for future
|
||||
* reference.
|
||||
*/
|
||||
#if 0
|
||||
const uint32_t IsaGenerator_Gfx8::COPY_DWORD_ISA[] = {
|
||||
(63u << SQ_VOP1__ENCODING__SHIFT) | (0 << SQ_VOP1__VDST__SHIFT) | (SQ_V_MOV_B32 << SQ_VOP1__OP__SHIFT) | (0 << SQ_VOP1__SRC0__SHIFT), // v_mov_b32 v0, s0 (VOP1)
|
||||
(63u << SQ_VOP1__ENCODING__SHIFT) | (1 << SQ_VOP1__VDST__SHIFT) | (SQ_V_MOV_B32 << SQ_VOP1__OP__SHIFT) | (1 << SQ_VOP1__SRC0__SHIFT), // v_mov_b32 v1, s1 (VOP1)
|
||||
(63u << SQ_VOP1__ENCODING__SHIFT) | (2 << SQ_VOP1__VDST__SHIFT) | (SQ_V_MOV_B32 << SQ_VOP1__OP__SHIFT) | (2 << SQ_VOP1__SRC0__SHIFT), // v_mov_b32 v2, s2 (VOP1)
|
||||
(63u << SQ_VOP1__ENCODING__SHIFT) | (3 << SQ_VOP1__VDST__SHIFT) | (SQ_V_MOV_B32 << SQ_VOP1__OP__SHIFT) | (3 << SQ_VOP1__SRC0__SHIFT), // v_mov_b32 v3, s3 (VOP1)
|
||||
|
||||
(55u << SQ_FLAT_0__ENCODING__SHIFT) | (SQ_FLAT_LOAD_DWORD << SQ_FLAT_0__OP__SHIFT) | (1 << SQ_FLAT_0__SLC__SHIFT) | (1 << SQ_FLAT_0__GLC__SHIFT)/*(3 << 16)*/, // SQ_FLAT_0, flat_load_dword, slc = 1, glc = 1 (FLAT_0)
|
||||
(4u << SQ_FLAT_1__VDST__SHIFT) | (0 << SQ_FLAT_1__ADDR__SHIFT), // ADDR = V0:V1, VDST = V4 (FLAT_1)
|
||||
|
||||
(383u << SQ_SOPP__ENCODING__SHIFT) | (SQ_S_WAITCNT << SQ_SOPP__OP__SHIFT) | (0 << SQ_SOPP__SIMM16__SHIFT), // s_waitcnt 0 (SOPP)
|
||||
|
||||
(55u << SQ_FLAT_0__ENCODING__SHIFT) | (SQ_FLAT_STORE_DWORD << SQ_FLAT_0__OP__SHIFT) | (1 << SQ_FLAT_0__SLC__SHIFT) | (1 << SQ_FLAT_0__GLC__SHIFT), // SQ_FLAT_0, flat_store_dword, slc = 1, glc = 1 (FLAT_0)
|
||||
(4u << SQ_FLAT_1__DATA__SHIFT) | (2 << SQ_FLAT_1__ADDR__SHIFT), // ADDR = V2:V3, DATA = V4 (FLAT_1)
|
||||
|
||||
0xBF810000u // s_endpgm, note that we rely on the implicit s_waitcnt 0,0,0
|
||||
};
|
||||
|
||||
const uint32_t IsaGenerator_Gfx8::INFINITE_LOOP_ISA[] = {
|
||||
(0x17F << SQ_SOPP__ENCODING__SHIFT) | (SQ_S_BRANCH << SQ_SOPP__OP__SHIFT) | ( (const uint32_t)-1 << SQ_SOPP__SIMM16__SHIFT), // s_branch -1 (PC <- PC + SIMM*4)+4
|
||||
0xBF810000u // S_ENDPGM
|
||||
};
|
||||
#endif
|
||||
|
||||
const uint32_t IsaGenerator_Gfx8::COPY_DWORD_ISA[] = {
|
||||
0x7e000200, // v_mov_b32 v0, s0 (VOP1)
|
||||
0x7e020201, // v_mov_b32 v1, s1 (VOP1)
|
||||
0x7e040202, // v_mov_b32 v2, s2 (VOP1)
|
||||
0x7e060203, // v_mov_b32 v3, s3 (VOP1)
|
||||
|
||||
0xdc530000, // SQ_FLAT_0, flat_load_dword, slc = 1, glc = 1 (FLAT_0)
|
||||
0x04000000, // ADDR = V0:V1, VDST = V4 (FLAT_1)
|
||||
|
||||
0xbf8c0000, // s_waitcnt 0 (SOPP)
|
||||
|
||||
0xdc730000, // SQ_FLAT_0, flat_store_dword, slc = 1, glc = 1 (FLAT_0)
|
||||
0x00000402, // ADDR = V2:V3, DATA = V4 (FLAT_1)
|
||||
|
||||
0xbf810000 // s_endpgm, note that we rely on the implicit s_waitcnt 0,0,0
|
||||
};
|
||||
|
||||
const uint32_t IsaGenerator_Gfx8::INFINITE_LOOP_ISA[] = {
|
||||
0xbf82ffff, // s_branch -1 (PC <- PC + SIMM*4)+4
|
||||
0xbf810000 // S_ENDPGM
|
||||
};
|
||||
|
||||
/**
|
||||
* The atomic_add_isa binary is generated from following ISA
|
||||
* The original atomic_inc is not support by some PCIE, so use atomic_add instead
|
||||
*
|
||||
*/
|
||||
/*
|
||||
shader atomic_add
|
||||
asic(VI)
|
||||
type(CS)
|
||||
v_mov_b32 v0, s0
|
||||
v_mov_b32 v1, s1
|
||||
v_mov_b32 v2, 1
|
||||
flat_atomic_add v3, v[0:1], v2 slc glc
|
||||
s_waitcnt 0
|
||||
s_endpgm
|
||||
end
|
||||
*/
|
||||
|
||||
const uint32_t IsaGenerator_Gfx8::ATOMIC_ADD_ISA[] = {
|
||||
0x7e000200, 0x7e020201,
|
||||
0x7e040281, 0xdd0b0000,
|
||||
0x03000200, 0xbf8c0000,
|
||||
0xbf810000, 0x00000000
|
||||
};
|
||||
|
||||
void IsaGenerator_Gfx8::GetNoopIsa(HsaMemoryBuffer& rBuf) {
|
||||
std::copy(NOOP_ISA, NOOP_ISA+ARRAY_SIZE(NOOP_ISA), rBuf.As<uint32_t*>());
|
||||
}
|
||||
|
||||
void IsaGenerator_Gfx8::GetCopyDwordIsa(HsaMemoryBuffer& rBuf) {
|
||||
std::copy(COPY_DWORD_ISA, COPY_DWORD_ISA+ARRAY_SIZE(COPY_DWORD_ISA), rBuf.As<uint32_t*>());
|
||||
}
|
||||
|
||||
void IsaGenerator_Gfx8::GetInfiniteLoopIsa(HsaMemoryBuffer& rBuf) {
|
||||
std::copy(INFINITE_LOOP_ISA, INFINITE_LOOP_ISA+ARRAY_SIZE(INFINITE_LOOP_ISA), rBuf.As<uint32_t*>());
|
||||
}
|
||||
|
||||
void IsaGenerator_Gfx8::GetAtomicIncIsa(HsaMemoryBuffer& rBuf) {
|
||||
std::copy(ATOMIC_ADD_ISA, ATOMIC_ADD_ISA+ARRAY_SIZE(ATOMIC_ADD_ISA), rBuf.As<uint32_t*>());
|
||||
}
|
||||
|
||||
const std::string& IsaGenerator_Gfx8::GetAsicName() {
|
||||
return ASIC_NAME;
|
||||
}
|
||||
@@ -1,49 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _ISAGENERATOR_GFX8_H_
|
||||
#define _ISAGENERATOR_GFX8_H_
|
||||
|
||||
#include <string>
|
||||
#include "IsaGenerator.hpp"
|
||||
|
||||
class IsaGenerator_Gfx8 : public IsaGenerator {
|
||||
public:
|
||||
virtual void GetNoopIsa(HsaMemoryBuffer& rBuf);
|
||||
virtual void GetCopyDwordIsa(HsaMemoryBuffer& rBuf);
|
||||
virtual void GetInfiniteLoopIsa(HsaMemoryBuffer& rBuf);
|
||||
virtual void GetAtomicIncIsa(HsaMemoryBuffer& rBuf);
|
||||
|
||||
protected:
|
||||
virtual const std::string& GetAsicName();
|
||||
|
||||
private:
|
||||
static const std::string ASIC_NAME;
|
||||
|
||||
static const uint32_t NOOP_ISA[];
|
||||
static const uint32_t COPY_DWORD_ISA[];
|
||||
static const uint32_t INFINITE_LOOP_ISA[];
|
||||
static const uint32_t ATOMIC_ADD_ISA[];
|
||||
};
|
||||
|
||||
#endif // _ISAGENERATOR_GFX72_H_
|
||||
@@ -1,113 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "IsaGenerator_Gfx9.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
|
||||
const std::string IsaGenerator_Gfx9::ASIC_NAME = "GFX9";
|
||||
|
||||
/* The binaries are generated from following ISA */
|
||||
#if 0
|
||||
/* flat_atomic_inc will not support by some PCIE, use flat_atomic_add instead */
|
||||
shader atomic_add
|
||||
asic(GFX9)
|
||||
type(CS)
|
||||
v_mov_b32 v0, s0
|
||||
v_mov_b32 v1, s1
|
||||
v_mov_b32 v2, 1
|
||||
flat_atomic_add v3, v[0:1], v2 slc glc
|
||||
s_waitcnt 0
|
||||
s_endpgm
|
||||
end
|
||||
|
||||
shader copy_dword
|
||||
asic(GFX9)
|
||||
type(CS)
|
||||
/* copy the parameters from scalar registers to vector registers */
|
||||
v_mov_b32 v0, s0
|
||||
v_mov_b32 v1, s1
|
||||
v_mov_b32 v2, s2
|
||||
v_mov_b32 v3, s3
|
||||
/* copy a dword between the passed addresses */
|
||||
flat_load_dword v4, v[0:1] slc glc
|
||||
s_waitcnt 0
|
||||
flat_store_dword v[2:3], v4 slc glc
|
||||
s_endpgm
|
||||
end
|
||||
|
||||
shader main
|
||||
asic(GFX9)
|
||||
type(CS)
|
||||
loop:
|
||||
s_branch loop
|
||||
s_endpgm
|
||||
end
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
const uint32_t IsaGenerator_Gfx9::NOOP_ISA[] = {
|
||||
0xbf810000
|
||||
};
|
||||
|
||||
const uint32_t IsaGenerator_Gfx9::COPY_DWORD_ISA[] = {
|
||||
0x7e000200, 0x7e020201,
|
||||
0x7e040202, 0x7e060203,
|
||||
0xdc530000, 0x047f0000,
|
||||
0xbf8c0000, 0xdc730000,
|
||||
0x007f0402, 0xbf810000
|
||||
};
|
||||
|
||||
const uint32_t IsaGenerator_Gfx9::INFINITE_LOOP_ISA[] = {
|
||||
0xbf82ffff, 0xbf810000
|
||||
};
|
||||
|
||||
const uint32_t IsaGenerator_Gfx9::ATOMIC_ADD_ISA[] = {
|
||||
0x7e000200, 0x7e020201,
|
||||
0x7e040281, 0xdd0b0000,
|
||||
0x037f0200, 0xbf8c0000,
|
||||
0xbf810000, 0x00000000
|
||||
};
|
||||
|
||||
void IsaGenerator_Gfx9::GetNoopIsa(HsaMemoryBuffer& rBuf) {
|
||||
std::copy(NOOP_ISA, NOOP_ISA+ARRAY_SIZE(NOOP_ISA), rBuf.As<uint32_t*>());
|
||||
}
|
||||
|
||||
void IsaGenerator_Gfx9::GetCopyDwordIsa(HsaMemoryBuffer& rBuf) {
|
||||
std::copy(COPY_DWORD_ISA, COPY_DWORD_ISA+ARRAY_SIZE(COPY_DWORD_ISA), rBuf.As<uint32_t*>());
|
||||
}
|
||||
|
||||
void IsaGenerator_Gfx9::GetInfiniteLoopIsa(HsaMemoryBuffer& rBuf) {
|
||||
std::copy(INFINITE_LOOP_ISA, INFINITE_LOOP_ISA+ARRAY_SIZE(INFINITE_LOOP_ISA), rBuf.As<uint32_t*>());
|
||||
}
|
||||
|
||||
void IsaGenerator_Gfx9::GetAtomicIncIsa(HsaMemoryBuffer& rBuf) {
|
||||
std::copy(ATOMIC_ADD_ISA, ATOMIC_ADD_ISA+ARRAY_SIZE(ATOMIC_ADD_ISA), rBuf.As<uint32_t*>());
|
||||
}
|
||||
|
||||
const std::string& IsaGenerator_Gfx9::GetAsicName() {
|
||||
return ASIC_NAME;
|
||||
}
|
||||
|
||||
@@ -1,49 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _ISAGENERATOR_GFX9_H_
|
||||
#define _ISAGENERATOR_GFX9_H_
|
||||
|
||||
#include <string>
|
||||
#include "IsaGenerator.hpp"
|
||||
|
||||
class IsaGenerator_Gfx9 : public IsaGenerator {
|
||||
public:
|
||||
virtual void GetNoopIsa(HsaMemoryBuffer& rBuf);
|
||||
virtual void GetCopyDwordIsa(HsaMemoryBuffer& rBuf);
|
||||
virtual void GetInfiniteLoopIsa(HsaMemoryBuffer& rBuf);
|
||||
virtual void GetAtomicIncIsa(HsaMemoryBuffer& rBuf);
|
||||
|
||||
protected:
|
||||
virtual const std::string& GetAsicName();
|
||||
|
||||
private:
|
||||
static const std::string ASIC_NAME;
|
||||
|
||||
static const uint32_t NOOP_ISA[];
|
||||
static const uint32_t COPY_DWORD_ISA[];
|
||||
static const uint32_t INFINITE_LOOP_ISA[];
|
||||
static const uint32_t ATOMIC_ADD_ISA[];
|
||||
};
|
||||
|
||||
#endif // _ISAGENERATOR_GFX9_H_
|
||||
+47
-23
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
* Copyright (C) 2022 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -21,29 +21,53 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _ISAGENERATOR_ALDEBARAN_H_
|
||||
#define _ISAGENERATOR_ALDEBARAN_H_
|
||||
#include "GoogleTestExtension.hpp"
|
||||
#include "KFDASMTest.hpp"
|
||||
#include "ShaderStore.hpp"
|
||||
#include "Assemble.hpp"
|
||||
|
||||
#include <string>
|
||||
#include "IsaGenerator.hpp"
|
||||
void KFDASMTest::SetUp() {}
|
||||
void KFDASMTest::TearDown() {}
|
||||
|
||||
class IsaGenerator_Aldbrn : public IsaGenerator {
|
||||
public:
|
||||
virtual void GetNoopIsa(HsaMemoryBuffer& rBuf);
|
||||
virtual void GetCopyDwordIsa(HsaMemoryBuffer& rBuf);
|
||||
virtual void GetInfiniteLoopIsa(HsaMemoryBuffer& rBuf);
|
||||
virtual void GetAtomicIncIsa(HsaMemoryBuffer& rBuf);
|
||||
|
||||
protected:
|
||||
virtual const std::string& GetAsicName();
|
||||
|
||||
private:
|
||||
static const std::string ASIC_NAME;
|
||||
|
||||
static const uint32_t NOOP_ISA[];
|
||||
static const uint32_t COPY_DWORD_ISA[];
|
||||
static const uint32_t INFINITE_LOOP_ISA[];
|
||||
static const uint32_t ATOMIC_ADD_ISA[];
|
||||
static const std::vector<uint32_t> TargetList = {
|
||||
0x080001,
|
||||
0x080002,
|
||||
0x080003,
|
||||
0x080005,
|
||||
0x080100,
|
||||
0x090000,
|
||||
0x090002,
|
||||
0x090004,
|
||||
0x090006,
|
||||
0x090008,
|
||||
0x090009,
|
||||
0x09000a,
|
||||
0x09000c,
|
||||
0x0a0100,
|
||||
0x0a0101,
|
||||
0x0a0102,
|
||||
0x0a0103,
|
||||
0x0a0300,
|
||||
0x0a0301,
|
||||
0x0a0302,
|
||||
0x0a0303,
|
||||
0x0a0304,
|
||||
0x0a0305,
|
||||
0x0a0306,
|
||||
};
|
||||
|
||||
#endif // _ISAGENERATOR_ALDEBARAN_H_
|
||||
TEST_F(KFDASMTest, AssembleShaders) {
|
||||
TEST_START(TESTPROFILE_RUNALL)
|
||||
|
||||
for (auto &t : TargetList) {
|
||||
Assembler asmblr(t);
|
||||
|
||||
LOG() << "Running ASM test for target " << asmblr.GetTargetAsic() << std::endl;
|
||||
|
||||
for (auto &s : ShaderList) {
|
||||
EXPECT_SUCCESS(asmblr.RunAssemble(s));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_END
|
||||
}
|
||||
+17
-1
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
* Copyright (C) 2022 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -21,3 +21,19 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __KFD_ASM_TEST__H__
|
||||
#define __KFD_ASM_TEST__H__
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
class KFDASMTest : public testing::Test {
|
||||
public:
|
||||
KFDASMTest() {}
|
||||
~KFDASMTest() {}
|
||||
|
||||
protected:
|
||||
virtual void SetUp();
|
||||
virtual void TearDown();
|
||||
};
|
||||
|
||||
#endif // __KFD_ASM_TEST__H__
|
||||
@@ -68,6 +68,8 @@ void KFDBaseComponentTest::SetUp() {
|
||||
|
||||
g_baseTest = this;
|
||||
|
||||
m_pAsm = new Assembler(GetGfxVersion(nodeProperties));
|
||||
|
||||
ROUTINE_END
|
||||
}
|
||||
|
||||
@@ -86,6 +88,10 @@ void KFDBaseComponentTest::TearDown() {
|
||||
EXPECT_SUCCESS(hsaKmtCloseKFD());
|
||||
g_baseTest = NULL;
|
||||
|
||||
if (m_pAsm)
|
||||
delete m_pAsm;
|
||||
m_pAsm = nullptr;
|
||||
|
||||
ROUTINE_END
|
||||
}
|
||||
|
||||
|
||||
@@ -34,6 +34,8 @@
|
||||
#include "hsakmt.h"
|
||||
#include "OSWrapper.hpp"
|
||||
#include "KFDTestUtil.hpp"
|
||||
#include "Assemble.hpp"
|
||||
#include "ShaderStore.hpp"
|
||||
|
||||
// @class KFDBaseComponentTest
|
||||
class KFDBaseComponentTest : public testing::Test {
|
||||
@@ -74,6 +76,7 @@ class KFDBaseComponentTest : public testing::Test {
|
||||
HsaMemFlags m_MemoryFlags;
|
||||
HsaNodeInfo m_NodeInfo;
|
||||
HSAint32 m_xnack;
|
||||
Assembler* m_pAsm;
|
||||
|
||||
// @brief Executed before every test that uses KFDBaseComponentTest class and sets all common settings for the tests.
|
||||
virtual void SetUp();
|
||||
|
||||
@@ -24,90 +24,11 @@
|
||||
#include "KFDCWSRTest.hpp"
|
||||
#include "Dispatch.hpp"
|
||||
|
||||
|
||||
/* Initial state:
|
||||
* s[0:1] - 64 bits iteration number; only the lower 32 bits are useful.
|
||||
* s[2:3] - result buffer base address
|
||||
* s4 - workgroup id
|
||||
* v0 - workitem id, always 0 because
|
||||
* NUM_THREADS_X(number of threads) in workgroup set to 1
|
||||
* Registers:
|
||||
* v0 - calculated workitem = v0 + s4 * NUM_THREADS_X, which is s4
|
||||
* v2 - = s0, 32 bits iteration number
|
||||
* v[4:5] - corresponding output buf address: s[2:3] + v0 * 4
|
||||
* v6 - counter
|
||||
*/
|
||||
|
||||
static const char* iterate_isa_gfx8 = \
|
||||
"\
|
||||
shader iterate_isa\n\
|
||||
wave_size(32)\n\
|
||||
type(CS)\n\
|
||||
// copy the parameters from scalar registers to vector registers\n\
|
||||
v_mov_b32 v2, s0 // v[2:3] = s[0:1] \n\
|
||||
v_mov_b32 v3, s1 // v[2:3] = s[0:1] \n\
|
||||
v_mov_b32 v0, s4 // use workgroup id as index \n\
|
||||
v_lshlrev_b32 v0, 2, v0 // v0 *= 4 \n\
|
||||
v_add_u32 v4, vcc, s2, v0 // v[4:5] = s[2:3] + v0 * 4 \n\
|
||||
v_mov_b32 v5, s3 // v[4:5] = s[2:3] + v0 * 4 \n\
|
||||
v_add_u32 v5, vcc, v5, vcc_lo // v[4:5] = s[2:3] + v0 * 4 \n\
|
||||
v_mov_b32 v6, 0 \n\
|
||||
LOOP: \n\
|
||||
v_add_u32 v6, vcc, 1, v6 \n\
|
||||
// compare the result value (v6) to iteration value (v2), and \n\
|
||||
// jump if equal (i.e. if VCC is not zero after the comparison) \n\
|
||||
v_cmp_lt_u32 vcc, v6, v2 \n\
|
||||
s_cbranch_vccnz LOOP \n\
|
||||
flat_store_dword v[4:5], v6 \n\
|
||||
s_waitcnt vmcnt(0)&lgkmcnt(0) \n\
|
||||
s_endpgm \n\
|
||||
end \n\
|
||||
";
|
||||
|
||||
//This shader can be used by gfx9 and gfx10
|
||||
static const char* iterate_isa_gfx9 = \
|
||||
"\
|
||||
shader iterate_isa\n\
|
||||
wave_size(32)\n\
|
||||
type(CS)\n\
|
||||
// copy the parameters from scalar registers to vector registers\n\
|
||||
v_mov_b32 v2, s0 // v[2:3] = s[0:1] \n\
|
||||
v_mov_b32 v3, s1 // v[2:3] = s[0:1] \n\
|
||||
v_mov_b32 v0, s4 // use workgroup id as index \n\
|
||||
v_lshlrev_b32 v0, 2, v0 // v0 *= 4 \n\
|
||||
v_add_co_u32 v4, vcc, s2, v0 // v[4:5] = s[2:3] + v0 * 4 \n\
|
||||
v_mov_b32 v5, s3 // v[4:5] = s[2:3] + v0 * 4 \n\
|
||||
v_add_co_u32 v5, vcc, v5, vcc_lo // v[4:5] = s[2:3] + v0 * 4 \n\
|
||||
v_mov_b32 v6, 0 \n\
|
||||
LOOP: \n\
|
||||
v_add_co_u32 v6, vcc, 1, v6 \n\
|
||||
// compare the result value (v6) to iteration value (v2), and \n\
|
||||
// jump if equal (i.e. if VCC is not zero after the comparison) \n\
|
||||
v_cmp_lt_u32 vcc, v6, v2 \n\
|
||||
s_cbranch_vccnz LOOP \n\
|
||||
flat_store_dword v[4:5], v6 \n\
|
||||
s_waitcnt vmcnt(0)&lgkmcnt(0) \n\
|
||||
s_endpgm \n\
|
||||
end \n\
|
||||
";
|
||||
|
||||
static const char* infinite_isa = \
|
||||
"\
|
||||
shader infinite_isa \n\
|
||||
wave_size(32) \n\
|
||||
type(CS) \n\
|
||||
LOOP: \n\
|
||||
s_branch LOOP \n\
|
||||
end \n\
|
||||
";
|
||||
|
||||
void KFDCWSRTest::SetUp() {
|
||||
ROUTINE_START
|
||||
|
||||
KFDBaseComponentTest::SetUp();
|
||||
|
||||
m_pIsaGen = IsaGenerator::Create(m_FamilyId);
|
||||
|
||||
wave_number = 1;
|
||||
|
||||
ROUTINE_END
|
||||
@@ -115,9 +36,6 @@ void KFDCWSRTest::SetUp() {
|
||||
|
||||
void KFDCWSRTest::TearDown() {
|
||||
ROUTINE_START
|
||||
if (m_pIsaGen)
|
||||
delete m_pIsaGen;
|
||||
m_pIsaGen = NULL;
|
||||
|
||||
KFDBaseComponentTest::TearDown();
|
||||
|
||||
@@ -153,16 +71,10 @@ TEST_F(KFDCWSRTest, BasicTest) {
|
||||
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
|
||||
|
||||
if ((m_FamilyId >= FAMILY_VI) && (checkCWSREnabled())) {
|
||||
const char *pIterateIsa;
|
||||
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
|
||||
HsaMemoryBuffer resultBuf1(PAGE_SIZE, defaultGPUNode, true, false, false);
|
||||
uint64_t count1 = 400000000;
|
||||
|
||||
if (m_FamilyId < FAMILY_AI)
|
||||
pIterateIsa = iterate_isa_gfx8;
|
||||
else
|
||||
pIterateIsa = iterate_isa_gfx9;
|
||||
|
||||
if (isOnEmulator()) {
|
||||
// Divide the iterator times by 10000 so that the test can
|
||||
// finish in a reasonable time.
|
||||
@@ -172,7 +84,7 @@ TEST_F(KFDCWSRTest, BasicTest) {
|
||||
|
||||
unsigned int* result1 = resultBuf1.As<unsigned int*>();
|
||||
|
||||
m_pIsaGen->CompileShader(pIterateIsa, "iterate_isa", isaBuffer);
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(IterateIsa, isaBuffer.As<char*>()));
|
||||
|
||||
PM4Queue queue1;
|
||||
|
||||
@@ -236,7 +148,7 @@ TEST_F(KFDCWSRTest, InterruptRestore) {
|
||||
if ((m_FamilyId >= FAMILY_VI) && (checkCWSREnabled())) {
|
||||
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
|
||||
|
||||
m_pIsaGen->CompileShader(infinite_isa, "infinite_isa", isaBuffer);
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(InfiniteLoopIsa, isaBuffer.As<char*>()));
|
||||
|
||||
PM4Queue queue1, queue2, queue3;
|
||||
|
||||
|
||||
@@ -27,12 +27,11 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "PM4Queue.hpp"
|
||||
#include "IsaGenerator.hpp"
|
||||
#include "KFDBaseComponentTest.hpp"
|
||||
|
||||
class KFDCWSRTest : public KFDBaseComponentTest {
|
||||
public:
|
||||
KFDCWSRTest() :m_pIsaGen(NULL) {}
|
||||
KFDCWSRTest() {}
|
||||
~KFDCWSRTest() {}
|
||||
|
||||
protected:
|
||||
@@ -41,7 +40,6 @@ class KFDCWSRTest : public KFDBaseComponentTest {
|
||||
|
||||
protected: // Members
|
||||
unsigned wave_number;
|
||||
IsaGenerator* m_pIsaGen;
|
||||
};
|
||||
|
||||
#endif // __KFD_CWSR_TEST__H__
|
||||
|
||||
@@ -176,16 +176,11 @@ void KFDDBGTest::SetUp() {
|
||||
|
||||
KFDBaseComponentTest::SetUp();
|
||||
|
||||
m_pIsaGen = IsaGenerator::Create(m_FamilyId);
|
||||
|
||||
ROUTINE_END
|
||||
}
|
||||
|
||||
void KFDDBGTest::TearDown() {
|
||||
ROUTINE_START
|
||||
if (m_pIsaGen)
|
||||
delete m_pIsaGen;
|
||||
m_pIsaGen = NULL;
|
||||
|
||||
/* Reset the user trap handler */
|
||||
hsaKmtSetTrapHandler(m_NodeInfo.HsaDefaultGPUNode(), 0, 0, 0, 0);
|
||||
|
||||
@@ -26,20 +26,16 @@
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "IsaGenerator.hpp"
|
||||
#include "KFDBaseComponentTest.hpp"
|
||||
|
||||
class KFDDBGTest : public KFDBaseComponentTest {
|
||||
public:
|
||||
KFDDBGTest() :m_pIsaGen(NULL) {}
|
||||
KFDDBGTest() {}
|
||||
~KFDDBGTest() {}
|
||||
|
||||
protected:
|
||||
virtual void SetUp();
|
||||
virtual void TearDown();
|
||||
|
||||
protected: // Members
|
||||
IsaGenerator* m_pIsaGen;
|
||||
};
|
||||
|
||||
#endif // __KFD_DBG_TEST__H__
|
||||
|
||||
@@ -41,18 +41,12 @@ void KFDEvictTest::SetUp() {
|
||||
|
||||
KFDBaseComponentTest::SetUp();
|
||||
|
||||
m_pIsaGen = IsaGenerator::Create(m_FamilyId);
|
||||
|
||||
ROUTINE_END
|
||||
}
|
||||
|
||||
void KFDEvictTest::TearDown() {
|
||||
ROUTINE_START
|
||||
|
||||
if (m_pIsaGen)
|
||||
delete m_pIsaGen;
|
||||
m_pIsaGen = NULL;
|
||||
|
||||
KFDBaseComponentTest::TearDown();
|
||||
|
||||
ROUTINE_END
|
||||
@@ -286,136 +280,6 @@ void KFDEvictTest::AmdgpuCommandSubmissionSdmaNop(int rn, amdgpu_bo_handle handl
|
||||
EXPECT_EQ(0, amdgpu_cs_ctx_free(contextHandle));
|
||||
}
|
||||
|
||||
/* Shader to read local buffers using multiple wavefronts in parallel
|
||||
* until address buffer is filled with specific value 0x5678 by host program,
|
||||
* then each wavefront fills value 0x5678 at corresponding result buffer and quit
|
||||
*
|
||||
* Initial state:
|
||||
* s[0:1] - address buffer base address
|
||||
* s[2:3] - result buffer base address
|
||||
* s4 - workgroup id
|
||||
* v0 - workitem id, always 0 because NUM_THREADS_X(number of threads) in workgroup set to 1
|
||||
* Registers:
|
||||
* v0 - calculated workitem id, v0 = v0 + s4 * NUM_THREADS_X
|
||||
* v[2:3] - address of corresponding local buf address offset: s[0:1] + v0 * 8
|
||||
* v[4:5] - corresponding output buf address: s[2:3] + v0 * 4
|
||||
* v[6:7] - local buf address used for read test
|
||||
*
|
||||
* This shader can be used by gfx9 and gfx10
|
||||
*
|
||||
*/
|
||||
|
||||
static const char* gfx9_ReadMemory =
|
||||
"\
|
||||
shader ReadMemory\n\
|
||||
wave_size(32)\n\
|
||||
type(CS)\n\
|
||||
\n\
|
||||
// compute address of corresponding output buffer\n\
|
||||
v_mov_b32 v0, s4 // use workgroup id as index\n\
|
||||
v_lshlrev_b32 v0, 2, v0 // v0 *= 4\n\
|
||||
v_add_co_u32 v4, vcc, s2, v0 // v[4:5] = s[2:3] + v0 * 4\n\
|
||||
v_mov_b32 v5, s3\n\
|
||||
v_add_co_u32 v5, vcc, v5, vcc_lo\n\
|
||||
\n\
|
||||
// compute input buffer offset used to store corresponding local buffer address\n\
|
||||
v_lshlrev_b32 v0, 1, v0 // v0 *= 8\n\
|
||||
v_add_co_u32 v2, vcc, s0, v0 // v[2:3] = s[0:1] + v0 * 8\n\
|
||||
v_mov_b32 v3, s1\n\
|
||||
v_add_co_u32 v3, vcc, v3, vcc_lo\n\
|
||||
\n\
|
||||
// load 64bit local buffer address stored at v[2:3] to v[6:7]\n\
|
||||
flat_load_dwordx2 v[6:7], v[2:3] slc\n\
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory reads to finish\n\
|
||||
\n\
|
||||
v_mov_b32 v8, 0x5678\n\
|
||||
s_movk_i32 s8, 0x5678\n\
|
||||
L_REPEAT:\n\
|
||||
s_load_dword s16, s[0:1], 0x0 glc\n\
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory reads to finish\n\
|
||||
s_cmp_eq_i32 s16, s8\n\
|
||||
s_cbranch_scc1 L_QUIT // if notified to quit by host\n\
|
||||
// loop read 64M local buffer starting at v[6:7]\n\
|
||||
// every 4k page only read once\n\
|
||||
v_mov_b32 v9, 0\n\
|
||||
v_mov_b32 v10, 0x1000 // 4k page\n\
|
||||
v_mov_b32 v11, 0x4000000 // 64M size\n\
|
||||
v_mov_b32 v12, v6\n\
|
||||
v_mov_b32 v13, v7\n\
|
||||
L_LOOP_READ:\n\
|
||||
flat_load_dwordx2 v[14:15], v[12:13] slc\n\
|
||||
v_add_co_u32 v9, vcc, v9, v10 \n\
|
||||
v_add_co_u32 v12, vcc, v12, v10\n\
|
||||
v_add_co_u32 v13, vcc, v13, vcc_lo\n\
|
||||
v_cmp_lt_u32 vcc, v9, v11\n\
|
||||
s_cbranch_vccnz L_LOOP_READ\n\
|
||||
s_branch L_REPEAT\n\
|
||||
L_QUIT:\n\
|
||||
flat_store_dword v[4:5], v8\n\
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory writes to finish\n\
|
||||
s_endpgm\n\
|
||||
end\n\
|
||||
";
|
||||
|
||||
static const char* gfx8_ReadMemory =
|
||||
"\
|
||||
shader ReadMemory\n\
|
||||
asic(VI)\n\
|
||||
type(CS)\n\
|
||||
\n\
|
||||
// compute address of corresponding output buffer\n\
|
||||
v_mov_b32 v0, s4 // use workgroup id as index\n\
|
||||
v_lshlrev_b32 v0, 2, v0 // v0 *= 4\n\
|
||||
v_add_u32 v4, vcc, s2, v0 // v[4:5] = s[2:3] + v0 * 4\n\
|
||||
v_mov_b32 v5, s3\n\
|
||||
v_addc_u32 v5, vcc, v5, 0, vcc\n\
|
||||
\n\
|
||||
// compute input buffer offset used to store corresponding local buffer address\n\
|
||||
v_lshlrev_b32 v0, 1, v0 // v0 *= 8\n\
|
||||
v_add_u32 v2, vcc, s0, v0 // v[2:3] = s[0:1] + v0 * 8\n\
|
||||
v_mov_b32 v3, s1\n\
|
||||
v_addc_u32 v3, vcc, v3, 0, vcc\n\
|
||||
\n\
|
||||
// load 64bit local buffer address stored at v[2:3] to v[6:7]\n\
|
||||
flat_load_dwordx2 v[6:7], v[2:3] slc\n\
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory reads to finish\n\
|
||||
\n\
|
||||
v_mov_b32 v8, 0x5678\n\
|
||||
s_movk_i32 s8, 0x5678\n\
|
||||
L_REPEAT:\n\
|
||||
s_load_dword s16, s[0:1], 0x0 glc\n\
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory reads to finish\n\
|
||||
s_cmp_eq_i32 s16, s8\n\
|
||||
s_cbranch_scc1 L_QUIT // if notified to quit by host\n\
|
||||
// loop read 64M local buffer starting at v[6:7]\n\
|
||||
// every 4k page only read once\n\
|
||||
v_mov_b32 v9, 0\n\
|
||||
v_mov_b32 v10, 0x1000 // 4k page\n\
|
||||
v_mov_b32 v11, 0x4000000 // 64M size\n\
|
||||
v_mov_b32 v12, v6\n\
|
||||
v_mov_b32 v13, v7\n\
|
||||
L_LOOP_READ:\n\
|
||||
flat_load_dwordx2 v[14:15], v[12:13] slc\n\
|
||||
v_add_u32 v9, vcc, v9, v10 \n\
|
||||
v_add_u32 v12, vcc, v12, v10\n\
|
||||
v_addc_u32 v13, vcc, v13, 0, vcc\n\
|
||||
v_cmp_lt_u32 vcc, v9, v11\n\
|
||||
s_cbranch_vccnz L_LOOP_READ\n\
|
||||
s_branch L_REPEAT\n\
|
||||
L_QUIT:\n\
|
||||
flat_store_dword v[4:5], v8\n\
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory writes to finish\n\
|
||||
s_endpgm\n\
|
||||
end\n\
|
||||
";
|
||||
|
||||
std::string KFDEvictTest::CreateShader() {
|
||||
if (m_FamilyId < FAMILY_AI)
|
||||
return gfx8_ReadMemory;
|
||||
else
|
||||
return gfx9_ReadMemory;
|
||||
}
|
||||
|
||||
/* Evict and restore procedure basic test
|
||||
*
|
||||
* Use N_PROCESSES processes to allocate vram buf size larger than total vram size
|
||||
@@ -567,7 +431,7 @@ TEST_F(KFDEvictTest, QueueTest) {
|
||||
HsaMemoryBuffer addrBuffer(PAGE_SIZE, defaultGPUNode);
|
||||
HsaMemoryBuffer resultBuffer(PAGE_SIZE, defaultGPUNode);
|
||||
|
||||
m_pIsaGen->CompileShader(CreateShader().c_str(), "ReadMemory", isaBuffer);
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(ReadMemoryIsa, isaBuffer.As<char*>()));
|
||||
|
||||
PM4Queue pm4Queue;
|
||||
ASSERT_SUCCESS(pm4Queue.Create(defaultGPUNode));
|
||||
|
||||
@@ -27,22 +27,19 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "KFDMultiProcessTest.hpp"
|
||||
#include "IsaGenerator.hpp"
|
||||
#include "PM4Queue.hpp"
|
||||
|
||||
// @class KFDEvictTest
|
||||
// Test eviction and restore procedure using two processes
|
||||
class KFDEvictTest : public KFDMultiProcessTest {
|
||||
public:
|
||||
KFDEvictTest(void): m_pIsaGen(NULL) {}
|
||||
|
||||
KFDEvictTest(void) {}
|
||||
~KFDEvictTest(void) {}
|
||||
|
||||
protected:
|
||||
virtual void SetUp();
|
||||
virtual void TearDown();
|
||||
|
||||
std::string CreateShader();
|
||||
void AllocBuffers(HSAuint32 defaultGPUNode, HSAuint32 count, HSAuint64 vramBufSize,
|
||||
std::vector<void *> &pBuffers);
|
||||
void FreeBuffers(std::vector<void *> &pBuffers, HSAuint64 vramBufSize);
|
||||
@@ -52,7 +49,6 @@ class KFDEvictTest : public KFDMultiProcessTest {
|
||||
PM4Queue *computeQueue);
|
||||
|
||||
protected: // Members
|
||||
IsaGenerator* m_pIsaGen;
|
||||
HsaMemFlags m_Flags;
|
||||
void* m_pBuf;
|
||||
};
|
||||
|
||||
@@ -33,18 +33,12 @@ void KFDExceptionTest::SetUp() {
|
||||
|
||||
KFDBaseComponentTest::SetUp();
|
||||
|
||||
m_pIsaGen = IsaGenerator::Create(m_FamilyId);
|
||||
|
||||
ROUTINE_END
|
||||
}
|
||||
|
||||
void KFDExceptionTest::TearDown() {
|
||||
ROUTINE_START
|
||||
|
||||
if (m_pIsaGen)
|
||||
delete m_pIsaGen;
|
||||
m_pIsaGen = NULL;
|
||||
|
||||
KFDBaseComponentTest::TearDown();
|
||||
|
||||
// WORKAROUND: This needs to be fixed in the kernel
|
||||
@@ -75,7 +69,8 @@ void KFDExceptionTest::TestMemoryException(int defaultGPUNode, HSAuint64 pSrc,
|
||||
eventDesc.SyncVar.SyncVar.UserData = NULL;
|
||||
eventDesc.SyncVar.SyncVarSize = 0;
|
||||
|
||||
m_pIsaGen->GetCopyDwordIsa(isaBuffer);
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()));
|
||||
|
||||
m_ChildStatus = queue.Create(defaultGPUNode);
|
||||
if (m_ChildStatus != HSAKMT_STATUS_SUCCESS) {
|
||||
WARN() << "Queue create failed" << std::endl;
|
||||
|
||||
@@ -26,12 +26,11 @@
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "IsaGenerator.hpp"
|
||||
#include "KFDBaseComponentTest.hpp"
|
||||
|
||||
class KFDExceptionTest : public KFDBaseComponentTest {
|
||||
public:
|
||||
KFDExceptionTest() :m_pIsaGen(NULL), m_ChildPid(-1) {
|
||||
KFDExceptionTest() : m_ChildPid(-1) {
|
||||
/* Because there could be early return before m_ChildPid is set
|
||||
* by fork(), we should initialize m_ChildPid to a non-zero value
|
||||
* to avoid possible exit of the main process.
|
||||
@@ -59,8 +58,6 @@ class KFDExceptionTest : public KFDBaseComponentTest {
|
||||
protected: // Members
|
||||
pid_t m_ChildPid;
|
||||
HSAKMT_STATUS m_ChildStatus;
|
||||
|
||||
IsaGenerator* m_pIsaGen;
|
||||
};
|
||||
|
||||
#endif // __KFD_EXCEPTION_TEST__H__
|
||||
|
||||
@@ -26,91 +26,17 @@
|
||||
#include "PM4Packet.hpp"
|
||||
#include "Dispatch.hpp"
|
||||
|
||||
/* Shader to initialize gws counter to 1*/
|
||||
const char* gfx9_10_GwsInit =
|
||||
"\
|
||||
shader GwsInit\n\
|
||||
type(CS)\n\
|
||||
wave_size(32)\n\
|
||||
s_mov_b32 m0, 0\n\
|
||||
s_nop 0\n\
|
||||
s_load_dword s16, s[0:1], 0x0 glc\n\
|
||||
s_waitcnt 0\n\
|
||||
v_mov_b32 v0, s16\n\
|
||||
s_waitcnt 0\n\
|
||||
ds_gws_init v0 gds:1 offset0:0\n\
|
||||
s_waitcnt 0\n\
|
||||
s_endpgm\n\
|
||||
end\n\
|
||||
";
|
||||
|
||||
/* Atomically increase a value in memory
|
||||
* This is expected to be executed from
|
||||
* multiple work groups simultaneously.
|
||||
* GWS semaphore is used to guarantee
|
||||
* the operation is atomic.
|
||||
*/
|
||||
const char* gfx9_AtomicIncrease =
|
||||
"\
|
||||
shader AtomicIncrease\n\
|
||||
type(CS)\n\
|
||||
/* Assume src address in s0, s1 */\n\
|
||||
s_mov_b32 m0, 0\n\
|
||||
s_nop 0\n\
|
||||
ds_gws_sema_p gds:1 offset0:0\n\
|
||||
s_waitcnt 0\n\
|
||||
s_load_dword s16, s[0:1], 0x0 glc\n\
|
||||
s_waitcnt 0\n\
|
||||
s_add_u32 s16, s16, 1\n\
|
||||
s_store_dword s16, s[0:1], 0x0 glc\n\
|
||||
s_waitcnt lgkmcnt(0)\n\
|
||||
ds_gws_sema_v gds:1 offset0:0\n\
|
||||
s_waitcnt 0\n\
|
||||
s_endpgm\n\
|
||||
end\n\
|
||||
";
|
||||
|
||||
const char* gfx10_AtomicIncrease =
|
||||
"\
|
||||
shader AtomicIncrease\n\
|
||||
asic(GFX10)\n\
|
||||
type(CS)\n\
|
||||
wave_size(32)\n\
|
||||
/* Assume src address in s0, s1 */\n\
|
||||
s_mov_b32 m0, 0\n\
|
||||
s_mov_b32 exec_lo, 0x1\n\
|
||||
v_mov_b32 v0, s0\n\
|
||||
v_mov_b32 v1, s1\n\
|
||||
ds_gws_sema_p gds:1 offset0:0\n\
|
||||
s_waitcnt 0\n\
|
||||
flat_load_dword v2, v[0:1] glc:1 dlc:1\n\
|
||||
s_waitcnt 0\n\
|
||||
v_add_nc_u32 v2, v2, 1\n\
|
||||
flat_store_dword v[0:1], v2\n\
|
||||
s_waitcnt_vscnt null, 0\n\
|
||||
ds_gws_sema_v gds:1 offset0:0\n\
|
||||
s_waitcnt 0\n\
|
||||
s_endpgm\n\
|
||||
end\n\
|
||||
";
|
||||
|
||||
void KFDGWSTest::SetUp() {
|
||||
ROUTINE_START
|
||||
|
||||
KFDBaseComponentTest::SetUp();
|
||||
|
||||
m_pIsaGen = IsaGenerator::Create(m_FamilyId);
|
||||
|
||||
ROUTINE_END
|
||||
}
|
||||
|
||||
void KFDGWSTest::TearDown() {
|
||||
ROUTINE_START
|
||||
|
||||
if (m_pIsaGen)
|
||||
delete m_pIsaGen;
|
||||
m_pIsaGen = NULL;
|
||||
|
||||
KFDBaseComponentTest::TearDown();
|
||||
|
||||
ROUTINE_END
|
||||
@@ -160,21 +86,15 @@ TEST_F(KFDGWSTest, Semaphore) {
|
||||
pNodeProperties->NumGws,&firstGWS));
|
||||
EXPECT_EQ(0, firstGWS);
|
||||
|
||||
m_pIsaGen = IsaGenerator::Create(m_FamilyId);
|
||||
m_pIsaGen->CompileShader(gfx9_10_GwsInit, "GwsInit", isaBuffer);
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(GwsInitIsa, isaBuffer.As<char*>()));
|
||||
|
||||
Dispatch dispatch0(isaBuffer);
|
||||
buffer.Fill(numResources, 0, 4);
|
||||
dispatch0.SetArgs(buffer.As<void*>(), NULL);
|
||||
dispatch0.Submit(queue);
|
||||
dispatch0.Sync();
|
||||
|
||||
const char *pAtomicIncrease;
|
||||
if (m_FamilyId <= FAMILY_AL)
|
||||
pAtomicIncrease = gfx9_AtomicIncrease;
|
||||
else
|
||||
pAtomicIncrease = gfx10_AtomicIncrease;
|
||||
|
||||
m_pIsaGen->CompileShader(pAtomicIncrease, "AtomicIncrease", isaBuffer);
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(GwsAtomicIncreaseIsa, isaBuffer.As<char*>()));
|
||||
|
||||
Dispatch dispatch(isaBuffer);
|
||||
dispatch.SetArgs(buffer.As<void*>(), NULL);
|
||||
|
||||
@@ -26,20 +26,16 @@
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "IsaGenerator.hpp"
|
||||
#include "KFDBaseComponentTest.hpp"
|
||||
|
||||
class KFDGWSTest : public KFDBaseComponentTest {
|
||||
public:
|
||||
KFDGWSTest() :m_pIsaGen(NULL) {}
|
||||
KFDGWSTest() {}
|
||||
~KFDGWSTest() {}
|
||||
|
||||
protected:
|
||||
virtual void SetUp();
|
||||
virtual void TearDown();
|
||||
|
||||
protected: // Members
|
||||
IsaGenerator* m_pIsaGen;
|
||||
};
|
||||
|
||||
#endif // __KFD_GWS_TEST__H__
|
||||
|
||||
@@ -101,7 +101,8 @@ TEST_F(KFDGraphicsInterop, RegisterGraphicsHandle) {
|
||||
|
||||
// Copy contents to a system memory buffer for comparison
|
||||
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
|
||||
m_pIsaGen->GetCopyDwordIsa(isaBuffer);
|
||||
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()));
|
||||
|
||||
HsaMemoryBuffer dstBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/);
|
||||
|
||||
|
||||
@@ -28,18 +28,12 @@ void KFDHWSTest::SetUp() {
|
||||
|
||||
KFDBaseComponentTest::SetUp();
|
||||
|
||||
m_pIsaGen = IsaGenerator::Create(m_FamilyId);
|
||||
|
||||
ROUTINE_END
|
||||
}
|
||||
|
||||
void KFDHWSTest::TearDown() {
|
||||
ROUTINE_START
|
||||
|
||||
if (m_pIsaGen)
|
||||
delete m_pIsaGen;
|
||||
m_pIsaGen = NULL;
|
||||
|
||||
KFDBaseComponentTest::TearDown();
|
||||
|
||||
ROUTINE_END
|
||||
@@ -70,7 +64,9 @@ void KFDHWSTest::RunTest(unsigned nProcesses, unsigned nQueues, unsigned nLoops)
|
||||
|
||||
// Run work on all queues
|
||||
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
|
||||
m_pIsaGen->GetNoopIsa(isaBuffer);
|
||||
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(NoopIsa, isaBuffer.As<char*>()));
|
||||
|
||||
for (l = 0; l < nLoops; l++) {
|
||||
for (q = 0; q < nQueues; q++) {
|
||||
if (dispatch[q])
|
||||
|
||||
@@ -27,14 +27,12 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "PM4Queue.hpp"
|
||||
#include "IsaGenerator.hpp"
|
||||
#include "KFDMultiProcessTest.hpp"
|
||||
#include "Dispatch.hpp"
|
||||
|
||||
class KFDHWSTest : public KFDMultiProcessTest {
|
||||
public:
|
||||
KFDHWSTest():m_pIsaGen(NULL) {}
|
||||
|
||||
KFDHWSTest() {}
|
||||
~KFDHWSTest() {}
|
||||
|
||||
protected:
|
||||
@@ -42,9 +40,6 @@ class KFDHWSTest : public KFDMultiProcessTest {
|
||||
virtual void TearDown();
|
||||
|
||||
void RunTest(unsigned nProcesses, unsigned nQueues, unsigned nLoops);
|
||||
|
||||
protected: // Members
|
||||
IsaGenerator* m_pIsaGen;
|
||||
};
|
||||
|
||||
#endif // __KFD_QCM_TEST__H__
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
|
||||
#include "KFDBaseComponentTest.hpp"
|
||||
#include "BaseQueue.hpp"
|
||||
#include "IsaGenerator.hpp"
|
||||
|
||||
#ifndef __KFD_MEMORY_TEST__H__
|
||||
#define __KFD_MEMORY_TEST__H__
|
||||
|
||||
@@ -33,18 +33,12 @@ void KFDLocalMemoryTest::SetUp() {
|
||||
|
||||
KFDBaseComponentTest::SetUp();
|
||||
|
||||
m_pIsaGen = IsaGenerator::Create(m_FamilyId);
|
||||
|
||||
ROUTINE_END
|
||||
}
|
||||
|
||||
void KFDLocalMemoryTest::TearDown() {
|
||||
ROUTINE_START
|
||||
|
||||
if (m_pIsaGen)
|
||||
delete m_pIsaGen;
|
||||
m_pIsaGen = NULL;
|
||||
|
||||
KFDBaseComponentTest::TearDown();
|
||||
|
||||
ROUTINE_END
|
||||
@@ -107,7 +101,7 @@ TEST_F(KFDLocalMemoryTest, BasicTest) {
|
||||
|
||||
srcSysBuffer.Fill(0x01010101);
|
||||
|
||||
m_pIsaGen->GetCopyDwordIsa(isaBuffer);
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()));
|
||||
|
||||
ASSERT_SUCCESS(hsaKmtMapMemoryToGPUNodes(srcLocalBuffer.As<void*>(), srcLocalBuffer.Size(), &AlternateVAGPU,
|
||||
mapFlags, 1, reinterpret_cast<HSAuint32 *>(&defaultGPUNode)));
|
||||
@@ -164,7 +158,7 @@ TEST_F(KFDLocalMemoryTest, VerifyContentsAfterUnmapAndMap) {
|
||||
|
||||
SysBufferA.Fill(0x01010101);
|
||||
|
||||
m_pIsaGen->GetCopyDwordIsa(isaBuffer);
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()));
|
||||
|
||||
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
|
||||
queue.SetSkipWaitConsump(0);
|
||||
@@ -303,7 +297,8 @@ TEST_F(KFDLocalMemoryTest, Fragmentation) {
|
||||
PM4Queue queue;
|
||||
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
|
||||
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode);
|
||||
m_pIsaGen->GetCopyDwordIsa(isaBuffer);
|
||||
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()));
|
||||
|
||||
/* Allocate and test memory using the strategy explained at the top */
|
||||
HSAKMT_STATUS status;
|
||||
|
||||
@@ -26,20 +26,16 @@
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "IsaGenerator.hpp"
|
||||
#include "KFDBaseComponentTest.hpp"
|
||||
|
||||
class KFDLocalMemoryTest : public KFDBaseComponentTest {
|
||||
public:
|
||||
KFDLocalMemoryTest() :m_pIsaGen(NULL) {}
|
||||
KFDLocalMemoryTest() {}
|
||||
~KFDLocalMemoryTest() {}
|
||||
|
||||
protected:
|
||||
virtual void SetUp();
|
||||
virtual void TearDown();
|
||||
|
||||
protected: // Members
|
||||
IsaGenerator* m_pIsaGen;
|
||||
};
|
||||
|
||||
#endif // __KFD_LOCALMEMORY_TEST__H__
|
||||
|
||||
@@ -39,360 +39,17 @@
|
||||
#include "SDMAPacket.hpp"
|
||||
#include "linux/kfd_ioctl.h"
|
||||
|
||||
const char* gfx8_ScratchCopyDword =
|
||||
"\
|
||||
shader ScratchCopyDword\n\
|
||||
asic(VI)\n\
|
||||
type(CS)\n\
|
||||
/*copy the parameters from scalar registers to vector registers*/\n\
|
||||
v_mov_b32 v0, s0\n\
|
||||
v_mov_b32 v1, s1\n\
|
||||
v_mov_b32 v2, s2\n\
|
||||
v_mov_b32 v3, s3\n\
|
||||
/*set up the scratch parameters. This assumes a single 16-reg block.*/\n\
|
||||
s_mov_b32 flat_scratch_lo, 8/*2 dwords of scratch per thread*/\n\
|
||||
s_mov_b32 flat_scratch_hi, 0/*offset in units of 256bytes*/\n\
|
||||
/*copy a dword between the passed addresses*/\n\
|
||||
flat_load_dword v4, v[0:1] slc\n\
|
||||
s_waitcnt vmcnt(0)&lgkmcnt(0)\n\
|
||||
flat_store_dword v[2:3], v4 slc\n\
|
||||
\n\
|
||||
s_endpgm\n\
|
||||
\n\
|
||||
end\n\
|
||||
";
|
||||
|
||||
const char* gfx9_ScratchCopyDword =
|
||||
"\
|
||||
shader ScratchCopyDword\n\
|
||||
asic(GFX9)\n\
|
||||
type(CS)\n\
|
||||
/*copy the parameters from scalar registers to vector registers*/\n\
|
||||
v_mov_b32 v0, s0\n\
|
||||
v_mov_b32 v1, s1\n\
|
||||
v_mov_b32 v2, s2\n\
|
||||
v_mov_b32 v3, s3\n\
|
||||
/*set up the scratch parameters. This assumes a single 16-reg block.*/\n\
|
||||
s_mov_b32 flat_scratch_lo, s4\n\
|
||||
s_mov_b32 flat_scratch_hi, s5\n\
|
||||
/*copy a dword between the passed addresses*/\n\
|
||||
flat_load_dword v4, v[0:1] slc\n\
|
||||
s_waitcnt vmcnt(0)&lgkmcnt(0)\n\
|
||||
flat_store_dword v[2:3], v4 slc\n\
|
||||
\n\
|
||||
s_endpgm\n\
|
||||
\n\
|
||||
end\n\
|
||||
";
|
||||
const char* gfx10_ScratchCopyDword =
|
||||
"\
|
||||
shader ScratchCopyDword\n\
|
||||
asic(GFX10)\n\
|
||||
type(CS)\n\
|
||||
wave_size(32)\n\
|
||||
/*copy the parameters from scalar registers to vector registers*/\n\
|
||||
v_mov_b32 v0, s0\n\
|
||||
v_mov_b32 v1, s1\n\
|
||||
v_mov_b32 v2, s2\n\
|
||||
v_mov_b32 v3, s3\n\
|
||||
/*set up the scratch parameters. This assumes a single 16-reg block.*/\n\
|
||||
s_setreg_b32 hwreg(HW_REG_SHADER_FLAT_SCRATCH_LO), s4\n\
|
||||
s_setreg_b32 hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI), s5\n\
|
||||
/*copy a dword between the passed addresses*/\n\
|
||||
flat_load_dword v4, v[0:1] slc\n\
|
||||
s_waitcnt vmcnt(0)&lgkmcnt(0)\n\
|
||||
flat_store_dword v[2:3], v4 slc\n\
|
||||
\n\
|
||||
s_endpgm\n\
|
||||
\n\
|
||||
end\n\
|
||||
";
|
||||
|
||||
const char* aldbrn_ScratchCopyDword =
|
||||
"\
|
||||
shader ScratchCopyDword\n\
|
||||
asic(ALDEBARAN)\n\
|
||||
type(CS)\n\
|
||||
/*copy the parameters from scalar registers to vector registers*/\n\
|
||||
v_mov_b32 v0, s0\n\
|
||||
v_mov_b32 v1, s1\n\
|
||||
v_mov_b32 v2, s2\n\
|
||||
v_mov_b32 v3, s3\n\
|
||||
/*set up the scratch parameters. This assumes a single 16-reg block.*/\n\
|
||||
s_mov_b32 flat_scratch_lo, s4\n\
|
||||
s_mov_b32 flat_scratch_hi, s5\n\
|
||||
/*copy a dword between the passed addresses*/\n\
|
||||
flat_load_dword v4, v[0:1] slc\n\
|
||||
s_waitcnt vmcnt(0)&lgkmcnt(0)\n\
|
||||
flat_store_dword v[2:3], v4 slc\n\
|
||||
\n\
|
||||
s_endpgm\n\
|
||||
\n\
|
||||
end\n\
|
||||
";
|
||||
|
||||
|
||||
|
||||
/* Continuously poll src buffer and check buffer value
|
||||
* After src buffer is filled with specific value (0x5678,
|
||||
* by host program), fill dst buffer with specific
|
||||
* value(0x5678) and quit
|
||||
*/
|
||||
const char* gfx9_PollMemory =
|
||||
"\
|
||||
shader ReadMemory\n\
|
||||
wave_size(32)\n\
|
||||
type(CS)\n\
|
||||
/* Assume src address in s0, s1 and dst address in s2, s3*/\n\
|
||||
s_movk_i32 s18, 0x5678\n\
|
||||
LOOP:\n\
|
||||
s_load_dword s16, s[0:1], 0x0 glc\n\
|
||||
s_cmp_eq_i32 s16, s18\n\
|
||||
s_cbranch_scc0 LOOP\n\
|
||||
s_store_dword s18, s[2:3], 0x0 glc\n\
|
||||
s_endpgm\n\
|
||||
end\n\
|
||||
";
|
||||
|
||||
/* Similar to gfx9_PollMemory except that the buffer
|
||||
* polled can be Non-coherant memory. SCC system-level
|
||||
* cache coherence is not supported in scalar (smem) path.
|
||||
* Use vmem operations with scc
|
||||
*/
|
||||
const char* gfx9_PollNCMemory =
|
||||
"\
|
||||
shader ReadMemory\n\
|
||||
asic(ALDEBARAN)\n\
|
||||
wave_size(32)\n\
|
||||
type(CS)\n\
|
||||
/* Assume src address in s0, s1 and dst address in s2, s3*/\n\
|
||||
v_mov_b32 v6, 0x5678\n\
|
||||
v_mov_b32 v0, s0\n\
|
||||
v_mov_b32 v1, s1\n\
|
||||
LOOP:\n\
|
||||
flat_load_dword v4, v[0:1] scc\n\
|
||||
v_cmp_eq_u32 vcc, v4, v6\n\
|
||||
s_cbranch_vccz LOOP\n\
|
||||
v_mov_b32 v0, s2\n\
|
||||
v_mov_b32 v1, s3\n\
|
||||
flat_store_dword v[0:1], v6 scc\n\
|
||||
s_endpgm\n\
|
||||
end\n\
|
||||
";
|
||||
|
||||
const char* gfx10_PollMemory =
|
||||
"\
|
||||
shader ReadMemory\n\
|
||||
wave_size(32)\n\
|
||||
type(CS)\n\
|
||||
/* Assume src address in s0, s1 and dst address in s2, s3*/\n\
|
||||
s_movk_i32 s18, 0x5678\n\
|
||||
v_mov_b32 v0, s2\n\
|
||||
v_mov_b32 v1, s3\n\
|
||||
v_mov_b32 v2, 0x5678\n\
|
||||
LOOP:\n\
|
||||
s_load_dword s16, s[0:1], 0x0 glc\n\
|
||||
s_cmp_eq_i32 s16, s18\n\
|
||||
s_cbranch_scc0 LOOP\n\
|
||||
flat_store_dword v[0,1], v2 slc\n\
|
||||
s_waitcnt vmcnt(0)&lgkmcnt(0)\n\
|
||||
s_endpgm\n\
|
||||
end\n\
|
||||
";
|
||||
|
||||
/* Input: A buffer of at least 3 dwords.
|
||||
* DW0: used as a signal. 0xcafe means it is signaled
|
||||
* DW1: Input buffer for device to read.
|
||||
* DW2: Output buffer for device to write.
|
||||
* Once receive signal, device will copy DW1 to DW2
|
||||
* This shader continously poll the signal buffer,
|
||||
* Once signal buffer is signaled, it copies input buffer
|
||||
* to output buffer
|
||||
*/
|
||||
const char* gfx9_CopyOnSignal =
|
||||
"\
|
||||
shader CopyOnSignal\n\
|
||||
wave_size(32)\n\
|
||||
type(CS)\n\
|
||||
/* Assume input buffer in s0, s1 */\n\
|
||||
s_mov_b32 s18, 0xcafe\n\
|
||||
POLLSIGNAL:\n\
|
||||
s_load_dword s16, s[0:1], 0x0 glc\n\
|
||||
s_cmp_eq_i32 s16, s18\n\
|
||||
s_cbranch_scc0 POLLSIGNAL\n\
|
||||
s_load_dword s17, s[0:1], 0x4 glc\n\
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0)\n\
|
||||
s_store_dword s17, s[0:1], 0x8 glc\n\
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0)\n\
|
||||
s_endpgm\n\
|
||||
end\n\
|
||||
";
|
||||
|
||||
const char* gfx10_CopyOnSignal =
|
||||
"\
|
||||
shader CopyOnSignal\n\
|
||||
wave_size(32)\n\
|
||||
type(CS)\n\
|
||||
/* Assume input buffer in s0, s1 */\n\
|
||||
s_add_u32 s2, s0, 0x8\n\
|
||||
s_addc_u32 s3, s1, 0x0\n\
|
||||
s_mov_b32 s18, 0xcafe\n\
|
||||
v_mov_b32 v0, s0\n\
|
||||
v_mov_b32 v1, s1\n\
|
||||
v_mov_b32 v4, s2\n\
|
||||
v_mov_b32 v5, s3\n\
|
||||
POLLSIGNAL:\n\
|
||||
s_load_dword s16, s[0:1], 0x0 glc\n\
|
||||
s_cmp_eq_i32 s16, s18\n\
|
||||
s_cbranch_scc0 POLLSIGNAL\n\
|
||||
s_load_dword s17, s[0:1], 0x4 glc\n\
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0)\n\
|
||||
v_mov_b32 v2, s17\n\
|
||||
flat_store_dword v[4,5], v2 glc\n\
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0)\n\
|
||||
s_endpgm\n\
|
||||
end\n\
|
||||
";
|
||||
|
||||
/* Input0: A buffer of at least 2 dwords.
|
||||
* DW0: used as a signal. Write 0xcafe to signal
|
||||
* DW1: Write to this buffer for other device to read.
|
||||
* Input1: mmio base address
|
||||
*/
|
||||
const char* gfx9_WriteAndSignal =
|
||||
"\
|
||||
shader WriteAndSignal\n\
|
||||
wave_size(32)\n\
|
||||
type(CS)\n\
|
||||
/* Assume input buffer in s0, s1 */\n\
|
||||
s_mov_b32 s18, 0xbeef\n\
|
||||
s_store_dword s18, s[0:1], 0x4 glc\n\
|
||||
s_mov_b32 s18, 0x1\n\
|
||||
s_store_dword s18, s[2:3], 0 glc\n\
|
||||
s_mov_b32 s18, 0xcafe\n\
|
||||
s_store_dword s18, s[0:1], 0x0 glc\n\
|
||||
s_endpgm\n\
|
||||
end\n\
|
||||
";
|
||||
|
||||
/* Continuously poll the flag at src buffer
|
||||
* After the flag of s[0:1] is 1 filled,
|
||||
* copy the value from s[0:1]+4 to dst buffer
|
||||
*/
|
||||
const char* gfx9_PollAndCopy =
|
||||
"\
|
||||
shader CopyMemory\n\
|
||||
wave_size(32)\n\
|
||||
type(CS)\n\
|
||||
/* Assume src buffer in s[0:1] and dst buffer in s[2:3]*/\n\
|
||||
s_movk_i32 s18, 0x1\n\
|
||||
LOOP:\n\
|
||||
s_load_dword s16, s[0:1], 0x0 glc\n\
|
||||
s_cmp_eq_i32 s16, s18\n\
|
||||
s_cbranch_scc0 LOOP\n\
|
||||
s_load_dword s17, s[0:1], 0x4 glc\n\
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0)\n\
|
||||
s_store_dword s17, s[2:3], 0x0 glc:1\n\
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0)\n\
|
||||
s_endpgm\n\
|
||||
end\n\
|
||||
";
|
||||
|
||||
const char* gfx9aldbrn_PollAndCopy =
|
||||
"\
|
||||
shader CopyMemory\n\
|
||||
wave_size(32)\n\
|
||||
type(CS)\n\
|
||||
/* Assume src buffer in s[0:1] and dst buffer in s[2:3]*/\n\
|
||||
v_mov_b32 v0, s0\n\
|
||||
v_mov_b32 v1, s1\n\
|
||||
v_mov_b32 v18, 0x1\n\
|
||||
LOOP:\n\
|
||||
flat_load_dword v16, v[0:1] glc\n\
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0)\n\
|
||||
v_cmp_eq_i32 vcc, v16, v18\n\
|
||||
s_cbranch_vccz LOOP\n\
|
||||
buffer_invl2\n\
|
||||
s_load_dword s17, s[0:1], 0x4 glc\n\
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0)\n\
|
||||
s_store_dword s17, s[2:3], 0x0 glc\n\
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0)\n\
|
||||
buffer_wbl2\n\
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0)\n\
|
||||
s_endpgm\n\
|
||||
end\n\
|
||||
";
|
||||
|
||||
/* Input0: A buffer of at least 2 dwords.
|
||||
* DW0: used as a signal. Write 0x1 to signal
|
||||
* DW1: Write the value from 2nd input buffer
|
||||
* for other device to read.
|
||||
* Input1: A buffer of at least 2 dwords.
|
||||
* DW0: used as the value to be written.
|
||||
*/
|
||||
const char* gfx9aldbrn_WriteFlagAndValue =
|
||||
"\
|
||||
shader WriteMemory\n\
|
||||
wave_size(32)\n\
|
||||
type(CS)\n\
|
||||
/* Assume two inputs buffer in s[0:1] and s[2:3]*/\n\
|
||||
v_mov_b32 v0, s0\n\
|
||||
v_mov_b32 v1, s1\n\
|
||||
s_load_dword s18, s[2:3], 0x0 glc\n\
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0)\n\
|
||||
s_store_dword s18, s[0:1], 0x4 glc\n\
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0)\n\
|
||||
buffer_wbl2\n\
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0)\n\
|
||||
v_mov_b32 v16, 0x1\n\
|
||||
flat_store_dword v[0:1], v16 glc\n\
|
||||
s_endpgm\n\
|
||||
end\n\
|
||||
";
|
||||
|
||||
const char* gfx10_WriteAndSignal =
|
||||
"\
|
||||
shader WriteAndSignal\n\
|
||||
wave_size(32)\n\
|
||||
type(CS)\n\
|
||||
/* Assume input buffer in s0, s1 */\n\
|
||||
s_add_u32 s4, s0, 0x4\n\
|
||||
s_addc_u32 s5, s1, 0x0\n\
|
||||
v_mov_b32 v0, s0\n\
|
||||
v_mov_b32 v1, s1\n\
|
||||
v_mov_b32 v2, s2\n\
|
||||
v_mov_b32 v3, s3\n\
|
||||
v_mov_b32 v4, s4\n\
|
||||
v_mov_b32 v5, s5\n\
|
||||
v_mov_b32 v18, 0xbeef\n\
|
||||
flat_store_dword v[4:5], v18 glc\n\
|
||||
v_mov_b32 v18, 0x1\n\
|
||||
flat_store_dword v[2:3], v18 glc\n\
|
||||
v_mov_b32 v18, 0xcafe\n\
|
||||
flat_store_dword v[0:1], v18 glc\n\
|
||||
s_endpgm\n\
|
||||
end\n\
|
||||
";
|
||||
|
||||
//These gfx9_PullMemory, gfx9_CopyOnSignal, gfx9_WriteAndSignal shaders can be used by both gfx9 and gfx10
|
||||
|
||||
void KFDMemoryTest::SetUp() {
|
||||
ROUTINE_START
|
||||
|
||||
KFDBaseComponentTest::SetUp();
|
||||
|
||||
m_pIsaGen = IsaGenerator::Create(m_FamilyId);
|
||||
|
||||
ROUTINE_END
|
||||
}
|
||||
|
||||
void KFDMemoryTest::TearDown() {
|
||||
ROUTINE_START
|
||||
|
||||
if (m_pIsaGen)
|
||||
delete m_pIsaGen;
|
||||
m_pIsaGen = NULL;
|
||||
|
||||
KFDBaseComponentTest::TearDown();
|
||||
|
||||
ROUTINE_END
|
||||
@@ -508,16 +165,13 @@ TEST_F(KFDMemoryTest, MapUnmapToNodes) {
|
||||
HsaMemoryBuffer dstBuffer(PAGE_SIZE, defaultGPUNode);
|
||||
|
||||
const char *pReadMemory;
|
||||
if (m_FamilyId < FAMILY_NV)
|
||||
pReadMemory = gfx9_PollMemory;
|
||||
else
|
||||
pReadMemory = gfx10_PollMemory;
|
||||
|
||||
if (m_NodeInfo.IsNodeXGMItoCPU(defaultGPUNode))
|
||||
/* On A+A system memory is mapped as NC */
|
||||
m_pIsaGen->CompileShader(gfx9_PollNCMemory, "ReadMemory", isaBuffer);
|
||||
pReadMemory = PollNCMemoryIsa;
|
||||
else
|
||||
m_pIsaGen->CompileShader(pReadMemory, "ReadMemory", isaBuffer);
|
||||
pReadMemory = PollMemoryIsa;
|
||||
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(pReadMemory, isaBuffer.As<char*>()));
|
||||
|
||||
PM4Queue pm4Queue;
|
||||
ASSERT_SUCCESS(pm4Queue.Create(defaultGPUNode));
|
||||
@@ -674,7 +328,8 @@ TEST_F(KFDMemoryTest, MemoryRegister) {
|
||||
ASSERT_SUCCESS(sdmaQueue.Create(defaultGPUNode));
|
||||
|
||||
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
|
||||
m_pIsaGen->GetCopyDwordIsa(isaBuffer);
|
||||
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()));
|
||||
|
||||
/* First submit just so the queues are not empty, and to get the
|
||||
* TLB populated (in case we need to flush TLBs somewhere after
|
||||
@@ -855,16 +510,7 @@ TEST_F(KFDMemoryTest, FlatScratchAccess) {
|
||||
// Initialize the srcBuffer to some fixed value
|
||||
srcMemBuffer.Fill(0x01010101);
|
||||
|
||||
const char *pScratchCopyDword;
|
||||
if (m_FamilyId < FAMILY_AI)
|
||||
pScratchCopyDword = gfx8_ScratchCopyDword;
|
||||
else if (m_FamilyId < FAMILY_AL)
|
||||
pScratchCopyDword = gfx9_ScratchCopyDword;
|
||||
else if (m_FamilyId == FAMILY_AL)
|
||||
pScratchCopyDword = aldbrn_ScratchCopyDword;
|
||||
else
|
||||
pScratchCopyDword = gfx10_ScratchCopyDword;
|
||||
m_pIsaGen->CompileShader(pScratchCopyDword, "ScratchCopyDword", isaBuffer);
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(ScratchCopyDwordIsa, isaBuffer.As<char*>()));
|
||||
|
||||
const HsaNodeProperties *pNodeProperties = m_NodeInfo.GetNodeProperties(defaultGPUNode);
|
||||
|
||||
@@ -1728,17 +1374,8 @@ TEST_F(KFDMemoryTest, PtraceAccessInvisibleVram) {
|
||||
// dstBuffer is cpu accessible gtt memory
|
||||
HsaMemoryBuffer dstBuffer(PAGE_SIZE, defaultGPUNode);
|
||||
|
||||
const char *pScratchCopyDword;
|
||||
if (m_FamilyId < FAMILY_AI)
|
||||
pScratchCopyDword = gfx8_ScratchCopyDword;
|
||||
else if (m_FamilyId < FAMILY_AL)
|
||||
pScratchCopyDword = gfx9_ScratchCopyDword;
|
||||
else if (m_FamilyId == FAMILY_AL)
|
||||
pScratchCopyDword = aldbrn_ScratchCopyDword;
|
||||
else
|
||||
pScratchCopyDword = gfx10_ScratchCopyDword;
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(ScratchCopyDwordIsa, isaBuffer.As<char*>()));
|
||||
|
||||
m_pIsaGen->CompileShader(pScratchCopyDword, "ScratchCopyDword", isaBuffer);
|
||||
Dispatch dispatch0(isaBuffer);
|
||||
dispatch0.SetArgs(mem0, dstBuffer.As<void*>());
|
||||
dispatch0.Submit(queue);
|
||||
@@ -2109,12 +1746,9 @@ TEST_F(KFDMemoryTest, HostHdpFlush) {
|
||||
PM4Queue queue;
|
||||
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
|
||||
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
|
||||
const char *pCopyOnSignal;
|
||||
if (m_FamilyId < FAMILY_NV)
|
||||
pCopyOnSignal = gfx9_CopyOnSignal;
|
||||
else
|
||||
pCopyOnSignal = gfx10_CopyOnSignal;
|
||||
m_pIsaGen->CompileShader(pCopyOnSignal, "CopyOnSignal", isaBuffer);
|
||||
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyOnSignalIsa, isaBuffer.As<char*>()));
|
||||
|
||||
Dispatch dispatch0(isaBuffer);
|
||||
dispatch0.SetArgs(buffer, NULL);
|
||||
dispatch0.Submit(queue);
|
||||
@@ -2234,12 +1868,9 @@ TEST_F(KFDMemoryTest, DeviceHdpFlush) {
|
||||
PM4Queue queue;
|
||||
ASSERT_SUCCESS(queue.Create(nodes[0]));
|
||||
HsaMemoryBuffer isaBuffer(PAGE_SIZE, nodes[0], true/*zero*/, false/*local*/, true/*exec*/);
|
||||
const char *pCopyOnSignal;
|
||||
if (m_FamilyId < FAMILY_NV)
|
||||
pCopyOnSignal = gfx9_CopyOnSignal;
|
||||
else
|
||||
pCopyOnSignal = gfx10_CopyOnSignal;
|
||||
m_pIsaGen->CompileShader(pCopyOnSignal, "CopyOnSignal", isaBuffer);
|
||||
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyOnSignalIsa, isaBuffer.As<char*>()));
|
||||
|
||||
Dispatch dispatch(isaBuffer);
|
||||
dispatch.SetArgs(buffer, NULL);
|
||||
dispatch.Submit(queue);
|
||||
@@ -2247,12 +1878,9 @@ TEST_F(KFDMemoryTest, DeviceHdpFlush) {
|
||||
PM4Queue queue0;
|
||||
ASSERT_SUCCESS(queue0.Create(nodes[1]));
|
||||
HsaMemoryBuffer isaBuffer0(PAGE_SIZE, nodes[1], true/*zero*/, false/*local*/, true/*exec*/);
|
||||
const char *pWriteAndSignal;
|
||||
if (m_FamilyId < FAMILY_NV)
|
||||
pWriteAndSignal = gfx9_WriteAndSignal;
|
||||
else
|
||||
pWriteAndSignal = gfx10_WriteAndSignal;
|
||||
m_pIsaGen->CompileShader(pWriteAndSignal, "WriteAndSignal", isaBuffer0);
|
||||
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(WriteAndSignalIsa, isaBuffer.As<char*>()));
|
||||
|
||||
Dispatch dispatch0(isaBuffer0);
|
||||
dispatch0.SetArgs(buffer, mmioBase);
|
||||
dispatch0.Submit(queue0);
|
||||
@@ -2304,7 +1932,9 @@ TEST_F(KFDMemoryTest, CacheInvalidateOnSdmaWrite) {
|
||||
PM4Queue queue;
|
||||
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
|
||||
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
|
||||
m_pIsaGen->CompileShader(gfx9_PollMemory, "ReadMemory", isaBuffer);
|
||||
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(PollMemoryIsa, isaBuffer.As<char*>()));
|
||||
|
||||
Dispatch dispatch(isaBuffer);
|
||||
dispatch.SetArgs(buffer.As<int*>(), buffer.As<int*>()+dwLocation);
|
||||
dispatch.Submit(queue);
|
||||
@@ -2357,7 +1987,9 @@ TEST_F(KFDMemoryTest, CacheInvalidateOnCPUWrite) {
|
||||
PM4Queue queue;
|
||||
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
|
||||
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
|
||||
m_pIsaGen->CompileShader(gfx9_PollMemory, "ReadMemory", isaBuffer);
|
||||
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(PollMemoryIsa, isaBuffer.As<char*>()));
|
||||
|
||||
Dispatch dispatch(isaBuffer);
|
||||
dispatch.SetArgs(buffer, buffer+100);
|
||||
dispatch.Submit(queue);
|
||||
@@ -2419,7 +2051,9 @@ TEST_F(KFDMemoryTest, CacheInvalidateOnRemoteWrite) {
|
||||
PM4Queue queue;
|
||||
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
|
||||
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
|
||||
m_pIsaGen->CompileShader(gfx9_PollMemory, "ReadMemory", isaBuffer);
|
||||
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(PollMemoryIsa, isaBuffer.As<char*>()));
|
||||
|
||||
Dispatch dispatch(isaBuffer);
|
||||
dispatch.SetArgs(buffer.As<int*>(), buffer.As<int*>()+dwLocation);
|
||||
dispatch.Submit(queue);
|
||||
@@ -2434,7 +2068,9 @@ TEST_F(KFDMemoryTest, CacheInvalidateOnRemoteWrite) {
|
||||
ASSERT_SUCCESS(queue1.Create(nondefaultNode));
|
||||
buffer.Fill(0x5678, sdmaQueue, dwLocation1*sizeof(int), 4);
|
||||
HsaMemoryBuffer isaBuffer1(PAGE_SIZE, nondefaultNode, true/*zero*/, false/*local*/, true/*exec*/);
|
||||
m_pIsaGen->GetCopyDwordIsa(isaBuffer1);
|
||||
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()));
|
||||
|
||||
Dispatch dispatch1(isaBuffer1);
|
||||
dispatch1.SetArgs(buffer.As<int*>()+dwLocation1, buffer.As<int*>());
|
||||
dispatch1.Submit(queue1);
|
||||
@@ -2500,7 +2136,9 @@ TEST_F(KFDMemoryTest, VramCacheCoherenceWithRemoteGPU) {
|
||||
PM4Queue queue;
|
||||
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
|
||||
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
|
||||
m_pIsaGen->CompileShader(gfx9aldbrn_PollAndCopy, "CopyMemory", isaBuffer);
|
||||
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(PollAndCopyIsa, isaBuffer.As<char*>()));
|
||||
|
||||
Dispatch dispatch(isaBuffer);
|
||||
dispatch.SetArgs(buffer.As<char *>(), buffer.As<char *>()+dwLocation);
|
||||
dispatch.Submit(queue);
|
||||
@@ -2515,7 +2153,9 @@ TEST_F(KFDMemoryTest, VramCacheCoherenceWithRemoteGPU) {
|
||||
PM4Queue queue1;
|
||||
ASSERT_SUCCESS(queue1.Create(nondefaultNode));
|
||||
HsaMemoryBuffer isaBuffer1(PAGE_SIZE, nondefaultNode, true/*zero*/, false/*local*/, true/*exec*/);
|
||||
m_pIsaGen->CompileShader(gfx9aldbrn_WriteFlagAndValue, "WriteMemory", isaBuffer1);
|
||||
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(WriteFlagAndValueIsa, isaBuffer.As<char*>()));
|
||||
|
||||
Dispatch dispatch1(isaBuffer1);
|
||||
dispatch1.SetArgs(buffer.As<char *>(), buffer.As<char *>()+dwSource);
|
||||
dispatch1.Submit(queue1);
|
||||
@@ -2569,7 +2209,9 @@ TEST_F(KFDMemoryTest, VramCacheCoherenceWithCPU) {
|
||||
PM4Queue queue;
|
||||
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
|
||||
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
|
||||
m_pIsaGen->CompileShader(gfx9aldbrn_PollAndCopy, "CopyMemory", isaBuffer);
|
||||
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(PollAndCopyIsa, isaBuffer.As<char*>()));
|
||||
|
||||
Dispatch dispatch(isaBuffer);
|
||||
dispatch.SetArgs(buffer, buffer+dwLocation);
|
||||
dispatch.Submit(queue);
|
||||
@@ -2608,12 +2250,17 @@ TEST_F(KFDMemoryTest, SramCacheCoherenceWithGPU) {
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned int *fineBuffer = NULL;
|
||||
unsigned int tmp;
|
||||
|
||||
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
|
||||
const int dwLocation = 0x80;
|
||||
|
||||
if (!m_NodeInfo.IsNodeXGMItoCPU(defaultGPUNode)) {
|
||||
LOG() << "Skipping test: XGMI link to CPU is required." << std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned int *fineBuffer = NULL;
|
||||
unsigned int tmp;
|
||||
|
||||
ASSERT_SUCCESS(hsaKmtAllocMemory(defaultGPUNode /* system */, PAGE_SIZE, m_MemoryFlags,
|
||||
reinterpret_cast<void**>(&fineBuffer)));
|
||||
ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(fineBuffer, PAGE_SIZE, NULL));
|
||||
@@ -2627,10 +2274,7 @@ TEST_F(KFDMemoryTest, SramCacheCoherenceWithGPU) {
|
||||
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
|
||||
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
|
||||
|
||||
if (m_NodeInfo.IsNodeXGMItoCPU(defaultGPUNode))
|
||||
m_pIsaGen->CompileShader(gfx9aldbrn_PollAndCopy, "CopyMemory", isaBuffer);
|
||||
else
|
||||
m_pIsaGen->CompileShader(gfx9_PollAndCopy, "CopyMemory", isaBuffer);
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(PollAndCopyIsa, isaBuffer.As<char*>()));
|
||||
|
||||
Dispatch dispatch(isaBuffer);
|
||||
dispatch.SetArgs(fineBuffer, fineBuffer+dwLocation);
|
||||
|
||||
@@ -22,7 +22,6 @@
|
||||
*/
|
||||
|
||||
#include "KFDBaseComponentTest.hpp"
|
||||
#include "IsaGenerator.hpp"
|
||||
|
||||
#ifndef __KFD_MEMORY_TEST__H__
|
||||
#define __KFD_MEMORY_TEST__H__
|
||||
@@ -33,15 +32,13 @@
|
||||
*/
|
||||
class KFDMemoryTest : public KFDBaseComponentTest {
|
||||
public:
|
||||
KFDMemoryTest(void) :m_pIsaGen(NULL) {}
|
||||
KFDMemoryTest(void) {}
|
||||
~KFDMemoryTest(void) {}
|
||||
protected:
|
||||
virtual void SetUp();
|
||||
virtual void TearDown();
|
||||
|
||||
protected:
|
||||
IsaGenerator* m_pIsaGen;
|
||||
|
||||
void BinarySearchLargestBuffer(int allocNode, const HsaMemFlags &memFlags,
|
||||
HSAuint64 highMB, int nodeToMap,
|
||||
HSAuint64 *lastSizeMB);
|
||||
|
||||
@@ -39,18 +39,12 @@ void KFDQMTest::SetUp() {
|
||||
|
||||
KFDBaseComponentTest::SetUp();
|
||||
|
||||
m_pIsaGen = IsaGenerator::Create(m_FamilyId);
|
||||
|
||||
ROUTINE_END
|
||||
}
|
||||
|
||||
void KFDQMTest::TearDown() {
|
||||
ROUTINE_START
|
||||
|
||||
if (m_pIsaGen)
|
||||
delete m_pIsaGen;
|
||||
m_pIsaGen = NULL;
|
||||
|
||||
KFDBaseComponentTest::TearDown();
|
||||
|
||||
ROUTINE_END
|
||||
@@ -677,111 +671,12 @@ TEST_F(KFDQMTest, OverSubscribeCpQueues) {
|
||||
TEST_END
|
||||
}
|
||||
|
||||
/* A simple isa loop program with dense mathematic operations
|
||||
* s1 controls the number iterations of the loop
|
||||
* This shader can be used by GFX8, GFX9 and GFX10
|
||||
*/
|
||||
static const char *loop_isa = \
|
||||
"\
|
||||
shader loop_isa\n\
|
||||
wave_size(32)\n\
|
||||
type(CS)\n\
|
||||
s_movk_i32 s0, 0x0008\n\
|
||||
s_movk_i32 s1, 0x00ff\n\
|
||||
v_mov_b32 v0, 0\n\
|
||||
v_mov_b32 v1, 0\n\
|
||||
v_mov_b32 v2, 0\n\
|
||||
v_mov_b32 v3, 0\n\
|
||||
v_mov_b32 v4, 0\n\
|
||||
v_mov_b32 v5, 0\n\
|
||||
v_mov_b32 v6, 0\n\
|
||||
v_mov_b32 v7, 0\n\
|
||||
v_mov_b32 v8, 0\n\
|
||||
v_mov_b32 v9, 0\n\
|
||||
v_mov_b32 v10, 0\n\
|
||||
v_mov_b32 v11, 0\n\
|
||||
v_mov_b32 v12, 0\n\
|
||||
v_mov_b32 v13, 0\n\
|
||||
v_mov_b32 v14, 0\n\
|
||||
v_mov_b32 v15, 0\n\
|
||||
v_mov_b32 v16, 0\n\
|
||||
LOOP:\n\
|
||||
s_mov_b32 s8, s4\n\
|
||||
s_mov_b32 s9, s1\n\
|
||||
s_mov_b32 s10, s6\n\
|
||||
s_mov_b32 s11, s7\n\
|
||||
s_cmp_le_i32 s1, s0\n\
|
||||
s_cbranch_scc1 END_OF_PGM\n\
|
||||
s_buffer_load_dwordx8 s[8:15], s[8:11], 0x10\n\
|
||||
v_add_f32 v0, 2.0, v0\n\
|
||||
v_cvt_f32_i32 v17, s1\n\
|
||||
s_waitcnt lgkmcnt(0)\n\
|
||||
v_add_f32 v18, s8, v17\n\
|
||||
v_add_f32 v19, s9, v17\n\
|
||||
v_add_f32 v20, s10, v17\n\
|
||||
v_add_f32 v21, s11, v17\n\
|
||||
v_add_f32 v22, s12, v17\n\
|
||||
v_add_f32 v23, s13, v17\n\
|
||||
v_add_f32 v24, s14, v17\n\
|
||||
v_add_f32 v17, s15, v17\n\
|
||||
v_log_f32 v25, v18\n\
|
||||
v_mul_f32 v25, v22, v25\n\
|
||||
v_exp_f32 v25, v25\n\
|
||||
v_log_f32 v26, v19\n\
|
||||
v_mul_f32 v26, v23, v26\n\
|
||||
v_exp_f32 v26, v26\n\
|
||||
v_log_f32 v27, v20\n\
|
||||
v_mul_f32 v27, v24, v27\n\
|
||||
v_exp_f32 v27, v27\n\
|
||||
v_log_f32 v28, v21\n\
|
||||
v_mul_f32 v28, v17, v28\n\
|
||||
v_exp_f32 v28, v28\n\
|
||||
v_add_f32 v5, v5, v25\n\
|
||||
v_add_f32 v6, v6, v26\n\
|
||||
v_add_f32 v7, v7, v27\n\
|
||||
v_add_f32 v8, v8, v28\n\
|
||||
v_mul_f32 v18, 0x3fb8aa3b, v18\n\
|
||||
v_exp_f32 v18, v18\n\
|
||||
v_mul_f32 v19, 0x3fb8aa3b, v19\n\
|
||||
v_exp_f32 v19, v19\n\
|
||||
v_mul_f32 v20, 0x3fb8aa3b, v20\n\
|
||||
v_exp_f32 v20, v20\n\
|
||||
v_mul_f32 v21, 0x3fb8aa3b, v21\n\
|
||||
v_exp_f32 v21, v21\n\
|
||||
v_add_f32 v9, v9, v18\n\
|
||||
v_add_f32 v10, v10, v19\n\
|
||||
v_add_f32 v11, v11, v20\n\
|
||||
v_add_f32 v12, v12, v21\n\
|
||||
v_sqrt_f32 v18, v22\n\
|
||||
v_sqrt_f32 v19, v23\n\
|
||||
v_sqrt_f32 v20, v24\n\
|
||||
v_sqrt_f32 v21, v17\n\
|
||||
v_add_f32 v13, v13, v18\n\
|
||||
v_add_f32 v14, v14, v19\n\
|
||||
v_add_f32 v15, v15, v20\n\
|
||||
v_add_f32 v16, v16, v21\n\
|
||||
v_rsq_f32 v18, v22\n\
|
||||
v_rsq_f32 v19, v23\n\
|
||||
v_rsq_f32 v20, v24\n\
|
||||
v_rsq_f32 v17, v17\n\
|
||||
v_add_f32 v1, v1, v18\n\
|
||||
v_add_f32 v2, v2, v19\n\
|
||||
v_add_f32 v3, v3, v20\n\
|
||||
v_add_f32 v4, v4, v17\n\
|
||||
s_add_u32 s0, s0, 1\n\
|
||||
s_branch LOOP\n\
|
||||
END_OF_PGM:\n\
|
||||
s_endpgm\n\
|
||||
end\n\
|
||||
";
|
||||
|
||||
HSAint64 KFDQMTest::TimeConsumedwithCUMask(int node, uint32_t* mask, uint32_t mask_count) {
|
||||
HsaMemoryBuffer isaBuffer(PAGE_SIZE, node, true/*zero*/, false/*local*/, true/*exec*/);
|
||||
HsaMemoryBuffer dstBuffer(PAGE_SIZE, node, true, false, false);
|
||||
HsaMemoryBuffer ctlBuffer(PAGE_SIZE, node, true, false, false);
|
||||
|
||||
m_pIsaGen = IsaGenerator::Create(m_FamilyId);
|
||||
m_pIsaGen->CompileShader(loop_isa, "loop_isa", isaBuffer);
|
||||
EXPECT_SUCCESS(m_pAsm->RunAssembleBuf(LoopIsa, isaBuffer.As<char*>()));
|
||||
|
||||
Dispatch dispatch(isaBuffer);
|
||||
dispatch.SetDim(1024, 16, 16);
|
||||
@@ -838,7 +733,6 @@ TEST_F(KFDQMTest, BasicCuMaskingLinear) {
|
||||
TEST_START(TESTPROFILE_RUNALL);
|
||||
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
|
||||
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
|
||||
m_pIsaGen = IsaGenerator::Create(m_FamilyId);
|
||||
|
||||
if (m_FamilyId >= FAMILY_VI) {
|
||||
const HsaNodeProperties *pNodeProperties = m_NodeInfo.GetNodeProperties(defaultGPUNode);
|
||||
@@ -982,7 +876,7 @@ TEST_F(KFDQMTest, QueuePriorityOnDifferentPipe) {
|
||||
HSAint32 *syncBuffer = syncBuf.As<HSAint32*>();
|
||||
HsaMemoryBuffer isaBuffer(PAGE_SIZE, node, true/*zero*/, false/*local*/, true/*exec*/);
|
||||
|
||||
m_pIsaGen->CompileShader(loop_isa, "loop_isa", isaBuffer);
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(LoopIsa, isaBuffer.As<char*>()));
|
||||
|
||||
Dispatch dispatch[2] = {
|
||||
Dispatch(isaBuffer, true),
|
||||
@@ -1047,7 +941,7 @@ TEST_F(KFDQMTest, QueuePriorityOnSamePipe) {
|
||||
HSAint32 *syncBuffer = syncBuf.As<HSAint32*>();
|
||||
HsaMemoryBuffer isaBuffer(PAGE_SIZE, node, true/*zero*/, false/*local*/, true/*exec*/);
|
||||
|
||||
m_pIsaGen->CompileShader(loop_isa, "loop_isa", isaBuffer);
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(LoopIsa, isaBuffer.As<char*>()));
|
||||
|
||||
Dispatch dispatch[2] = {
|
||||
Dispatch(isaBuffer, true),
|
||||
@@ -1140,7 +1034,7 @@ TEST_F(KFDQMTest, EmptyDispatch) {
|
||||
|
||||
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
|
||||
|
||||
m_pIsaGen->GetNoopIsa(isaBuffer);
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(LoopIsa, isaBuffer.As<char*>()));
|
||||
|
||||
SyncDispatch(isaBuffer, NULL, NULL);
|
||||
|
||||
@@ -1159,7 +1053,7 @@ TEST_F(KFDQMTest, SimpleWriteDispatch) {
|
||||
|
||||
srcBuffer.Fill(0x01010101);
|
||||
|
||||
m_pIsaGen->GetCopyDwordIsa(isaBuffer);
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()));
|
||||
|
||||
SyncDispatch(isaBuffer, srcBuffer.As<void*>(), destBuffer.As<void*>());
|
||||
|
||||
@@ -1194,7 +1088,7 @@ TEST_F(KFDQMTest, MultipleCpQueuesStressDispatch) {
|
||||
|
||||
destBuffer.Fill(0xFF);
|
||||
|
||||
m_pIsaGen->GetCopyDwordIsa(isaBuffer);
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()));
|
||||
|
||||
for (i = 0; i < MAX_CP_QUEUES; ++i)
|
||||
ASSERT_SUCCESS(queues[i].Create(defaultGPUNode)) << " QueueId=" << i;
|
||||
@@ -1533,7 +1427,7 @@ TEST_F(KFDQMTest, Atomics) {
|
||||
|
||||
PM4Queue queue;
|
||||
|
||||
m_pIsaGen->GetAtomicIncIsa(isaBuf);
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(AtomicIncIsa, isaBuf.As<char*>()));
|
||||
|
||||
Dispatch dispatch(isaBuf);
|
||||
dispatch.SetArgs(destBuf.As<void*>(), NULL);
|
||||
@@ -1598,10 +1492,12 @@ TEST_F(KFDQMTest, mGPUShareBO) {
|
||||
|
||||
srcNodeMem.Fill(0x05050505);
|
||||
|
||||
m_pIsaGen->GetCopyDwordIsa(isaBufferSrc);
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssemble(CopyDwordIsa));
|
||||
|
||||
m_pAsm->CopyInstrStream(isaBufferSrc.As<char*>());
|
||||
SyncDispatch(isaBufferSrc, srcNodeMem.As<void*>(), shared_addr.As<void *>(), src_node);
|
||||
|
||||
m_pIsaGen->GetCopyDwordIsa(isaBufferDst);
|
||||
m_pAsm->CopyInstrStream(isaBufferDst.As<char*>());
|
||||
SyncDispatch(isaBufferDst, shared_addr.As<void *>(), dstNodeMem.As<void*>(), dst_node);
|
||||
|
||||
EXPECT_EQ(dstNodeMem.As<unsigned int*>()[0], 0x05050505);
|
||||
|
||||
@@ -27,13 +27,12 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "PM4Queue.hpp"
|
||||
#include "IsaGenerator.hpp"
|
||||
#include "KFDBaseComponentTest.hpp"
|
||||
#include "Dispatch.hpp"
|
||||
|
||||
class KFDQMTest : public KFDBaseComponentTest {
|
||||
public:
|
||||
KFDQMTest():m_pIsaGen(NULL) {}
|
||||
KFDQMTest() {}
|
||||
|
||||
~KFDQMTest() {}
|
||||
|
||||
@@ -49,7 +48,6 @@ class KFDQMTest : public KFDBaseComponentTest {
|
||||
const double CuVariance = 0.15;
|
||||
const double CuNegVariance = 1.0 - CuVariance;
|
||||
const double CuPosVariance = 1.0 + CuVariance;
|
||||
IsaGenerator* m_pIsaGen;
|
||||
};
|
||||
|
||||
#endif // __KFD_QCM_TEST__H__
|
||||
|
||||
@@ -234,131 +234,6 @@ TEST_F(KFDSVMEvictTest, BasicTest) {
|
||||
TEST_END
|
||||
}
|
||||
|
||||
/* Shader to read local buffers using multiple wavefronts in parallel
|
||||
* until address buffer is filled with specific value 0x5678 by host program,
|
||||
* then each wavefront fills value 0x5678 at corresponding result buffer and quit
|
||||
*
|
||||
* initial state:
|
||||
* s[0:1] - address buffer base address
|
||||
* s[2:3] - result buffer base address
|
||||
* s4 - workgroup id
|
||||
* v0 - workitem id, always 0 because NUM_THREADS_X(number of threads) in workgroup set to 1
|
||||
* registers:
|
||||
* v0 - calculated workitem id, v0 = v0 + s4 * NUM_THREADS_X
|
||||
* v[2:3] - address of corresponding local buf address offset: s[0:1] + v0 * 8
|
||||
* v[4:5] - corresponding output buf address: s[2:3] + v0 * 4
|
||||
* v[6:7] - local buf address used for read test
|
||||
*/
|
||||
static const char* gfx9_ReadMemory =
|
||||
"\
|
||||
shader ReadMemory\n\
|
||||
type(CS)\n\
|
||||
\n\
|
||||
// compute address of corresponding output buffer\n\
|
||||
v_mov_b32 v0, s4 // use workgroup id as index\n\
|
||||
v_lshlrev_b32 v0, 2, v0 // v0 *= 4\n\
|
||||
v_add_co_u32 v4, vcc, s2, v0 // v[4:5] = s[2:3] + v0 * 4\n\
|
||||
v_mov_b32 v5, s3\n\
|
||||
v_add_u32 v5, vcc_lo, v5\n\
|
||||
\n\
|
||||
// compute input buffer offset used to store corresponding local buffer address\n\
|
||||
v_lshlrev_b32 v0, 1, v0 // v0 *= 8\n\
|
||||
v_add_co_u32 v2, vcc, s0, v0 // v[2:3] = s[0:1] + v0 * 8\n\
|
||||
v_mov_b32 v3, s1\n\
|
||||
v_add_u32 v3, vcc_lo, v3\n\
|
||||
\n\
|
||||
// load 64bit local buffer address stored at v[2:3] to v[6:7]\n\
|
||||
flat_load_dwordx2 v[6:7], v[2:3] slc\n\
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory reads to finish\n\
|
||||
\n\
|
||||
v_mov_b32 v8, 0x5678\n\
|
||||
s_movk_i32 s8, 0x5678\n\
|
||||
L_REPEAT:\n\
|
||||
s_load_dword s16, s[0:1], 0x0 glc\n\
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory reads to finish\n\
|
||||
s_cmp_eq_i32 s16, s8\n\
|
||||
s_cbranch_scc1 L_QUIT // if notified to quit by host\n\
|
||||
// loop read 64M local buffer starting at v[6:7]\n\
|
||||
// every 4k page only read once\n\
|
||||
v_mov_b32 v9, 0\n\
|
||||
v_mov_b32 v10, 0x1000 // 4k page\n\
|
||||
v_mov_b32 v11, 0x4000000 // 64M size\n\
|
||||
v_mov_b32 v12, v6\n\
|
||||
v_mov_b32 v13, v7\n\
|
||||
L_LOOP_READ:\n\
|
||||
flat_load_dwordx2 v[14:15], v[12:13] slc\n\
|
||||
v_add_u32 v9, v9, v10 \n\
|
||||
v_add_co_u32 v12, vcc, v12, v10\n\
|
||||
v_add_u32 v13, vcc_lo, v13\n\
|
||||
v_cmp_lt_u32 vcc, v9, v11\n\
|
||||
s_cbranch_vccnz L_LOOP_READ\n\
|
||||
s_branch L_REPEAT\n\
|
||||
L_QUIT:\n\
|
||||
flat_store_dword v[4:5], v8\n\
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory writes to finish\n\
|
||||
s_endpgm\n\
|
||||
end\n\
|
||||
";
|
||||
|
||||
static const char* gfx8_ReadMemory =
|
||||
"\
|
||||
shader ReadMemory\n\
|
||||
asic(VI)\n\
|
||||
type(CS)\n\
|
||||
\n\
|
||||
// compute address of corresponding output buffer\n\
|
||||
v_mov_b32 v0, s4 // use workgroup id as index\n\
|
||||
v_lshlrev_b32 v0, 2, v0 // v0 *= 4\n\
|
||||
v_add_u32 v4, vcc, s2, v0 // v[4:5] = s[2:3] + v0 * 4\n\
|
||||
v_mov_b32 v5, s3\n\
|
||||
v_addc_u32 v5, vcc, v5, 0, vcc\n\
|
||||
\n\
|
||||
// compute input buffer offset used to store corresponding local buffer address\n\
|
||||
v_lshlrev_b32 v0, 1, v0 // v0 *= 8\n\
|
||||
v_add_u32 v2, vcc, s0, v0 // v[2:3] = s[0:1] + v0 * 8\n\
|
||||
v_mov_b32 v3, s1\n\
|
||||
v_addc_u32 v3, vcc, v3, 0, vcc\n\
|
||||
\n\
|
||||
// load 64bit local buffer address stored at v[2:3] to v[6:7]\n\
|
||||
flat_load_dwordx2 v[6:7], v[2:3] slc\n\
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory reads to finish\n\
|
||||
\n\
|
||||
v_mov_b32 v8, 0x5678\n\
|
||||
s_movk_i32 s8, 0x5678\n\
|
||||
L_REPEAT:\n\
|
||||
s_load_dword s16, s[0:1], 0x0 glc\n\
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory reads to finish\n\
|
||||
s_cmp_eq_i32 s16, s8\n\
|
||||
s_cbranch_scc1 L_QUIT // if notified to quit by host\n\
|
||||
// loop read 64M local buffer starting at v[6:7]\n\
|
||||
// every 4k page only read once\n\
|
||||
v_mov_b32 v9, 0\n\
|
||||
v_mov_b32 v10, 0x1000 // 4k page\n\
|
||||
v_mov_b32 v11, 0x4000000 // 64M size\n\
|
||||
v_mov_b32 v12, v6\n\
|
||||
v_mov_b32 v13, v7\n\
|
||||
L_LOOP_READ:\n\
|
||||
flat_load_dwordx2 v[14:15], v[12:13] slc\n\
|
||||
v_add_u32 v9, vcc, v9, v10 \n\
|
||||
v_add_u32 v12, vcc, v12, v10\n\
|
||||
v_addc_u32 v13, vcc, v13, 0, vcc\n\
|
||||
v_cmp_lt_u32 vcc, v9, v11\n\
|
||||
s_cbranch_vccnz L_LOOP_READ\n\
|
||||
s_branch L_REPEAT\n\
|
||||
L_QUIT:\n\
|
||||
flat_store_dword v[4:5], v8\n\
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory writes to finish\n\
|
||||
s_endpgm\n\
|
||||
end\n\
|
||||
";
|
||||
|
||||
std::string KFDSVMEvictTest::CreateShader() {
|
||||
if (m_FamilyId >= FAMILY_AI)
|
||||
return gfx9_ReadMemory;
|
||||
else
|
||||
return gfx8_ReadMemory;
|
||||
}
|
||||
|
||||
/* Evict and restore queue test
|
||||
*
|
||||
* N_PROCESSES processes read all local buffers in parallel while buffers are evicted and restored
|
||||
@@ -434,7 +309,7 @@ TEST_F(KFDSVMEvictTest, QueueTest) {
|
||||
for (i = 0; i < wavefront_num; i++)
|
||||
*(localBufAddr + i) = pBuffers[i];
|
||||
|
||||
m_pIsaGen->CompileShader(CreateShader().c_str(), "ReadMemory", isaBuffer);
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(ReadMemoryIsa, isaBuffer.As<char*>()));
|
||||
|
||||
PM4Queue pm4Queue;
|
||||
ASSERT_SUCCESS(pm4Queue.Create(defaultGPUNode));
|
||||
|
||||
@@ -28,7 +28,6 @@
|
||||
#include <vector>
|
||||
#include "KFDLocalMemoryTest.hpp"
|
||||
#include "KFDBaseComponentTest.hpp"
|
||||
#include "IsaGenerator.hpp"
|
||||
|
||||
// @class KFDEvictTest
|
||||
// Test eviction and restore procedure using two processes
|
||||
|
||||
@@ -34,8 +34,6 @@ void KFDSVMRangeTest::SetUp() {
|
||||
|
||||
KFDBaseComponentTest::SetUp();
|
||||
|
||||
m_pIsaGen = IsaGenerator::Create(m_FamilyId);
|
||||
|
||||
SVMSetXNACKMode();
|
||||
|
||||
ROUTINE_END
|
||||
@@ -44,10 +42,6 @@ void KFDSVMRangeTest::SetUp() {
|
||||
void KFDSVMRangeTest::TearDown() {
|
||||
ROUTINE_START
|
||||
|
||||
if (m_pIsaGen)
|
||||
delete m_pIsaGen;
|
||||
m_pIsaGen = NULL;
|
||||
|
||||
SVMRestoreXNACKMode();
|
||||
|
||||
KFDBaseComponentTest::TearDown();
|
||||
@@ -80,7 +74,7 @@ TEST_F(KFDSVMRangeTest, BasicSystemMemTest) {
|
||||
|
||||
srcSysBuffer.Fill(0x01010101);
|
||||
|
||||
m_pIsaGen->GetCopyDwordIsa(isaBuffer);
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()));
|
||||
|
||||
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
|
||||
queue.SetSkipWaitConsump(0);
|
||||
@@ -364,7 +358,8 @@ TEST_F(KFDSVMRangeTest, EvictSystemRangeTest) {
|
||||
ASSERT_SUCCESS(sdmaQueue.Create(defaultGPUNode));
|
||||
|
||||
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
|
||||
m_pIsaGen->GetCopyDwordIsa(isaBuffer);
|
||||
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()));
|
||||
|
||||
Dispatch dispatch0(isaBuffer);
|
||||
dispatch0.SetArgs(srcBuffer.As<void*>(), dstBuffer.As<void*>());
|
||||
@@ -458,7 +453,8 @@ TEST_F(KFDSVMRangeTest, PartialUnmapSysMemTest) {
|
||||
|
||||
munmap(pBuf2, Buf2Size);
|
||||
|
||||
m_pIsaGen->GetCopyDwordIsa(isaBuffer);
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()));
|
||||
|
||||
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
|
||||
|
||||
Dispatch dispatch(isaBuffer);
|
||||
@@ -507,7 +503,7 @@ TEST_F(KFDSVMRangeTest, BasicVramTest) {
|
||||
|
||||
srcSysBuffer.Fill(0x01010101);
|
||||
|
||||
m_pIsaGen->GetCopyDwordIsa(isaBuffer);
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()));
|
||||
|
||||
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
|
||||
queue.SetSkipWaitConsump(0);
|
||||
@@ -943,7 +939,9 @@ TEST_F(KFDSVMRangeTest, MigratePolicyTest) {
|
||||
#ifdef USE_PM4_QUEUE_TRIGGER_VM_FAULT
|
||||
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode);
|
||||
PM4Queue queue;
|
||||
m_pIsaGen->GetCopyDwordIsa(isaBuffer);
|
||||
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()));
|
||||
|
||||
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
|
||||
|
||||
for (HSAuint64 i = 0; i < BufferSize / 8; i += 512) {
|
||||
|
||||
@@ -26,21 +26,17 @@
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "IsaGenerator.hpp"
|
||||
#include "KFDBaseComponentTest.hpp"
|
||||
|
||||
class KFDSVMRangeTest : public KFDBaseComponentTest {
|
||||
public:
|
||||
KFDSVMRangeTest() :m_pIsaGen(NULL) {}
|
||||
KFDSVMRangeTest() {}
|
||||
~KFDSVMRangeTest() {}
|
||||
void SplitRangeTest(int defaultGPUNode, int prefetch_location);
|
||||
|
||||
protected:
|
||||
virtual void SetUp();
|
||||
virtual void TearDown();
|
||||
|
||||
protected: // Members
|
||||
IsaGenerator* m_pIsaGen;
|
||||
};
|
||||
|
||||
#endif // __KFD_LOCALMEMORY_TEST__H__
|
||||
|
||||
@@ -231,6 +231,12 @@ bool isTonga(const HsaNodeProperties *props) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const uint32_t GetGfxVersion(const HsaNodeProperties *props) {
|
||||
return ((props->EngineId.ui32.Major << 16) |
|
||||
(props->EngineId.ui32.Minor << 8) |
|
||||
(props->EngineId.ui32.Stepping));
|
||||
}
|
||||
|
||||
HSAuint64 GetSystemTickCountInMicroSec() {
|
||||
struct timeval t;
|
||||
gettimeofday(&t, 0);
|
||||
|
||||
@@ -52,6 +52,7 @@ bool is_dgpu();
|
||||
bool isTonga(const HsaNodeProperties *props);
|
||||
bool hasPciAtomicsSupport(int node);
|
||||
unsigned int FamilyIdFromNode(const HsaNodeProperties *props);
|
||||
const uint32_t GetGfxVersion(const HsaNodeProperties *props);
|
||||
|
||||
void GetHwQueueInfo(const HsaNodeProperties *props,
|
||||
unsigned int *p_num_cp_queues,
|
||||
|
||||
@@ -34,16 +34,11 @@ void RDMATest::SetUp() {
|
||||
|
||||
KFDBaseComponentTest::SetUp();
|
||||
|
||||
m_pIsaGen = IsaGenerator::Create(m_FamilyId);
|
||||
|
||||
ROUTINE_END
|
||||
}
|
||||
|
||||
void RDMATest::TearDown() {
|
||||
ROUTINE_START
|
||||
if (m_pIsaGen)
|
||||
delete m_pIsaGen;
|
||||
m_pIsaGen = NULL;
|
||||
|
||||
KFDBaseComponentTest::TearDown();
|
||||
|
||||
@@ -77,7 +72,8 @@ TEST_F(RDMATest, GPUDirect) {
|
||||
srcSysBuffer.Fill(0xfe);
|
||||
|
||||
/* Put 'copy dword' command to ISA buffer */
|
||||
m_pIsaGen->GetCopyDwordIsa(isaBuffer);
|
||||
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()));
|
||||
|
||||
|
||||
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
|
||||
Dispatch dispatch(isaBuffer);
|
||||
|
||||
@@ -26,20 +26,16 @@
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "IsaGenerator.hpp"
|
||||
#include "KFDBaseComponentTest.hpp"
|
||||
|
||||
class RDMATest : public KFDBaseComponentTest {
|
||||
public:
|
||||
RDMATest():m_pIsaGen(NULL) {}
|
||||
RDMATest() {}
|
||||
~RDMATest() {}
|
||||
|
||||
protected:
|
||||
virtual void SetUp();
|
||||
virtual void TearDown();
|
||||
|
||||
protected: // Members
|
||||
IsaGenerator* m_pIsaGen;
|
||||
};
|
||||
|
||||
#endif // __RDMA_TEST__H__
|
||||
|
||||
@@ -0,0 +1,609 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "ShaderStore.hpp"
|
||||
|
||||
/**
|
||||
* KFDASMTest List
|
||||
*/
|
||||
|
||||
const std::vector<const char*> ShaderList = {
|
||||
NoopIsa,
|
||||
CopyDwordIsa,
|
||||
InfiniteLoopIsa,
|
||||
AtomicIncIsa,
|
||||
ScratchCopyDwordIsa,
|
||||
PollMemoryIsa,
|
||||
CopyOnSignalIsa,
|
||||
PollAndCopyIsa,
|
||||
WriteFlagAndValueIsa,
|
||||
WriteAndSignalIsa,
|
||||
LoopIsa,
|
||||
IterateIsa,
|
||||
ReadMemoryIsa,
|
||||
GwsInitIsa,
|
||||
GwsAtomicIncreaseIsa,
|
||||
};
|
||||
|
||||
/**
|
||||
* Macros
|
||||
*/
|
||||
|
||||
/* Create macro for portable v_add_co_u32, v_add_co_ci_u32,
|
||||
* and v_cmp_lt_u32
|
||||
*/
|
||||
#define SHADER_MACROS \
|
||||
" .text\n"\
|
||||
" .macro V_ADD_CO_U32 vdst, src0, vsrc1\n"\
|
||||
" .if (.amdgcn.gfx_generation_number >= 10)\n"\
|
||||
" v_add_co_u32 \\vdst, vcc_lo, \\src0, \\vsrc1\n"\
|
||||
" .elseif (.amdgcn.gfx_generation_number >= 9)\n"\
|
||||
" v_add_co_u32 \\vdst, vcc, \\src0, \\vsrc1\n"\
|
||||
" .else\n"\
|
||||
" v_add_u32 \\vdst, vcc, \\src0, \\vsrc1\n"\
|
||||
" .endif\n"\
|
||||
" .endm\n"\
|
||||
" .macro V_ADD_CO_CI_U32 vdst, src0, vsrc1\n"\
|
||||
" .if (.amdgcn.gfx_generation_number >= 10)\n"\
|
||||
" v_add_co_ci_u32 \\vdst, vcc_lo, \\src0, \\vsrc1, vcc_lo\n"\
|
||||
" .elseif (.amdgcn.gfx_generation_number >= 9)\n"\
|
||||
" v_addc_co_u32 \\vdst, vcc, \\src0, \\vsrc1, vcc\n"\
|
||||
" .else\n"\
|
||||
" v_addc_u32 \\vdst, vcc, \\src0, \\vsrc1, vcc\n"\
|
||||
" .endif\n"\
|
||||
" .endm\n"\
|
||||
" .macro V_CMP_LT_U32 src0, vsrc1\n"\
|
||||
" .if (.amdgcn.gfx_generation_number >= 10)\n"\
|
||||
" v_cmp_lt_u32 vcc_lo, \\src0, \\vsrc1\n"\
|
||||
" .else\n"\
|
||||
" v_cmp_lt_u32 vcc, \\src0, \\vsrc1\n"\
|
||||
" .endif\n"\
|
||||
" .endm\n"
|
||||
|
||||
/**
|
||||
* Common
|
||||
*/
|
||||
|
||||
const char *NoopIsa = R"(
|
||||
.text
|
||||
s_endpgm
|
||||
)";
|
||||
|
||||
const char *CopyDwordIsa = R"(
|
||||
.text
|
||||
v_mov_b32 v0, s0
|
||||
v_mov_b32 v1, s1
|
||||
v_mov_b32 v2, s2
|
||||
v_mov_b32 v3, s3
|
||||
flat_load_dword v4, v[0:1] glc slc
|
||||
s_waitcnt 0
|
||||
flat_store_dword v[2:3], v4 glc slc
|
||||
s_endpgm
|
||||
)";
|
||||
|
||||
const char *InfiniteLoopIsa = R"(
|
||||
.text
|
||||
LOOP:
|
||||
s_branch LOOP
|
||||
s_endpgm
|
||||
)";
|
||||
|
||||
const char *AtomicIncIsa = R"(
|
||||
.text
|
||||
v_mov_b32 v0, s0
|
||||
v_mov_b32 v1, s1
|
||||
.if (.amdgcn.gfx_generation_number >= 8)
|
||||
v_mov_b32 v2, 1
|
||||
flat_atomic_add v3, v[0:1], v2 glc slc
|
||||
.else
|
||||
v_mov_b32 v2, -1
|
||||
flat_atomic_inc v3, v[0:1], v2 glc slc
|
||||
.endif
|
||||
s_waitcnt 0
|
||||
s_endpgm
|
||||
)";
|
||||
|
||||
/**
|
||||
* KFDMemoryTest
|
||||
*/
|
||||
|
||||
const char *ScratchCopyDwordIsa = R"(
|
||||
.text
|
||||
// Copy the parameters from scalar registers to vector registers
|
||||
.if (.amdgcn.gfx_generation_number >= 9)
|
||||
v_mov_b32 v0, s0
|
||||
v_mov_b32 v1, s1
|
||||
v_mov_b32 v2, s2
|
||||
v_mov_b32 v3, s3
|
||||
.else
|
||||
v_mov_b32_e32 v0, s0
|
||||
v_mov_b32_e32 v1, s1
|
||||
v_mov_b32_e32 v2, s2
|
||||
v_mov_b32_e32 v3, s3
|
||||
.endif
|
||||
// Setup the scratch parameters. This assumes a single 16-reg block
|
||||
.if (.amdgcn.gfx_generation_number >= 10)
|
||||
s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4
|
||||
s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5
|
||||
.elseif (.amdgcn.gfx_generation_number == 9)
|
||||
s_mov_b32 flat_scratch_lo, s4
|
||||
s_mov_b32 flat_scratch_hi, s5
|
||||
.else
|
||||
s_mov_b32 flat_scratch_lo, 8
|
||||
s_mov_b32 flat_scratch_hi, 0
|
||||
.endif
|
||||
// Copy a dword between the passed addresses
|
||||
flat_load_dword v4, v[0:1] slc
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0)
|
||||
flat_store_dword v[2:3], v4 slc
|
||||
s_endpgm
|
||||
)";
|
||||
|
||||
/* Continuously poll src buffer and check buffer value
|
||||
* After src buffer is filled with specific value (0x5678,
|
||||
* by host program), fill dst buffer with specific
|
||||
* value(0x5678) and quit
|
||||
*/
|
||||
const char *PollMemoryIsa = R"(
|
||||
.text
|
||||
// Assume src address in s0, s1, and dst address in s2, s3
|
||||
s_movk_i32 s18, 0x5678
|
||||
.if (.amdgcn.gfx_generation_number >= 10)
|
||||
v_mov_b32 v0, s2
|
||||
v_mov_b32 v1, s3
|
||||
v_mov_b32 v2, 0x5678
|
||||
.endif
|
||||
LOOP:
|
||||
s_load_dword s16, s[0:1], 0x0 glc
|
||||
s_cmp_eq_i32 s16, s18
|
||||
s_cbranch_scc0 LOOP
|
||||
.if (.amdgcn.gfx_generation_number >= 10)
|
||||
flat_store_dword v[0:1], v2 slc
|
||||
.else
|
||||
s_store_dword s18, s[2:3], 0x0 glc
|
||||
.endif
|
||||
s_endpgm
|
||||
)";
|
||||
|
||||
/* Similar to PollMemoryIsa except that the buffer
|
||||
* polled can be Non-coherant memory. SCC system-level
|
||||
* cache coherence is not supported in scalar (smem) path.
|
||||
* Use vmem operations with scc
|
||||
*
|
||||
* Note: Only works on Aldebaran, and even then the scc modifier
|
||||
* has been defeatured. This shader is more or less
|
||||
* deprecated.
|
||||
*/
|
||||
const char *PollNCMemoryIsa = R"(
|
||||
.text
|
||||
// Assume src address in s0, s1, and dst address in s2, s3
|
||||
v_mov_b32 v6, 0x5678
|
||||
v_mov_b32 v0, s0
|
||||
v_mov_b32 v1, s1
|
||||
LOOP:
|
||||
flat_load_dword v4, v[0:1] scc
|
||||
v_cmp_eq_u32 vcc, v4, v6
|
||||
s_cbranch_vccz LOOP
|
||||
v_mov_b32 v0, s2
|
||||
v_mov_b32 v1, s3
|
||||
flat_store_dword v[0:1], v6 scc
|
||||
s_endpgm
|
||||
)";
|
||||
|
||||
/* Input: A buffer of at least 3 dwords.
|
||||
* DW0: used as a signal. 0xcafe means it is signaled
|
||||
* DW1: Input buffer for device to read.
|
||||
* DW2: Output buffer for device to write.
|
||||
* Once receive signal, device will copy DW1 to DW2
|
||||
* This shader continously poll the signal buffer,
|
||||
* Once signal buffer is signaled, it copies input buffer
|
||||
* to output buffer
|
||||
*/
|
||||
const char *CopyOnSignalIsa = R"(
|
||||
.text
|
||||
// Assume input buffer in s0, s1
|
||||
.if (.amdgcn.gfx_generation_number >= 10)
|
||||
s_add_u32 s2, s0, 0x8
|
||||
s_addc_u32 s3, s1, 0x0
|
||||
s_mov_b32 s18, 0xcafe
|
||||
v_mov_b32 v0, s0
|
||||
v_mov_b32 v1, s1
|
||||
v_mov_b32 v4, s2
|
||||
v_mov_b32 v5, s3
|
||||
.else
|
||||
s_mov_b32 s18, 0xcafe
|
||||
.endif
|
||||
POLLSIGNAL:
|
||||
s_load_dword s16, s[0:1], 0x0 glc
|
||||
s_cmp_eq_i32 s16, s18
|
||||
s_cbranch_scc0 POLLSIGNAL
|
||||
s_load_dword s17, s[0:1], 0x4 glc
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0)
|
||||
.if (.amdgcn.gfx_generation_number >= 10)
|
||||
v_mov_b32 v2, s17
|
||||
flat_store_dword v[4:5], v2 glc
|
||||
.else
|
||||
s_store_dword s17, s[0:1], 0x8 glc
|
||||
.endif
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0)
|
||||
s_endpgm
|
||||
)";
|
||||
|
||||
/* Continuously poll the flag at src buffer
|
||||
* After the flag of s[0:1] is 1 filled,
|
||||
* copy the value from s[0:1]+4 to dst buffer
|
||||
*
|
||||
* Note: Only works on GFX9 (only used in
|
||||
* aldebaran tests)
|
||||
*/
|
||||
const char *PollAndCopyIsa = R"(
|
||||
.text
|
||||
// Assume src buffer in s[0:1] and dst buffer in s[2:3]
|
||||
.if (.amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_stepping == 10)
|
||||
// Path for Aldebaran
|
||||
v_mov_b32 v0, s0
|
||||
v_mov_b32 v1, s1
|
||||
v_mov_b32 v18, 0x1
|
||||
LOOP_ALDBRN:
|
||||
flat_load_dword v16, v[0:1] glc
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0)
|
||||
v_cmp_eq_i32 vcc, v16, v18
|
||||
s_cbranch_vccz LOOP_ALDBRN
|
||||
buffer_invl2
|
||||
s_load_dword s17, s[0:1], 0x4 glc
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0)
|
||||
s_store_dword s17, s[2:3], 0x0 glc
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0)
|
||||
buffer_wbl2
|
||||
.elseif (.amdgcn.gfx_generation_number == 9)
|
||||
s_movk_i32 s18, 0x1
|
||||
LOOP:
|
||||
s_load_dword s16, s[0:1], 0x0 glc
|
||||
s_cmp_eq_i32 s16, s18
|
||||
s_cbranch_scc0 LOOP
|
||||
s_load_dword s17, s[0:1], 0x4 glc
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0)
|
||||
s_store_dword s17, s[2:3], 0x0 glc
|
||||
.endif
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0)
|
||||
s_endpgm
|
||||
)";
|
||||
|
||||
/* Input0: A buffer of at least 2 dwords.
|
||||
* DW0: used as a signal. Write 0x1 to signal
|
||||
* DW1: Write the value from 2nd input buffer
|
||||
* for other device to read.
|
||||
* Input1: A buffer of at least 2 dwords.
|
||||
* DW0: used as the value to be written.
|
||||
*
|
||||
* Note: Only works on Aldebaran
|
||||
*/
|
||||
const char *WriteFlagAndValueIsa = R"(
|
||||
.text
|
||||
// Assume two inputs buffer in s[0:1] and s[2:3]
|
||||
.if (.amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_stepping == 10)
|
||||
v_mov_b32 v0, s0
|
||||
v_mov_b32 v1, s1
|
||||
s_load_dword s18, s[2:3], 0x0 glc
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0)
|
||||
s_store_dword s18, s[0:1], 0x4 glc
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0)
|
||||
buffer_wbl2
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0)
|
||||
v_mov_b32 v16, 0x1
|
||||
flat_store_dword v[0:1], v16 glc
|
||||
.endif
|
||||
s_endpgm
|
||||
)";
|
||||
|
||||
/* Input0: A buffer of at least 2 dwords.
|
||||
* DW0: used as a signal. Write 0xcafe to signal
|
||||
* DW1: Write to this buffer for other device to read.
|
||||
* Input1: mmio base address
|
||||
*/
|
||||
const char *WriteAndSignalIsa = R"(
|
||||
.text
|
||||
// Assume input buffer in s0, s1
|
||||
.if (.amdgcn.gfx_generation_number >= 10)
|
||||
s_add_u32 s4, s0, 0x4
|
||||
s_addc_u32 s5, s1, 0x0
|
||||
v_mov_b32 v0, s0
|
||||
v_mov_b32 v1, s1
|
||||
v_mov_b32 v2, s2
|
||||
v_mov_b32 v3, s3
|
||||
v_mov_b32 v4, s4
|
||||
v_mov_b32 v5, s5
|
||||
v_mov_b32 v18, 0xbeef
|
||||
flat_store_dword v[4:5], v18 glc
|
||||
v_mov_b32 v18, 0x1
|
||||
flat_store_dword v[2:3], v18 glc
|
||||
v_mov_b32 v18, 0xcafe
|
||||
flat_store_dword v[0:1], v18 glc
|
||||
.else
|
||||
s_mov_b32 s18, 0xbeef
|
||||
s_store_dword s18, s[0:1], 0x4 glc
|
||||
s_mov_b32 s18, 0x1
|
||||
s_store_dword s18, s[2:3], 0 glc
|
||||
s_mov_b32 s18, 0xcafe
|
||||
s_store_dword s18, s[0:1], 0x0 glc
|
||||
.endif
|
||||
s_endpgm
|
||||
)";
|
||||
|
||||
/**
|
||||
* KFDQMTest
|
||||
*/
|
||||
|
||||
/* A simple isa loop program with dense mathematic operations
|
||||
* s1 controls the number iterations of the loop
|
||||
* This shader can be used by GFX8, GFX9 and GFX10
|
||||
*/
|
||||
const char *LoopIsa = R"(
|
||||
.text
|
||||
s_movk_i32 s0, 0x0008
|
||||
s_movk_i32 s1, 0x00ff
|
||||
v_mov_b32 v0, 0
|
||||
v_mov_b32 v1, 0
|
||||
v_mov_b32 v2, 0
|
||||
v_mov_b32 v3, 0
|
||||
v_mov_b32 v4, 0
|
||||
v_mov_b32 v5, 0
|
||||
v_mov_b32 v6, 0
|
||||
v_mov_b32 v7, 0
|
||||
v_mov_b32 v8, 0
|
||||
v_mov_b32 v9, 0
|
||||
v_mov_b32 v10, 0
|
||||
v_mov_b32 v11, 0
|
||||
v_mov_b32 v12, 0
|
||||
v_mov_b32 v13, 0
|
||||
v_mov_b32 v14, 0
|
||||
v_mov_b32 v15, 0
|
||||
v_mov_b32 v16, 0
|
||||
LOOP:
|
||||
s_mov_b32 s8, s4
|
||||
s_mov_b32 s9, s1
|
||||
s_mov_b32 s10, s6
|
||||
s_mov_b32 s11, s7
|
||||
s_cmp_le_i32 s1, s0
|
||||
s_cbranch_scc1 END_OF_PGM
|
||||
s_buffer_load_dwordx8 s[8:15], s[8:11], 0x10
|
||||
v_add_f32 v0, 2.0, v0
|
||||
v_cvt_f32_i32 v17, s1
|
||||
s_waitcnt lgkmcnt(0)
|
||||
v_add_f32 v18, s8, v17
|
||||
v_add_f32 v19, s9, v17
|
||||
v_add_f32 v20, s10, v17
|
||||
v_add_f32 v21, s11, v17
|
||||
v_add_f32 v22, s12, v17
|
||||
v_add_f32 v23, s13, v17
|
||||
v_add_f32 v24, s14, v17
|
||||
v_add_f32 v17, s15, v17
|
||||
v_log_f32 v25, v18
|
||||
v_mul_f32 v25, v22, v25
|
||||
v_exp_f32 v25, v25
|
||||
v_log_f32 v26, v19
|
||||
v_mul_f32 v26, v23, v26
|
||||
v_exp_f32 v26, v26
|
||||
v_log_f32 v27, v20
|
||||
v_mul_f32 v27, v24, v27
|
||||
v_exp_f32 v27, v27
|
||||
v_log_f32 v28, v21
|
||||
v_mul_f32 v28, v17, v28
|
||||
v_exp_f32 v28, v28
|
||||
v_add_f32 v5, v5, v25
|
||||
v_add_f32 v6, v6, v26
|
||||
v_add_f32 v7, v7, v27
|
||||
v_add_f32 v8, v8, v28
|
||||
v_mul_f32 v18, 0x3fb8aa3b, v18
|
||||
v_exp_f32 v18, v18
|
||||
v_mul_f32 v19, 0x3fb8aa3b, v19
|
||||
v_exp_f32 v19, v19
|
||||
v_mul_f32 v20, 0x3fb8aa3b, v20
|
||||
v_exp_f32 v20, v20
|
||||
v_mul_f32 v21, 0x3fb8aa3b, v21
|
||||
v_exp_f32 v21, v21
|
||||
v_add_f32 v9, v9, v18
|
||||
v_add_f32 v10, v10, v19
|
||||
v_add_f32 v11, v11, v20
|
||||
v_add_f32 v12, v12, v21
|
||||
v_sqrt_f32 v18, v22
|
||||
v_sqrt_f32 v19, v23
|
||||
v_sqrt_f32 v20, v24
|
||||
v_sqrt_f32 v21, v17
|
||||
v_add_f32 v13, v13, v18
|
||||
v_add_f32 v14, v14, v19
|
||||
v_add_f32 v15, v15, v20
|
||||
v_add_f32 v16, v16, v21
|
||||
v_rsq_f32 v18, v22
|
||||
v_rsq_f32 v19, v23
|
||||
v_rsq_f32 v20, v24
|
||||
v_rsq_f32 v17, v17
|
||||
v_add_f32 v1, v1, v18
|
||||
v_add_f32 v2, v2, v19
|
||||
v_add_f32 v3, v3, v20
|
||||
v_add_f32 v4, v4, v17
|
||||
s_add_u32 s0, s0, 1
|
||||
s_branch LOOP
|
||||
END_OF_PGM:
|
||||
s_endpgm
|
||||
)";
|
||||
|
||||
|
||||
/**
|
||||
* KFDCWSRTest
|
||||
*/
|
||||
|
||||
/* Initial state:
|
||||
* s[0:1] - 64 bits iteration number; only the lower 32 bits are useful.
|
||||
* s[2:3] - result buffer base address
|
||||
* s4 - workgroup id
|
||||
* v0 - workitem id, always 0 because
|
||||
* NUM_THREADS_X(number of threads) in workgroup set to 1
|
||||
* Registers:
|
||||
* v0 - calculated workitem = v0 + s4 * NUM_THREADS_X, which is s4
|
||||
* v2 - = s0, 32 bits iteration number
|
||||
* v[4:5] - corresponding output buf address: s[2:3] + v0 * 4
|
||||
* v6 - counter
|
||||
*/
|
||||
const char *IterateIsa = SHADER_MACROS R"(
|
||||
// Copy the parameters from scalar registers to vector registers
|
||||
v_mov_b32 v2, s0 // v[2:3] = s[0:1]
|
||||
v_mov_b32 v3, s1 // v[2:3] = s[0:1]
|
||||
v_mov_b32 v0, s4 // use workgroup id as index
|
||||
v_lshlrev_b32 v0, 2, v0 // v0 *= 4
|
||||
V_ADD_CO_U32 v4, s2, v0 // v[4:5] = s[2:3] + v0 * 4
|
||||
v_mov_b32 v5, s3 // v[4:5] = s[2:3] + v0 * 4
|
||||
V_ADD_CO_CI_U32 v5, v5, 0 // v[4:5] = s[2:3] + v0 * 4
|
||||
v_mov_b32 v6, 0
|
||||
LOOP:
|
||||
V_ADD_CO_U32 v6, 1, v6
|
||||
|
||||
// Compare the result value (v6) to iteration value (v2), and
|
||||
// jump if equal (i.e. if VCC is not zero after the comparison)
|
||||
V_CMP_LT_U32 v6, v2
|
||||
s_cbranch_vccnz LOOP
|
||||
flat_store_dword v[4:5], v6
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0)
|
||||
s_endpgm
|
||||
)";
|
||||
|
||||
/**
|
||||
* KFDEvictTest
|
||||
*/
|
||||
|
||||
/* Shader to read local buffers using multiple wavefronts in parallel
|
||||
* until address buffer is filled with specific value 0x5678 by host program,
|
||||
* then each wavefront fills value 0x5678 at corresponding result buffer and quit
|
||||
*
|
||||
* Initial state:
|
||||
* s[0:1] - address buffer base address
|
||||
* s[2:3] - result buffer base address
|
||||
* s4 - workgroup id
|
||||
* v0 - workitem id, always 0 because NUM_THREADS_X(number of threads) in workgroup set to 1
|
||||
* Registers:
|
||||
* v0 - calculated workitem id, v0 = v0 + s4 * NUM_THREADS_X
|
||||
* v[2:3] - address of corresponding local buf address offset: s[0:1] + v0 * 8
|
||||
* v[4:5] - corresponding output buf address: s[2:3] + v0 * 4
|
||||
* v[6:7] - local buf address used for read test
|
||||
*/
|
||||
const char *ReadMemoryIsa = SHADER_MACROS R"(
|
||||
// Compute address of corresponding output buffer
|
||||
v_mov_b32 v0, s4 // use workgroup id as index
|
||||
v_lshlrev_b32 v0, 2, v0 // v0 *= 4
|
||||
V_ADD_CO_U32 v4, s2, v0 // v[4:5] = s[2:3] + v0 * 4
|
||||
v_mov_b32 v5, s3 // v[4:5] = s[2:3] + v0 * 4
|
||||
V_ADD_CO_CI_U32 v5, v5, 0 // v[4:5] = s[2:3] + v0 * 4
|
||||
|
||||
// Compute input buffer offset used to store corresponding local buffer address
|
||||
v_lshlrev_b32 v0, 1, v0 // v0 *= 8
|
||||
V_ADD_CO_U32 v2, s0, v0 // v[2:3] = s[0:1] + v0 * 8
|
||||
v_mov_b32 v3, s1 // v[2:3] = s[0:1] + v0 * 8
|
||||
V_ADD_CO_CI_U32 v3, v3, 0 // v[2:3] = s[0:1] + v0 * 8
|
||||
|
||||
// Load 64bit local buffer address stored at v[2:3] to v[6:7]
|
||||
flat_load_dwordx2 v[6:7], v[2:3] slc
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory reads to finish
|
||||
v_mov_b32 v8, 0x5678
|
||||
s_movk_i32 s8, 0x5678
|
||||
L_REPEAT:
|
||||
s_load_dword s16, s[0:1], 0x0 glc
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory reads to finish
|
||||
s_cmp_eq_i32 s16, s8
|
||||
s_cbranch_scc1 L_QUIT // if notified to quit by host
|
||||
|
||||
// Loop read 64M local buffer starting at v[6:7]
|
||||
// every 4k page only read once
|
||||
v_mov_b32 v9, 0
|
||||
v_mov_b32 v10, 0x1000 // 4k page
|
||||
v_mov_b32 v11, 0x4000000 // 64M size
|
||||
v_mov_b32 v12, v6
|
||||
v_mov_b32 v13, v7
|
||||
L_LOOP_READ:
|
||||
flat_load_dwordx2 v[14:15], v[12:13] slc
|
||||
V_ADD_CO_U32 v9, v9, v10
|
||||
V_ADD_CO_U32 v12, v12, v10
|
||||
V_ADD_CO_CI_U32 v13, v13, 0
|
||||
V_CMP_LT_U32 v9, v11
|
||||
s_cbranch_vccnz L_LOOP_READ
|
||||
s_branch L_REPEAT
|
||||
L_QUIT:
|
||||
flat_store_dword v[4:5], v8
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory writes to finish
|
||||
s_endpgm
|
||||
)";
|
||||
|
||||
/**
|
||||
* KFDGWSTest
|
||||
*/
|
||||
|
||||
/* Shader to initialize gws counter to 1 */
|
||||
const char *GwsInitIsa = R"(
|
||||
.text
|
||||
s_mov_b32 m0, 0
|
||||
s_nop 0
|
||||
s_load_dword s16, s[0:1], 0x0 glc
|
||||
s_waitcnt 0
|
||||
v_mov_b32 v0, s16
|
||||
s_waitcnt 0
|
||||
ds_gws_init v0 offset:0 gds
|
||||
s_waitcnt 0
|
||||
s_endpgm
|
||||
)";
|
||||
|
||||
/* Atomically increase a value in memory
|
||||
* This is expected to be executed from
|
||||
* multiple work groups simultaneously.
|
||||
* GWS semaphore is used to guarantee
|
||||
* the operation is atomic.
|
||||
*/
|
||||
const char *GwsAtomicIncreaseIsa = R"(
|
||||
.text
|
||||
// Assume src address in s0, s1
|
||||
.if (.amdgcn.gfx_generation_number >= 10)
|
||||
s_mov_b32 m0, 0
|
||||
s_mov_b32 exec_lo, 0x1
|
||||
v_mov_b32 v0, s0
|
||||
v_mov_b32 v1, s1
|
||||
ds_gws_sema_p offset:0 gds
|
||||
s_waitcnt 0
|
||||
flat_load_dword v2, v[0:1] glc dlc
|
||||
s_waitcnt 0
|
||||
v_add_nc_u32 v2, v2, 1
|
||||
flat_store_dword v[0:1], v2
|
||||
s_waitcnt_vscnt null, 0
|
||||
ds_gws_sema_v offset:0 gds
|
||||
.else
|
||||
s_mov_b32 m0, 0
|
||||
s_nop 0
|
||||
ds_gws_sema_p offset:0 gds
|
||||
s_waitcnt 0
|
||||
s_load_dword s16, s[0:1], 0x0 glc
|
||||
s_waitcnt 0
|
||||
s_add_u32 s16, s16, 1
|
||||
s_store_dword s16, s[0:1], 0x0 glc
|
||||
s_waitcnt lgkmcnt(0)
|
||||
ds_gws_sema_v offset:0 gds
|
||||
.endif
|
||||
s_waitcnt 0
|
||||
s_endpgm
|
||||
)";
|
||||
+32
-21
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
* Copyright (C) 2021 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -21,29 +21,40 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _ISAGENERATOR_GFX10_H_
|
||||
#define _ISAGENERATOR_GFX10_H_
|
||||
#ifndef _SHADERSTORE_H_
|
||||
#define _SHADERSTORE_H_
|
||||
|
||||
#include <string>
|
||||
#include "IsaGenerator.hpp"
|
||||
#include <vector>
|
||||
|
||||
class IsaGenerator_Gfx10 : public IsaGenerator {
|
||||
public:
|
||||
virtual void GetNoopIsa(HsaMemoryBuffer& rBuf);
|
||||
virtual void GetCopyDwordIsa(HsaMemoryBuffer& rBuf);
|
||||
virtual void GetInfiniteLoopIsa(HsaMemoryBuffer& rBuf);
|
||||
virtual void GetAtomicIncIsa(HsaMemoryBuffer& rBuf);
|
||||
/* KFDASMTest List */
|
||||
extern const std::vector<const char*> ShaderList;
|
||||
|
||||
protected:
|
||||
virtual const std::string& GetAsicName();
|
||||
/* Common */
|
||||
extern const char *NoopIsa;
|
||||
extern const char *CopyDwordIsa;
|
||||
extern const char *InfiniteLoopIsa;
|
||||
extern const char *AtomicIncIsa;
|
||||
|
||||
private:
|
||||
static const std::string ASIC_NAME;
|
||||
/* KFDMemoryTest */
|
||||
extern const char *ScratchCopyDwordIsa;
|
||||
extern const char *PollMemoryIsa;
|
||||
extern const char *PollNCMemoryIsa;
|
||||
extern const char *CopyOnSignalIsa;
|
||||
extern const char *PollAndCopyIsa;
|
||||
extern const char *WriteFlagAndValueIsa;
|
||||
extern const char *WriteAndSignalIsa;
|
||||
|
||||
static const uint32_t NOOP_ISA[];
|
||||
static const uint32_t COPY_DWORD_ISA[];
|
||||
static const uint32_t INFINITE_LOOP_ISA[];
|
||||
static const uint32_t ATOMIC_ADD_ISA[];
|
||||
};
|
||||
/* KFDQMTest */
|
||||
extern const char *LoopIsa;
|
||||
|
||||
#endif // _ISAGENERATOR_GFX9_H_
|
||||
/* KFDCWSRTest */
|
||||
extern const char *IterateIsa;
|
||||
|
||||
/* KFDEvictTest */
|
||||
extern const char *ReadMemoryIsa;
|
||||
|
||||
/* KFDGWSTest */
|
||||
extern const char *GwsInitIsa;
|
||||
extern const char *GwsAtomicIncreaseIsa;
|
||||
|
||||
#endif // _SHADERSTORE_H_
|
||||
Ссылка в новой задаче
Block a user