diff --git a/projects/rocr-runtime/tests/kfdtest/CMakeLists.txt b/projects/rocr-runtime/tests/kfdtest/CMakeLists.txt index 05557ec16d..a2b122d42b 100644 --- a/projects/rocr-runtime/tests/kfdtest/CMakeLists.txt +++ b/projects/rocr-runtime/tests/kfdtest/CMakeLists.txt @@ -95,12 +95,42 @@ endif() message ( "Find libhsakmt at ${HSAKMT_LIBRARY_DIRS}" ) -set ( SP3_DIR ${PROJECT_SOURCE_DIR}/sp3 ) +if ( POLICY CMP0074 ) + cmake_policy( SET CMP0074 NEW ) +endif() + +find_path( LIGHTNING_CMAKE_DIR NAMES LLVMConfig.cmake + PATHS $ENV{OUT_DIR}/llvm/lib/cmake/llvm NO_CACHE NO_DEFAULT_PATH) + +if ( DEFINED LIGHTNING_CMAKE_DIR AND EXISTS ${LIGHTNING_CMAKE_DIR} ) + set ( LLVM_DIR ${LIGHTNING_CMAKE_DIR} ) +else() + message( WARNING "Couldn't find Lightning build. " + "Attempting to use system LLVM install..." ) +endif() + +find_package( LLVM REQUIRED CONFIG ) + +if( ${LLVM_PACKAGE_VERSION} VERSION_LESS "7.0" ) + message( FATAL_ERROR "Requires LLVM 7.0 or greater " + "(found ${LLVM_PACKAGE_VERSION})" ) +elseif( ${LLVM_PACKAGE_VERSION} VERSION_LESS "14.0" ) + message( WARNING "Not using latest LLVM version. " + "Some ASIC targets may not work!" ) +endif() + +message( STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}" ) +message( STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}" ) + +include_directories(${LLVM_INCLUDE_DIRS}) +separate_arguments(LLVM_DEFINITIONS_LIST NATIVE_COMMAND ${LLVM_DEFINITIONS}) +add_definitions(${LLVM_DEFINITIONS_LIST}) + +llvm_map_components_to_libnames(llvm_libs AMDGPUAsmParser Core Support) include_directories(${PROJECT_SOURCE_DIR}/gtest-1.6.0) include_directories(${PROJECT_SOURCE_DIR}/include) include_directories(${PROJECT_SOURCE_DIR}/../../include) -include_directories(${SP3_DIR}) include_directories(${DRM_INCLUDE_DIRS}) @@ -112,12 +142,8 @@ set (SRC_FILES gtest-1.6.0/gtest-all.cpp src/Dispatch.cpp src/GoogleTestExtension.cpp src/IndirectBuffer.cpp - src/IsaGenerator.cpp - src/IsaGenerator_Aldebaran.cpp - src/IsaGenerator_Gfx10.cpp - src/IsaGenerator_Gfx72.cpp - src/IsaGenerator_Gfx8.cpp - src/IsaGenerator_Gfx9.cpp + src/Assemble.cpp + src/ShaderStore.cpp src/LinuxOSWrapper.cpp src/PM4Packet.cpp src/PM4Queue.cpp @@ -143,6 +169,7 @@ set (SRC_FILES gtest-1.6.0/gtest-all.cpp src/KFDDBGTest.cpp src/KFDGWSTest.cpp src/KFDIPCTest.cpp + src/KFDASMTest.cpp src/KFDEvictTest.cpp src/KFDHWSTest.cpp @@ -163,7 +190,7 @@ message( STATUS "PROJECT_SOURCE_DIR:" ${PROJECT_SOURCE_DIR} ) if ( "${CMAKE_C_COMPILER_VERSION}" STRGREATER "4.8.0") ## Add --enable-new-dtags to generate DT_RUNPATH -set ( CMAKE_CXX_FLAGS "-std=gnu++11 -Wl,--enable-new-dtags" ) +set ( CMAKE_CXX_FLAGS "-std=gnu++14 -Wl,--enable-new-dtags" ) endif() if ( "${CMAKE_BUILD_TYPE}" STREQUAL Release ) set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2" ) @@ -181,11 +208,10 @@ endif () # The modules found by pkg_check_modules() in the default pkg config # path do not need to use link_directories() here. link_directories(${HSAKMT_LIBRARY_DIRS}) -link_directories(${SP3_DIR}) add_executable(kfdtest ${SRC_FILES}) -target_link_libraries(kfdtest ${HSAKMT_LIBRARIES} ${DRM_LDFLAGS} ${DRM_AMDGPU_LDFLAGS} pthread m stdc++ rt amdsp3 numa) +target_link_libraries(kfdtest ${HSAKMT_LIBRARIES} ${DRM_LDFLAGS} ${DRM_AMDGPU_LDFLAGS} ${llvm_libs} pthread m stdc++ rt numa) configure_file ( scripts/kfdtest.exclude kfdtest.exclude COPYONLY ) configure_file ( scripts/run_kfdtest.sh run_kfdtest.sh COPYONLY ) diff --git a/projects/rocr-runtime/tests/kfdtest/scripts/kfdtest.exclude b/projects/rocr-runtime/tests/kfdtest/scripts/kfdtest.exclude index 392dba3cde..885054ba50 100644 --- a/projects/rocr-runtime/tests/kfdtest/scripts/kfdtest.exclude +++ b/projects/rocr-runtime/tests/kfdtest/scripts/kfdtest.exclude @@ -224,26 +224,10 @@ FILTER[aldebaran]=\ "KFDMemoryTest.PtraceAccess:"\ "KFDMemoryTest.DeviceHdpFlush" -# SP3 Compiler needs to be updated for GFX10. Temporarily disable all tests -# that require shader compiler -# Adding KFDSVMEvictTest as SVM/HMM was never validated on GFX10 -TEMP_GFX10_BLACKLIST=\ -"KFDMemoryTest.FlatScratchAccess:"\ -"KFDMemoryTest.PtraceAccessInvisibleVram:"\ -"KFDQMTest.QueuePriorityOnDifferentPipe:"\ -"KFDQMTest.QueuePriorityOnSamePipe:"\ -"KFDCWSRTest.BasicTest:"\ -"KFDQMTest.BasicCuMaskingEven:"\ -"KFDEvictTest.QueueTest:"\ -"KFDMemoryTest.MapUnmapToNodes:"\ -"KFDMemoryTest.HostHdpFlush:"\ -"KFDMemoryTest.DeviceHdpFlush:"\ -"KFDSVMEvictTest.*" - FILTER[navi10]=\ "$BLACKLIST_ALL_ASICS:"\ -"$TEMP_GFX10_BLACKLIST:"\ -"KFDMemoryTest.MMBench" +"KFDMemoryTest.MMBench:"\ +"KFDSVMEvictTest.*" # Need to verify the following failed tests on another machine: # Exceptions not being received during exception tests @@ -254,42 +238,42 @@ FILTER[navi12]=\ "KFDExceptionTest.*:"\ "KFDPerfCountersTest.*:"\ "KFDPerformanceTest.P2PBandWidthTest:"\ -"$TEMP_GFX10_BLACKLIST" +"KFDSVMEvictTest.*" FILTER[navi14]=\ "$BLACKLIST_ALL_ASICS:"\ -"$TEMP_GFX10_BLACKLIST" +"KFDSVMEvictTest.*" FILTER[sienna_cichlid]=\ "$BLACKLIST_ALL_ASICS:"\ -"$TEMP_GFX10_BLACKLIST:"\ "KFDQMTest.BasicCuMaskingEven:"\ "KFDDBGTest.*:"\ "KFDPerfCountersTest.*:"\ +"KFDSVMEvictTest.*" FILTER[navy_flounder]=\ "$BLACKLIST_ALL_ASICS:"\ -"$TEMP_GFX10_BLACKLIST:"\ "KFDQMTest.BasicCuMaskingEven:"\ "KFDDBGTest.*:"\ "KFDPerfCountersTest.*:"\ +"KFDSVMEvictTest.*" FILTER[dimgrey_cavefish]=\ "$BLACKLIST_ALL_ASICS:"\ -"$TEMP_GFX10_BLACKLIST:"\ "KFDQMTest.BasicCuMaskingEven:"\ "KFDDBGTest.*:"\ "KFDPerfCountersTest.*:"\ +"KFDSVMEvictTest.*" FILTER[beige_goby]=\ "$BLACKLIST_ALL_ASICS:"\ -"$TEMP_GFX10_BLACKLIST:"\ "KFDQMTest.BasicCuMaskingEven:"\ "KFDDBGTest.*:"\ "KFDPerfCountersTest.*:"\ +"KFDSVMEvictTest.*" FILTER[yellow_carp]=\ "$BLACKLIST_ALL_ASICS:"\ -"$TEMP_GFX10_BLACKLIST:"\ "KFDQMTest.BasicCuMaskingEven:"\ -"KFDIPCTest.CMABasicTest" +"KFDIPCTest.CMABasicTest:"\ +"KFDSVMEvictTest.*" diff --git a/projects/rocr-runtime/tests/kfdtest/sp3/README.txt b/projects/rocr-runtime/tests/kfdtest/sp3/README.txt deleted file mode 100644 index 7cbe800f50..0000000000 --- a/projects/rocr-runtime/tests/kfdtest/sp3/README.txt +++ /dev/null @@ -1,6 +0,0 @@ -Note: This folder is primarily intended for AMD internal developers. - -The folder lib_helper contains the script to generate SP3 library libamdsp3.a -and the associated header files in the current folder for kfdtest to use. -cmake is required for the script to run. Just run ./build_sp3.sh after setting -up the environment variables (source build/envsetup.sh). diff --git a/projects/rocr-runtime/tests/kfdtest/sp3/lib_helper/CMakeLists_sp3.txt b/projects/rocr-runtime/tests/kfdtest/sp3/lib_helper/CMakeLists_sp3.txt deleted file mode 100644 index ce8a3cb33f..0000000000 --- a/projects/rocr-runtime/tests/kfdtest/sp3/lib_helper/CMakeLists_sp3.txt +++ /dev/null @@ -1,79 +0,0 @@ -# -# Copyright (C) 2018 Advanced Micro Devices, Inc. All Rights Reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -# - -cmake_minimum_required(VERSION 2.8 FATAL_ERROR) - -project(amdsp3) - -#set ( CMAKE_VERBOSE_MAKEFILE on ) - -find_package(PkgConfig) - -set ( P4_PATH $ENV{WORK_ROOT}/p4/driver/drivers ) - -set ( SCLIB_SRC ${PROJECT_SOURCE_DIR} ) -#if( DEFINED ENV{SCLIB_SRC} ) -# set ( SCLIB_SRC $ENV{SCLIB_SRC} ) -#else() -# set ( SCLIB_SRC ${P4_PATH}/sc/Chip ) -#endif() - -include_directories(${SCLIB_SRC}/sp3) -#include_directories(${SCLIB_SRC}/sp3/release_headers) -include_directories(${SCLIB_SRC}/sp3/gen) - -set ( SRC_FILES ${SRC_FILES} ${SCLIB_SRC}/sp3/sp3-asic.c ) -set ( SRC_FILES ${SRC_FILES} ${SCLIB_SRC}/sp3/sp3-dispatch.c ) -set ( SRC_FILES ${SRC_FILES} ${SCLIB_SRC}/sp3/sp3-eval.c ) -set ( SRC_FILES ${SRC_FILES} ${SCLIB_SRC}/sp3/sp3-gc.c ) -set ( SRC_FILES ${SRC_FILES} ${SCLIB_SRC}/sp3/sp3-int.c ) -set ( SRC_FILES ${SRC_FILES} ${SCLIB_SRC}/sp3/sp3-lib.c ) -set ( SRC_FILES ${SRC_FILES} ${SCLIB_SRC}/sp3/sp3-native.c ) -set ( SRC_FILES ${SRC_FILES} ${SCLIB_SRC}/sp3/sp3-cipher.c ) -set ( SRC_FILES ${SRC_FILES} ${SCLIB_SRC}/sp3/sp3-vm.c ) - -aux_source_directory(${SCLIB_SRC}/sp3/gen SRC_FILES) -aux_source_directory(${SCLIB_SRC}/sp3/backend/si/lib SRC_FILES) -aux_source_directory(${SCLIB_SRC}/sp3/backend/ci/lib SRC_FILES) -aux_source_directory(${SCLIB_SRC}/sp3/backend/gfx8/lib SRC_FILES) -aux_source_directory(${SCLIB_SRC}/sp3/backend/gfx81/lib SRC_FILES) -aux_source_directory(${SCLIB_SRC}/sp3/backend/gfx9/lib SRC_FILES) -aux_source_directory(${SCLIB_SRC}/sp3/backend/gfx10/lib SRC_FILES) -aux_source_directory(${SCLIB_SRC}/sp3/backend/aldbrn/lib SRC_FILES) -aux_source_directory(${SCLIB_SRC}/sp3/backend/gfx81/arch SRC_FILES) -aux_source_directory(${SCLIB_SRC}/sp3/backend/gfx9/arch SRC_FILES) -aux_source_directory(${SCLIB_SRC}/sp3/backend/gfx10/arch SRC_FILES) -aux_source_directory(${SCLIB_SRC}/sp3/backend/aldbrn/arch SRC_FILES) - - -message( STATUS "PROJECT_SOURCE_DIR:" ${PROJECT_SOURCE_DIR} ) -#message( STATUS "SRC_FILES: ") -#foreach(file ${SRC_FILES}) -# message(STATUS "${file}") -#endforeach() - -set ( CMAKE_C_FLAGS "-DSP3_STATIC_LIB -Wno-error -DPUBLIC_RELEASE -DLITTLEENDIAN_CPU -fPIC -DGFX101_BUILD -DALDBRN_BUILD" ) - -add_library(amdsp3 ${SRC_FILES}) - - diff --git a/projects/rocr-runtime/tests/kfdtest/sp3/lib_helper/build_sp3.sh b/projects/rocr-runtime/tests/kfdtest/sp3/lib_helper/build_sp3.sh deleted file mode 100755 index f93f145da6..0000000000 --- a/projects/rocr-runtime/tests/kfdtest/sp3/lib_helper/build_sp3.sh +++ /dev/null @@ -1,57 +0,0 @@ -# -# Copyright (C) 2018 Advanced Micro Devices, Inc. All Rights Reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -# - -#!/bin/bash - -if [ "$KFDTEST_ROOT" == "" ] || [ "$P4_ROOT" == "" ]; then - echo "Environment variables should be set before running this script" - exit 1 -fi - -cd $KFDTEST_ROOT/sp3/lib_helper - -SP3_PROJECT=$P4_ROOT/driver/drivers/sc/Chip/ -LIB_OUTPUT=$KFDTEST_ROOT/sp3/ - -cp CMakeLists_sp3.txt $SP3_PROJECT/CMakeLists.txt - -mkdir -p build -echo "Building SP3 lib" -pushd build -cmake $SP3_PROJECT/ -make -popd - -rsync --progress -a build/libamdsp3.a $LIB_OUTPUT -# Put the intermediate header files in the current folder for further processing -rsync --progress -a $SP3_PROJECT/sp3/public/lib/sp3.h . - -# Remove the build folder and CMakeLists.txt put into SP source folder -rm -r build -rm $SP3_PROJECT/CMakeLists.txt - -# Replace the license statement in the header files -{ cat AMD_opensource_license.txt; sed -e '1,/#ifndef/ { /#ifndef/b; d }' sp3.h; } > $LIB_OUTPUT/sp3.h - -# Delete the intermediate header files -rm sp3.h diff --git a/projects/rocr-runtime/tests/kfdtest/sp3/sp3.h b/projects/rocr-runtime/tests/kfdtest/sp3/sp3.h deleted file mode 100644 index 513167d595..0000000000 --- a/projects/rocr-runtime/tests/kfdtest/sp3/sp3.h +++ /dev/null @@ -1,643 +0,0 @@ -/* - * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef __SP3_H__ -#define __SP3_H__ - -#ifdef __cplusplus -extern "C" { -#endif - - -/// @file sp3.h -/// @brief sp3 API -#include - -// Export tags -#define SP3_EXPORT - - -/// @defgroup sp3main SP3 Main API -/// -/// Main API to assemble and disassemble SP3 shaders. -/// -/// @{ - - -/// Valid shader stages. -enum sp3_shtype { - SP3_SHTYPE_NONE = -1, - SP3_SHTYPE_PS = 0, - SP3_SHTYPE_VS = 1, - SP3_SHTYPE_GS = 2, - SP3_SHTYPE_ES = 3, - SP3_SHTYPE_HS = 4, - SP3_SHTYPE_LS = 5, - SP3_SHTYPE_CS = 6, -#ifdef NAVI10LITE_BUILD - SP3_SHTYPE_ACV = 7, -#endif -}; - -/// Assorted constants used by sp3 API. -enum sp3_count { - SP3_NUM_MRT = 8, ///< Maximum number of render targets supported. - SP3_NUM_STRM = 4, ///< Maximum number of streams supported. -}; - -/// Disassembly flags. Bitwise-OR flags to set options. -enum sp3_flag { - SP3DIS_NO_STATE = 0x01, ///< Do not include state header at top of shader. - SP3DIS_NO_BINARY = 0x02, ///< Do not include comments with raw binary microcode. - SP3DIS_COMMENTS = 0x04, ///< Do not include comments. - SP3DIS_NO_GPR_COUNT = 0x08, ///< Do not include GPR allocation counts. - SP3DIS_FORCEVALID = 0x10, ///< Force all bytes of microcode to be disassembled. - SP3DIS_NO_ASIC = 0x20, ///< Do not emit the asic header at top of shader. -}; - -/// Shader context. Contains no user-visible fields. -struct sp3_context; - -/// Memory object. Contains no user-visible fields. -struct sp3_vma; - -/// VM addresses are 64-bit and the address unit is 32 bits -typedef uint64_t sp3_vmaddr; - -/// Storage entry for register streams. -struct sp3_reg { - uint32_t index; ///< One of the MM aperture register addresses. - uint32_t value; ///< 32-bit register data. -}; - -/// Bits for a single instruction. -struct sp3_inst_bits { - uint32_t val[5]; ///< Largest single instruction in any backend is 5 dwords. -}; - -/// Wrapped shader metadata. -/// -/// After generation, shaders are encapsulated in sp3_shader structures. -/// -/// Those structures contain the shader binary, its register stream, constants and constant -/// buffers and metadata needed for SC compatibility. -/// -struct sp3_shader { - enum sp3_shtype type; ///< One of the SHTYPE_* constants. - uint32_t asic_int; ///< Internal ASIC index. Do not use. - char asic[0x100]; ///< ASIC name as a string ("RV870" etc). - uint32_t size; ///< Size of the compiled shader, in 32-bit words. - uint32_t nsgprs; ///< Number of scalar GPRs used. - uint32_t nvgprs; ///< Number of vector GPRs used. - uint32_t nsvgprs; ///< Number of shared vector GPRs used (only available in certain projects). - uint32_t naccvgprs; ///< Number of accumulator vector GPRs used (only available in certain projects). - uint32_t nsgprs_manual_alloc; - uint32_t nvgprs_manual_alloc; - uint32_t nsvgprs_manual_alloc; - uint32_t naccvgprs_manual_alloc; - uint32_t trap_present; - uint32_t user_sgpr_count; - uint32_t scratch_en; - uint32_t dispatch_draw_en; - uint32_t so_en; - uint32_t so_base0_en; - uint32_t so_base1_en; - uint32_t so_base2_en; - uint32_t so_base3_en; - uint32_t oc_lds_en; - uint32_t tg_size_en; - uint32_t tidig_comp_cnt; ///< Number of components(-1) enabled for thread id in group - uint32_t tgid_x_en; - uint32_t tgid_y_en; - uint32_t tgid_z_en; - uint32_t wave_cnt_en; - uint32_t primgen_en; - uint32_t pc_base_en; - uint32_t sgpr_scratch; - uint32_t sgpr_psvs_state; - uint32_t sgpr_gs2vs_offset; - uint32_t sgpr_so_write_index; - uint32_t sgpr_so_base_offset0; - uint32_t sgpr_so_base_offset1; - uint32_t sgpr_so_base_offset2; - uint32_t sgpr_so_base_offset3; - uint32_t sgpr_offchip_lds; - uint32_t sgpr_is_offchip; - uint32_t sgpr_ring_offset; - uint32_t sgpr_gs_wave_id; - uint32_t sgpr_global_wave_id; - uint32_t sgpr_tg_size; - uint32_t sgpr_tgid_x; - uint32_t sgpr_tgid_y; - uint32_t sgpr_tgid_z; - uint32_t sgpr_tf_base; - uint32_t sgpr_pc_base; - uint32_t sgpr_wave_cnt; - uint32_t wave_size; ///< Number of threads in a wavefront (only certain ASICs; 0 = don't care). - uint32_t pc_exports; ///< Range of parameters exported (if VS). - uint32_t pos_export; ///< Shader executes a position export (if VS). - uint32_t cb_exports; ///< Range of MRTs exported (if PS). - uint32_t mrtz_export_format;///< Export format of the mrtz export. - uint32_t z_export; ///< Shader executes a Z export (if PS). - uint32_t pops_en; ///< Shader is POPS (PS) - uint32_t pops_num_samples; ///< (PS) - uint32_t load_collision_waveid; ///< Shader sets load collision waveid (if PS). - uint32_t load_intrawave_collision; ///< Shader is in intrawave mode (if PS). - uint32_t stencil_test_export; ///< Shader exports stencil (if PS). - uint32_t stencil_op_export; ///< Shader exports stencil (if PS). - uint32_t kill_used; ///< Shader executes ALU KILL operations. - uint32_t cb_masks[SP3_NUM_MRT]; ///< Component masks for each MRT exported (if PS). - uint32_t emit_used; ///< EMIT opcodes used (if GS). - uint32_t covmask_export; ///< Shader exports coverage mask (if PS). - uint32_t mask_export; ///< Shader exports mask (if PS). - uint32_t strm_used[SP3_NUM_STRM]; ///< Streamout operations used (map). - uint32_t scratch_used; ///< Scratch SMX exports used. - uint32_t scratch_itemsize; ///< Scratch ring item size. - uint32_t reduction_used; ///< Reduction SMX exports used. - uint32_t ring_used; ///< ESGS/GSVS ring SMX exports used. - uint32_t ring_itemsize; ///< ESGS/GSVS ring item size (for ES/GS respectively). - uint32_t vertex_size[4]; ///< GSVS ring vertex size (for GS). - uint32_t mem_used; ///< Raw memory SMX exports used. - uint32_t rats_used; ///< Mask of RATs (UAVs) used - uint32_t group_size[3]; ///< Wavefront group size (for ELF files). - uint32_t alloc_lds; ///< Number of LDS bytes allocated for wave group. (translates to lds_size in CS and LS) - uint32_t *data; ///< Shader binary data. - uint32_t nregs; ///< Number of register writes in the stream. - uint64_t crc64; ///< CRC64 of compiled shader, may be used for identification/fingerprinting. - uint32_t crc32; ///< 32-bit CRC of compiled shader (based on crc64), may be used for identification/fingerprinting. - struct sp3_reg *regs; ///< Register writes (index-value pairs). - struct sp3_shader *merged_2nd_shader; ///< Merged es/gs, ls/hs shader, this points to start of the second shader (only certain ASICs). -}; - -/// Comment callback. -typedef const char *(*sp3_comment_cb)(void *, int); - - -/// Get version of the sp3 library. -/// -/// @return String containing the version number. -/// -SP3_EXPORT const char *sp3_version(void); - -/// Create a new sp3 context. -/// -/// @return A new context for use in assembling and disassembling shaders. Free with sp3_close(). -/// -SP3_EXPORT struct sp3_context *sp3_new(void); - -/// Set option for sp3. -/// -/// @param state sp3 context. -/// @param option Option name. Unknown options will raise an error. -/// @param value Option value. NULL is used to represent value-less options. -/// -/// Currently supported options: -/// -/// stdlib (string) -- absolute path to standard library files. May be a colon-separated list -/// of paths that will be used to search for stdlib files. Used by sp3_parse_library(). -/// -/// The following options are deprecated because they take integer arguments; you should use -/// sp3_set_option_int() for these settings going forward. They will continue to be accepted by -/// this API to support legacy users. -/// -/// Werror (boolean) -- indicates whether warnings should be treated as errors. -/// -/// wave_size (integer) -- sets the wave size being used by the draw calls that will be using -/// this shader. Ignored in certain ASICs. You may set this to 32, 64 or the special value 0 -/// to indicate no preference on wave size. The shader will be checked to ensure it is -/// compatible with the size specified here. -/// -/// omit_version (boolean) -- omit generation of the S_VERSION opcode. -/// -/// omit_code_end (boolean) -- omit generation of the S_CODE_END footer. -/// -/// allow_raw_bits (boolean) -- allow use of the raw_bits() function in sp3 shaders. This is a -/// dangerous option to allow in general so you must explicitly enable this option, otherwise -/// the raw_bits() function will always error out. -/// -SP3_EXPORT void sp3_set_option( - struct sp3_context *state, - const char *option, - const char *value); - -/// Set option for sp3. -/// -/// @param state sp3 context. -/// @param option Option name. Unknown options will raise an error. -/// @param value Option value. -/// -/// Currently supported options: -/// -/// Werror (boolean) -- indicates whether warnings should be treated as errors. -/// -/// wave_size (integer) -- sets the wave size being used by the draw calls that will be using -/// this shader. Ignored in certain ASICs. You may set this to 32, 64 or the special value 0 -/// to indicate no preference on wave size. The shader will be checked to ensure it is -/// compatible with the size specified here. -/// -/// omit_version (boolean) -- omit generation of the S_VERSION opcode. -/// -/// omit_code_end (boolean) -- omit generation of the S_CODE_END footer. -/// -/// allow_raw_bits (boolean) -- allow use of the raw_bits() function in sp3 shaders. This is a -/// dangerous option to allow in general so you must explicitly enable this option, otherwise -/// the raw_bits() function will always error out. -/// -/// secure_mode (boolean) -- run in secure mode. Disables macro language features in assembly -/// path including calls to custom functions. Useful if sp3 is used as a backend to a web-based -/// assembly tool. -/// -/// debug_encoding (boolean) -- if true, debug encoding selection logic for assembly. Only -/// supported in 10.4+ backends. -/// -/// no_vs_export_check (boolean) -- if true, disable VS export sanity check. Only supported in -/// 10.4+ backends. -/// -SP3_EXPORT void sp3_set_option_int( - struct sp3_context *state, - const char *option, - int32_t value); - -/// Parse a file into a context. -/// -/// Use sp3_compile to generate binary microcode after the shader is parsed. -/// -/// @param state Context to use for parsing. -/// @param file File to read. If NULL, parse from stdin. -/// -SP3_EXPORT void sp3_parse_file(struct sp3_context *state, const char *file); - -/// Parse a string into a context. -/// -/// Use sp3_compile to generate binary microcode after the shader is parsed. -/// -/// @param state Context to use for parsing. -/// @param string String to parse. -/// -SP3_EXPORT void sp3_parse_string(struct sp3_context *state, const char *string); - -/// Parse a file from the standard library into a context. -/// -/// Use sp3_compile to generate binary microcode after the shader is parsed. -/// -/// @param state Context to use for parsing. -/// @param name Path to the standard library; files in this directory are parsed. -/// -SP3_EXPORT void sp3_parse_library(struct sp3_context *state, const char *name); - -/// Call a sp3 function. -/// -SP3_EXPORT void sp3_call(struct sp3_context *state, const char *func); - -/// Compile a shader program that has been parsed into the context. -/// -/// @param state sp3 context. -/// @param cffunc Name of clause to call. By convention, this is "main". -/// @return A compiled and linked shader. Free memory with sp3_free_shader(). -/// -SP3_EXPORT struct sp3_shader *sp3_compile( - struct sp3_context *state, - const char *cffunc); - -/// Free a sp3_shader. -/// -/// @param sh Shader object to delete. -/// -SP3_EXPORT void sp3_free_shader(struct sp3_shader *sh); - -/// Get current ASIC name set for a context. -/// -/// @param state Context to query. -/// @return Name of ASIC. -/// -SP3_EXPORT const char *sp3_getasic(struct sp3_context *state); - -/// Set current ASIC name for a context. -/// -/// @param state Context to modify. -/// @param chip Case-insensitive string representing the ASIC to compile or disassemble for. -/// -SP3_EXPORT void sp3_setasic(struct sp3_context *state, const char *chip); - -/// Set global variable in context to an integer. -/// -SP3_EXPORT void sp3_set_param_int( - struct sp3_context *state, - const char *name, - int32_t value); - -/// Set global variable in context to an integer vector. -/// -SP3_EXPORT void sp3_set_param_intvec( - struct sp3_context *state, - const char *name, - uint32_t size, - const int32_t *value); - -/// Set global variable in context to a float. -/// -SP3_EXPORT void sp3_set_param_float( - struct sp3_context *state, - const char *name, - float value); - -/// Set global variable in context to a float vector. -/// -SP3_EXPORT void sp3_set_param_floatvec( - struct sp3_context *state, - const char *name, - uint32_t size, - const float *value); - -/// Set error message header. -/// -/// @param state Context to modify. -/// @param str Text to include in error message header. -/// -SP3_EXPORT void sp3_set_error_header(struct sp3_context *state, const char *str); - -/// Get ASIC metrics for the ASIC in current state. -/// -/// Used by ELF tools to fill in some CAL fields. -/// -/// @param state Context to query. -/// @param name Name of ASIC metric. -/// @return Value of ASIC metric. -/// -SP3_EXPORT int sp3_asicinfo(struct sp3_context *state, const char *name); - -/// Free a context allocated by sp3_new/open/parse. -/// -/// @param state Context to delete. -/// -SP3_EXPORT void sp3_close(struct sp3_context *state); - -/// Disassemble a shader. -/// -/// This call is likely to change to something that will take a filled sp3_shader structure -/// later on. -/// -/// @param state sp3 context (use sp3_new to allocate and sp3_setasic to set ASIC). -/// @param bin Memory map with the opcodes (see sp3-vm.h). -/// @param base Start of the shader in the memory map (in VM entries, i.e. 32-bit words). -/// @param name Same to give the disassembled shader. -/// @param shader_type One of the SHTYPE_* constants. -/// @param include Literal text to include in the CF clause (NULL includes nothing). -/// @param max_len Maximum length of CF clause. Matters if SP3DIS_FORCEVALID is set. -/// @param flags A bitmask of SP3DIS_* flags. -/// -/// @return Shader disassembly as a string. Free memory with sp3_free(). -/// -SP3_EXPORT char *sp3_disasm( - struct sp3_context *state, - struct sp3_vma *bin, - sp3_vmaddr base, - const char *name, - enum sp3_shtype shader_type, - const char *include, - uint32_t max_len, - uint32_t flags); - -/// Disassemble a single shader instruction. -/// -/// This call is likely to change to something that will take a filled sp3_shader structure -/// later on. -/// -/// @param state sp3 context (use sp3_new to allocate and sp3_setasic to set ASIC). -/// @param inst Pointer to dwords containing instruction (exact number of dwords required depends on instruction). -/// @param base Start of the shader in the memory map (in VM entries, i.e. 32-bit words). -/// @param addr Address of the instruction being disassembled (in VM entries, i.e. 32-bit words). -/// @param shader_type One of the SHTYPE_* constants. -/// @param flags A mask of SP3DIS_* flags. -/// -/// @return Shader disassembly as a string. Free memory with sp3_free(). -/// -SP3_EXPORT char *sp3_disasm_inst( - struct sp3_context *state, - const struct sp3_inst_bits *inst, - sp3_vmaddr base, - sp3_vmaddr addr, - enum sp3_shtype shader_type, - uint32_t flags); - -/// Parse a register stream. -/// -/// Can be called before sp3_disasm to preset things like ALU, boolean and loop constants. -/// -/// This call is likely to merge with sp3_disasm later on. -/// -/// @param state sp3 context to fill with state. -/// @param nregs Number of register entries. -/// @param regs Register stream to parse. -/// @param shader_type One of the SHTYPE_* constants. -/// -SP3_EXPORT void sp3_setregs( - struct sp3_context *state, - uint32_t nregs, - const struct sp3_reg *regs, - enum sp3_shtype shader_type); - - -/// Set shader comments -/// -/// @param state sp3 context. -/// @param map Map of comments (0 for no comment, other values will be passed to the callback). -/// @param f_top Callback returning comment to place above the opcode. -/// @param f_right Callback returning comment to place to the right of the opcode. -/// @param ctx Void pointer to pass to comment callbacks. -/// -SP3_EXPORT void sp3_setcomments( - struct sp3_context *state, - struct sp3_vma *map, - sp3_comment_cb f_top, - sp3_comment_cb f_right, - void *ctx); - -/// Set alternate shader entry points -/// -/// Used for disassembly; this marks an additional location in memory -/// (besides the start address) where shader code may be found. Generally -/// required for jump tables and any case where the shader may perform -/// indirect jumps to ensure that disassembly locates all shader -/// instructions. -/// -/// @param state sp3 context (use sp3_new to allocate and sp3_setasic to set ASIC). -/// @param addr Address of the instruction being disassembled (in VM entries, i.e. 32-bit words). -/// -SP3_EXPORT void sp3_setentrypoint( - struct sp3_context *state, - sp3_vmaddr addr); - -/// Clear alternate shader entry points. -/// -/// Clear all entry points previously set with sp3_setentrypoint. -/// -/// @param state sp3 context (use sp3_new to allocate and sp3_setasic to set ASIC). -/// -SP3_EXPORT void sp3_clearentrypoints(struct sp3_context *state); - -/// Free memory allocated by sp3. -/// -/// Windows DLLs that allocate memory have to free it. This function -/// should be used to free the result of sp3_disasm, sp3_compile etc. -/// -SP3_EXPORT void sp3_free(void *ptr); - -/// SP3 API to merge two shaders given file names as input. -/// -SP3_EXPORT struct sp3_shader* sp3_merge_shaders( - struct sp3_context *pointer, - const char *first_file, - const char *second_file); - -/// SP3 API to merge two shaders given shader strings as input. -/// -SP3_EXPORT struct sp3_shader* sp3_merge_shader_strings( - struct sp3_context *pointer, - const char *first_string, - const char *second_string); - - -/// @} - - -/// @defgroup sp3vm SP3 Memory Objects -/// -/// The VM API is used to manage virtual memory maps. Those maps are used for binary storage -/// for disassembly, as they can naturally mirror the GPU's memory map (so no register -/// translation is needed). -/// -/// @{ - -/// Callback function that will fill a VMA on demand -/// -/// The VMA to be filled will be specified through the request address. -/// The callback should fill the VMA using sp3_vm_write calls. -/// -typedef void (* sp3_vmfill)(struct sp3_vma *vm, sp3_vmaddr addr, void *ctx); - -/// Create a new VM that is empty. -/// -/// Free the object with sp3_vm_free(). -/// -/// @return New VM object. -/// -SP3_EXPORT -struct sp3_vma *sp3_vm_new(void); - -/// Create a new VM that has a sp3_vmfill callback. -/// -/// Free the object with sp3_vm_free(). -/// -/// @param fill Function used to populate data in VM. The function will be pass the new VM object, the address and a context. -/// @param ctx User-specified context. Passed to the fill function and not used by sp3 itself. -/// @return New VM object. -/// -SP3_EXPORT -struct sp3_vma *sp3_vm_new_fill(sp3_vmfill fill, void *ctx); - -/// Create a new VM from an array of words. -/// -/// Free the object with sp3_vm_free(). -/// -/// @param base VM address to load array at. -/// @param len Number of 32-bit words in the array. -/// @param data Pointer to the array. -/// @return New VM object. -/// -SP3_EXPORT -struct sp3_vma *sp3_vm_new_ptr(sp3_vmaddr base, sp3_vmaddr len, const uint32_t *data); - -/// Find a VMA, optionally adding it. -/// -/// @param vm VM to search in. -/// @param addr Address to search for. -/// @param add Flag indicating whether a failure should result in adding a new VMA. -/// @return VM object matching the specified address. -/// -SP3_EXPORT -struct sp3_vma *sp3_vm_find(struct sp3_vma *vm, sp3_vmaddr addr, uint32_t add); - -/// Write a word to a VM. -/// -/// @param vm VM to write. -/// @param addr Address to write. -/// @param val 32-bits of data to write. -/// -SP3_EXPORT -void sp3_vm_write(struct sp3_vma *vm, sp3_vmaddr addr, uint32_t val); - -/// Read a word from a VM. -/// -/// @param vm VM to read. -/// @param addr Address to read. -/// @return 32-bits of data at specified address. -/// -SP3_EXPORT -uint32_t sp3_vm_read(struct sp3_vma *vm, sp3_vmaddr addr); - -/// Probe VM for presence. -/// -/// @param vm VM to probe. -/// @param addr Address to search for. -/// @return 1 if the specified address is backed in the VM, 0 otherwise. -/// -SP3_EXPORT -int sp3_vm_present(struct sp3_vma *vm, sp3_vmaddr addr); - -/// Return base address of VM. -/// -/// @param vm VM to query. -/// @return Base address. -/// -SP3_EXPORT -sp3_vmaddr sp3_vm_base(struct sp3_vma *vm); - -/// Return next VM. -/// -/// @param vm VM to query. -/// @return Next VM in list. -/// -SP3_EXPORT -struct sp3_vma *sp3_vm_next(struct sp3_vma *vm); - -/// Free a VM and all its storage. -/// -/// Use this function to free memory allocated by sp3_vm_new, sp3_vm_new_fill and -/// sp3_vm_new_ptr. -/// -/// @param vm VM to free. -/// -SP3_EXPORT -void sp3_vm_free(struct sp3_vma *vm); - - -/// @} - - -#ifdef __cplusplus -} -#endif - - -#endif /* __SP3_H__ */ diff --git a/projects/rocr-runtime/tests/kfdtest/src/Assemble.cpp b/projects/rocr-runtime/tests/kfdtest/src/Assemble.cpp new file mode 100644 index 0000000000..cf4b9e7de0 --- /dev/null +++ b/projects/rocr-runtime/tests/kfdtest/src/Assemble.cpp @@ -0,0 +1,379 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// The University of Illinois/NCSA +// Open Source License (NCSA) +// +// Copyright (c) 2022, Advanced Micro Devices, Inc. All rights reserved. +// +// Developed by: +// +// AMD Research and AMD HSA Software Development +// +// Advanced Micro Devices, Inc. +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal with the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimers in +// the documentation and/or other materials provided with the distribution. +// - Neither the names of Advanced Micro Devices, Inc, +// nor the names of its contributors may be used to endorse or promote +// products derived from this Software without specific prior written +// permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS WITH THE SOFTWARE. +// +//////////////////////////////////////////////////////////////////////////////// + +/** + * Self-contained assembler that uses the LLVM MC API to assemble AMDGCN + * instructions + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if LLVM_VERSION_MAJOR > 13 +#include +#else +#include +#endif + +#include +#include "OSWrapper.hpp" +#include "Assemble.hpp" + +using namespace llvm; + +Assembler::Assembler(const uint32_t Gfxv) { + SetTargetAsic(Gfxv); + TextData = nullptr; + TextSize = 0; + LLVMInit(); +} + +Assembler::~Assembler() { + FlushText(); + llvm_shutdown(); +} + +const char* Assembler::GetInstrStream() { + return TextData; +} + +const size_t Assembler::GetInstrStreamSize() { + return TextSize; +} + +int Assembler::CopyInstrStream(char* OutBuf, const size_t BufSize) { + if (TextSize > BufSize) + return -2; + + std::copy(TextData, TextData + TextSize, OutBuf); + return 0; +} + +const char* Assembler::GetTargetAsic() { + return MCPU; +} + +/** + * Set MCPU via GFX Version from Thunk + * LLVM Target IDs use decimal for Maj/Min, hex for Step + */ +void Assembler::SetTargetAsic(const uint32_t Gfxv) { + const uint8_t Major = (Gfxv >> 16) & 0xff; + const uint8_t Minor = (Gfxv >> 8) & 0xff; + const uint8_t Step = Gfxv & 0xff; + + snprintf(MCPU, ASM_MCPU_LEN, "gfx%d%d%x", Major, Minor, Step); +} + +/** + * Initialize LLVM targets and assembly printers/parsers + */ +void Assembler::LLVMInit() { + LLVMInitializeAMDGPUTargetInfo(); + LLVMInitializeAMDGPUTargetMC(); + LLVMInitializeAMDGPUAsmParser(); +} + +/** + * Flush/reset TextData and TextSize to initial state + */ +void Assembler::FlushText() { + if (TextData) + delete[] TextData; + TextData = nullptr; + TextSize = 0; +} + +/** + * Print hex of ELF object to stdout (debug) + */ +void Assembler::PrintELFHex(const std::string Data) { + outs() << "ASM Info: assembled ELF hex data (length " << Data.length() << "):\n"; + outs() << "0x00:\t"; + for (size_t i = 0; i < Data.length(); ++i) { + char c = Data[i]; + outs() << format_hex(static_cast(c), 4); + if ((i+1) % 16 == 0) + outs() << "\n" << format_hex(i+1, 4) << ":\t"; + else + outs() << " "; + } + outs() << "\n"; +} + +/** + * Print hex of raw instruction stream to stdout (debug) + */ +void Assembler::PrintTextHex() { + outs() << "ASM Info: assembled .text hex data (length " << TextSize << "):\n"; + outs() << "0x00:\t"; + for (size_t i = 0; i < TextSize; i++) { + outs() << format_hex(static_cast(TextData[i]), 4); + if ((i+1) % 16 == 0) + outs() << "\n" << format_hex(i+1, 4) << ":\t"; + else + outs() << " "; + } + outs() << "\n"; +} + +/** + * Extract raw instruction stream from .text section in ELF object + * + * @param RawData Raw C string of ELF object + * @return 0 on success + */ +int Assembler::ExtractELFText(const char* RawData) { + const Elf64_Ehdr* ElfHeader; + const Elf64_Shdr* SectHeader; + const Elf64_Shdr* SectStrTable; + const char* SectStrAddr; + unsigned NumSects, SectIdx; + + if (!(ElfHeader = reinterpret_cast(RawData))) { + outs() << "ASM Error: elf data is invalid or corrupted\n"; + return -1; + } + if (ElfHeader->e_ident[EI_CLASS] != ELFCLASS64) { + outs() << "ASM Error: elf object must be of 64-bit type\n"; + return -1; + } + + SectHeader = reinterpret_cast(RawData + ElfHeader->e_shoff); + SectStrTable = &SectHeader[ElfHeader->e_shstrndx]; + SectStrAddr = static_cast(RawData + SectStrTable->sh_offset); + + // Loop through sections, break on .text + NumSects = ElfHeader->e_shnum; + for (SectIdx = 0; SectIdx < NumSects; SectIdx++) { + std::string SectName = std::string(SectStrAddr + SectHeader[SectIdx].sh_name); + if (SectName == std::string(".text")) { + TextSize = SectHeader[SectIdx].sh_size; + TextData = new char[TextSize]; + memcpy(TextData, RawData + SectHeader[SectIdx].sh_offset, TextSize); + break; + } + } + + if (SectIdx >= NumSects) { + outs() << "ASM Error: couldn't locate .text section\n"; + return -1; + } + + return 0; +} + +/** + * Assemble shader, fill member vars, and copy to output buffer + * + * @param AssemblySource Shader source represented as a raw C string + * @param OutBuf Raw instruction stream output buffer + * @param BufSize Size of OutBuf (defaults to PAGE_SIZE) + * @return Value of RunAssemble() (0 on success) + */ +int Assembler::RunAssembleBuf(const char* const AssemblySource, char* OutBuf, + const size_t BufSize) { + int ret = RunAssemble(AssemblySource); + return ret ? ret : CopyInstrStream(OutBuf, BufSize); +} + +/** + * Assemble shader and fill member vars + * + * @param AssemblySource Shader source represented as a raw C string + * @return 0 on success + */ +int Assembler::RunAssemble(const char* const AssemblySource) { + // Ensure target ASIC has been set + if (!MCPU) { + outs() << "ASM Error: target asic is uninitialized\n"; + return -1; + } + + // Delete TextData for any previous runs + FlushText(); + +#if 0 + outs() << "ASM Info: running assembly for target: " << MCPU << "\n"; + outs() << "ASM Info: source:\n"; + outs() << AssemblySource << "\n"; +#endif + + // Initialize MCOptions and target triple + const MCTargetOptions MCOptions; + Triple TheTriple; + + const Target* TheTarget = + TargetRegistry::lookupTarget(ArchName, TheTriple, Error); + if (!TheTarget) { + outs() << Error; + return -1; + } + + TheTriple.setArchName(ArchName); + TheTriple.setVendorName(VendorName); + TheTriple.setOSName(OSName); + + TripleName = TheTriple.getTriple(); + TheTriple.setTriple(Triple::normalize(TripleName)); + + // Create MemoryBuffer for assembly source + StringRef AssemblyRef(AssemblySource); + std::unique_ptr BufferPtr = + MemoryBuffer::getMemBuffer(AssemblyRef, "", false); + if (!BufferPtr->getBufferSize()) { + outs() << "ASM Error: assembly source is empty\n"; + return -1; + } + + // Instantiate SrcMgr and transfer BufferPtr ownership + SourceMgr SrcMgr; + SrcMgr.AddNewSourceBuffer(std::move(BufferPtr), SMLoc()); + + // Initialize MC interfaces and base class objects + std::unique_ptr MRI( + TheTarget->createMCRegInfo(TripleName)); + if (!MRI) { + outs() << "ASM Error: no register info for target " << MCPU << "\n"; + return -1; + } +#if LLVM_VERSION_MAJOR > 9 + std::unique_ptr MAI( + TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions)); +#else + std::unique_ptr MAI( + TheTarget->createMCAsmInfo(*MRI, TripleName)); +#endif + if (!MAI) { + outs() << "ASM Error: no assembly info for target " << MCPU << "\n"; + return -1; + } + std::unique_ptr MCII( + TheTarget->createMCInstrInfo()); + if (!MCII) { + outs() << "ASM Error: no instruction info for target " << MCPU << "\n"; + return -1; + } + std::unique_ptr STI( + TheTarget->createMCSubtargetInfo(TripleName, MCPU, std::string())); + if (!STI || !STI->isCPUStringValid(MCPU)) { + outs() << "ASM Error: no subtarget info for target " << MCPU << "\n"; + return -1; + } + + // Set up the MCContext for creating symbols and MCExpr's +#if LLVM_VERSION_MAJOR > 12 + MCContext Ctx(TheTriple, MAI.get(), MRI.get(), STI.get(), &SrcMgr, &MCOptions); +#else + MCObjectFileInfo MOFI; + MCContext Ctx(MAI.get(), MRI.get(), &MOFI, &SrcMgr, &MCOptions); + MOFI.InitMCObjectFileInfo(TheTriple, true, Ctx); +#endif + + // Finalize setup for output object code stream + std::string Data; + std::unique_ptr DataStream(std::make_unique(Data)); + std::unique_ptr BOS(std::make_unique(*DataStream)); + raw_pwrite_stream* OS = BOS.get(); + +#if LLVM_VERSION_MAJOR > 14 + MCCodeEmitter* CE = TheTarget->createMCCodeEmitter(*MCII, Ctx); +#else + MCCodeEmitter* CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx); +#endif + MCAsmBackend* MAB = TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions); + + std::unique_ptr Streamer(TheTarget->createMCObjectStreamer( + TheTriple, Ctx, + std::unique_ptr(MAB), MAB->createObjectWriter(*OS), + std::unique_ptr(CE), *STI, MCOptions.MCRelaxAll, + MCOptions.MCIncrementalLinkerCompatible, /*DWARFMustBeAtTheEnd*/ false)); + + std::unique_ptr Parser( + createMCAsmParser(SrcMgr, Ctx, *Streamer, *MAI)); + + // Set parser to target parser and run + std::unique_ptr TAP( + TheTarget->createMCAsmParser(*STI, *Parser, *MCII, MCOptions)); + if (!TAP) { + outs() << "ASM Error: no assembly parsing support for target " << MCPU << "\n"; + return -1; + } + Parser->setTargetParser(*TAP); + + if (Parser->Run(true)) { + outs() << "ASM Error: assembly parser failed\n"; + return -1; + } + + BOS.reset(); + DataStream->flush(); + + int ret = ExtractELFText(Data.data()); + if (ret < 0 || !TextData) { + outs() << "ASM Error: .text extraction failed\n"; + return ret; + } + +#if 0 + PrintELFHex(Data); + PrintTextHex(); +#endif + + return 0; +} diff --git a/projects/rocr-runtime/tests/kfdtest/src/Assemble.hpp b/projects/rocr-runtime/tests/kfdtest/src/Assemble.hpp new file mode 100644 index 0000000000..46fb946a84 --- /dev/null +++ b/projects/rocr-runtime/tests/kfdtest/src/Assemble.hpp @@ -0,0 +1,86 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// The University of Illinois/NCSA +// Open Source License (NCSA) +// +// Copyright (c) 2022, Advanced Micro Devices, Inc. All rights reserved. +// +// Developed by: +// +// AMD Research and AMD HSA Software Development +// +// Advanced Micro Devices, Inc. +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal with the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimers in +// the documentation and/or other materials provided with the distribution. +// - Neither the names of Advanced Micro Devices, Inc, +// nor the names of its contributors may be used to endorse or promote +// products derived from this Software without specific prior written +// permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS WITH THE SOFTWARE. +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef _ASSEMBLE_H_ +#define _ASSEMBLE_H_ + +#include "OSWrapper.hpp" + +#define ASM_MCPU_LEN 16 + +class Assembler { + private: + const char* ArchName = "amdgcn"; + const char* VendorName = "amd"; + const char* OSName = "amdhsa"; + char MCPU[ASM_MCPU_LEN]; + + std::string TripleName; + std::string Error; + + char* TextData; + size_t TextSize; + + void SetTargetAsic(const uint32_t Gfxv); + + void LLVMInit(); + void FlushText(); + void PrintELFHex(const std::string Data); + int ExtractELFText(const char* RawData); + + public: + Assembler(const uint32_t Gfxv); + ~Assembler(); + + void PrintTextHex(); + const char* GetTargetAsic(); + + const char* GetInstrStream(); + const size_t GetInstrStreamSize(); + int CopyInstrStream(char* OutBuf, const size_t BufSize = PAGE_SIZE); + + int RunAssemble(const char* const AssemblySource); + int RunAssembleBuf(const char* const AssemblySource, char* OutBuf, + const size_t BufSize = PAGE_SIZE); +}; + +#endif // _ASSEMBLE_H_ diff --git a/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator.cpp b/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator.cpp deleted file mode 100644 index 3e69b5f9df..0000000000 --- a/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator.cpp +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "IsaGenerator.hpp" - -#include -#include - -#include "IsaGenerator_Gfx72.hpp" -#include "IsaGenerator_Gfx8.hpp" -#include "IsaGenerator_Gfx9.hpp" -#include "IsaGenerator_Gfx10.hpp" -#include "IsaGenerator_Aldebaran.hpp" - -#include "GoogleTestExtension.hpp" - -#include "sp3.h" - -const std::string IsaGenerator::ADDRESS_WATCH_SP3( - "var REG_TRAPSTS_EXCP_MASK = 0x000001ff\n" - "var WAVE_COUNT_OFFSET = 12\n" - "var TMA_CYCLE_OFFSET = 16\n" - "\n" - "/*\n" - " * ttmp[0:1] -- The ISA address that triggered this trap handler\n" - " * ttmp[10:11] -- The TMA user provided, used to store the debug info in this shader\n" - " * v[10:14] ttmp[7:8] -- temp use inside this shader\n" - " * s5 -- store the counts that this trap been triggered\n" - " * Each time when the trap is triggered , this shader will write\n" - " * ttmp[0] : ttmp[1] : Trap_Status : [reserved]\n" - " * to TMA + (trap count * TMA_CYCLE_OFFSET)\n" - " * The TMA + WAVE_COUNT_OFFSET(the first [reserved] address)\n" - " * used to store the total triggered trap count.\n" - " */\n" - "shader main\n" - "\n" - " asic(VI)\n" - "\n" - " type(CS)\n" - " v_mov_b32 v10, ttmp10\n" - " v_mov_b32 v11, ttmp11\n" - " s_mov_b32 ttmp7, s5\n" - " s_mulk_i32 ttmp7, TMA_CYCLE_OFFSET\n" - " s_addk_i32 s5, 1\n" - " v_mov_b32 v12, ttmp0\n" - " v_add_u32 v10, vcc, ttmp7, v10\n" - " flat_store_dword v[10,11], v12 slc glc\n" - " v_mov_b32 v12, ttmp1\n" - " v_add_u32 v10, vcc, 4, v10\n" - " flat_store_dword v[10,11], v12 slc glc\n" - " s_getreg_b32 ttmp8, hwreg(HW_REG_TRAPSTS)\n" - " s_and_b32 ttmp8, ttmp8, REG_TRAPSTS_EXCP_MASK\n" - " v_mov_b32 v12, ttmp8\n" - " v_add_u32 v10, vcc, 4, v10\n" - " flat_store_dword v[10,11], v12 glc\n" - " v_mov_b32 v10, ttmp10\n" - " v_add_u32 v10, vcc, WAVE_COUNT_OFFSET, v10\n" - " v_mov_b32 v13, 1\n" - " flat_atomic_add v14, v[10:11], v13 slc glc\n" - " s_and_b32 ttmp1, ttmp1, 0xffff\n" - " s_rfe_b64 [ttmp0,ttmp1]\n" - "end\n" -); - -IsaGenerator* IsaGenerator::Create(unsigned int familyId) { - switch (familyId) { - case FAMILY_CI: - case FAMILY_KV: - return new IsaGenerator_Gfx72; - case FAMILY_VI: - case FAMILY_CZ: - return new IsaGenerator_Gfx8; - case FAMILY_AI: - case FAMILY_RV: - case FAMILY_AR: - return new IsaGenerator_Gfx9; - case FAMILY_AL: - return new IsaGenerator_Aldbrn; - case FAMILY_NV: - return new IsaGenerator_Gfx10; - - default: - LOG() << "Error: Invalid ISA" << std::endl; - return NULL; - } -} - -void IsaGenerator::GetAwTrapHandler(HsaMemoryBuffer& rBuf) { - CompileShader(ADDRESS_WATCH_SP3.c_str(), "main", rBuf); -} - -void IsaGenerator::CompileShader(const char* shaderCode, const char* shaderName, HsaMemoryBuffer& rBuf) { - sp3_context* pSp3 = sp3_new(); - sp3_setasic(pSp3, GetAsicName().c_str()); - sp3_parse_string(pSp3, shaderCode); - sp3_shader* pShader = sp3_compile(pSp3, shaderName); - - std::copy(pShader->data, pShader->data + pShader->size, rBuf.As()); - sp3_free_shader(pShader); - - /** Inside this close function, there is an unknown reason of free memory not used by compiler. - * Comment out this as a workaround. System will do the garbage collection after this - * application is closed. - */ - // sp3_close(pSp3); -} diff --git a/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator.hpp b/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator.hpp deleted file mode 100644 index 4b9c49ad9e..0000000000 --- a/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator.hpp +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef _ISAGENERATOR_H_ -#define _ISAGENERATOR_H_ - -#include "KFDTestUtil.hpp" - -/* isa generation class - interface */ -class IsaGenerator { - public: - static IsaGenerator* Create(unsigned int familyId); - - virtual ~IsaGenerator() {} - - virtual void GetNoopIsa(HsaMemoryBuffer& rBuf) = 0; - virtual void GetCopyDwordIsa(HsaMemoryBuffer& rBuf) = 0; - virtual void GetInfiniteLoopIsa(HsaMemoryBuffer& rBuf) = 0; - virtual void GetAtomicIncIsa(HsaMemoryBuffer& rBuf) = 0; - virtual void GetCwsrTrapHandler(HsaMemoryBuffer& rBuf) {} - virtual void GetAwTrapHandler(HsaMemoryBuffer& rBuf); - - void CompileShader(const char* shaderCode, const char* shaderName, HsaMemoryBuffer& rBuf); - - protected: - virtual const std::string& GetAsicName() = 0; - - private: - static const std::string ADDRESS_WATCH_SP3; -}; - -#endif // _ISAGENERATOR_H_ diff --git a/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator_Aldebaran.cpp b/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator_Aldebaran.cpp deleted file mode 100644 index 2c377f9111..0000000000 --- a/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator_Aldebaran.cpp +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (C) 2020 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "IsaGenerator_Aldebaran.hpp" - -#include -#include - -const std::string IsaGenerator_Aldbrn::ASIC_NAME = "ALDEBARAN"; - -/* The binaries are generated from following ISA */ -#if 0 -/* flat_atomic_inc will not support by some PCIE, use flat_atomic_add instead */ -shader atomic_add -asic(ALDEBARAN) -type(CS) - v_mov_b32 v0, s0 - v_mov_b32 v1, s1 - v_mov_b32 v2, 1 - flat_atomic_add v3, v[0:1], v2 slc glc scc - s_waitcnt 0 - s_endpgm -end - -shader copy_dword -asic(ALDEBARAN) -type(CS) -/* copy the parameters from scalar registers to vector registers */ - v_mov_b32 v0, s0 - v_mov_b32 v1, s1 - v_mov_b32 v2, s2 - v_mov_b32 v3, s3 -/* copy a dword between the passed addresses */ - flat_load_dword v4, v[0:1] slc glc - s_waitcnt 0 - flat_store_dword v[2:3], v4 slc glc - s_endpgm -end - -shader main -asic(ALDEBARAN) -type(CS) -loop: - s_branch loop - s_endpgm -end - - -#endif - -const uint32_t IsaGenerator_Aldbrn::NOOP_ISA[] = { - 0xbf810000 -}; - -const uint32_t IsaGenerator_Aldbrn::COPY_DWORD_ISA[] = { - 0x7e000200, 0x7e020201, - 0x7e040202, 0x7e060203, - 0xdc530000, 0x047f0000, - 0xbf8c0000, 0xdc730000, - 0x007f0402, 0xbf810000 -}; - -const uint32_t IsaGenerator_Aldbrn::INFINITE_LOOP_ISA[] = { - 0xbf82ffff, 0xbf810000 -}; - -const uint32_t IsaGenerator_Aldbrn::ATOMIC_ADD_ISA[] = { - 0x7e000200, 0x7e020201, - 0x7e040281, 0xdf0b0000, - 0x037f0200, 0xbf8c0000, - 0xbf810000, 0x00000000 -}; - -void IsaGenerator_Aldbrn::GetNoopIsa(HsaMemoryBuffer& rBuf) { - std::copy(NOOP_ISA, NOOP_ISA+ARRAY_SIZE(NOOP_ISA), rBuf.As()); -} - -void IsaGenerator_Aldbrn::GetCopyDwordIsa(HsaMemoryBuffer& rBuf) { - std::copy(COPY_DWORD_ISA, COPY_DWORD_ISA+ARRAY_SIZE(COPY_DWORD_ISA), rBuf.As()); -} - -void IsaGenerator_Aldbrn::GetInfiniteLoopIsa(HsaMemoryBuffer& rBuf) { - std::copy(INFINITE_LOOP_ISA, INFINITE_LOOP_ISA+ARRAY_SIZE(INFINITE_LOOP_ISA), rBuf.As()); -} - -void IsaGenerator_Aldbrn::GetAtomicIncIsa(HsaMemoryBuffer& rBuf) { - std::copy(ATOMIC_ADD_ISA, ATOMIC_ADD_ISA+ARRAY_SIZE(ATOMIC_ADD_ISA), rBuf.As()); -} - -const std::string& IsaGenerator_Aldbrn::GetAsicName() { - return ASIC_NAME; -} - diff --git a/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator_Gfx10.cpp b/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator_Gfx10.cpp deleted file mode 100644 index d8d33086e5..0000000000 --- a/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator_Gfx10.cpp +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Copyright (C) 2019 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "IsaGenerator_Gfx10.hpp" - -#include -#include - -/* The binaries are generated from following ISA */ -const std::string IsaGenerator_Gfx10::ASIC_NAME = "GFX10"; -#if 0 -static const char * atomic_add = \ -"\ -shader atomic_add \n\ -asic(GFX10) \n\ -wave_size(32) \n\ -type(CS) \n\ - v_mov_b32 v0, s0 \n\ - v_mov_b32 v1, s1 \n\ - v_mov_b32 v2, 1 \n\ - flat_atomic_add v3, v[0:1], v2 slc glc \n\ - s_waitcnt 0 \n\ - s_endpgm \n\ -end \n\ -"; - -static const char * copy_dword = \ -"\ -shader copy_dword \n\ -asic(GFX10) \n\ -wave_size(32) \n\ -type(CS) \n\ - v_mov_b32 v0, s0 \n\ - v_mov_b32 v1, s1 \n\ - v_mov_b32 v2, s2 \n\ - v_mov_b32 v3, s3 \n\ - flat_load_dword v4, v[0:1] slc glc \n\ - s_waitcnt 0 \n\ - flat_store_dword v[2:3], v4 slc glc \n\ - s_endpgm \n\ -end \n\ -"; - -static const char * loop= \ -"\ -shader loop \n\ -asic(GFX10) \n\ -type(CS) \n\ -wave_size(32) \n\ -loop: \n\ - s_branch loop \n\ - s_endpgm \n\ -end \n\ -"; - -static const char * noop= \ -"\ -shader noop \n\ -asic(GFX10) \n\ -type(CS) \n\ -wave_size(32) \n\ - s_endpgm \n\ -end \n\ -"; -#endif - -const uint32_t IsaGenerator_Gfx10::NOOP_ISA[] = { -0xb0804004, 0xbf810000, -0xbf9f0000, 0xbf9f0000, -0xbf9f0000, 0xbf9f0000, -0xbf9f0000 -}; - -const uint32_t IsaGenerator_Gfx10::COPY_DWORD_ISA[] = { -0xb0804004, 0x7e000200, -0x7e020201, 0x7e040202, -0x7e060203, 0xdc330000, -0x47d0000, 0xbf8c0000, -0xdc730000, 0x7d0402, -0xbf810000, 0xbf9f0000, -0xbf9f0000, 0xbf9f0000, -0xbf9f0000, 0xbf9f0000 -}; - -const uint32_t IsaGenerator_Gfx10::INFINITE_LOOP_ISA[] = { -0xbf82ffff, 0xb0804004, -0xbf810000, 0xbf9f0000, -0xbf9f0000, 0xbf9f0000, -0xbf9f0000, 0xbf9f0000 -}; - -const uint32_t IsaGenerator_Gfx10::ATOMIC_ADD_ISA[] = { -0xb0804004, 0x7e000200, -0x7e020201, 0x7e040281, -0xdccb0000, 0x37d0200, -0xbf8c0000, 0xbf810000, -0xbf9f0000, 0xbf9f0000, -0xbf9f0000, 0xbf9f0000, -0xbf9f0000 -}; - - -void IsaGenerator_Gfx10::GetNoopIsa(HsaMemoryBuffer& rBuf) { - std::copy(NOOP_ISA, NOOP_ISA+ARRAY_SIZE(NOOP_ISA), rBuf.As()); -} - -void IsaGenerator_Gfx10::GetCopyDwordIsa(HsaMemoryBuffer& rBuf) { - std::copy(COPY_DWORD_ISA, COPY_DWORD_ISA+ARRAY_SIZE(COPY_DWORD_ISA), rBuf.As()); -} - -void IsaGenerator_Gfx10::GetInfiniteLoopIsa(HsaMemoryBuffer& rBuf) { - std::copy(INFINITE_LOOP_ISA, INFINITE_LOOP_ISA+ARRAY_SIZE(INFINITE_LOOP_ISA), rBuf.As()); -} - -void IsaGenerator_Gfx10::GetAtomicIncIsa(HsaMemoryBuffer& rBuf) { - std::copy(ATOMIC_ADD_ISA, ATOMIC_ADD_ISA+ARRAY_SIZE(ATOMIC_ADD_ISA), rBuf.As()); -} - -const std::string& IsaGenerator_Gfx10::GetAsicName() { - return ASIC_NAME; -} - diff --git a/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator_Gfx72.cpp b/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator_Gfx72.cpp deleted file mode 100644 index e0d98fb5c8..0000000000 --- a/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator_Gfx72.cpp +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "IsaGenerator_Gfx72.hpp" - -#include -#include - -const std::string IsaGenerator_Gfx72::ASIC_NAME = "CI"; - -const uint32_t IsaGenerator_Gfx72::NOOP_ISA[] = { - 0xbf810000 // S_ENDPGM -}; - -/* The below arrays are filled with hex values in order not to reference - * proprietary header files, but we still leave the code here for future - * reference. - */ -#if 0 -const uint32_t IsaGenerator_Gfx72::COPY_DWORD_ISA[] = { - (63u << SQ_VOP1__ENCODING__SHIFT) | (0 << SQ_VOP1__VDST__SHIFT) | (SQ_V_MOV_B32 << SQ_VOP1__OP__SHIFT) | (0 << SQ_VOP1__SRC0__SHIFT), // v_mov_b32 v0, s0 (VOP1) - (63u << SQ_VOP1__ENCODING__SHIFT) | (1 << SQ_VOP1__VDST__SHIFT) | (SQ_V_MOV_B32 << SQ_VOP1__OP__SHIFT) | (1 << SQ_VOP1__SRC0__SHIFT), // v_mov_b32 v1, s1 (VOP1) - (63u << SQ_VOP1__ENCODING__SHIFT) | (2 << SQ_VOP1__VDST__SHIFT) | (SQ_V_MOV_B32 << SQ_VOP1__OP__SHIFT) | (2 << SQ_VOP1__SRC0__SHIFT), // v_mov_b32 v2, s2 (VOP1) - (63u << SQ_VOP1__ENCODING__SHIFT) | (3 << SQ_VOP1__VDST__SHIFT) | (SQ_V_MOV_B32 << SQ_VOP1__OP__SHIFT) | (3 << SQ_VOP1__SRC0__SHIFT), // v_mov_b32 v3, s3 (VOP1) - - (55u << SQ_FLAT_0__ENCODING__SHIFT) | (SQ_FLAT_LOAD_DWORD << SQ_FLAT_0__OP__SHIFT) | (1 << SQ_FLAT_0__SLC__SHIFT) | (1 << SQ_FLAT_0__GLC__SHIFT)/*(3 << 16)*/, // SQ_FLAT_0, flat_load_dword, slc = 1, glc = 1 (FLAT_0) - (4u << SQ_FLAT_1__VDST__SHIFT) | (0 << SQ_FLAT_1__ADDR__SHIFT), // ADDR = V0:V1, VDST = V4 (FLAT_1) - - (383u << SQ_SOPP__ENCODING__SHIFT) | (SQ_S_WAITCNT << SQ_SOPP__OP__SHIFT) | (0 << SQ_SOPP__SIMM16__SHIFT), // s_waitcnt 0 (SOPP) - - (55u << SQ_FLAT_0__ENCODING__SHIFT) | (SQ_FLAT_STORE_DWORD << SQ_FLAT_0__OP__SHIFT) | (1 << SQ_FLAT_0__SLC__SHIFT) | (1 << SQ_FLAT_0__GLC__SHIFT), // SQ_FLAT_0, flat_store_dword, slc = 1, glc = 1 (FLAT_0) - (4u << SQ_FLAT_1__DATA__SHIFT) | (2 << SQ_FLAT_1__ADDR__SHIFT), // ADDR = V2:V3, DATA = V4 (FLAT_1) - - 0xBF810000u // s_endpgm, note that we rely on the implicit s_waitcnt 0,0,0 -}; - -const uint32_t IsaGenerator_Gfx72::INFINITE_LOOP_ISA[] = { - (0x17F << SQ_SOPP__ENCODING__SHIFT) | (SQ_S_BRANCH << SQ_SOPP__OP__SHIFT) | ( (const uint32_t)-1 << SQ_SOPP__SIMM16__SHIFT), // s_branch -1 (PC <- PC + SIMM*4)+4 - 0xBF810000u // S_ENDPGM -}; - -const uint32_t IsaGenerator_Gfx72::ATOMIC_INC_ISA[] = { - (63u << SQ_VOP1__ENCODING__SHIFT) | (0 << SQ_VOP1__VDST__SHIFT) | (SQ_V_MOV_B32 << SQ_VOP1__OP__SHIFT) | (0 << SQ_VOP1__SRC0__SHIFT), // v_mov_b32 v0, s0 (VOP1) - (63u << SQ_VOP1__ENCODING__SHIFT) | (1 << SQ_VOP1__VDST__SHIFT) | (SQ_V_MOV_B32 << SQ_VOP1__OP__SHIFT) | (1 << SQ_VOP1__SRC0__SHIFT), // v_mov_b32 v1, s1 (VOP1) - (63u << SQ_VOP1__ENCODING__SHIFT) | (2 << SQ_VOP1__VDST__SHIFT) | (SQ_V_MOV_B32 << SQ_VOP1__OP__SHIFT) | (0xC1 << SQ_VOP1__SRC0__SHIFT), // v_mov_b32 0xFFFFFFFF, s2 (VOP1) - - (55u << SQ_FLAT_0__ENCODING__SHIFT) | (SQ_FLAT_ATOMIC_INC << SQ_FLAT_0__OP__SHIFT) | (1 << SQ_FLAT_0__SLC__SHIFT) | (0 << SQ_FLAT_0__GLC__SHIFT), // SQ_FLAT_0, flat_atomic_inc, slc = 1, glc = 0 (FLAT_0) - (3u << SQ_FLAT_1__VDST__SHIFT) | (2u << SQ_FLAT_1__DATA__SHIFT) | (0 << SQ_FLAT_1__ADDR__SHIFT), // ADDR/dst = V0:V1, VDST/ret = V3, DATA/src=V2 (FLAT_1) - 0xBF810000u // s_endpgm, note that we rely on the implicit s_waitcnt 0,0,0 -}; -#endif - -const uint32_t IsaGenerator_Gfx72::COPY_DWORD_ISA[] = { - 0x7e000200, // v_mov_b32 v0, s0 (VOP1) - 0x7e020201, // v_mov_b32 v1, s1 (VOP1) - 0x7e040202, // v_mov_b32 v2, s2 (VOP1) - 0x7e060203, // v_mov_b32 v3, s3 (VOP1) - - 0xdc330000, // SQ_FLAT_0, flat_load_dword, slc = 1, glc = 1 (FLAT_0) - 0x04000000, // ADDR = V0:V1, VDST = V4 (FLAT_1) - - 0xbf8c0000, // s_waitcnt 0 (SOPP) - - 0xdc730000, // SQ_FLAT_0, flat_store_dword, slc = 1, glc = 1 (FLAT_0) - 0x00000402, // ADDR = V2:V3, DATA = V4 (FLAT_1) - - 0xbf810000 // s_endpgm, note that we rely on the implicit s_waitcnt 0,0,0 -}; - -const uint32_t IsaGenerator_Gfx72::INFINITE_LOOP_ISA[] = { - 0xbf82ffff, // s_branch -1 (PC <- PC + SIMM*4)+4 - 0xbf810000 // S_ENDPGM -}; - -const uint32_t IsaGenerator_Gfx72::ATOMIC_INC_ISA[] = { - 0x7e000200, // v_mov_b32 v0, s0 (VOP1) - 0x7e020201, // v_mov_b32 v1, s1 (VOP1) - 0x7e0402c1, // v_mov_b32 0xFFFFFFFF, s2 (VOP1) - - 0xdcf20000, // SQ_FLAT_0, flat_atomic_inc, slc = 1, glc = 0 (FLAT_0) - 0x03000200, // ADDR/dst = V0:V1, VDST/ret = V3, DATA/src=V2 (FLAT_1) - 0xbf810000 // s_endpgm, note that we rely on the implicit s_waitcnt 0,0,0 -}; - -void IsaGenerator_Gfx72::GetNoopIsa(HsaMemoryBuffer& rBuf) { - std::copy(NOOP_ISA, NOOP_ISA+ARRAY_SIZE(NOOP_ISA), rBuf.As()); -} - -void IsaGenerator_Gfx72::GetCopyDwordIsa(HsaMemoryBuffer& rBuf) { - std::copy(COPY_DWORD_ISA, COPY_DWORD_ISA+ARRAY_SIZE(COPY_DWORD_ISA), rBuf.As()); -} - -void IsaGenerator_Gfx72::GetInfiniteLoopIsa(HsaMemoryBuffer& rBuf) { - std::copy(INFINITE_LOOP_ISA, INFINITE_LOOP_ISA+ARRAY_SIZE(INFINITE_LOOP_ISA), rBuf.As()); -} - -void IsaGenerator_Gfx72::GetAtomicIncIsa(HsaMemoryBuffer& rBuf) { - std::copy(ATOMIC_INC_ISA, ATOMIC_INC_ISA+ARRAY_SIZE(ATOMIC_INC_ISA), rBuf.As()); -} - -const std::string& IsaGenerator_Gfx72::GetAsicName() { - return ASIC_NAME; -} diff --git a/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator_Gfx72.hpp b/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator_Gfx72.hpp deleted file mode 100644 index 5c39ffa216..0000000000 --- a/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator_Gfx72.hpp +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef _ISAGENERATOR_GFX72_H_ -#define _ISAGENERATOR_GFX72_H_ - -#include -#include "IsaGenerator.hpp" - -class IsaGenerator_Gfx72 : public IsaGenerator { - public: - virtual void GetNoopIsa(HsaMemoryBuffer& rBuf); - virtual void GetCopyDwordIsa(HsaMemoryBuffer& rBuf); - virtual void GetInfiniteLoopIsa(HsaMemoryBuffer& rBuf); - virtual void GetAtomicIncIsa(HsaMemoryBuffer& rBuf); - - protected: - virtual const std::string& GetAsicName(); - - private: - static const std::string ASIC_NAME; - - static const uint32_t NOOP_ISA[]; - static const uint32_t COPY_DWORD_ISA[]; - static const uint32_t INFINITE_LOOP_ISA[]; - static const uint32_t ATOMIC_INC_ISA[]; -}; - -#endif // _ISAGENERATOR_GFX72_H_ diff --git a/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator_Gfx8.cpp b/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator_Gfx8.cpp deleted file mode 100644 index 65e0df6836..0000000000 --- a/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator_Gfx8.cpp +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "IsaGenerator_Gfx8.hpp" - -#include -#include - -const std::string IsaGenerator_Gfx8::ASIC_NAME = "VI"; - -const uint32_t IsaGenerator_Gfx8::NOOP_ISA[] = { - 0xbf810000 // S_ENDPGM -}; - -/** The below arrays are filled with hex values in order not to reference - * proprietary header files, but we still leave the code here for future - * reference. - */ -#if 0 -const uint32_t IsaGenerator_Gfx8::COPY_DWORD_ISA[] = { - (63u << SQ_VOP1__ENCODING__SHIFT) | (0 << SQ_VOP1__VDST__SHIFT) | (SQ_V_MOV_B32 << SQ_VOP1__OP__SHIFT) | (0 << SQ_VOP1__SRC0__SHIFT), // v_mov_b32 v0, s0 (VOP1) - (63u << SQ_VOP1__ENCODING__SHIFT) | (1 << SQ_VOP1__VDST__SHIFT) | (SQ_V_MOV_B32 << SQ_VOP1__OP__SHIFT) | (1 << SQ_VOP1__SRC0__SHIFT), // v_mov_b32 v1, s1 (VOP1) - (63u << SQ_VOP1__ENCODING__SHIFT) | (2 << SQ_VOP1__VDST__SHIFT) | (SQ_V_MOV_B32 << SQ_VOP1__OP__SHIFT) | (2 << SQ_VOP1__SRC0__SHIFT), // v_mov_b32 v2, s2 (VOP1) - (63u << SQ_VOP1__ENCODING__SHIFT) | (3 << SQ_VOP1__VDST__SHIFT) | (SQ_V_MOV_B32 << SQ_VOP1__OP__SHIFT) | (3 << SQ_VOP1__SRC0__SHIFT), // v_mov_b32 v3, s3 (VOP1) - - (55u << SQ_FLAT_0__ENCODING__SHIFT) | (SQ_FLAT_LOAD_DWORD << SQ_FLAT_0__OP__SHIFT) | (1 << SQ_FLAT_0__SLC__SHIFT) | (1 << SQ_FLAT_0__GLC__SHIFT)/*(3 << 16)*/, // SQ_FLAT_0, flat_load_dword, slc = 1, glc = 1 (FLAT_0) - (4u << SQ_FLAT_1__VDST__SHIFT) | (0 << SQ_FLAT_1__ADDR__SHIFT), // ADDR = V0:V1, VDST = V4 (FLAT_1) - - (383u << SQ_SOPP__ENCODING__SHIFT) | (SQ_S_WAITCNT << SQ_SOPP__OP__SHIFT) | (0 << SQ_SOPP__SIMM16__SHIFT), // s_waitcnt 0 (SOPP) - - (55u << SQ_FLAT_0__ENCODING__SHIFT) | (SQ_FLAT_STORE_DWORD << SQ_FLAT_0__OP__SHIFT) | (1 << SQ_FLAT_0__SLC__SHIFT) | (1 << SQ_FLAT_0__GLC__SHIFT), // SQ_FLAT_0, flat_store_dword, slc = 1, glc = 1 (FLAT_0) - (4u << SQ_FLAT_1__DATA__SHIFT) | (2 << SQ_FLAT_1__ADDR__SHIFT), // ADDR = V2:V3, DATA = V4 (FLAT_1) - - 0xBF810000u // s_endpgm, note that we rely on the implicit s_waitcnt 0,0,0 -}; - -const uint32_t IsaGenerator_Gfx8::INFINITE_LOOP_ISA[] = { - (0x17F << SQ_SOPP__ENCODING__SHIFT) | (SQ_S_BRANCH << SQ_SOPP__OP__SHIFT) | ( (const uint32_t)-1 << SQ_SOPP__SIMM16__SHIFT), // s_branch -1 (PC <- PC + SIMM*4)+4 - 0xBF810000u // S_ENDPGM -}; -#endif - -const uint32_t IsaGenerator_Gfx8::COPY_DWORD_ISA[] = { - 0x7e000200, // v_mov_b32 v0, s0 (VOP1) - 0x7e020201, // v_mov_b32 v1, s1 (VOP1) - 0x7e040202, // v_mov_b32 v2, s2 (VOP1) - 0x7e060203, // v_mov_b32 v3, s3 (VOP1) - - 0xdc530000, // SQ_FLAT_0, flat_load_dword, slc = 1, glc = 1 (FLAT_0) - 0x04000000, // ADDR = V0:V1, VDST = V4 (FLAT_1) - - 0xbf8c0000, // s_waitcnt 0 (SOPP) - - 0xdc730000, // SQ_FLAT_0, flat_store_dword, slc = 1, glc = 1 (FLAT_0) - 0x00000402, // ADDR = V2:V3, DATA = V4 (FLAT_1) - - 0xbf810000 // s_endpgm, note that we rely on the implicit s_waitcnt 0,0,0 -}; - -const uint32_t IsaGenerator_Gfx8::INFINITE_LOOP_ISA[] = { - 0xbf82ffff, // s_branch -1 (PC <- PC + SIMM*4)+4 - 0xbf810000 // S_ENDPGM -}; - -/** - * The atomic_add_isa binary is generated from following ISA - * The original atomic_inc is not support by some PCIE, so use atomic_add instead - * - */ -/* -shader atomic_add -asic(VI) -type(CS) - v_mov_b32 v0, s0 - v_mov_b32 v1, s1 - v_mov_b32 v2, 1 - flat_atomic_add v3, v[0:1], v2 slc glc - s_waitcnt 0 - s_endpgm -end -*/ - -const uint32_t IsaGenerator_Gfx8::ATOMIC_ADD_ISA[] = { - 0x7e000200, 0x7e020201, - 0x7e040281, 0xdd0b0000, - 0x03000200, 0xbf8c0000, - 0xbf810000, 0x00000000 -}; - -void IsaGenerator_Gfx8::GetNoopIsa(HsaMemoryBuffer& rBuf) { - std::copy(NOOP_ISA, NOOP_ISA+ARRAY_SIZE(NOOP_ISA), rBuf.As()); -} - -void IsaGenerator_Gfx8::GetCopyDwordIsa(HsaMemoryBuffer& rBuf) { - std::copy(COPY_DWORD_ISA, COPY_DWORD_ISA+ARRAY_SIZE(COPY_DWORD_ISA), rBuf.As()); -} - -void IsaGenerator_Gfx8::GetInfiniteLoopIsa(HsaMemoryBuffer& rBuf) { - std::copy(INFINITE_LOOP_ISA, INFINITE_LOOP_ISA+ARRAY_SIZE(INFINITE_LOOP_ISA), rBuf.As()); -} - -void IsaGenerator_Gfx8::GetAtomicIncIsa(HsaMemoryBuffer& rBuf) { - std::copy(ATOMIC_ADD_ISA, ATOMIC_ADD_ISA+ARRAY_SIZE(ATOMIC_ADD_ISA), rBuf.As()); -} - -const std::string& IsaGenerator_Gfx8::GetAsicName() { - return ASIC_NAME; -} diff --git a/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator_Gfx8.hpp b/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator_Gfx8.hpp deleted file mode 100644 index 7e5b9e3c89..0000000000 --- a/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator_Gfx8.hpp +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef _ISAGENERATOR_GFX8_H_ -#define _ISAGENERATOR_GFX8_H_ - -#include -#include "IsaGenerator.hpp" - -class IsaGenerator_Gfx8 : public IsaGenerator { - public: - virtual void GetNoopIsa(HsaMemoryBuffer& rBuf); - virtual void GetCopyDwordIsa(HsaMemoryBuffer& rBuf); - virtual void GetInfiniteLoopIsa(HsaMemoryBuffer& rBuf); - virtual void GetAtomicIncIsa(HsaMemoryBuffer& rBuf); - - protected: - virtual const std::string& GetAsicName(); - - private: - static const std::string ASIC_NAME; - - static const uint32_t NOOP_ISA[]; - static const uint32_t COPY_DWORD_ISA[]; - static const uint32_t INFINITE_LOOP_ISA[]; - static const uint32_t ATOMIC_ADD_ISA[]; -}; - -#endif // _ISAGENERATOR_GFX72_H_ diff --git a/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator_Gfx9.cpp b/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator_Gfx9.cpp deleted file mode 100644 index 8eaab32a5e..0000000000 --- a/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator_Gfx9.cpp +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "IsaGenerator_Gfx9.hpp" - -#include -#include - -const std::string IsaGenerator_Gfx9::ASIC_NAME = "GFX9"; - -/* The binaries are generated from following ISA */ -#if 0 -/* flat_atomic_inc will not support by some PCIE, use flat_atomic_add instead */ -shader atomic_add -asic(GFX9) -type(CS) - v_mov_b32 v0, s0 - v_mov_b32 v1, s1 - v_mov_b32 v2, 1 - flat_atomic_add v3, v[0:1], v2 slc glc - s_waitcnt 0 - s_endpgm -end - -shader copy_dword -asic(GFX9) -type(CS) -/* copy the parameters from scalar registers to vector registers */ - v_mov_b32 v0, s0 - v_mov_b32 v1, s1 - v_mov_b32 v2, s2 - v_mov_b32 v3, s3 -/* copy a dword between the passed addresses */ - flat_load_dword v4, v[0:1] slc glc - s_waitcnt 0 - flat_store_dword v[2:3], v4 slc glc - s_endpgm -end - -shader main -asic(GFX9) -type(CS) -loop: - s_branch loop - s_endpgm -end - - -#endif - -const uint32_t IsaGenerator_Gfx9::NOOP_ISA[] = { - 0xbf810000 -}; - -const uint32_t IsaGenerator_Gfx9::COPY_DWORD_ISA[] = { - 0x7e000200, 0x7e020201, - 0x7e040202, 0x7e060203, - 0xdc530000, 0x047f0000, - 0xbf8c0000, 0xdc730000, - 0x007f0402, 0xbf810000 -}; - -const uint32_t IsaGenerator_Gfx9::INFINITE_LOOP_ISA[] = { - 0xbf82ffff, 0xbf810000 -}; - -const uint32_t IsaGenerator_Gfx9::ATOMIC_ADD_ISA[] = { - 0x7e000200, 0x7e020201, - 0x7e040281, 0xdd0b0000, - 0x037f0200, 0xbf8c0000, - 0xbf810000, 0x00000000 -}; - -void IsaGenerator_Gfx9::GetNoopIsa(HsaMemoryBuffer& rBuf) { - std::copy(NOOP_ISA, NOOP_ISA+ARRAY_SIZE(NOOP_ISA), rBuf.As()); -} - -void IsaGenerator_Gfx9::GetCopyDwordIsa(HsaMemoryBuffer& rBuf) { - std::copy(COPY_DWORD_ISA, COPY_DWORD_ISA+ARRAY_SIZE(COPY_DWORD_ISA), rBuf.As()); -} - -void IsaGenerator_Gfx9::GetInfiniteLoopIsa(HsaMemoryBuffer& rBuf) { - std::copy(INFINITE_LOOP_ISA, INFINITE_LOOP_ISA+ARRAY_SIZE(INFINITE_LOOP_ISA), rBuf.As()); -} - -void IsaGenerator_Gfx9::GetAtomicIncIsa(HsaMemoryBuffer& rBuf) { - std::copy(ATOMIC_ADD_ISA, ATOMIC_ADD_ISA+ARRAY_SIZE(ATOMIC_ADD_ISA), rBuf.As()); -} - -const std::string& IsaGenerator_Gfx9::GetAsicName() { - return ASIC_NAME; -} - diff --git a/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator_Gfx9.hpp b/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator_Gfx9.hpp deleted file mode 100644 index 32103c0a15..0000000000 --- a/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator_Gfx9.hpp +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef _ISAGENERATOR_GFX9_H_ -#define _ISAGENERATOR_GFX9_H_ - -#include -#include "IsaGenerator.hpp" - -class IsaGenerator_Gfx9 : public IsaGenerator { - public: - virtual void GetNoopIsa(HsaMemoryBuffer& rBuf); - virtual void GetCopyDwordIsa(HsaMemoryBuffer& rBuf); - virtual void GetInfiniteLoopIsa(HsaMemoryBuffer& rBuf); - virtual void GetAtomicIncIsa(HsaMemoryBuffer& rBuf); - - protected: - virtual const std::string& GetAsicName(); - - private: - static const std::string ASIC_NAME; - - static const uint32_t NOOP_ISA[]; - static const uint32_t COPY_DWORD_ISA[]; - static const uint32_t INFINITE_LOOP_ISA[]; - static const uint32_t ATOMIC_ADD_ISA[]; -}; - -#endif // _ISAGENERATOR_GFX9_H_ diff --git a/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator_Aldebaran.hpp b/projects/rocr-runtime/tests/kfdtest/src/KFDASMTest.cpp similarity index 53% rename from projects/rocr-runtime/tests/kfdtest/src/IsaGenerator_Aldebaran.hpp rename to projects/rocr-runtime/tests/kfdtest/src/KFDASMTest.cpp index 5571b91c26..4b9f5d69c8 100644 --- a/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator_Aldebaran.hpp +++ b/projects/rocr-runtime/tests/kfdtest/src/KFDASMTest.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (C) 2022 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -21,29 +21,53 @@ * */ -#ifndef _ISAGENERATOR_ALDEBARAN_H_ -#define _ISAGENERATOR_ALDEBARAN_H_ +#include "GoogleTestExtension.hpp" +#include "KFDASMTest.hpp" +#include "ShaderStore.hpp" +#include "Assemble.hpp" -#include -#include "IsaGenerator.hpp" +void KFDASMTest::SetUp() {} +void KFDASMTest::TearDown() {} -class IsaGenerator_Aldbrn : public IsaGenerator { - public: - virtual void GetNoopIsa(HsaMemoryBuffer& rBuf); - virtual void GetCopyDwordIsa(HsaMemoryBuffer& rBuf); - virtual void GetInfiniteLoopIsa(HsaMemoryBuffer& rBuf); - virtual void GetAtomicIncIsa(HsaMemoryBuffer& rBuf); - - protected: - virtual const std::string& GetAsicName(); - - private: - static const std::string ASIC_NAME; - - static const uint32_t NOOP_ISA[]; - static const uint32_t COPY_DWORD_ISA[]; - static const uint32_t INFINITE_LOOP_ISA[]; - static const uint32_t ATOMIC_ADD_ISA[]; +static const std::vector TargetList = { + 0x080001, + 0x080002, + 0x080003, + 0x080005, + 0x080100, + 0x090000, + 0x090002, + 0x090004, + 0x090006, + 0x090008, + 0x090009, + 0x09000a, + 0x09000c, + 0x0a0100, + 0x0a0101, + 0x0a0102, + 0x0a0103, + 0x0a0300, + 0x0a0301, + 0x0a0302, + 0x0a0303, + 0x0a0304, + 0x0a0305, + 0x0a0306, }; -#endif // _ISAGENERATOR_ALDEBARAN_H_ +TEST_F(KFDASMTest, AssembleShaders) { + TEST_START(TESTPROFILE_RUNALL) + + for (auto &t : TargetList) { + Assembler asmblr(t); + + LOG() << "Running ASM test for target " << asmblr.GetTargetAsic() << std::endl; + + for (auto &s : ShaderList) { + EXPECT_SUCCESS(asmblr.RunAssemble(s)); + } + } + + TEST_END +} diff --git a/projects/rocr-runtime/tests/kfdtest/sp3/lib_helper/AMD_opensource_license.txt b/projects/rocr-runtime/tests/kfdtest/src/KFDASMTest.hpp similarity index 75% rename from projects/rocr-runtime/tests/kfdtest/sp3/lib_helper/AMD_opensource_license.txt rename to projects/rocr-runtime/tests/kfdtest/src/KFDASMTest.hpp index 673285ddb4..5f601e165a 100644 --- a/projects/rocr-runtime/tests/kfdtest/sp3/lib_helper/AMD_opensource_license.txt +++ b/projects/rocr-runtime/tests/kfdtest/src/KFDASMTest.hpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (C) 2022 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -21,3 +21,19 @@ * */ +#ifndef __KFD_ASM_TEST__H__ +#define __KFD_ASM_TEST__H__ + +#include + +class KFDASMTest : public testing::Test { + public: + KFDASMTest() {} + ~KFDASMTest() {} + + protected: + virtual void SetUp(); + virtual void TearDown(); +}; + +#endif // __KFD_ASM_TEST__H__ diff --git a/projects/rocr-runtime/tests/kfdtest/src/KFDBaseComponentTest.cpp b/projects/rocr-runtime/tests/kfdtest/src/KFDBaseComponentTest.cpp index f950a7a1f1..5618945505 100644 --- a/projects/rocr-runtime/tests/kfdtest/src/KFDBaseComponentTest.cpp +++ b/projects/rocr-runtime/tests/kfdtest/src/KFDBaseComponentTest.cpp @@ -68,6 +68,8 @@ void KFDBaseComponentTest::SetUp() { g_baseTest = this; + m_pAsm = new Assembler(GetGfxVersion(nodeProperties)); + ROUTINE_END } @@ -86,6 +88,10 @@ void KFDBaseComponentTest::TearDown() { EXPECT_SUCCESS(hsaKmtCloseKFD()); g_baseTest = NULL; + if (m_pAsm) + delete m_pAsm; + m_pAsm = nullptr; + ROUTINE_END } diff --git a/projects/rocr-runtime/tests/kfdtest/src/KFDBaseComponentTest.hpp b/projects/rocr-runtime/tests/kfdtest/src/KFDBaseComponentTest.hpp index e27baf1cc1..cc87465ad5 100644 --- a/projects/rocr-runtime/tests/kfdtest/src/KFDBaseComponentTest.hpp +++ b/projects/rocr-runtime/tests/kfdtest/src/KFDBaseComponentTest.hpp @@ -34,6 +34,8 @@ #include "hsakmt.h" #include "OSWrapper.hpp" #include "KFDTestUtil.hpp" +#include "Assemble.hpp" +#include "ShaderStore.hpp" // @class KFDBaseComponentTest class KFDBaseComponentTest : public testing::Test { @@ -74,6 +76,7 @@ class KFDBaseComponentTest : public testing::Test { HsaMemFlags m_MemoryFlags; HsaNodeInfo m_NodeInfo; HSAint32 m_xnack; + Assembler* m_pAsm; // @brief Executed before every test that uses KFDBaseComponentTest class and sets all common settings for the tests. virtual void SetUp(); diff --git a/projects/rocr-runtime/tests/kfdtest/src/KFDCWSRTest.cpp b/projects/rocr-runtime/tests/kfdtest/src/KFDCWSRTest.cpp index e12e697566..5a4f853ee2 100644 --- a/projects/rocr-runtime/tests/kfdtest/src/KFDCWSRTest.cpp +++ b/projects/rocr-runtime/tests/kfdtest/src/KFDCWSRTest.cpp @@ -24,90 +24,11 @@ #include "KFDCWSRTest.hpp" #include "Dispatch.hpp" - -/* Initial state: - * s[0:1] - 64 bits iteration number; only the lower 32 bits are useful. - * s[2:3] - result buffer base address - * s4 - workgroup id - * v0 - workitem id, always 0 because - * NUM_THREADS_X(number of threads) in workgroup set to 1 - * Registers: - * v0 - calculated workitem = v0 + s4 * NUM_THREADS_X, which is s4 - * v2 - = s0, 32 bits iteration number - * v[4:5] - corresponding output buf address: s[2:3] + v0 * 4 - * v6 - counter - */ - -static const char* iterate_isa_gfx8 = \ -"\ -shader iterate_isa\n\ -wave_size(32)\n\ -type(CS)\n\ - // copy the parameters from scalar registers to vector registers\n\ - v_mov_b32 v2, s0 // v[2:3] = s[0:1] \n\ - v_mov_b32 v3, s1 // v[2:3] = s[0:1] \n\ - v_mov_b32 v0, s4 // use workgroup id as index \n\ - v_lshlrev_b32 v0, 2, v0 // v0 *= 4 \n\ - v_add_u32 v4, vcc, s2, v0 // v[4:5] = s[2:3] + v0 * 4 \n\ - v_mov_b32 v5, s3 // v[4:5] = s[2:3] + v0 * 4 \n\ - v_add_u32 v5, vcc, v5, vcc_lo // v[4:5] = s[2:3] + v0 * 4 \n\ - v_mov_b32 v6, 0 \n\ -LOOP: \n\ - v_add_u32 v6, vcc, 1, v6 \n\ - // compare the result value (v6) to iteration value (v2), and \n\ - // jump if equal (i.e. if VCC is not zero after the comparison) \n\ - v_cmp_lt_u32 vcc, v6, v2 \n\ - s_cbranch_vccnz LOOP \n\ - flat_store_dword v[4:5], v6 \n\ - s_waitcnt vmcnt(0)&lgkmcnt(0) \n\ - s_endpgm \n\ -end \n\ -"; - -//This shader can be used by gfx9 and gfx10 -static const char* iterate_isa_gfx9 = \ -"\ -shader iterate_isa\n\ -wave_size(32)\n\ -type(CS)\n\ - // copy the parameters from scalar registers to vector registers\n\ - v_mov_b32 v2, s0 // v[2:3] = s[0:1] \n\ - v_mov_b32 v3, s1 // v[2:3] = s[0:1] \n\ - v_mov_b32 v0, s4 // use workgroup id as index \n\ - v_lshlrev_b32 v0, 2, v0 // v0 *= 4 \n\ - v_add_co_u32 v4, vcc, s2, v0 // v[4:5] = s[2:3] + v0 * 4 \n\ - v_mov_b32 v5, s3 // v[4:5] = s[2:3] + v0 * 4 \n\ - v_add_co_u32 v5, vcc, v5, vcc_lo // v[4:5] = s[2:3] + v0 * 4 \n\ - v_mov_b32 v6, 0 \n\ -LOOP: \n\ - v_add_co_u32 v6, vcc, 1, v6 \n\ - // compare the result value (v6) to iteration value (v2), and \n\ - // jump if equal (i.e. if VCC is not zero after the comparison) \n\ - v_cmp_lt_u32 vcc, v6, v2 \n\ - s_cbranch_vccnz LOOP \n\ - flat_store_dword v[4:5], v6 \n\ - s_waitcnt vmcnt(0)&lgkmcnt(0) \n\ - s_endpgm \n\ -end \n\ -"; - -static const char* infinite_isa = \ -"\ -shader infinite_isa \n\ -wave_size(32) \n\ -type(CS) \n\ -LOOP: \n\ - s_branch LOOP \n\ -end \n\ -"; - void KFDCWSRTest::SetUp() { ROUTINE_START KFDBaseComponentTest::SetUp(); - m_pIsaGen = IsaGenerator::Create(m_FamilyId); - wave_number = 1; ROUTINE_END @@ -115,9 +36,6 @@ void KFDCWSRTest::SetUp() { void KFDCWSRTest::TearDown() { ROUTINE_START - if (m_pIsaGen) - delete m_pIsaGen; - m_pIsaGen = NULL; KFDBaseComponentTest::TearDown(); @@ -153,16 +71,10 @@ TEST_F(KFDCWSRTest, BasicTest) { int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode(); if ((m_FamilyId >= FAMILY_VI) && (checkCWSREnabled())) { - const char *pIterateIsa; HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/); HsaMemoryBuffer resultBuf1(PAGE_SIZE, defaultGPUNode, true, false, false); uint64_t count1 = 400000000; - if (m_FamilyId < FAMILY_AI) - pIterateIsa = iterate_isa_gfx8; - else - pIterateIsa = iterate_isa_gfx9; - if (isOnEmulator()) { // Divide the iterator times by 10000 so that the test can // finish in a reasonable time. @@ -172,7 +84,7 @@ TEST_F(KFDCWSRTest, BasicTest) { unsigned int* result1 = resultBuf1.As(); - m_pIsaGen->CompileShader(pIterateIsa, "iterate_isa", isaBuffer); + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(IterateIsa, isaBuffer.As())); PM4Queue queue1; @@ -236,7 +148,7 @@ TEST_F(KFDCWSRTest, InterruptRestore) { if ((m_FamilyId >= FAMILY_VI) && (checkCWSREnabled())) { HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/); - m_pIsaGen->CompileShader(infinite_isa, "infinite_isa", isaBuffer); + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(InfiniteLoopIsa, isaBuffer.As())); PM4Queue queue1, queue2, queue3; diff --git a/projects/rocr-runtime/tests/kfdtest/src/KFDCWSRTest.hpp b/projects/rocr-runtime/tests/kfdtest/src/KFDCWSRTest.hpp index 779180ea3d..53c925b0aa 100644 --- a/projects/rocr-runtime/tests/kfdtest/src/KFDCWSRTest.hpp +++ b/projects/rocr-runtime/tests/kfdtest/src/KFDCWSRTest.hpp @@ -27,12 +27,11 @@ #include #include "PM4Queue.hpp" -#include "IsaGenerator.hpp" #include "KFDBaseComponentTest.hpp" class KFDCWSRTest : public KFDBaseComponentTest { public: - KFDCWSRTest() :m_pIsaGen(NULL) {} + KFDCWSRTest() {} ~KFDCWSRTest() {} protected: @@ -41,7 +40,6 @@ class KFDCWSRTest : public KFDBaseComponentTest { protected: // Members unsigned wave_number; - IsaGenerator* m_pIsaGen; }; #endif // __KFD_CWSR_TEST__H__ diff --git a/projects/rocr-runtime/tests/kfdtest/src/KFDDBGTest.cpp b/projects/rocr-runtime/tests/kfdtest/src/KFDDBGTest.cpp index b7a38bbd46..f256d8a135 100644 --- a/projects/rocr-runtime/tests/kfdtest/src/KFDDBGTest.cpp +++ b/projects/rocr-runtime/tests/kfdtest/src/KFDDBGTest.cpp @@ -176,16 +176,11 @@ void KFDDBGTest::SetUp() { KFDBaseComponentTest::SetUp(); - m_pIsaGen = IsaGenerator::Create(m_FamilyId); - ROUTINE_END } void KFDDBGTest::TearDown() { ROUTINE_START - if (m_pIsaGen) - delete m_pIsaGen; - m_pIsaGen = NULL; /* Reset the user trap handler */ hsaKmtSetTrapHandler(m_NodeInfo.HsaDefaultGPUNode(), 0, 0, 0, 0); diff --git a/projects/rocr-runtime/tests/kfdtest/src/KFDDBGTest.hpp b/projects/rocr-runtime/tests/kfdtest/src/KFDDBGTest.hpp index c4b46b296b..dccedc3fd3 100644 --- a/projects/rocr-runtime/tests/kfdtest/src/KFDDBGTest.hpp +++ b/projects/rocr-runtime/tests/kfdtest/src/KFDDBGTest.hpp @@ -26,20 +26,16 @@ #include -#include "IsaGenerator.hpp" #include "KFDBaseComponentTest.hpp" class KFDDBGTest : public KFDBaseComponentTest { public: - KFDDBGTest() :m_pIsaGen(NULL) {} + KFDDBGTest() {} ~KFDDBGTest() {} protected: virtual void SetUp(); virtual void TearDown(); - - protected: // Members - IsaGenerator* m_pIsaGen; }; #endif // __KFD_DBG_TEST__H__ diff --git a/projects/rocr-runtime/tests/kfdtest/src/KFDEvictTest.cpp b/projects/rocr-runtime/tests/kfdtest/src/KFDEvictTest.cpp index 7ec86bc8bd..1effa639b0 100644 --- a/projects/rocr-runtime/tests/kfdtest/src/KFDEvictTest.cpp +++ b/projects/rocr-runtime/tests/kfdtest/src/KFDEvictTest.cpp @@ -41,18 +41,12 @@ void KFDEvictTest::SetUp() { KFDBaseComponentTest::SetUp(); - m_pIsaGen = IsaGenerator::Create(m_FamilyId); - ROUTINE_END } void KFDEvictTest::TearDown() { ROUTINE_START - if (m_pIsaGen) - delete m_pIsaGen; - m_pIsaGen = NULL; - KFDBaseComponentTest::TearDown(); ROUTINE_END @@ -286,136 +280,6 @@ void KFDEvictTest::AmdgpuCommandSubmissionSdmaNop(int rn, amdgpu_bo_handle handl EXPECT_EQ(0, amdgpu_cs_ctx_free(contextHandle)); } -/* Shader to read local buffers using multiple wavefronts in parallel - * until address buffer is filled with specific value 0x5678 by host program, - * then each wavefront fills value 0x5678 at corresponding result buffer and quit - * - * Initial state: - * s[0:1] - address buffer base address - * s[2:3] - result buffer base address - * s4 - workgroup id - * v0 - workitem id, always 0 because NUM_THREADS_X(number of threads) in workgroup set to 1 - * Registers: - * v0 - calculated workitem id, v0 = v0 + s4 * NUM_THREADS_X - * v[2:3] - address of corresponding local buf address offset: s[0:1] + v0 * 8 - * v[4:5] - corresponding output buf address: s[2:3] + v0 * 4 - * v[6:7] - local buf address used for read test - * - * This shader can be used by gfx9 and gfx10 - * - */ - -static const char* gfx9_ReadMemory = -"\ - shader ReadMemory\n\ - wave_size(32)\n\ - type(CS)\n\ - \n\ - // compute address of corresponding output buffer\n\ - v_mov_b32 v0, s4 // use workgroup id as index\n\ - v_lshlrev_b32 v0, 2, v0 // v0 *= 4\n\ - v_add_co_u32 v4, vcc, s2, v0 // v[4:5] = s[2:3] + v0 * 4\n\ - v_mov_b32 v5, s3\n\ - v_add_co_u32 v5, vcc, v5, vcc_lo\n\ - \n\ - // compute input buffer offset used to store corresponding local buffer address\n\ - v_lshlrev_b32 v0, 1, v0 // v0 *= 8\n\ - v_add_co_u32 v2, vcc, s0, v0 // v[2:3] = s[0:1] + v0 * 8\n\ - v_mov_b32 v3, s1\n\ - v_add_co_u32 v3, vcc, v3, vcc_lo\n\ - \n\ - // load 64bit local buffer address stored at v[2:3] to v[6:7]\n\ - flat_load_dwordx2 v[6:7], v[2:3] slc\n\ - s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory reads to finish\n\ - \n\ - v_mov_b32 v8, 0x5678\n\ - s_movk_i32 s8, 0x5678\n\ -L_REPEAT:\n\ - s_load_dword s16, s[0:1], 0x0 glc\n\ - s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory reads to finish\n\ - s_cmp_eq_i32 s16, s8\n\ - s_cbranch_scc1 L_QUIT // if notified to quit by host\n\ - // loop read 64M local buffer starting at v[6:7]\n\ - // every 4k page only read once\n\ - v_mov_b32 v9, 0\n\ - v_mov_b32 v10, 0x1000 // 4k page\n\ - v_mov_b32 v11, 0x4000000 // 64M size\n\ - v_mov_b32 v12, v6\n\ - v_mov_b32 v13, v7\n\ -L_LOOP_READ:\n\ - flat_load_dwordx2 v[14:15], v[12:13] slc\n\ - v_add_co_u32 v9, vcc, v9, v10 \n\ - v_add_co_u32 v12, vcc, v12, v10\n\ - v_add_co_u32 v13, vcc, v13, vcc_lo\n\ - v_cmp_lt_u32 vcc, v9, v11\n\ - s_cbranch_vccnz L_LOOP_READ\n\ - s_branch L_REPEAT\n\ -L_QUIT:\n\ - flat_store_dword v[4:5], v8\n\ - s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory writes to finish\n\ - s_endpgm\n\ - end\n\ -"; - -static const char* gfx8_ReadMemory = -"\ - shader ReadMemory\n\ - asic(VI)\n\ - type(CS)\n\ - \n\ - // compute address of corresponding output buffer\n\ - v_mov_b32 v0, s4 // use workgroup id as index\n\ - v_lshlrev_b32 v0, 2, v0 // v0 *= 4\n\ - v_add_u32 v4, vcc, s2, v0 // v[4:5] = s[2:3] + v0 * 4\n\ - v_mov_b32 v5, s3\n\ - v_addc_u32 v5, vcc, v5, 0, vcc\n\ - \n\ - // compute input buffer offset used to store corresponding local buffer address\n\ - v_lshlrev_b32 v0, 1, v0 // v0 *= 8\n\ - v_add_u32 v2, vcc, s0, v0 // v[2:3] = s[0:1] + v0 * 8\n\ - v_mov_b32 v3, s1\n\ - v_addc_u32 v3, vcc, v3, 0, vcc\n\ - \n\ - // load 64bit local buffer address stored at v[2:3] to v[6:7]\n\ - flat_load_dwordx2 v[6:7], v[2:3] slc\n\ - s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory reads to finish\n\ - \n\ - v_mov_b32 v8, 0x5678\n\ - s_movk_i32 s8, 0x5678\n\ -L_REPEAT:\n\ - s_load_dword s16, s[0:1], 0x0 glc\n\ - s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory reads to finish\n\ - s_cmp_eq_i32 s16, s8\n\ - s_cbranch_scc1 L_QUIT // if notified to quit by host\n\ - // loop read 64M local buffer starting at v[6:7]\n\ - // every 4k page only read once\n\ - v_mov_b32 v9, 0\n\ - v_mov_b32 v10, 0x1000 // 4k page\n\ - v_mov_b32 v11, 0x4000000 // 64M size\n\ - v_mov_b32 v12, v6\n\ - v_mov_b32 v13, v7\n\ -L_LOOP_READ:\n\ - flat_load_dwordx2 v[14:15], v[12:13] slc\n\ - v_add_u32 v9, vcc, v9, v10 \n\ - v_add_u32 v12, vcc, v12, v10\n\ - v_addc_u32 v13, vcc, v13, 0, vcc\n\ - v_cmp_lt_u32 vcc, v9, v11\n\ - s_cbranch_vccnz L_LOOP_READ\n\ - s_branch L_REPEAT\n\ -L_QUIT:\n\ - flat_store_dword v[4:5], v8\n\ - s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory writes to finish\n\ - s_endpgm\n\ - end\n\ -"; - -std::string KFDEvictTest::CreateShader() { - if (m_FamilyId < FAMILY_AI) - return gfx8_ReadMemory; - else - return gfx9_ReadMemory; -} - /* Evict and restore procedure basic test * * Use N_PROCESSES processes to allocate vram buf size larger than total vram size @@ -567,7 +431,7 @@ TEST_F(KFDEvictTest, QueueTest) { HsaMemoryBuffer addrBuffer(PAGE_SIZE, defaultGPUNode); HsaMemoryBuffer resultBuffer(PAGE_SIZE, defaultGPUNode); - m_pIsaGen->CompileShader(CreateShader().c_str(), "ReadMemory", isaBuffer); + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(ReadMemoryIsa, isaBuffer.As())); PM4Queue pm4Queue; ASSERT_SUCCESS(pm4Queue.Create(defaultGPUNode)); diff --git a/projects/rocr-runtime/tests/kfdtest/src/KFDEvictTest.hpp b/projects/rocr-runtime/tests/kfdtest/src/KFDEvictTest.hpp index 2b838a5388..30f0a856be 100644 --- a/projects/rocr-runtime/tests/kfdtest/src/KFDEvictTest.hpp +++ b/projects/rocr-runtime/tests/kfdtest/src/KFDEvictTest.hpp @@ -27,22 +27,19 @@ #include #include #include "KFDMultiProcessTest.hpp" -#include "IsaGenerator.hpp" #include "PM4Queue.hpp" // @class KFDEvictTest // Test eviction and restore procedure using two processes class KFDEvictTest : public KFDMultiProcessTest { public: - KFDEvictTest(void): m_pIsaGen(NULL) {} - + KFDEvictTest(void) {} ~KFDEvictTest(void) {} protected: virtual void SetUp(); virtual void TearDown(); - std::string CreateShader(); void AllocBuffers(HSAuint32 defaultGPUNode, HSAuint32 count, HSAuint64 vramBufSize, std::vector &pBuffers); void FreeBuffers(std::vector &pBuffers, HSAuint64 vramBufSize); @@ -52,7 +49,6 @@ class KFDEvictTest : public KFDMultiProcessTest { PM4Queue *computeQueue); protected: // Members - IsaGenerator* m_pIsaGen; HsaMemFlags m_Flags; void* m_pBuf; }; diff --git a/projects/rocr-runtime/tests/kfdtest/src/KFDExceptionTest.cpp b/projects/rocr-runtime/tests/kfdtest/src/KFDExceptionTest.cpp index 11df6279c9..9fa15e0969 100644 --- a/projects/rocr-runtime/tests/kfdtest/src/KFDExceptionTest.cpp +++ b/projects/rocr-runtime/tests/kfdtest/src/KFDExceptionTest.cpp @@ -33,18 +33,12 @@ void KFDExceptionTest::SetUp() { KFDBaseComponentTest::SetUp(); - m_pIsaGen = IsaGenerator::Create(m_FamilyId); - ROUTINE_END } void KFDExceptionTest::TearDown() { ROUTINE_START - if (m_pIsaGen) - delete m_pIsaGen; - m_pIsaGen = NULL; - KFDBaseComponentTest::TearDown(); // WORKAROUND: This needs to be fixed in the kernel @@ -75,7 +69,8 @@ void KFDExceptionTest::TestMemoryException(int defaultGPUNode, HSAuint64 pSrc, eventDesc.SyncVar.SyncVar.UserData = NULL; eventDesc.SyncVar.SyncVarSize = 0; - m_pIsaGen->GetCopyDwordIsa(isaBuffer); + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As())); + m_ChildStatus = queue.Create(defaultGPUNode); if (m_ChildStatus != HSAKMT_STATUS_SUCCESS) { WARN() << "Queue create failed" << std::endl; diff --git a/projects/rocr-runtime/tests/kfdtest/src/KFDExceptionTest.hpp b/projects/rocr-runtime/tests/kfdtest/src/KFDExceptionTest.hpp index 00b45fe5db..df57649e2c 100644 --- a/projects/rocr-runtime/tests/kfdtest/src/KFDExceptionTest.hpp +++ b/projects/rocr-runtime/tests/kfdtest/src/KFDExceptionTest.hpp @@ -26,12 +26,11 @@ #include -#include "IsaGenerator.hpp" #include "KFDBaseComponentTest.hpp" class KFDExceptionTest : public KFDBaseComponentTest { public: - KFDExceptionTest() :m_pIsaGen(NULL), m_ChildPid(-1) { + KFDExceptionTest() : m_ChildPid(-1) { /* Because there could be early return before m_ChildPid is set * by fork(), we should initialize m_ChildPid to a non-zero value * to avoid possible exit of the main process. @@ -59,8 +58,6 @@ class KFDExceptionTest : public KFDBaseComponentTest { protected: // Members pid_t m_ChildPid; HSAKMT_STATUS m_ChildStatus; - - IsaGenerator* m_pIsaGen; }; #endif // __KFD_EXCEPTION_TEST__H__ diff --git a/projects/rocr-runtime/tests/kfdtest/src/KFDGWSTest.cpp b/projects/rocr-runtime/tests/kfdtest/src/KFDGWSTest.cpp index 4c8aefc447..99e9248d8f 100644 --- a/projects/rocr-runtime/tests/kfdtest/src/KFDGWSTest.cpp +++ b/projects/rocr-runtime/tests/kfdtest/src/KFDGWSTest.cpp @@ -26,91 +26,17 @@ #include "PM4Packet.hpp" #include "Dispatch.hpp" -/* Shader to initialize gws counter to 1*/ -const char* gfx9_10_GwsInit = -"\ -shader GwsInit\n\ -type(CS)\n\ -wave_size(32)\n\ - s_mov_b32 m0, 0\n\ - s_nop 0\n\ - s_load_dword s16, s[0:1], 0x0 glc\n\ - s_waitcnt 0\n\ - v_mov_b32 v0, s16\n\ - s_waitcnt 0\n\ - ds_gws_init v0 gds:1 offset0:0\n\ - s_waitcnt 0\n\ - s_endpgm\n\ - end\n\ -"; - -/* Atomically increase a value in memory - * This is expected to be executed from - * multiple work groups simultaneously. - * GWS semaphore is used to guarantee - * the operation is atomic. - */ -const char* gfx9_AtomicIncrease = -"\ -shader AtomicIncrease\n\ -type(CS)\n\ -/* Assume src address in s0, s1 */\n\ - s_mov_b32 m0, 0\n\ - s_nop 0\n\ - ds_gws_sema_p gds:1 offset0:0\n\ - s_waitcnt 0\n\ - s_load_dword s16, s[0:1], 0x0 glc\n\ - s_waitcnt 0\n\ - s_add_u32 s16, s16, 1\n\ - s_store_dword s16, s[0:1], 0x0 glc\n\ - s_waitcnt lgkmcnt(0)\n\ - ds_gws_sema_v gds:1 offset0:0\n\ - s_waitcnt 0\n\ - s_endpgm\n\ - end\n\ -"; - -const char* gfx10_AtomicIncrease = -"\ -shader AtomicIncrease\n\ -asic(GFX10)\n\ -type(CS)\n\ -wave_size(32)\n\ -/* Assume src address in s0, s1 */\n\ - s_mov_b32 m0, 0\n\ - s_mov_b32 exec_lo, 0x1\n\ - v_mov_b32 v0, s0\n\ - v_mov_b32 v1, s1\n\ - ds_gws_sema_p gds:1 offset0:0\n\ - s_waitcnt 0\n\ - flat_load_dword v2, v[0:1] glc:1 dlc:1\n\ - s_waitcnt 0\n\ - v_add_nc_u32 v2, v2, 1\n\ - flat_store_dword v[0:1], v2\n\ - s_waitcnt_vscnt null, 0\n\ - ds_gws_sema_v gds:1 offset0:0\n\ - s_waitcnt 0\n\ - s_endpgm\n\ - end\n\ -"; - void KFDGWSTest::SetUp() { ROUTINE_START KFDBaseComponentTest::SetUp(); - m_pIsaGen = IsaGenerator::Create(m_FamilyId); - ROUTINE_END } void KFDGWSTest::TearDown() { ROUTINE_START - if (m_pIsaGen) - delete m_pIsaGen; - m_pIsaGen = NULL; - KFDBaseComponentTest::TearDown(); ROUTINE_END @@ -160,21 +86,15 @@ TEST_F(KFDGWSTest, Semaphore) { pNodeProperties->NumGws,&firstGWS)); EXPECT_EQ(0, firstGWS); - m_pIsaGen = IsaGenerator::Create(m_FamilyId); - m_pIsaGen->CompileShader(gfx9_10_GwsInit, "GwsInit", isaBuffer); + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(GwsInitIsa, isaBuffer.As())); + Dispatch dispatch0(isaBuffer); buffer.Fill(numResources, 0, 4); dispatch0.SetArgs(buffer.As(), NULL); dispatch0.Submit(queue); dispatch0.Sync(); - const char *pAtomicIncrease; - if (m_FamilyId <= FAMILY_AL) - pAtomicIncrease = gfx9_AtomicIncrease; - else - pAtomicIncrease = gfx10_AtomicIncrease; - - m_pIsaGen->CompileShader(pAtomicIncrease, "AtomicIncrease", isaBuffer); + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(GwsAtomicIncreaseIsa, isaBuffer.As())); Dispatch dispatch(isaBuffer); dispatch.SetArgs(buffer.As(), NULL); diff --git a/projects/rocr-runtime/tests/kfdtest/src/KFDGWSTest.hpp b/projects/rocr-runtime/tests/kfdtest/src/KFDGWSTest.hpp index 15e61ee235..8413145982 100644 --- a/projects/rocr-runtime/tests/kfdtest/src/KFDGWSTest.hpp +++ b/projects/rocr-runtime/tests/kfdtest/src/KFDGWSTest.hpp @@ -26,20 +26,16 @@ #include -#include "IsaGenerator.hpp" #include "KFDBaseComponentTest.hpp" class KFDGWSTest : public KFDBaseComponentTest { public: - KFDGWSTest() :m_pIsaGen(NULL) {} + KFDGWSTest() {} ~KFDGWSTest() {} protected: virtual void SetUp(); virtual void TearDown(); - - protected: // Members - IsaGenerator* m_pIsaGen; }; #endif // __KFD_GWS_TEST__H__ diff --git a/projects/rocr-runtime/tests/kfdtest/src/KFDGraphicsInterop.cpp b/projects/rocr-runtime/tests/kfdtest/src/KFDGraphicsInterop.cpp index bf2a928db1..48224bce0f 100644 --- a/projects/rocr-runtime/tests/kfdtest/src/KFDGraphicsInterop.cpp +++ b/projects/rocr-runtime/tests/kfdtest/src/KFDGraphicsInterop.cpp @@ -101,7 +101,8 @@ TEST_F(KFDGraphicsInterop, RegisterGraphicsHandle) { // Copy contents to a system memory buffer for comparison HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/); - m_pIsaGen->GetCopyDwordIsa(isaBuffer); + + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As())); HsaMemoryBuffer dstBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/); diff --git a/projects/rocr-runtime/tests/kfdtest/src/KFDHWSTest.cpp b/projects/rocr-runtime/tests/kfdtest/src/KFDHWSTest.cpp index 66c0b5d8ff..3040b16ac6 100644 --- a/projects/rocr-runtime/tests/kfdtest/src/KFDHWSTest.cpp +++ b/projects/rocr-runtime/tests/kfdtest/src/KFDHWSTest.cpp @@ -28,18 +28,12 @@ void KFDHWSTest::SetUp() { KFDBaseComponentTest::SetUp(); - m_pIsaGen = IsaGenerator::Create(m_FamilyId); - ROUTINE_END } void KFDHWSTest::TearDown() { ROUTINE_START - if (m_pIsaGen) - delete m_pIsaGen; - m_pIsaGen = NULL; - KFDBaseComponentTest::TearDown(); ROUTINE_END @@ -70,7 +64,9 @@ void KFDHWSTest::RunTest(unsigned nProcesses, unsigned nQueues, unsigned nLoops) // Run work on all queues HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/); - m_pIsaGen->GetNoopIsa(isaBuffer); + + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(NoopIsa, isaBuffer.As())); + for (l = 0; l < nLoops; l++) { for (q = 0; q < nQueues; q++) { if (dispatch[q]) diff --git a/projects/rocr-runtime/tests/kfdtest/src/KFDHWSTest.hpp b/projects/rocr-runtime/tests/kfdtest/src/KFDHWSTest.hpp index e3ea5155c2..cbec52fbed 100644 --- a/projects/rocr-runtime/tests/kfdtest/src/KFDHWSTest.hpp +++ b/projects/rocr-runtime/tests/kfdtest/src/KFDHWSTest.hpp @@ -27,14 +27,12 @@ #include #include "PM4Queue.hpp" -#include "IsaGenerator.hpp" #include "KFDMultiProcessTest.hpp" #include "Dispatch.hpp" class KFDHWSTest : public KFDMultiProcessTest { public: - KFDHWSTest():m_pIsaGen(NULL) {} - + KFDHWSTest() {} ~KFDHWSTest() {} protected: @@ -42,9 +40,6 @@ class KFDHWSTest : public KFDMultiProcessTest { virtual void TearDown(); void RunTest(unsigned nProcesses, unsigned nQueues, unsigned nLoops); - - protected: // Members - IsaGenerator* m_pIsaGen; }; #endif // __KFD_QCM_TEST__H__ diff --git a/projects/rocr-runtime/tests/kfdtest/src/KFDIPCTest.hpp b/projects/rocr-runtime/tests/kfdtest/src/KFDIPCTest.hpp index 961ecbd9e9..3ce0aa12d8 100644 --- a/projects/rocr-runtime/tests/kfdtest/src/KFDIPCTest.hpp +++ b/projects/rocr-runtime/tests/kfdtest/src/KFDIPCTest.hpp @@ -23,7 +23,6 @@ #include "KFDBaseComponentTest.hpp" #include "BaseQueue.hpp" -#include "IsaGenerator.hpp" #ifndef __KFD_MEMORY_TEST__H__ #define __KFD_MEMORY_TEST__H__ diff --git a/projects/rocr-runtime/tests/kfdtest/src/KFDLocalMemoryTest.cpp b/projects/rocr-runtime/tests/kfdtest/src/KFDLocalMemoryTest.cpp index a27b502f97..b37528c651 100644 --- a/projects/rocr-runtime/tests/kfdtest/src/KFDLocalMemoryTest.cpp +++ b/projects/rocr-runtime/tests/kfdtest/src/KFDLocalMemoryTest.cpp @@ -33,18 +33,12 @@ void KFDLocalMemoryTest::SetUp() { KFDBaseComponentTest::SetUp(); - m_pIsaGen = IsaGenerator::Create(m_FamilyId); - ROUTINE_END } void KFDLocalMemoryTest::TearDown() { ROUTINE_START - if (m_pIsaGen) - delete m_pIsaGen; - m_pIsaGen = NULL; - KFDBaseComponentTest::TearDown(); ROUTINE_END @@ -107,7 +101,7 @@ TEST_F(KFDLocalMemoryTest, BasicTest) { srcSysBuffer.Fill(0x01010101); - m_pIsaGen->GetCopyDwordIsa(isaBuffer); + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As())); ASSERT_SUCCESS(hsaKmtMapMemoryToGPUNodes(srcLocalBuffer.As(), srcLocalBuffer.Size(), &AlternateVAGPU, mapFlags, 1, reinterpret_cast(&defaultGPUNode))); @@ -164,7 +158,7 @@ TEST_F(KFDLocalMemoryTest, VerifyContentsAfterUnmapAndMap) { SysBufferA.Fill(0x01010101); - m_pIsaGen->GetCopyDwordIsa(isaBuffer); + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As())); ASSERT_SUCCESS(queue.Create(defaultGPUNode)); queue.SetSkipWaitConsump(0); @@ -303,7 +297,8 @@ TEST_F(KFDLocalMemoryTest, Fragmentation) { PM4Queue queue; ASSERT_SUCCESS(queue.Create(defaultGPUNode)); HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode); - m_pIsaGen->GetCopyDwordIsa(isaBuffer); + + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As())); /* Allocate and test memory using the strategy explained at the top */ HSAKMT_STATUS status; diff --git a/projects/rocr-runtime/tests/kfdtest/src/KFDLocalMemoryTest.hpp b/projects/rocr-runtime/tests/kfdtest/src/KFDLocalMemoryTest.hpp index 519081cfc0..b3a9b2add6 100644 --- a/projects/rocr-runtime/tests/kfdtest/src/KFDLocalMemoryTest.hpp +++ b/projects/rocr-runtime/tests/kfdtest/src/KFDLocalMemoryTest.hpp @@ -26,20 +26,16 @@ #include -#include "IsaGenerator.hpp" #include "KFDBaseComponentTest.hpp" class KFDLocalMemoryTest : public KFDBaseComponentTest { public: - KFDLocalMemoryTest() :m_pIsaGen(NULL) {} + KFDLocalMemoryTest() {} ~KFDLocalMemoryTest() {} protected: virtual void SetUp(); virtual void TearDown(); - - protected: // Members - IsaGenerator* m_pIsaGen; }; #endif // __KFD_LOCALMEMORY_TEST__H__ diff --git a/projects/rocr-runtime/tests/kfdtest/src/KFDMemoryTest.cpp b/projects/rocr-runtime/tests/kfdtest/src/KFDMemoryTest.cpp index 8cf24ffb1b..4e9cb5fd19 100644 --- a/projects/rocr-runtime/tests/kfdtest/src/KFDMemoryTest.cpp +++ b/projects/rocr-runtime/tests/kfdtest/src/KFDMemoryTest.cpp @@ -39,360 +39,17 @@ #include "SDMAPacket.hpp" #include "linux/kfd_ioctl.h" -const char* gfx8_ScratchCopyDword = -"\ -shader ScratchCopyDword\n\ -asic(VI)\n\ -type(CS)\n\ -/*copy the parameters from scalar registers to vector registers*/\n\ - v_mov_b32 v0, s0\n\ - v_mov_b32 v1, s1\n\ - v_mov_b32 v2, s2\n\ - v_mov_b32 v3, s3\n\ -/*set up the scratch parameters. This assumes a single 16-reg block.*/\n\ - s_mov_b32 flat_scratch_lo, 8/*2 dwords of scratch per thread*/\n\ - s_mov_b32 flat_scratch_hi, 0/*offset in units of 256bytes*/\n\ -/*copy a dword between the passed addresses*/\n\ - flat_load_dword v4, v[0:1] slc\n\ - s_waitcnt vmcnt(0)&lgkmcnt(0)\n\ - flat_store_dword v[2:3], v4 slc\n\ - \n\ - s_endpgm\n\ - \n\ -end\n\ -"; - -const char* gfx9_ScratchCopyDword = -"\ -shader ScratchCopyDword\n\ -asic(GFX9)\n\ -type(CS)\n\ -/*copy the parameters from scalar registers to vector registers*/\n\ - v_mov_b32 v0, s0\n\ - v_mov_b32 v1, s1\n\ - v_mov_b32 v2, s2\n\ - v_mov_b32 v3, s3\n\ -/*set up the scratch parameters. This assumes a single 16-reg block.*/\n\ - s_mov_b32 flat_scratch_lo, s4\n\ - s_mov_b32 flat_scratch_hi, s5\n\ -/*copy a dword between the passed addresses*/\n\ - flat_load_dword v4, v[0:1] slc\n\ - s_waitcnt vmcnt(0)&lgkmcnt(0)\n\ - flat_store_dword v[2:3], v4 slc\n\ - \n\ - s_endpgm\n\ - \n\ -end\n\ -"; -const char* gfx10_ScratchCopyDword = -"\ -shader ScratchCopyDword\n\ -asic(GFX10)\n\ -type(CS)\n\ -wave_size(32)\n\ -/*copy the parameters from scalar registers to vector registers*/\n\ - v_mov_b32 v0, s0\n\ - v_mov_b32 v1, s1\n\ - v_mov_b32 v2, s2\n\ - v_mov_b32 v3, s3\n\ -/*set up the scratch parameters. This assumes a single 16-reg block.*/\n\ - s_setreg_b32 hwreg(HW_REG_SHADER_FLAT_SCRATCH_LO), s4\n\ - s_setreg_b32 hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI), s5\n\ -/*copy a dword between the passed addresses*/\n\ - flat_load_dword v4, v[0:1] slc\n\ - s_waitcnt vmcnt(0)&lgkmcnt(0)\n\ - flat_store_dword v[2:3], v4 slc\n\ - \n\ - s_endpgm\n\ - \n\ -end\n\ -"; - -const char* aldbrn_ScratchCopyDword = -"\ -shader ScratchCopyDword\n\ -asic(ALDEBARAN)\n\ -type(CS)\n\ -/*copy the parameters from scalar registers to vector registers*/\n\ - v_mov_b32 v0, s0\n\ - v_mov_b32 v1, s1\n\ - v_mov_b32 v2, s2\n\ - v_mov_b32 v3, s3\n\ -/*set up the scratch parameters. This assumes a single 16-reg block.*/\n\ - s_mov_b32 flat_scratch_lo, s4\n\ - s_mov_b32 flat_scratch_hi, s5\n\ -/*copy a dword between the passed addresses*/\n\ - flat_load_dword v4, v[0:1] slc\n\ - s_waitcnt vmcnt(0)&lgkmcnt(0)\n\ - flat_store_dword v[2:3], v4 slc\n\ - \n\ - s_endpgm\n\ - \n\ -end\n\ -"; - - - -/* Continuously poll src buffer and check buffer value - * After src buffer is filled with specific value (0x5678, - * by host program), fill dst buffer with specific - * value(0x5678) and quit - */ -const char* gfx9_PollMemory = -"\ -shader ReadMemory\n\ -wave_size(32)\n\ -type(CS)\n\ -/* Assume src address in s0, s1 and dst address in s2, s3*/\n\ - s_movk_i32 s18, 0x5678\n\ - LOOP:\n\ - s_load_dword s16, s[0:1], 0x0 glc\n\ - s_cmp_eq_i32 s16, s18\n\ - s_cbranch_scc0 LOOP\n\ - s_store_dword s18, s[2:3], 0x0 glc\n\ - s_endpgm\n\ - end\n\ -"; - -/* Similar to gfx9_PollMemory except that the buffer - * polled can be Non-coherant memory. SCC system-level - * cache coherence is not supported in scalar (smem) path. - * Use vmem operations with scc - */ -const char* gfx9_PollNCMemory = -"\ -shader ReadMemory\n\ -asic(ALDEBARAN)\n\ -wave_size(32)\n\ -type(CS)\n\ -/* Assume src address in s0, s1 and dst address in s2, s3*/\n\ - v_mov_b32 v6, 0x5678\n\ - v_mov_b32 v0, s0\n\ - v_mov_b32 v1, s1\n\ - LOOP:\n\ - flat_load_dword v4, v[0:1] scc\n\ - v_cmp_eq_u32 vcc, v4, v6\n\ - s_cbranch_vccz LOOP\n\ - v_mov_b32 v0, s2\n\ - v_mov_b32 v1, s3\n\ - flat_store_dword v[0:1], v6 scc\n\ - s_endpgm\n\ - end\n\ -"; - -const char* gfx10_PollMemory = -"\ -shader ReadMemory\n\ -wave_size(32)\n\ -type(CS)\n\ -/* Assume src address in s0, s1 and dst address in s2, s3*/\n\ - s_movk_i32 s18, 0x5678\n\ - v_mov_b32 v0, s2\n\ - v_mov_b32 v1, s3\n\ - v_mov_b32 v2, 0x5678\n\ - LOOP:\n\ - s_load_dword s16, s[0:1], 0x0 glc\n\ - s_cmp_eq_i32 s16, s18\n\ - s_cbranch_scc0 LOOP\n\ - flat_store_dword v[0,1], v2 slc\n\ - s_waitcnt vmcnt(0)&lgkmcnt(0)\n\ - s_endpgm\n\ - end\n\ -"; - -/* Input: A buffer of at least 3 dwords. - * DW0: used as a signal. 0xcafe means it is signaled - * DW1: Input buffer for device to read. - * DW2: Output buffer for device to write. - * Once receive signal, device will copy DW1 to DW2 - * This shader continously poll the signal buffer, - * Once signal buffer is signaled, it copies input buffer - * to output buffer - */ -const char* gfx9_CopyOnSignal = -"\ -shader CopyOnSignal\n\ -wave_size(32)\n\ -type(CS)\n\ -/* Assume input buffer in s0, s1 */\n\ - s_mov_b32 s18, 0xcafe\n\ -POLLSIGNAL:\n\ - s_load_dword s16, s[0:1], 0x0 glc\n\ - s_cmp_eq_i32 s16, s18\n\ - s_cbranch_scc0 POLLSIGNAL\n\ - s_load_dword s17, s[0:1], 0x4 glc\n\ - s_waitcnt vmcnt(0) & lgkmcnt(0)\n\ - s_store_dword s17, s[0:1], 0x8 glc\n\ - s_waitcnt vmcnt(0) & lgkmcnt(0)\n\ - s_endpgm\n\ - end\n\ -"; - -const char* gfx10_CopyOnSignal = -"\ -shader CopyOnSignal\n\ -wave_size(32)\n\ -type(CS)\n\ -/* Assume input buffer in s0, s1 */\n\ - s_add_u32 s2, s0, 0x8\n\ - s_addc_u32 s3, s1, 0x0\n\ - s_mov_b32 s18, 0xcafe\n\ - v_mov_b32 v0, s0\n\ - v_mov_b32 v1, s1\n\ - v_mov_b32 v4, s2\n\ - v_mov_b32 v5, s3\n\ -POLLSIGNAL:\n\ - s_load_dword s16, s[0:1], 0x0 glc\n\ - s_cmp_eq_i32 s16, s18\n\ - s_cbranch_scc0 POLLSIGNAL\n\ - s_load_dword s17, s[0:1], 0x4 glc\n\ - s_waitcnt vmcnt(0) & lgkmcnt(0)\n\ - v_mov_b32 v2, s17\n\ - flat_store_dword v[4,5], v2 glc\n\ - s_waitcnt vmcnt(0) & lgkmcnt(0)\n\ - s_endpgm\n\ - end\n\ -"; - -/* Input0: A buffer of at least 2 dwords. - * DW0: used as a signal. Write 0xcafe to signal - * DW1: Write to this buffer for other device to read. - * Input1: mmio base address - */ -const char* gfx9_WriteAndSignal = -"\ -shader WriteAndSignal\n\ -wave_size(32)\n\ -type(CS)\n\ -/* Assume input buffer in s0, s1 */\n\ - s_mov_b32 s18, 0xbeef\n\ - s_store_dword s18, s[0:1], 0x4 glc\n\ - s_mov_b32 s18, 0x1\n\ - s_store_dword s18, s[2:3], 0 glc\n\ - s_mov_b32 s18, 0xcafe\n\ - s_store_dword s18, s[0:1], 0x0 glc\n\ - s_endpgm\n\ - end\n\ -"; - -/* Continuously poll the flag at src buffer - * After the flag of s[0:1] is 1 filled, - * copy the value from s[0:1]+4 to dst buffer - */ -const char* gfx9_PollAndCopy = -"\ -shader CopyMemory\n\ -wave_size(32)\n\ -type(CS)\n\ -/* Assume src buffer in s[0:1] and dst buffer in s[2:3]*/\n\ - s_movk_i32 s18, 0x1\n\ - LOOP:\n\ - s_load_dword s16, s[0:1], 0x0 glc\n\ - s_cmp_eq_i32 s16, s18\n\ - s_cbranch_scc0 LOOP\n\ - s_load_dword s17, s[0:1], 0x4 glc\n\ - s_waitcnt vmcnt(0) & lgkmcnt(0)\n\ - s_store_dword s17, s[2:3], 0x0 glc:1\n\ - s_waitcnt vmcnt(0) & lgkmcnt(0)\n\ - s_endpgm\n\ - end\n\ -"; - -const char* gfx9aldbrn_PollAndCopy = -"\ -shader CopyMemory\n\ -wave_size(32)\n\ -type(CS)\n\ -/* Assume src buffer in s[0:1] and dst buffer in s[2:3]*/\n\ - v_mov_b32 v0, s0\n\ - v_mov_b32 v1, s1\n\ - v_mov_b32 v18, 0x1\n\ - LOOP:\n\ - flat_load_dword v16, v[0:1] glc\n\ - s_waitcnt vmcnt(0) & lgkmcnt(0)\n\ - v_cmp_eq_i32 vcc, v16, v18\n\ - s_cbranch_vccz LOOP\n\ - buffer_invl2\n\ - s_load_dword s17, s[0:1], 0x4 glc\n\ - s_waitcnt vmcnt(0) & lgkmcnt(0)\n\ - s_store_dword s17, s[2:3], 0x0 glc\n\ - s_waitcnt vmcnt(0) & lgkmcnt(0)\n\ - buffer_wbl2\n\ - s_waitcnt vmcnt(0) & lgkmcnt(0)\n\ - s_endpgm\n\ - end\n\ -"; - -/* Input0: A buffer of at least 2 dwords. - * DW0: used as a signal. Write 0x1 to signal - * DW1: Write the value from 2nd input buffer - * for other device to read. - * Input1: A buffer of at least 2 dwords. - * DW0: used as the value to be written. - */ -const char* gfx9aldbrn_WriteFlagAndValue = -"\ -shader WriteMemory\n\ -wave_size(32)\n\ -type(CS)\n\ -/* Assume two inputs buffer in s[0:1] and s[2:3]*/\n\ - v_mov_b32 v0, s0\n\ - v_mov_b32 v1, s1\n\ - s_load_dword s18, s[2:3], 0x0 glc\n\ - s_waitcnt vmcnt(0) & lgkmcnt(0)\n\ - s_store_dword s18, s[0:1], 0x4 glc\n\ - s_waitcnt vmcnt(0) & lgkmcnt(0)\n\ - buffer_wbl2\n\ - s_waitcnt vmcnt(0) & lgkmcnt(0)\n\ - v_mov_b32 v16, 0x1\n\ - flat_store_dword v[0:1], v16 glc\n\ - s_endpgm\n\ - end\n\ -"; - -const char* gfx10_WriteAndSignal = -"\ -shader WriteAndSignal\n\ -wave_size(32)\n\ -type(CS)\n\ -/* Assume input buffer in s0, s1 */\n\ - s_add_u32 s4, s0, 0x4\n\ - s_addc_u32 s5, s1, 0x0\n\ - v_mov_b32 v0, s0\n\ - v_mov_b32 v1, s1\n\ - v_mov_b32 v2, s2\n\ - v_mov_b32 v3, s3\n\ - v_mov_b32 v4, s4\n\ - v_mov_b32 v5, s5\n\ - v_mov_b32 v18, 0xbeef\n\ - flat_store_dword v[4:5], v18 glc\n\ - v_mov_b32 v18, 0x1\n\ - flat_store_dword v[2:3], v18 glc\n\ - v_mov_b32 v18, 0xcafe\n\ - flat_store_dword v[0:1], v18 glc\n\ - s_endpgm\n\ - end\n\ -"; - -//These gfx9_PullMemory, gfx9_CopyOnSignal, gfx9_WriteAndSignal shaders can be used by both gfx9 and gfx10 - void KFDMemoryTest::SetUp() { ROUTINE_START KFDBaseComponentTest::SetUp(); - m_pIsaGen = IsaGenerator::Create(m_FamilyId); - ROUTINE_END } void KFDMemoryTest::TearDown() { ROUTINE_START - if (m_pIsaGen) - delete m_pIsaGen; - m_pIsaGen = NULL; - KFDBaseComponentTest::TearDown(); ROUTINE_END @@ -508,16 +165,13 @@ TEST_F(KFDMemoryTest, MapUnmapToNodes) { HsaMemoryBuffer dstBuffer(PAGE_SIZE, defaultGPUNode); const char *pReadMemory; - if (m_FamilyId < FAMILY_NV) - pReadMemory = gfx9_PollMemory; - else - pReadMemory = gfx10_PollMemory; - if (m_NodeInfo.IsNodeXGMItoCPU(defaultGPUNode)) /* On A+A system memory is mapped as NC */ - m_pIsaGen->CompileShader(gfx9_PollNCMemory, "ReadMemory", isaBuffer); + pReadMemory = PollNCMemoryIsa; else - m_pIsaGen->CompileShader(pReadMemory, "ReadMemory", isaBuffer); + pReadMemory = PollMemoryIsa; + + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(pReadMemory, isaBuffer.As())); PM4Queue pm4Queue; ASSERT_SUCCESS(pm4Queue.Create(defaultGPUNode)); @@ -674,7 +328,8 @@ TEST_F(KFDMemoryTest, MemoryRegister) { ASSERT_SUCCESS(sdmaQueue.Create(defaultGPUNode)); HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/); - m_pIsaGen->GetCopyDwordIsa(isaBuffer); + + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As())); /* First submit just so the queues are not empty, and to get the * TLB populated (in case we need to flush TLBs somewhere after @@ -855,16 +510,7 @@ TEST_F(KFDMemoryTest, FlatScratchAccess) { // Initialize the srcBuffer to some fixed value srcMemBuffer.Fill(0x01010101); - const char *pScratchCopyDword; - if (m_FamilyId < FAMILY_AI) - pScratchCopyDword = gfx8_ScratchCopyDword; - else if (m_FamilyId < FAMILY_AL) - pScratchCopyDword = gfx9_ScratchCopyDword; - else if (m_FamilyId == FAMILY_AL) - pScratchCopyDword = aldbrn_ScratchCopyDword; - else - pScratchCopyDword = gfx10_ScratchCopyDword; - m_pIsaGen->CompileShader(pScratchCopyDword, "ScratchCopyDword", isaBuffer); + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(ScratchCopyDwordIsa, isaBuffer.As())); const HsaNodeProperties *pNodeProperties = m_NodeInfo.GetNodeProperties(defaultGPUNode); @@ -1728,17 +1374,8 @@ TEST_F(KFDMemoryTest, PtraceAccessInvisibleVram) { // dstBuffer is cpu accessible gtt memory HsaMemoryBuffer dstBuffer(PAGE_SIZE, defaultGPUNode); - const char *pScratchCopyDword; - if (m_FamilyId < FAMILY_AI) - pScratchCopyDword = gfx8_ScratchCopyDword; - else if (m_FamilyId < FAMILY_AL) - pScratchCopyDword = gfx9_ScratchCopyDword; - else if (m_FamilyId == FAMILY_AL) - pScratchCopyDword = aldbrn_ScratchCopyDword; - else - pScratchCopyDword = gfx10_ScratchCopyDword; + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(ScratchCopyDwordIsa, isaBuffer.As())); - m_pIsaGen->CompileShader(pScratchCopyDword, "ScratchCopyDword", isaBuffer); Dispatch dispatch0(isaBuffer); dispatch0.SetArgs(mem0, dstBuffer.As()); dispatch0.Submit(queue); @@ -2109,12 +1746,9 @@ TEST_F(KFDMemoryTest, HostHdpFlush) { PM4Queue queue; ASSERT_SUCCESS(queue.Create(defaultGPUNode)); HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/); - const char *pCopyOnSignal; - if (m_FamilyId < FAMILY_NV) - pCopyOnSignal = gfx9_CopyOnSignal; - else - pCopyOnSignal = gfx10_CopyOnSignal; - m_pIsaGen->CompileShader(pCopyOnSignal, "CopyOnSignal", isaBuffer); + + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyOnSignalIsa, isaBuffer.As())); + Dispatch dispatch0(isaBuffer); dispatch0.SetArgs(buffer, NULL); dispatch0.Submit(queue); @@ -2234,12 +1868,9 @@ TEST_F(KFDMemoryTest, DeviceHdpFlush) { PM4Queue queue; ASSERT_SUCCESS(queue.Create(nodes[0])); HsaMemoryBuffer isaBuffer(PAGE_SIZE, nodes[0], true/*zero*/, false/*local*/, true/*exec*/); - const char *pCopyOnSignal; - if (m_FamilyId < FAMILY_NV) - pCopyOnSignal = gfx9_CopyOnSignal; - else - pCopyOnSignal = gfx10_CopyOnSignal; - m_pIsaGen->CompileShader(pCopyOnSignal, "CopyOnSignal", isaBuffer); + + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyOnSignalIsa, isaBuffer.As())); + Dispatch dispatch(isaBuffer); dispatch.SetArgs(buffer, NULL); dispatch.Submit(queue); @@ -2247,12 +1878,9 @@ TEST_F(KFDMemoryTest, DeviceHdpFlush) { PM4Queue queue0; ASSERT_SUCCESS(queue0.Create(nodes[1])); HsaMemoryBuffer isaBuffer0(PAGE_SIZE, nodes[1], true/*zero*/, false/*local*/, true/*exec*/); - const char *pWriteAndSignal; - if (m_FamilyId < FAMILY_NV) - pWriteAndSignal = gfx9_WriteAndSignal; - else - pWriteAndSignal = gfx10_WriteAndSignal; - m_pIsaGen->CompileShader(pWriteAndSignal, "WriteAndSignal", isaBuffer0); + + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(WriteAndSignalIsa, isaBuffer.As())); + Dispatch dispatch0(isaBuffer0); dispatch0.SetArgs(buffer, mmioBase); dispatch0.Submit(queue0); @@ -2304,7 +1932,9 @@ TEST_F(KFDMemoryTest, CacheInvalidateOnSdmaWrite) { PM4Queue queue; ASSERT_SUCCESS(queue.Create(defaultGPUNode)); HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/); - m_pIsaGen->CompileShader(gfx9_PollMemory, "ReadMemory", isaBuffer); + + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(PollMemoryIsa, isaBuffer.As())); + Dispatch dispatch(isaBuffer); dispatch.SetArgs(buffer.As(), buffer.As()+dwLocation); dispatch.Submit(queue); @@ -2357,7 +1987,9 @@ TEST_F(KFDMemoryTest, CacheInvalidateOnCPUWrite) { PM4Queue queue; ASSERT_SUCCESS(queue.Create(defaultGPUNode)); HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/); - m_pIsaGen->CompileShader(gfx9_PollMemory, "ReadMemory", isaBuffer); + + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(PollMemoryIsa, isaBuffer.As())); + Dispatch dispatch(isaBuffer); dispatch.SetArgs(buffer, buffer+100); dispatch.Submit(queue); @@ -2419,7 +2051,9 @@ TEST_F(KFDMemoryTest, CacheInvalidateOnRemoteWrite) { PM4Queue queue; ASSERT_SUCCESS(queue.Create(defaultGPUNode)); HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/); - m_pIsaGen->CompileShader(gfx9_PollMemory, "ReadMemory", isaBuffer); + + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(PollMemoryIsa, isaBuffer.As())); + Dispatch dispatch(isaBuffer); dispatch.SetArgs(buffer.As(), buffer.As()+dwLocation); dispatch.Submit(queue); @@ -2434,7 +2068,9 @@ TEST_F(KFDMemoryTest, CacheInvalidateOnRemoteWrite) { ASSERT_SUCCESS(queue1.Create(nondefaultNode)); buffer.Fill(0x5678, sdmaQueue, dwLocation1*sizeof(int), 4); HsaMemoryBuffer isaBuffer1(PAGE_SIZE, nondefaultNode, true/*zero*/, false/*local*/, true/*exec*/); - m_pIsaGen->GetCopyDwordIsa(isaBuffer1); + + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As())); + Dispatch dispatch1(isaBuffer1); dispatch1.SetArgs(buffer.As()+dwLocation1, buffer.As()); dispatch1.Submit(queue1); @@ -2500,7 +2136,9 @@ TEST_F(KFDMemoryTest, VramCacheCoherenceWithRemoteGPU) { PM4Queue queue; ASSERT_SUCCESS(queue.Create(defaultGPUNode)); HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/); - m_pIsaGen->CompileShader(gfx9aldbrn_PollAndCopy, "CopyMemory", isaBuffer); + + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(PollAndCopyIsa, isaBuffer.As())); + Dispatch dispatch(isaBuffer); dispatch.SetArgs(buffer.As(), buffer.As()+dwLocation); dispatch.Submit(queue); @@ -2515,7 +2153,9 @@ TEST_F(KFDMemoryTest, VramCacheCoherenceWithRemoteGPU) { PM4Queue queue1; ASSERT_SUCCESS(queue1.Create(nondefaultNode)); HsaMemoryBuffer isaBuffer1(PAGE_SIZE, nondefaultNode, true/*zero*/, false/*local*/, true/*exec*/); - m_pIsaGen->CompileShader(gfx9aldbrn_WriteFlagAndValue, "WriteMemory", isaBuffer1); + + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(WriteFlagAndValueIsa, isaBuffer.As())); + Dispatch dispatch1(isaBuffer1); dispatch1.SetArgs(buffer.As(), buffer.As()+dwSource); dispatch1.Submit(queue1); @@ -2569,7 +2209,9 @@ TEST_F(KFDMemoryTest, VramCacheCoherenceWithCPU) { PM4Queue queue; ASSERT_SUCCESS(queue.Create(defaultGPUNode)); HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/); - m_pIsaGen->CompileShader(gfx9aldbrn_PollAndCopy, "CopyMemory", isaBuffer); + + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(PollAndCopyIsa, isaBuffer.As())); + Dispatch dispatch(isaBuffer); dispatch.SetArgs(buffer, buffer+dwLocation); dispatch.Submit(queue); @@ -2608,12 +2250,17 @@ TEST_F(KFDMemoryTest, SramCacheCoherenceWithGPU) { return; } - unsigned int *fineBuffer = NULL; - unsigned int tmp; - int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode(); const int dwLocation = 0x80; + if (!m_NodeInfo.IsNodeXGMItoCPU(defaultGPUNode)) { + LOG() << "Skipping test: XGMI link to CPU is required." << std::endl; + return; + } + + unsigned int *fineBuffer = NULL; + unsigned int tmp; + ASSERT_SUCCESS(hsaKmtAllocMemory(defaultGPUNode /* system */, PAGE_SIZE, m_MemoryFlags, reinterpret_cast(&fineBuffer))); ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(fineBuffer, PAGE_SIZE, NULL)); @@ -2627,10 +2274,7 @@ TEST_F(KFDMemoryTest, SramCacheCoherenceWithGPU) { ASSERT_SUCCESS(queue.Create(defaultGPUNode)); HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/); - if (m_NodeInfo.IsNodeXGMItoCPU(defaultGPUNode)) - m_pIsaGen->CompileShader(gfx9aldbrn_PollAndCopy, "CopyMemory", isaBuffer); - else - m_pIsaGen->CompileShader(gfx9_PollAndCopy, "CopyMemory", isaBuffer); + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(PollAndCopyIsa, isaBuffer.As())); Dispatch dispatch(isaBuffer); dispatch.SetArgs(fineBuffer, fineBuffer+dwLocation); diff --git a/projects/rocr-runtime/tests/kfdtest/src/KFDMemoryTest.hpp b/projects/rocr-runtime/tests/kfdtest/src/KFDMemoryTest.hpp index ea93395f71..03149e5639 100644 --- a/projects/rocr-runtime/tests/kfdtest/src/KFDMemoryTest.hpp +++ b/projects/rocr-runtime/tests/kfdtest/src/KFDMemoryTest.hpp @@ -22,7 +22,6 @@ */ #include "KFDBaseComponentTest.hpp" -#include "IsaGenerator.hpp" #ifndef __KFD_MEMORY_TEST__H__ #define __KFD_MEMORY_TEST__H__ @@ -33,15 +32,13 @@ */ class KFDMemoryTest : public KFDBaseComponentTest { public: - KFDMemoryTest(void) :m_pIsaGen(NULL) {} + KFDMemoryTest(void) {} ~KFDMemoryTest(void) {} protected: virtual void SetUp(); virtual void TearDown(); protected: - IsaGenerator* m_pIsaGen; - void BinarySearchLargestBuffer(int allocNode, const HsaMemFlags &memFlags, HSAuint64 highMB, int nodeToMap, HSAuint64 *lastSizeMB); diff --git a/projects/rocr-runtime/tests/kfdtest/src/KFDQMTest.cpp b/projects/rocr-runtime/tests/kfdtest/src/KFDQMTest.cpp index 9b4003b68e..ffc568ebdf 100644 --- a/projects/rocr-runtime/tests/kfdtest/src/KFDQMTest.cpp +++ b/projects/rocr-runtime/tests/kfdtest/src/KFDQMTest.cpp @@ -39,18 +39,12 @@ void KFDQMTest::SetUp() { KFDBaseComponentTest::SetUp(); - m_pIsaGen = IsaGenerator::Create(m_FamilyId); - ROUTINE_END } void KFDQMTest::TearDown() { ROUTINE_START - if (m_pIsaGen) - delete m_pIsaGen; - m_pIsaGen = NULL; - KFDBaseComponentTest::TearDown(); ROUTINE_END @@ -677,111 +671,12 @@ TEST_F(KFDQMTest, OverSubscribeCpQueues) { TEST_END } -/* A simple isa loop program with dense mathematic operations - * s1 controls the number iterations of the loop - * This shader can be used by GFX8, GFX9 and GFX10 - */ -static const char *loop_isa = \ -"\ -shader loop_isa\n\ -wave_size(32)\n\ -type(CS)\n\ - s_movk_i32 s0, 0x0008\n\ - s_movk_i32 s1, 0x00ff\n\ - v_mov_b32 v0, 0\n\ - v_mov_b32 v1, 0\n\ - v_mov_b32 v2, 0\n\ - v_mov_b32 v3, 0\n\ - v_mov_b32 v4, 0\n\ - v_mov_b32 v5, 0\n\ - v_mov_b32 v6, 0\n\ - v_mov_b32 v7, 0\n\ - v_mov_b32 v8, 0\n\ - v_mov_b32 v9, 0\n\ - v_mov_b32 v10, 0\n\ - v_mov_b32 v11, 0\n\ - v_mov_b32 v12, 0\n\ - v_mov_b32 v13, 0\n\ - v_mov_b32 v14, 0\n\ - v_mov_b32 v15, 0\n\ - v_mov_b32 v16, 0\n\ - LOOP:\n\ - s_mov_b32 s8, s4\n\ - s_mov_b32 s9, s1\n\ - s_mov_b32 s10, s6\n\ - s_mov_b32 s11, s7\n\ - s_cmp_le_i32 s1, s0\n\ - s_cbranch_scc1 END_OF_PGM\n\ - s_buffer_load_dwordx8 s[8:15], s[8:11], 0x10\n\ - v_add_f32 v0, 2.0, v0\n\ - v_cvt_f32_i32 v17, s1\n\ -s_waitcnt lgkmcnt(0)\n\ - v_add_f32 v18, s8, v17\n\ - v_add_f32 v19, s9, v17\n\ - v_add_f32 v20, s10, v17\n\ - v_add_f32 v21, s11, v17\n\ - v_add_f32 v22, s12, v17\n\ - v_add_f32 v23, s13, v17\n\ - v_add_f32 v24, s14, v17\n\ - v_add_f32 v17, s15, v17\n\ - v_log_f32 v25, v18\n\ - v_mul_f32 v25, v22, v25\n\ - v_exp_f32 v25, v25\n\ - v_log_f32 v26, v19\n\ - v_mul_f32 v26, v23, v26\n\ - v_exp_f32 v26, v26\n\ - v_log_f32 v27, v20\n\ - v_mul_f32 v27, v24, v27\n\ - v_exp_f32 v27, v27\n\ - v_log_f32 v28, v21\n\ - v_mul_f32 v28, v17, v28\n\ - v_exp_f32 v28, v28\n\ - v_add_f32 v5, v5, v25\n\ - v_add_f32 v6, v6, v26\n\ - v_add_f32 v7, v7, v27\n\ - v_add_f32 v8, v8, v28\n\ - v_mul_f32 v18, 0x3fb8aa3b, v18\n\ - v_exp_f32 v18, v18\n\ - v_mul_f32 v19, 0x3fb8aa3b, v19\n\ - v_exp_f32 v19, v19\n\ - v_mul_f32 v20, 0x3fb8aa3b, v20\n\ - v_exp_f32 v20, v20\n\ - v_mul_f32 v21, 0x3fb8aa3b, v21\n\ - v_exp_f32 v21, v21\n\ - v_add_f32 v9, v9, v18\n\ - v_add_f32 v10, v10, v19\n\ - v_add_f32 v11, v11, v20\n\ - v_add_f32 v12, v12, v21\n\ - v_sqrt_f32 v18, v22\n\ - v_sqrt_f32 v19, v23\n\ - v_sqrt_f32 v20, v24\n\ - v_sqrt_f32 v21, v17\n\ - v_add_f32 v13, v13, v18\n\ - v_add_f32 v14, v14, v19\n\ - v_add_f32 v15, v15, v20\n\ - v_add_f32 v16, v16, v21\n\ - v_rsq_f32 v18, v22\n\ - v_rsq_f32 v19, v23\n\ - v_rsq_f32 v20, v24\n\ - v_rsq_f32 v17, v17\n\ - v_add_f32 v1, v1, v18\n\ - v_add_f32 v2, v2, v19\n\ - v_add_f32 v3, v3, v20\n\ - v_add_f32 v4, v4, v17\n\ - s_add_u32 s0, s0, 1\n\ - s_branch LOOP\n\ - END_OF_PGM:\n\ - s_endpgm\n\ - end\n\ -"; - HSAint64 KFDQMTest::TimeConsumedwithCUMask(int node, uint32_t* mask, uint32_t mask_count) { HsaMemoryBuffer isaBuffer(PAGE_SIZE, node, true/*zero*/, false/*local*/, true/*exec*/); HsaMemoryBuffer dstBuffer(PAGE_SIZE, node, true, false, false); HsaMemoryBuffer ctlBuffer(PAGE_SIZE, node, true, false, false); - m_pIsaGen = IsaGenerator::Create(m_FamilyId); - m_pIsaGen->CompileShader(loop_isa, "loop_isa", isaBuffer); + EXPECT_SUCCESS(m_pAsm->RunAssembleBuf(LoopIsa, isaBuffer.As())); Dispatch dispatch(isaBuffer); dispatch.SetDim(1024, 16, 16); @@ -838,7 +733,6 @@ TEST_F(KFDQMTest, BasicCuMaskingLinear) { TEST_START(TESTPROFILE_RUNALL); int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode(); ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node"; - m_pIsaGen = IsaGenerator::Create(m_FamilyId); if (m_FamilyId >= FAMILY_VI) { const HsaNodeProperties *pNodeProperties = m_NodeInfo.GetNodeProperties(defaultGPUNode); @@ -982,7 +876,7 @@ TEST_F(KFDQMTest, QueuePriorityOnDifferentPipe) { HSAint32 *syncBuffer = syncBuf.As(); HsaMemoryBuffer isaBuffer(PAGE_SIZE, node, true/*zero*/, false/*local*/, true/*exec*/); - m_pIsaGen->CompileShader(loop_isa, "loop_isa", isaBuffer); + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(LoopIsa, isaBuffer.As())); Dispatch dispatch[2] = { Dispatch(isaBuffer, true), @@ -1047,7 +941,7 @@ TEST_F(KFDQMTest, QueuePriorityOnSamePipe) { HSAint32 *syncBuffer = syncBuf.As(); HsaMemoryBuffer isaBuffer(PAGE_SIZE, node, true/*zero*/, false/*local*/, true/*exec*/); - m_pIsaGen->CompileShader(loop_isa, "loop_isa", isaBuffer); + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(LoopIsa, isaBuffer.As())); Dispatch dispatch[2] = { Dispatch(isaBuffer, true), @@ -1140,7 +1034,7 @@ TEST_F(KFDQMTest, EmptyDispatch) { HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/); - m_pIsaGen->GetNoopIsa(isaBuffer); + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(LoopIsa, isaBuffer.As())); SyncDispatch(isaBuffer, NULL, NULL); @@ -1159,7 +1053,7 @@ TEST_F(KFDQMTest, SimpleWriteDispatch) { srcBuffer.Fill(0x01010101); - m_pIsaGen->GetCopyDwordIsa(isaBuffer); + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As())); SyncDispatch(isaBuffer, srcBuffer.As(), destBuffer.As()); @@ -1194,7 +1088,7 @@ TEST_F(KFDQMTest, MultipleCpQueuesStressDispatch) { destBuffer.Fill(0xFF); - m_pIsaGen->GetCopyDwordIsa(isaBuffer); + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As())); for (i = 0; i < MAX_CP_QUEUES; ++i) ASSERT_SUCCESS(queues[i].Create(defaultGPUNode)) << " QueueId=" << i; @@ -1533,7 +1427,7 @@ TEST_F(KFDQMTest, Atomics) { PM4Queue queue; - m_pIsaGen->GetAtomicIncIsa(isaBuf); + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(AtomicIncIsa, isaBuf.As())); Dispatch dispatch(isaBuf); dispatch.SetArgs(destBuf.As(), NULL); @@ -1598,10 +1492,12 @@ TEST_F(KFDQMTest, mGPUShareBO) { srcNodeMem.Fill(0x05050505); - m_pIsaGen->GetCopyDwordIsa(isaBufferSrc); + ASSERT_SUCCESS(m_pAsm->RunAssemble(CopyDwordIsa)); + + m_pAsm->CopyInstrStream(isaBufferSrc.As()); SyncDispatch(isaBufferSrc, srcNodeMem.As(), shared_addr.As(), src_node); - m_pIsaGen->GetCopyDwordIsa(isaBufferDst); + m_pAsm->CopyInstrStream(isaBufferDst.As()); SyncDispatch(isaBufferDst, shared_addr.As(), dstNodeMem.As(), dst_node); EXPECT_EQ(dstNodeMem.As()[0], 0x05050505); diff --git a/projects/rocr-runtime/tests/kfdtest/src/KFDQMTest.hpp b/projects/rocr-runtime/tests/kfdtest/src/KFDQMTest.hpp index b0d3f66073..dfc36d17d4 100644 --- a/projects/rocr-runtime/tests/kfdtest/src/KFDQMTest.hpp +++ b/projects/rocr-runtime/tests/kfdtest/src/KFDQMTest.hpp @@ -27,13 +27,12 @@ #include #include "PM4Queue.hpp" -#include "IsaGenerator.hpp" #include "KFDBaseComponentTest.hpp" #include "Dispatch.hpp" class KFDQMTest : public KFDBaseComponentTest { public: - KFDQMTest():m_pIsaGen(NULL) {} + KFDQMTest() {} ~KFDQMTest() {} @@ -49,7 +48,6 @@ class KFDQMTest : public KFDBaseComponentTest { const double CuVariance = 0.15; const double CuNegVariance = 1.0 - CuVariance; const double CuPosVariance = 1.0 + CuVariance; - IsaGenerator* m_pIsaGen; }; #endif // __KFD_QCM_TEST__H__ diff --git a/projects/rocr-runtime/tests/kfdtest/src/KFDSVMEvictTest.cpp b/projects/rocr-runtime/tests/kfdtest/src/KFDSVMEvictTest.cpp index d41aedac74..319b054a64 100644 --- a/projects/rocr-runtime/tests/kfdtest/src/KFDSVMEvictTest.cpp +++ b/projects/rocr-runtime/tests/kfdtest/src/KFDSVMEvictTest.cpp @@ -234,131 +234,6 @@ TEST_F(KFDSVMEvictTest, BasicTest) { TEST_END } -/* Shader to read local buffers using multiple wavefronts in parallel - * until address buffer is filled with specific value 0x5678 by host program, - * then each wavefront fills value 0x5678 at corresponding result buffer and quit - * - * initial state: - * s[0:1] - address buffer base address - * s[2:3] - result buffer base address - * s4 - workgroup id - * v0 - workitem id, always 0 because NUM_THREADS_X(number of threads) in workgroup set to 1 - * registers: - * v0 - calculated workitem id, v0 = v0 + s4 * NUM_THREADS_X - * v[2:3] - address of corresponding local buf address offset: s[0:1] + v0 * 8 - * v[4:5] - corresponding output buf address: s[2:3] + v0 * 4 - * v[6:7] - local buf address used for read test - */ -static const char* gfx9_ReadMemory = -"\ - shader ReadMemory\n\ - type(CS)\n\ - \n\ - // compute address of corresponding output buffer\n\ - v_mov_b32 v0, s4 // use workgroup id as index\n\ - v_lshlrev_b32 v0, 2, v0 // v0 *= 4\n\ - v_add_co_u32 v4, vcc, s2, v0 // v[4:5] = s[2:3] + v0 * 4\n\ - v_mov_b32 v5, s3\n\ - v_add_u32 v5, vcc_lo, v5\n\ - \n\ - // compute input buffer offset used to store corresponding local buffer address\n\ - v_lshlrev_b32 v0, 1, v0 // v0 *= 8\n\ - v_add_co_u32 v2, vcc, s0, v0 // v[2:3] = s[0:1] + v0 * 8\n\ - v_mov_b32 v3, s1\n\ - v_add_u32 v3, vcc_lo, v3\n\ - \n\ - // load 64bit local buffer address stored at v[2:3] to v[6:7]\n\ - flat_load_dwordx2 v[6:7], v[2:3] slc\n\ - s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory reads to finish\n\ - \n\ - v_mov_b32 v8, 0x5678\n\ - s_movk_i32 s8, 0x5678\n\ -L_REPEAT:\n\ - s_load_dword s16, s[0:1], 0x0 glc\n\ - s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory reads to finish\n\ - s_cmp_eq_i32 s16, s8\n\ - s_cbranch_scc1 L_QUIT // if notified to quit by host\n\ - // loop read 64M local buffer starting at v[6:7]\n\ - // every 4k page only read once\n\ - v_mov_b32 v9, 0\n\ - v_mov_b32 v10, 0x1000 // 4k page\n\ - v_mov_b32 v11, 0x4000000 // 64M size\n\ - v_mov_b32 v12, v6\n\ - v_mov_b32 v13, v7\n\ -L_LOOP_READ:\n\ - flat_load_dwordx2 v[14:15], v[12:13] slc\n\ - v_add_u32 v9, v9, v10 \n\ - v_add_co_u32 v12, vcc, v12, v10\n\ - v_add_u32 v13, vcc_lo, v13\n\ - v_cmp_lt_u32 vcc, v9, v11\n\ - s_cbranch_vccnz L_LOOP_READ\n\ - s_branch L_REPEAT\n\ -L_QUIT:\n\ - flat_store_dword v[4:5], v8\n\ - s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory writes to finish\n\ - s_endpgm\n\ - end\n\ -"; - -static const char* gfx8_ReadMemory = -"\ - shader ReadMemory\n\ - asic(VI)\n\ - type(CS)\n\ - \n\ - // compute address of corresponding output buffer\n\ - v_mov_b32 v0, s4 // use workgroup id as index\n\ - v_lshlrev_b32 v0, 2, v0 // v0 *= 4\n\ - v_add_u32 v4, vcc, s2, v0 // v[4:5] = s[2:3] + v0 * 4\n\ - v_mov_b32 v5, s3\n\ - v_addc_u32 v5, vcc, v5, 0, vcc\n\ - \n\ - // compute input buffer offset used to store corresponding local buffer address\n\ - v_lshlrev_b32 v0, 1, v0 // v0 *= 8\n\ - v_add_u32 v2, vcc, s0, v0 // v[2:3] = s[0:1] + v0 * 8\n\ - v_mov_b32 v3, s1\n\ - v_addc_u32 v3, vcc, v3, 0, vcc\n\ - \n\ - // load 64bit local buffer address stored at v[2:3] to v[6:7]\n\ - flat_load_dwordx2 v[6:7], v[2:3] slc\n\ - s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory reads to finish\n\ - \n\ - v_mov_b32 v8, 0x5678\n\ - s_movk_i32 s8, 0x5678\n\ -L_REPEAT:\n\ - s_load_dword s16, s[0:1], 0x0 glc\n\ - s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory reads to finish\n\ - s_cmp_eq_i32 s16, s8\n\ - s_cbranch_scc1 L_QUIT // if notified to quit by host\n\ - // loop read 64M local buffer starting at v[6:7]\n\ - // every 4k page only read once\n\ - v_mov_b32 v9, 0\n\ - v_mov_b32 v10, 0x1000 // 4k page\n\ - v_mov_b32 v11, 0x4000000 // 64M size\n\ - v_mov_b32 v12, v6\n\ - v_mov_b32 v13, v7\n\ -L_LOOP_READ:\n\ - flat_load_dwordx2 v[14:15], v[12:13] slc\n\ - v_add_u32 v9, vcc, v9, v10 \n\ - v_add_u32 v12, vcc, v12, v10\n\ - v_addc_u32 v13, vcc, v13, 0, vcc\n\ - v_cmp_lt_u32 vcc, v9, v11\n\ - s_cbranch_vccnz L_LOOP_READ\n\ - s_branch L_REPEAT\n\ -L_QUIT:\n\ - flat_store_dword v[4:5], v8\n\ - s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory writes to finish\n\ - s_endpgm\n\ - end\n\ -"; - -std::string KFDSVMEvictTest::CreateShader() { - if (m_FamilyId >= FAMILY_AI) - return gfx9_ReadMemory; - else - return gfx8_ReadMemory; -} - /* Evict and restore queue test * * N_PROCESSES processes read all local buffers in parallel while buffers are evicted and restored @@ -434,7 +309,7 @@ TEST_F(KFDSVMEvictTest, QueueTest) { for (i = 0; i < wavefront_num; i++) *(localBufAddr + i) = pBuffers[i]; - m_pIsaGen->CompileShader(CreateShader().c_str(), "ReadMemory", isaBuffer); + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(ReadMemoryIsa, isaBuffer.As())); PM4Queue pm4Queue; ASSERT_SUCCESS(pm4Queue.Create(defaultGPUNode)); diff --git a/projects/rocr-runtime/tests/kfdtest/src/KFDSVMEvictTest.hpp b/projects/rocr-runtime/tests/kfdtest/src/KFDSVMEvictTest.hpp index 2b8a1de957..3f26287cc0 100644 --- a/projects/rocr-runtime/tests/kfdtest/src/KFDSVMEvictTest.hpp +++ b/projects/rocr-runtime/tests/kfdtest/src/KFDSVMEvictTest.hpp @@ -28,7 +28,6 @@ #include #include "KFDLocalMemoryTest.hpp" #include "KFDBaseComponentTest.hpp" -#include "IsaGenerator.hpp" // @class KFDEvictTest // Test eviction and restore procedure using two processes diff --git a/projects/rocr-runtime/tests/kfdtest/src/KFDSVMRangeTest.cpp b/projects/rocr-runtime/tests/kfdtest/src/KFDSVMRangeTest.cpp index 6aad683f91..283a567ff4 100644 --- a/projects/rocr-runtime/tests/kfdtest/src/KFDSVMRangeTest.cpp +++ b/projects/rocr-runtime/tests/kfdtest/src/KFDSVMRangeTest.cpp @@ -34,8 +34,6 @@ void KFDSVMRangeTest::SetUp() { KFDBaseComponentTest::SetUp(); - m_pIsaGen = IsaGenerator::Create(m_FamilyId); - SVMSetXNACKMode(); ROUTINE_END @@ -44,10 +42,6 @@ void KFDSVMRangeTest::SetUp() { void KFDSVMRangeTest::TearDown() { ROUTINE_START - if (m_pIsaGen) - delete m_pIsaGen; - m_pIsaGen = NULL; - SVMRestoreXNACKMode(); KFDBaseComponentTest::TearDown(); @@ -80,7 +74,7 @@ TEST_F(KFDSVMRangeTest, BasicSystemMemTest) { srcSysBuffer.Fill(0x01010101); - m_pIsaGen->GetCopyDwordIsa(isaBuffer); + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As())); ASSERT_SUCCESS(queue.Create(defaultGPUNode)); queue.SetSkipWaitConsump(0); @@ -364,7 +358,8 @@ TEST_F(KFDSVMRangeTest, EvictSystemRangeTest) { ASSERT_SUCCESS(sdmaQueue.Create(defaultGPUNode)); HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/); - m_pIsaGen->GetCopyDwordIsa(isaBuffer); + + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As())); Dispatch dispatch0(isaBuffer); dispatch0.SetArgs(srcBuffer.As(), dstBuffer.As()); @@ -458,7 +453,8 @@ TEST_F(KFDSVMRangeTest, PartialUnmapSysMemTest) { munmap(pBuf2, Buf2Size); - m_pIsaGen->GetCopyDwordIsa(isaBuffer); + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As())); + ASSERT_SUCCESS(queue.Create(defaultGPUNode)); Dispatch dispatch(isaBuffer); @@ -507,7 +503,7 @@ TEST_F(KFDSVMRangeTest, BasicVramTest) { srcSysBuffer.Fill(0x01010101); - m_pIsaGen->GetCopyDwordIsa(isaBuffer); + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As())); ASSERT_SUCCESS(queue.Create(defaultGPUNode)); queue.SetSkipWaitConsump(0); @@ -943,7 +939,9 @@ TEST_F(KFDSVMRangeTest, MigratePolicyTest) { #ifdef USE_PM4_QUEUE_TRIGGER_VM_FAULT HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode); PM4Queue queue; - m_pIsaGen->GetCopyDwordIsa(isaBuffer); + + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As())); + ASSERT_SUCCESS(queue.Create(defaultGPUNode)); for (HSAuint64 i = 0; i < BufferSize / 8; i += 512) { diff --git a/projects/rocr-runtime/tests/kfdtest/src/KFDSVMRangeTest.hpp b/projects/rocr-runtime/tests/kfdtest/src/KFDSVMRangeTest.hpp index 88bddd94a8..03a245dba3 100644 --- a/projects/rocr-runtime/tests/kfdtest/src/KFDSVMRangeTest.hpp +++ b/projects/rocr-runtime/tests/kfdtest/src/KFDSVMRangeTest.hpp @@ -26,21 +26,17 @@ #include -#include "IsaGenerator.hpp" #include "KFDBaseComponentTest.hpp" class KFDSVMRangeTest : public KFDBaseComponentTest { public: - KFDSVMRangeTest() :m_pIsaGen(NULL) {} + KFDSVMRangeTest() {} ~KFDSVMRangeTest() {} void SplitRangeTest(int defaultGPUNode, int prefetch_location); protected: virtual void SetUp(); virtual void TearDown(); - - protected: // Members - IsaGenerator* m_pIsaGen; }; #endif // __KFD_LOCALMEMORY_TEST__H__ diff --git a/projects/rocr-runtime/tests/kfdtest/src/KFDTestUtil.cpp b/projects/rocr-runtime/tests/kfdtest/src/KFDTestUtil.cpp index 476e0bb1ce..2eddc8857b 100644 --- a/projects/rocr-runtime/tests/kfdtest/src/KFDTestUtil.cpp +++ b/projects/rocr-runtime/tests/kfdtest/src/KFDTestUtil.cpp @@ -231,6 +231,12 @@ bool isTonga(const HsaNodeProperties *props) { return false; } +const uint32_t GetGfxVersion(const HsaNodeProperties *props) { + return ((props->EngineId.ui32.Major << 16) | + (props->EngineId.ui32.Minor << 8) | + (props->EngineId.ui32.Stepping)); +} + HSAuint64 GetSystemTickCountInMicroSec() { struct timeval t; gettimeofday(&t, 0); diff --git a/projects/rocr-runtime/tests/kfdtest/src/KFDTestUtil.hpp b/projects/rocr-runtime/tests/kfdtest/src/KFDTestUtil.hpp index 7c2f9c61ce..938ff8bf69 100644 --- a/projects/rocr-runtime/tests/kfdtest/src/KFDTestUtil.hpp +++ b/projects/rocr-runtime/tests/kfdtest/src/KFDTestUtil.hpp @@ -52,6 +52,7 @@ bool is_dgpu(); bool isTonga(const HsaNodeProperties *props); bool hasPciAtomicsSupport(int node); unsigned int FamilyIdFromNode(const HsaNodeProperties *props); +const uint32_t GetGfxVersion(const HsaNodeProperties *props); void GetHwQueueInfo(const HsaNodeProperties *props, unsigned int *p_num_cp_queues, diff --git a/projects/rocr-runtime/tests/kfdtest/src/RDMATest.cpp b/projects/rocr-runtime/tests/kfdtest/src/RDMATest.cpp index 973dbdcdc8..d44d69895e 100644 --- a/projects/rocr-runtime/tests/kfdtest/src/RDMATest.cpp +++ b/projects/rocr-runtime/tests/kfdtest/src/RDMATest.cpp @@ -34,16 +34,11 @@ void RDMATest::SetUp() { KFDBaseComponentTest::SetUp(); - m_pIsaGen = IsaGenerator::Create(m_FamilyId); - ROUTINE_END } void RDMATest::TearDown() { ROUTINE_START - if (m_pIsaGen) - delete m_pIsaGen; - m_pIsaGen = NULL; KFDBaseComponentTest::TearDown(); @@ -77,7 +72,8 @@ TEST_F(RDMATest, GPUDirect) { srcSysBuffer.Fill(0xfe); /* Put 'copy dword' command to ISA buffer */ - m_pIsaGen->GetCopyDwordIsa(isaBuffer); + ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As())); + ASSERT_SUCCESS(queue.Create(defaultGPUNode)); Dispatch dispatch(isaBuffer); diff --git a/projects/rocr-runtime/tests/kfdtest/src/RDMATest.hpp b/projects/rocr-runtime/tests/kfdtest/src/RDMATest.hpp index 3e4b2331aa..1d16853838 100644 --- a/projects/rocr-runtime/tests/kfdtest/src/RDMATest.hpp +++ b/projects/rocr-runtime/tests/kfdtest/src/RDMATest.hpp @@ -26,20 +26,16 @@ #include -#include "IsaGenerator.hpp" #include "KFDBaseComponentTest.hpp" class RDMATest : public KFDBaseComponentTest { public: - RDMATest():m_pIsaGen(NULL) {} + RDMATest() {} ~RDMATest() {} protected: virtual void SetUp(); virtual void TearDown(); - - protected: // Members - IsaGenerator* m_pIsaGen; }; #endif // __RDMA_TEST__H__ diff --git a/projects/rocr-runtime/tests/kfdtest/src/ShaderStore.cpp b/projects/rocr-runtime/tests/kfdtest/src/ShaderStore.cpp new file mode 100644 index 0000000000..8b40351f04 --- /dev/null +++ b/projects/rocr-runtime/tests/kfdtest/src/ShaderStore.cpp @@ -0,0 +1,609 @@ +/* + * Copyright (C) 2021 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "ShaderStore.hpp" + +/** + * KFDASMTest List + */ + +const std::vector ShaderList = { + NoopIsa, + CopyDwordIsa, + InfiniteLoopIsa, + AtomicIncIsa, + ScratchCopyDwordIsa, + PollMemoryIsa, + CopyOnSignalIsa, + PollAndCopyIsa, + WriteFlagAndValueIsa, + WriteAndSignalIsa, + LoopIsa, + IterateIsa, + ReadMemoryIsa, + GwsInitIsa, + GwsAtomicIncreaseIsa, +}; + +/** + * Macros + */ + +/* Create macro for portable v_add_co_u32, v_add_co_ci_u32, + * and v_cmp_lt_u32 + */ +#define SHADER_MACROS \ + " .text\n"\ + " .macro V_ADD_CO_U32 vdst, src0, vsrc1\n"\ + " .if (.amdgcn.gfx_generation_number >= 10)\n"\ + " v_add_co_u32 \\vdst, vcc_lo, \\src0, \\vsrc1\n"\ + " .elseif (.amdgcn.gfx_generation_number >= 9)\n"\ + " v_add_co_u32 \\vdst, vcc, \\src0, \\vsrc1\n"\ + " .else\n"\ + " v_add_u32 \\vdst, vcc, \\src0, \\vsrc1\n"\ + " .endif\n"\ + " .endm\n"\ + " .macro V_ADD_CO_CI_U32 vdst, src0, vsrc1\n"\ + " .if (.amdgcn.gfx_generation_number >= 10)\n"\ + " v_add_co_ci_u32 \\vdst, vcc_lo, \\src0, \\vsrc1, vcc_lo\n"\ + " .elseif (.amdgcn.gfx_generation_number >= 9)\n"\ + " v_addc_co_u32 \\vdst, vcc, \\src0, \\vsrc1, vcc\n"\ + " .else\n"\ + " v_addc_u32 \\vdst, vcc, \\src0, \\vsrc1, vcc\n"\ + " .endif\n"\ + " .endm\n"\ + " .macro V_CMP_LT_U32 src0, vsrc1\n"\ + " .if (.amdgcn.gfx_generation_number >= 10)\n"\ + " v_cmp_lt_u32 vcc_lo, \\src0, \\vsrc1\n"\ + " .else\n"\ + " v_cmp_lt_u32 vcc, \\src0, \\vsrc1\n"\ + " .endif\n"\ + " .endm\n" + +/** + * Common + */ + +const char *NoopIsa = R"( + .text + s_endpgm +)"; + +const char *CopyDwordIsa = R"( + .text + v_mov_b32 v0, s0 + v_mov_b32 v1, s1 + v_mov_b32 v2, s2 + v_mov_b32 v3, s3 + flat_load_dword v4, v[0:1] glc slc + s_waitcnt 0 + flat_store_dword v[2:3], v4 glc slc + s_endpgm +)"; + +const char *InfiniteLoopIsa = R"( + .text + LOOP: + s_branch LOOP + s_endpgm +)"; + +const char *AtomicIncIsa = R"( + .text + v_mov_b32 v0, s0 + v_mov_b32 v1, s1 + .if (.amdgcn.gfx_generation_number >= 8) + v_mov_b32 v2, 1 + flat_atomic_add v3, v[0:1], v2 glc slc + .else + v_mov_b32 v2, -1 + flat_atomic_inc v3, v[0:1], v2 glc slc + .endif + s_waitcnt 0 + s_endpgm +)"; + +/** + * KFDMemoryTest + */ + +const char *ScratchCopyDwordIsa = R"( + .text + // Copy the parameters from scalar registers to vector registers + .if (.amdgcn.gfx_generation_number >= 9) + v_mov_b32 v0, s0 + v_mov_b32 v1, s1 + v_mov_b32 v2, s2 + v_mov_b32 v3, s3 + .else + v_mov_b32_e32 v0, s0 + v_mov_b32_e32 v1, s1 + v_mov_b32_e32 v2, s2 + v_mov_b32_e32 v3, s3 + .endif + // Setup the scratch parameters. This assumes a single 16-reg block + .if (.amdgcn.gfx_generation_number >= 10) + s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 + s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 + .elseif (.amdgcn.gfx_generation_number == 9) + s_mov_b32 flat_scratch_lo, s4 + s_mov_b32 flat_scratch_hi, s5 + .else + s_mov_b32 flat_scratch_lo, 8 + s_mov_b32 flat_scratch_hi, 0 + .endif + // Copy a dword between the passed addresses + flat_load_dword v4, v[0:1] slc + s_waitcnt vmcnt(0) & lgkmcnt(0) + flat_store_dword v[2:3], v4 slc + s_endpgm +)"; + +/* Continuously poll src buffer and check buffer value + * After src buffer is filled with specific value (0x5678, + * by host program), fill dst buffer with specific + * value(0x5678) and quit + */ +const char *PollMemoryIsa = R"( + .text + // Assume src address in s0, s1, and dst address in s2, s3 + s_movk_i32 s18, 0x5678 + .if (.amdgcn.gfx_generation_number >= 10) + v_mov_b32 v0, s2 + v_mov_b32 v1, s3 + v_mov_b32 v2, 0x5678 + .endif + LOOP: + s_load_dword s16, s[0:1], 0x0 glc + s_cmp_eq_i32 s16, s18 + s_cbranch_scc0 LOOP + .if (.amdgcn.gfx_generation_number >= 10) + flat_store_dword v[0:1], v2 slc + .else + s_store_dword s18, s[2:3], 0x0 glc + .endif + s_endpgm +)"; + +/* Similar to PollMemoryIsa except that the buffer + * polled can be Non-coherant memory. SCC system-level + * cache coherence is not supported in scalar (smem) path. + * Use vmem operations with scc + * + * Note: Only works on Aldebaran, and even then the scc modifier + * has been defeatured. This shader is more or less + * deprecated. + */ +const char *PollNCMemoryIsa = R"( + .text + // Assume src address in s0, s1, and dst address in s2, s3 + v_mov_b32 v6, 0x5678 + v_mov_b32 v0, s0 + v_mov_b32 v1, s1 + LOOP: + flat_load_dword v4, v[0:1] scc + v_cmp_eq_u32 vcc, v4, v6 + s_cbranch_vccz LOOP + v_mov_b32 v0, s2 + v_mov_b32 v1, s3 + flat_store_dword v[0:1], v6 scc + s_endpgm +)"; + +/* Input: A buffer of at least 3 dwords. + * DW0: used as a signal. 0xcafe means it is signaled + * DW1: Input buffer for device to read. + * DW2: Output buffer for device to write. + * Once receive signal, device will copy DW1 to DW2 + * This shader continously poll the signal buffer, + * Once signal buffer is signaled, it copies input buffer + * to output buffer + */ +const char *CopyOnSignalIsa = R"( + .text + // Assume input buffer in s0, s1 + .if (.amdgcn.gfx_generation_number >= 10) + s_add_u32 s2, s0, 0x8 + s_addc_u32 s3, s1, 0x0 + s_mov_b32 s18, 0xcafe + v_mov_b32 v0, s0 + v_mov_b32 v1, s1 + v_mov_b32 v4, s2 + v_mov_b32 v5, s3 + .else + s_mov_b32 s18, 0xcafe + .endif + POLLSIGNAL: + s_load_dword s16, s[0:1], 0x0 glc + s_cmp_eq_i32 s16, s18 + s_cbranch_scc0 POLLSIGNAL + s_load_dword s17, s[0:1], 0x4 glc + s_waitcnt vmcnt(0) & lgkmcnt(0) + .if (.amdgcn.gfx_generation_number >= 10) + v_mov_b32 v2, s17 + flat_store_dword v[4:5], v2 glc + .else + s_store_dword s17, s[0:1], 0x8 glc + .endif + s_waitcnt vmcnt(0) & lgkmcnt(0) + s_endpgm +)"; + +/* Continuously poll the flag at src buffer + * After the flag of s[0:1] is 1 filled, + * copy the value from s[0:1]+4 to dst buffer + * + * Note: Only works on GFX9 (only used in + * aldebaran tests) + */ +const char *PollAndCopyIsa = R"( + .text + // Assume src buffer in s[0:1] and dst buffer in s[2:3] + .if (.amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_stepping == 10) + // Path for Aldebaran + v_mov_b32 v0, s0 + v_mov_b32 v1, s1 + v_mov_b32 v18, 0x1 + LOOP_ALDBRN: + flat_load_dword v16, v[0:1] glc + s_waitcnt vmcnt(0) & lgkmcnt(0) + v_cmp_eq_i32 vcc, v16, v18 + s_cbranch_vccz LOOP_ALDBRN + buffer_invl2 + s_load_dword s17, s[0:1], 0x4 glc + s_waitcnt vmcnt(0) & lgkmcnt(0) + s_store_dword s17, s[2:3], 0x0 glc + s_waitcnt vmcnt(0) & lgkmcnt(0) + buffer_wbl2 + .elseif (.amdgcn.gfx_generation_number == 9) + s_movk_i32 s18, 0x1 + LOOP: + s_load_dword s16, s[0:1], 0x0 glc + s_cmp_eq_i32 s16, s18 + s_cbranch_scc0 LOOP + s_load_dword s17, s[0:1], 0x4 glc + s_waitcnt vmcnt(0) & lgkmcnt(0) + s_store_dword s17, s[2:3], 0x0 glc + .endif + s_waitcnt vmcnt(0) & lgkmcnt(0) + s_endpgm +)"; + +/* Input0: A buffer of at least 2 dwords. + * DW0: used as a signal. Write 0x1 to signal + * DW1: Write the value from 2nd input buffer + * for other device to read. + * Input1: A buffer of at least 2 dwords. + * DW0: used as the value to be written. + * + * Note: Only works on Aldebaran + */ +const char *WriteFlagAndValueIsa = R"( + .text + // Assume two inputs buffer in s[0:1] and s[2:3] + .if (.amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_stepping == 10) + v_mov_b32 v0, s0 + v_mov_b32 v1, s1 + s_load_dword s18, s[2:3], 0x0 glc + s_waitcnt vmcnt(0) & lgkmcnt(0) + s_store_dword s18, s[0:1], 0x4 glc + s_waitcnt vmcnt(0) & lgkmcnt(0) + buffer_wbl2 + s_waitcnt vmcnt(0) & lgkmcnt(0) + v_mov_b32 v16, 0x1 + flat_store_dword v[0:1], v16 glc + .endif + s_endpgm +)"; + +/* Input0: A buffer of at least 2 dwords. + * DW0: used as a signal. Write 0xcafe to signal + * DW1: Write to this buffer for other device to read. + * Input1: mmio base address + */ +const char *WriteAndSignalIsa = R"( + .text + // Assume input buffer in s0, s1 + .if (.amdgcn.gfx_generation_number >= 10) + s_add_u32 s4, s0, 0x4 + s_addc_u32 s5, s1, 0x0 + v_mov_b32 v0, s0 + v_mov_b32 v1, s1 + v_mov_b32 v2, s2 + v_mov_b32 v3, s3 + v_mov_b32 v4, s4 + v_mov_b32 v5, s5 + v_mov_b32 v18, 0xbeef + flat_store_dword v[4:5], v18 glc + v_mov_b32 v18, 0x1 + flat_store_dword v[2:3], v18 glc + v_mov_b32 v18, 0xcafe + flat_store_dword v[0:1], v18 glc + .else + s_mov_b32 s18, 0xbeef + s_store_dword s18, s[0:1], 0x4 glc + s_mov_b32 s18, 0x1 + s_store_dword s18, s[2:3], 0 glc + s_mov_b32 s18, 0xcafe + s_store_dword s18, s[0:1], 0x0 glc + .endif + s_endpgm +)"; + +/** + * KFDQMTest + */ + +/* A simple isa loop program with dense mathematic operations + * s1 controls the number iterations of the loop + * This shader can be used by GFX8, GFX9 and GFX10 + */ +const char *LoopIsa = R"( + .text + s_movk_i32 s0, 0x0008 + s_movk_i32 s1, 0x00ff + v_mov_b32 v0, 0 + v_mov_b32 v1, 0 + v_mov_b32 v2, 0 + v_mov_b32 v3, 0 + v_mov_b32 v4, 0 + v_mov_b32 v5, 0 + v_mov_b32 v6, 0 + v_mov_b32 v7, 0 + v_mov_b32 v8, 0 + v_mov_b32 v9, 0 + v_mov_b32 v10, 0 + v_mov_b32 v11, 0 + v_mov_b32 v12, 0 + v_mov_b32 v13, 0 + v_mov_b32 v14, 0 + v_mov_b32 v15, 0 + v_mov_b32 v16, 0 + LOOP: + s_mov_b32 s8, s4 + s_mov_b32 s9, s1 + s_mov_b32 s10, s6 + s_mov_b32 s11, s7 + s_cmp_le_i32 s1, s0 + s_cbranch_scc1 END_OF_PGM + s_buffer_load_dwordx8 s[8:15], s[8:11], 0x10 + v_add_f32 v0, 2.0, v0 + v_cvt_f32_i32 v17, s1 + s_waitcnt lgkmcnt(0) + v_add_f32 v18, s8, v17 + v_add_f32 v19, s9, v17 + v_add_f32 v20, s10, v17 + v_add_f32 v21, s11, v17 + v_add_f32 v22, s12, v17 + v_add_f32 v23, s13, v17 + v_add_f32 v24, s14, v17 + v_add_f32 v17, s15, v17 + v_log_f32 v25, v18 + v_mul_f32 v25, v22, v25 + v_exp_f32 v25, v25 + v_log_f32 v26, v19 + v_mul_f32 v26, v23, v26 + v_exp_f32 v26, v26 + v_log_f32 v27, v20 + v_mul_f32 v27, v24, v27 + v_exp_f32 v27, v27 + v_log_f32 v28, v21 + v_mul_f32 v28, v17, v28 + v_exp_f32 v28, v28 + v_add_f32 v5, v5, v25 + v_add_f32 v6, v6, v26 + v_add_f32 v7, v7, v27 + v_add_f32 v8, v8, v28 + v_mul_f32 v18, 0x3fb8aa3b, v18 + v_exp_f32 v18, v18 + v_mul_f32 v19, 0x3fb8aa3b, v19 + v_exp_f32 v19, v19 + v_mul_f32 v20, 0x3fb8aa3b, v20 + v_exp_f32 v20, v20 + v_mul_f32 v21, 0x3fb8aa3b, v21 + v_exp_f32 v21, v21 + v_add_f32 v9, v9, v18 + v_add_f32 v10, v10, v19 + v_add_f32 v11, v11, v20 + v_add_f32 v12, v12, v21 + v_sqrt_f32 v18, v22 + v_sqrt_f32 v19, v23 + v_sqrt_f32 v20, v24 + v_sqrt_f32 v21, v17 + v_add_f32 v13, v13, v18 + v_add_f32 v14, v14, v19 + v_add_f32 v15, v15, v20 + v_add_f32 v16, v16, v21 + v_rsq_f32 v18, v22 + v_rsq_f32 v19, v23 + v_rsq_f32 v20, v24 + v_rsq_f32 v17, v17 + v_add_f32 v1, v1, v18 + v_add_f32 v2, v2, v19 + v_add_f32 v3, v3, v20 + v_add_f32 v4, v4, v17 + s_add_u32 s0, s0, 1 + s_branch LOOP + END_OF_PGM: + s_endpgm +)"; + + +/** + * KFDCWSRTest + */ + +/* Initial state: + * s[0:1] - 64 bits iteration number; only the lower 32 bits are useful. + * s[2:3] - result buffer base address + * s4 - workgroup id + * v0 - workitem id, always 0 because + * NUM_THREADS_X(number of threads) in workgroup set to 1 + * Registers: + * v0 - calculated workitem = v0 + s4 * NUM_THREADS_X, which is s4 + * v2 - = s0, 32 bits iteration number + * v[4:5] - corresponding output buf address: s[2:3] + v0 * 4 + * v6 - counter + */ +const char *IterateIsa = SHADER_MACROS R"( + // Copy the parameters from scalar registers to vector registers + v_mov_b32 v2, s0 // v[2:3] = s[0:1] + v_mov_b32 v3, s1 // v[2:3] = s[0:1] + v_mov_b32 v0, s4 // use workgroup id as index + v_lshlrev_b32 v0, 2, v0 // v0 *= 4 + V_ADD_CO_U32 v4, s2, v0 // v[4:5] = s[2:3] + v0 * 4 + v_mov_b32 v5, s3 // v[4:5] = s[2:3] + v0 * 4 + V_ADD_CO_CI_U32 v5, v5, 0 // v[4:5] = s[2:3] + v0 * 4 + v_mov_b32 v6, 0 + LOOP: + V_ADD_CO_U32 v6, 1, v6 + + // Compare the result value (v6) to iteration value (v2), and + // jump if equal (i.e. if VCC is not zero after the comparison) + V_CMP_LT_U32 v6, v2 + s_cbranch_vccnz LOOP + flat_store_dword v[4:5], v6 + s_waitcnt vmcnt(0) & lgkmcnt(0) + s_endpgm +)"; + +/** + * KFDEvictTest + */ + +/* Shader to read local buffers using multiple wavefronts in parallel + * until address buffer is filled with specific value 0x5678 by host program, + * then each wavefront fills value 0x5678 at corresponding result buffer and quit + * + * Initial state: + * s[0:1] - address buffer base address + * s[2:3] - result buffer base address + * s4 - workgroup id + * v0 - workitem id, always 0 because NUM_THREADS_X(number of threads) in workgroup set to 1 + * Registers: + * v0 - calculated workitem id, v0 = v0 + s4 * NUM_THREADS_X + * v[2:3] - address of corresponding local buf address offset: s[0:1] + v0 * 8 + * v[4:5] - corresponding output buf address: s[2:3] + v0 * 4 + * v[6:7] - local buf address used for read test + */ +const char *ReadMemoryIsa = SHADER_MACROS R"( + // Compute address of corresponding output buffer + v_mov_b32 v0, s4 // use workgroup id as index + v_lshlrev_b32 v0, 2, v0 // v0 *= 4 + V_ADD_CO_U32 v4, s2, v0 // v[4:5] = s[2:3] + v0 * 4 + v_mov_b32 v5, s3 // v[4:5] = s[2:3] + v0 * 4 + V_ADD_CO_CI_U32 v5, v5, 0 // v[4:5] = s[2:3] + v0 * 4 + + // Compute input buffer offset used to store corresponding local buffer address + v_lshlrev_b32 v0, 1, v0 // v0 *= 8 + V_ADD_CO_U32 v2, s0, v0 // v[2:3] = s[0:1] + v0 * 8 + v_mov_b32 v3, s1 // v[2:3] = s[0:1] + v0 * 8 + V_ADD_CO_CI_U32 v3, v3, 0 // v[2:3] = s[0:1] + v0 * 8 + + // Load 64bit local buffer address stored at v[2:3] to v[6:7] + flat_load_dwordx2 v[6:7], v[2:3] slc + s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory reads to finish + v_mov_b32 v8, 0x5678 + s_movk_i32 s8, 0x5678 + L_REPEAT: + s_load_dword s16, s[0:1], 0x0 glc + s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory reads to finish + s_cmp_eq_i32 s16, s8 + s_cbranch_scc1 L_QUIT // if notified to quit by host + + // Loop read 64M local buffer starting at v[6:7] + // every 4k page only read once + v_mov_b32 v9, 0 + v_mov_b32 v10, 0x1000 // 4k page + v_mov_b32 v11, 0x4000000 // 64M size + v_mov_b32 v12, v6 + v_mov_b32 v13, v7 + L_LOOP_READ: + flat_load_dwordx2 v[14:15], v[12:13] slc + V_ADD_CO_U32 v9, v9, v10 + V_ADD_CO_U32 v12, v12, v10 + V_ADD_CO_CI_U32 v13, v13, 0 + V_CMP_LT_U32 v9, v11 + s_cbranch_vccnz L_LOOP_READ + s_branch L_REPEAT + L_QUIT: + flat_store_dword v[4:5], v8 + s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory writes to finish + s_endpgm +)"; + +/** + * KFDGWSTest + */ + +/* Shader to initialize gws counter to 1 */ +const char *GwsInitIsa = R"( + .text + s_mov_b32 m0, 0 + s_nop 0 + s_load_dword s16, s[0:1], 0x0 glc + s_waitcnt 0 + v_mov_b32 v0, s16 + s_waitcnt 0 + ds_gws_init v0 offset:0 gds + s_waitcnt 0 + s_endpgm +)"; + +/* Atomically increase a value in memory + * This is expected to be executed from + * multiple work groups simultaneously. + * GWS semaphore is used to guarantee + * the operation is atomic. + */ +const char *GwsAtomicIncreaseIsa = R"( + .text + // Assume src address in s0, s1 + .if (.amdgcn.gfx_generation_number >= 10) + s_mov_b32 m0, 0 + s_mov_b32 exec_lo, 0x1 + v_mov_b32 v0, s0 + v_mov_b32 v1, s1 + ds_gws_sema_p offset:0 gds + s_waitcnt 0 + flat_load_dword v2, v[0:1] glc dlc + s_waitcnt 0 + v_add_nc_u32 v2, v2, 1 + flat_store_dword v[0:1], v2 + s_waitcnt_vscnt null, 0 + ds_gws_sema_v offset:0 gds + .else + s_mov_b32 m0, 0 + s_nop 0 + ds_gws_sema_p offset:0 gds + s_waitcnt 0 + s_load_dword s16, s[0:1], 0x0 glc + s_waitcnt 0 + s_add_u32 s16, s16, 1 + s_store_dword s16, s[0:1], 0x0 glc + s_waitcnt lgkmcnt(0) + ds_gws_sema_v offset:0 gds + .endif + s_waitcnt 0 + s_endpgm +)"; diff --git a/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator_Gfx10.hpp b/projects/rocr-runtime/tests/kfdtest/src/ShaderStore.hpp similarity index 55% rename from projects/rocr-runtime/tests/kfdtest/src/IsaGenerator_Gfx10.hpp rename to projects/rocr-runtime/tests/kfdtest/src/ShaderStore.hpp index e4a57cda56..e0151a6537 100644 --- a/projects/rocr-runtime/tests/kfdtest/src/IsaGenerator_Gfx10.hpp +++ b/projects/rocr-runtime/tests/kfdtest/src/ShaderStore.hpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (C) 2021 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -21,29 +21,40 @@ * */ -#ifndef _ISAGENERATOR_GFX10_H_ -#define _ISAGENERATOR_GFX10_H_ +#ifndef _SHADERSTORE_H_ +#define _SHADERSTORE_H_ -#include -#include "IsaGenerator.hpp" +#include -class IsaGenerator_Gfx10 : public IsaGenerator { - public: - virtual void GetNoopIsa(HsaMemoryBuffer& rBuf); - virtual void GetCopyDwordIsa(HsaMemoryBuffer& rBuf); - virtual void GetInfiniteLoopIsa(HsaMemoryBuffer& rBuf); - virtual void GetAtomicIncIsa(HsaMemoryBuffer& rBuf); +/* KFDASMTest List */ +extern const std::vector ShaderList; - protected: - virtual const std::string& GetAsicName(); +/* Common */ +extern const char *NoopIsa; +extern const char *CopyDwordIsa; +extern const char *InfiniteLoopIsa; +extern const char *AtomicIncIsa; - private: - static const std::string ASIC_NAME; +/* KFDMemoryTest */ +extern const char *ScratchCopyDwordIsa; +extern const char *PollMemoryIsa; +extern const char *PollNCMemoryIsa; +extern const char *CopyOnSignalIsa; +extern const char *PollAndCopyIsa; +extern const char *WriteFlagAndValueIsa; +extern const char *WriteAndSignalIsa; - static const uint32_t NOOP_ISA[]; - static const uint32_t COPY_DWORD_ISA[]; - static const uint32_t INFINITE_LOOP_ISA[]; - static const uint32_t ATOMIC_ADD_ISA[]; -}; +/* KFDQMTest */ +extern const char *LoopIsa; -#endif // _ISAGENERATOR_GFX9_H_ +/* KFDCWSRTest */ +extern const char *IterateIsa; + +/* KFDEvictTest */ +extern const char *ReadMemoryIsa; + +/* KFDGWSTest */ +extern const char *GwsInitIsa; +extern const char *GwsAtomicIncreaseIsa; + +#endif // _SHADERSTORE_H_