Merge 'thunk/integrate-into-rocr' into integrate-libhsakmt

This commit is contained in:
Chris Freehill
2024-05-02 21:52:49 -05:00
155 changed files with 100006 additions and 0 deletions
+32
View File
@@ -0,0 +1,32 @@
resources:
repositories:
- repository: pipelines_repo
type: github
endpoint: ROCm
name: ROCm/ROCm
variables:
- group: common
- template: /.azuredevops/variables-global.yml@pipelines_repo
trigger:
batch: true
branches:
include:
- master
paths:
exclude:
- '*.md'
pr:
autoCancel: true
branches:
include:
- master
paths:
exclude:
- '*.md'
drafts: false
jobs:
- template: ${{ variables.CI_COMPONENT_PATH }}/ROCT-Thunk-Interface.yml@pipelines_repo
+8
View File
@@ -0,0 +1,8 @@
# Default code owners
@kentrussell @fxkamd @dayatsin-amd
*.md @ROCm/rocm-documentation @kentrussell @dayatsin-amd
*.rst @ROCm/rocm-documentation @kentrussell @dayatsin-amd
# Header directory for Doxygen documentation
inc/* @ROCm/rocm-documentation @kentrussell @fxkamd @dayatsin-amd
+12
View File
@@ -0,0 +1,12 @@
.*
#
# git files that we don't want to ignore even it they are dot-files
#
!.gitignore
!.mailmap
.github*
patches-*
build/
outgoing/
Makefile
+446
View File
@@ -0,0 +1,446 @@
################################################################################
##
## Copyright (c) 2016 Advanced Micro Devices, Inc. All rights reserved.
##
## MIT LICENSE:
## Permission is hereby granted, free of charge, to any person obtaining a copy of
## this software and associated documentation files (the "Software"), to deal in
## the Software without restriction, including without limitation the rights to
## use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
## of the Software, and to permit persons to whom the Software is furnished to do
## so, subject to the following conditions:
##
## The above copyright notice and this permission notice shall be included in all
## copies or substantial portions of the Software.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
## OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
## SOFTWARE.
##
################################################################################
cmake_minimum_required ( VERSION 3.6.3 )
set ( HSAKMT "hsakmt" )
set ( HSAKMT_PACKAGE "hsakmt-roct" )
set ( HSAKMT_COMPONENT "lib${HSAKMT}" )
set ( HSAKMT_TARGET "${HSAKMT}" )
project ( ${HSAKMT_TARGET} VERSION 1.9.0)
# Build thunk as static by default
if ( NOT DEFINED BUILD_SHARED_LIBS )
set ( BUILD_SHARED_LIBS "off" )
endif()
set ( BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS} CACHE BOOL "Build shared library (.so) or not.")
# Optionally, build HSAKMT with ccache.
set(ROCM_CCACHE_BUILD OFF CACHE BOOL "Set to ON for a ccache enabled build")
if (ROCM_CCACHE_BUILD)
find_program(CCACHE_PROGRAM ccache)
if (CCACHE_PROGRAM)
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_PROGRAM})
else()
message(WARNING "Unable to find ccache. Falling back to real compiler")
endif() # if (CCACHE_PROGRAM)
endif() # if (ROCM_CCACHE_BUILD)
list( APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules" )
## Include common cmake modules
include ( utils )
include ( GNUInstallDirs )
## Setup the package version.
get_version ( "1.0.0" )
set ( BUILD_VERSION_MAJOR ${VERSION_MAJOR} )
set ( BUILD_VERSION_MINOR ${VERSION_MINOR} )
set ( BUILD_VERSION_PATCH ${VERSION_PATCH} )
set ( LIB_VERSION_MAJOR 1)
set ( LIB_VERSION_MINOR 0)
if (${ROCM_PATCH_VERSION})
set ( LIB_VERSION_PATCH ${ROCM_PATCH_VERSION} )
else ()
set ( LIB_VERSION_PATCH 6)
endif ()
set ( LIB_VERSION_STRING "${LIB_VERSION_MAJOR}.${LIB_VERSION_MINOR}.${LIB_VERSION_PATCH}" )
if ( DEFINED VERSION_BUILD AND NOT ${VERSION_BUILD} STREQUAL "" )
message ( "VERSION BUILD DEFINED ${VERSION_BUILD}" )
set ( BUILD_VERSION_PATCH "${BUILD_VERSION_PATCH}-${VERSION_BUILD}" )
endif ()
set ( BUILD_VERSION_STRING "${BUILD_VERSION_MAJOR}.${BUILD_VERSION_MINOR}.${BUILD_VERSION_PATCH}" )
## Compiler flags
set (HSAKMT_C_FLAGS -fPIC -W -Wall -Wextra -Wno-unused-parameter -Wformat-security -Wswitch-default -Wundef -Wshadow -Wpointer-arith -Wbad-function-cast -Wcast-qual -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations -Wredundant-decls -Wunreachable-code -std=gnu99 -fvisibility=hidden)
if ( CMAKE_COMPILER_IS_GNUCC )
set ( HSAKMT_C_FLAGS "${HSAKMT_C_FLAGS}" -Wlogical-op)
endif ()
if ( ${HSAKMT_WERROR} )
set ( HSAKMT_C_FLAGS "${HSAKMT_C_FLAGS}" -Werror )
endif ()
if ( "${CMAKE_BUILD_TYPE}" STREQUAL Release )
set ( HSAKMT_C_FLAGS "${HSAKMT_C_FLAGS}" -O2 )
else ()
set ( HSAKMT_C_FLAGS "${HSAKMT_C_FLAGS}" -g )
endif ()
set ( HSAKMT_LINKER_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/src/libhsakmt.ver" )
## Linker Flags
## Add --enable-new-dtags to generate DT_RUNPATH
set (HSAKMT_LINK_FLAGS "${HSAKMT_LINK_FLAGS} -Wl,--enable-new-dtags -Wl,--version-script=${HSAKMT_LINKER_SCRIPT} -Wl,-soname=${HSAKMT_COMPONENT}.so.${LIB_VERSION_MAJOR} -Wl,-z,nodelete")
## Address Sanitize Flag
if ( ${ADDRESS_SANITIZER} )
set ( HSAKMT_C_FLAGS "${HSAKMT_C_FLAGS}" -fsanitize=address )
set ( HSAKMT_LINK_FLAGS "${HSAKMT_LINK_FLAGS} -fsanitize=address" )
if ( BUILD_SHARED_LIBS )
set ( HSAKMT_LINK_FLAGS "${HSAKMT_LINK_FLAGS} -shared-libsan" )
else ()
set ( HSAKMT_LINK_FLAGS "${HSAKMT_LINK_FLAGS} -static-libsan" )
endif ()
else ()
if ( CMAKE_COMPILER_IS_GNUCC )
set ( HSAKMT_LINK_FLAGS "${HSAKMT_LINK_FLAGS} -Wl,-no-undefined" )
else ()
set ( HSAKMT_LINK_FLAGS "${HSAKMT_LINK_FLAGS} -Wl,-undefined,error" )
endif ()
endif ()
## Source files
set ( HSAKMT_SRC "src/debug.c"
"src/events.c"
"src/fmm.c"
"src/globals.c"
"src/libhsakmt.c"
"src/memory.c"
"src/openclose.c"
"src/perfctr.c"
"src/pmc_table.c"
"src/queues.c"
"src/time.c"
"src/topology.c"
"src/rbtree.c"
"src/spm.c"
"src/version.c"
"src/svm.c"
"src/pc_sampling.c")
## Declare the library target name
add_library ( ${HSAKMT_TARGET} "")
## Add sources
target_sources ( ${HSAKMT_TARGET} PRIVATE ${HSAKMT_SRC} )
## Add headers. The public headers need to point at their location in both build and install
## directory layouts. This declaration allows publishing library use data to downstream clients.
target_include_directories( ${HSAKMT_TARGET}
PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src )
set_property(TARGET ${HSAKMT_TARGET} PROPERTY LINK_FLAGS ${HSAKMT_LINK_FLAGS})
## Set the VERSION and SOVERSION values
set_property ( TARGET ${HSAKMT_TARGET} PROPERTY VERSION "${LIB_VERSION_STRING}" )
set_property ( TARGET ${HSAKMT_TARGET} PROPERTY SOVERSION "${LIB_VERSION_MAJOR}" )
find_package(PkgConfig)
# get OS-info for OS-specific build dependencies
get_os_info()
find_package(PkgConfig)
# Check for libraries required for building
find_library(LIBC NAMES c REQUIRED)
find_library(NUMA NAMES numa REQUIRED)
message(STATUS "LIBC:" ${LIBC})
message(STATUS "NUMA:" ${NUMA})
## If environment variable DRM_DIR is set, the script
## will pick up the corresponding libraries from that path.
list (PREPEND CMAKE_PREFIX_PATH "${DRM_DIR}")
# The module name passed to pkg_check_modules() is determined by the
# name of file *.pc
pkg_check_modules(DRM REQUIRED libdrm)
pkg_check_modules(DRM_AMDGPU REQUIRED libdrm_amdgpu)
include_directories(${DRM_AMDGPU_INCLUDE_DIRS})
include_directories(${DRM_INCLUDE_DIRS})
target_link_libraries ( ${HSAKMT_TARGET}
PRIVATE ${DRM_LDFLAGS} ${DRM_AMDGPU_LDFLAGS} pthread rt c numa ${CMAKE_DL_LIBS}
)
target_compile_options(${HSAKMT_TARGET} PRIVATE ${DRM_CFLAGS} ${HSAKMT_C_FLAGS})
## Define default paths and packages.
if( CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT )
set ( CMAKE_INSTALL_PREFIX "/opt/rocm" )
endif()
set ( CMAKE_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX} CACHE STRING "Default installation directory." FORCE )
set ( CPACK_PACKAGING_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}" CACHE STRING "Default packaging prefix." )
set ( CPACK_GENERATOR "DEB;RPM" CACHE STRING "Default packaging generators." )
## set components
if ( ENABLE_ASAN_PACKAGING )
# ASAN Package requires only asan component with libraries and license file
set ( CPACK_COMPONENTS_ALL asan )
else()
set ( CPACK_COMPONENTS_ALL devel )
endif()
# Installs binaries and exports the library usage data to ${HSAKMT_TARGET}Targets
install ( TARGETS ${HSAKMT_TARGET} EXPORT ${HSAKMT_TARGET}Targets
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT asan
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT asan )
install ( TARGETS ${HSAKMT_TARGET}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT devel
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT devel )
#install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.md DESTINATION ${CMAKE_INSTALL_DOCDIR} COMPONENT devel )
# Install public headers
install ( DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/${HSAKMT_TARGET} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
COMPONENT devel PATTERN "linux" EXCLUDE )
# Option to build header path migration helpers.
option(INCLUDE_PATH_COMPATIBILITY "Generate backward compatible headers and include paths. Use of these headers will warn when included." OFF)
if(INCLUDE_PATH_COMPATIBILITY)
# To enable/disable #error in wrapper header files
if(NOT DEFINED ROCM_HEADER_WRAPPER_WERROR)
if(DEFINED ENV{ROCM_HEADER_WRAPPER_WERROR})
set(ROCM_HEADER_WRAPPER_WERROR "$ENV{ROCM_HEADER_WRAPPER_WERROR}"
CACHE STRING "Header wrapper warnings as errors.")
else()
set(ROCM_HEADER_WRAPPER_WERROR "OFF" CACHE STRING "Header wrapper warnings as errors.")
endif()
endif()
if(ROCM_HEADER_WRAPPER_WERROR)
set(deprecated_error 1)
else()
set(deprecated_error 0)
endif()
include(hsakmt-backward-compat.cmake)
endif()
# Record our usage data for clients find_package calls.
install ( EXPORT ${HSAKMT_TARGET}Targets
FILE ${HSAKMT_TARGET}Targets.cmake
NAMESPACE ${HSAKMT_TARGET}::
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${HSAKMT_TARGET}
COMPONENT devel)
# Adds the target alias hsakmt::hsakmt to the local cmake cache.
# This isn't necessary today. It's harmless preparation for some
# hypothetical future in which the we might be included by add_subdirectory()
# in some other project's cmake file. It allows uniform use of find_package
# and target_link_library() without regard to whether a target is external or
# a subdirectory of the current build.
add_library( ${HSAKMT_TARGET}::${HSAKMT_TARGET} ALIAS ${HSAKMT_TARGET} )
# Create cmake configuration files
include(CMakePackageConfigHelpers)
configure_package_config_file(${HSAKMT_TARGET}-config.cmake.in
${HSAKMT_TARGET}-config.cmake
INSTALL_DESTINATION
${CMAKE_INSTALL_LIBDIR}/cmake/${HSAKMT_TARGET} )
write_basic_package_version_file(${HSAKMT_TARGET}-config-version.cmake
VERSION ${BUILD_VERSION_STRING}
COMPATIBILITY
AnyNewerVersion)
install(FILES
${CMAKE_CURRENT_BINARY_DIR}/${HSAKMT_TARGET}-config.cmake
${CMAKE_CURRENT_BINARY_DIR}/${HSAKMT_TARGET}-config-version.cmake
DESTINATION
${CMAKE_INSTALL_LIBDIR}/cmake/${HSAKMT_TARGET}
COMPONENT devel)
# Optionally record the package's find module in the user's package cache.
if ( NOT DEFINED EXPORT_TO_USER_PACKAGE_REGISTRY )
set ( EXPORT_TO_USER_PACKAGE_REGISTRY "off" )
endif()
set ( EXPORT_TO_USER_PACKAGE_REGISTRY ${EXPORT_TO_USER_PACKAGE_REGISTRY}
CACHE BOOL "Add cmake package config location to the user's cmake package registry.")
if(${EXPORT_TO_USER_PACKAGE_REGISTRY})
# Enable writing to the registry
set(CMAKE_EXPORT_PACKAGE_REGISTRY ON)
# Generate a target file for the build
export(TARGETS ${HSAKMT_TARGET} NAMESPACE ${HSAKMT_TARGET}:: FILE ${HSAKMT_TARGET}Targets.cmake)
# Record the package in the user's cache.
export(PACKAGE ${HSAKMT_TARGET})
endif()
# CPACK_PACKAGING_INSTALL_PREFIX is needed in libhsakmt.pc.in
# TODO: Add support for relocatable packages.
configure_file ( libhsakmt.pc.in libhsakmt.pc @ONLY )
install ( FILES ${CMAKE_CURRENT_BINARY_DIR}/libhsakmt.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig COMPONENT devel)
###########################
# Packaging directives
###########################
# Use component packaging
set(CPACK_COMPONENTS_GROUPING IGNORE)
set(CPACK_DEB_COMPONENT_INSTALL ON)
set(CPACK_RPM_COMPONENT_INSTALL ON)
set(CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc.")
set(CPACK_PACKAGE_VERSION_MAJOR ${VERSION_MAJOR})
set(CPACK_PACKAGE_VERSION_MINOR ${VERSION_MINOR})
set(CPACK_PACKAGE_VERSION_PATCH ${VERSION_PATCH})
set(CPACK_PACKAGE_CONTACT "AMD GFX mailing list <amd-gfx@lists.freedesktop.org>")
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.md")
set(CPACK_COMPONENT_DESCRIPTION "HSAKMT development package.\n This package includes the user-mode API interfaces\nused to interact with the ROCk driver.\n This package contains the headers, pkgonfig and\n cmake files for the ROCT package.")
set ( ENABLE_LDCONFIG ON CACHE BOOL "Set library links and caches using ldconfig.")
# Install License file
install ( FILES ${CPACK_RESOURCE_FILE_LICENSE} DESTINATION ${CMAKE_INSTALL_DOCDIR} COMPONENT devel)
if ( ENABLE_ASAN_PACKAGING )
install ( FILES ${CPACK_RESOURCE_FILE_LICENSE} DESTINATION ${CMAKE_INSTALL_DOCDIR}-asan COMPONENT asan)
endif()
# Make proper version for appending
# Default Value is 99999, setting it first
set(ROCM_VERSION_FOR_PACKAGE "99999")
if(DEFINED ENV{ROCM_LIBPATCH_VERSION})
set(ROCM_VERSION_FOR_PACKAGE $ENV{ROCM_LIBPATCH_VERSION})
endif()
# Prepare final version for the CPACK use
set(PACKAGE_VERSION_STR "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}.${ROCM_VERSION_FOR_PACKAGE}")
set(CPACK_PACKAGE_VERSION "${PACKAGE_VERSION_STR}")
# Debian package specific variables
set(CPACK_DEBIAN_PACKAGE_HOMEPAGE "https://github.com/ROCm/ROCT-Thunk-Interface")
set(CPACK_DEBIAN_PACKAGE_NAME "hsakmt-roct-dev")
if (DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE})
set(CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE})
else()
set(CPACK_DEBIAN_PACKAGE_RELEASE "local")
endif()
## Process the Debian install/remove scripts to update the CPACK variables
configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/postinst.in DEBIAN/postinst @ONLY )
configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/prerm.in DEBIAN/prerm @ONLY )
set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "DEBIAN/postinst;DEBIAN/prerm")
# Needed since some packages still say they need hsakmt-roct
set(CPACK_DEBIAN_PACKAGE_REPLACES "hsakmt-roct")
set(CPACK_DEBIAN_PACKAGE_PROVIDES "hsakmt-roct")
set(CPACK_RPM_PACKAGE_OBSOLETES "hsakmt-roct")
set(CPACK_RPM_PACKAGE_PROVIDES "hsakmt-roct")
# Debian devel component specific flags
set(CPACK_DEBIAN_DEVEL_PACKAGE_NAME "hsakmt-roct-dev")
set(CPACK_DEBIAN_DEVEL_PACKAGE_REPLACES "hsakmt-roct")
set(CPACK_DEBIAN_DEVEL_PACKAGE_PROVIDES "hsakmt-roct")
set(CPACK_RPM_DEVEL_PACKAGE_PROVIDES "hsakmt-roct")
# Debian package specific variable for ASAN
set(CPACK_DEBIAN_ASAN_PACKAGE_NAME "hsakmt-roct-asan" )
## Process the Rpm install/remove scripts to update the CPACK variables
configure_file ( "${CMAKE_CURRENT_SOURCE_DIR}/RPM/post.in" RPM/post @ONLY )
configure_file ( "${CMAKE_CURRENT_SOURCE_DIR}/RPM/postun.in" RPM/postun @ONLY )
## RPM package specific variables
set ( CPACK_RPM_PRE_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/RPM/post" )
set ( CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/RPM/postun" )
if(DEFINED CPACK_PACKAGING_INSTALL_PREFIX)
set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "${CPACK_PACKAGING_INSTALL_PREFIX} ${CPACK_PACKAGING_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR}")
endif()
set(CPACK_RPM_PACKAGE_NAME "hsakmt-roct-devel")
set(CPACK_RPM_PACKAGE_LICENSE "MIT")
# RPM package specific variable for ASAN
set(CPACK_RPM_ASAN_PACKAGE_NAME "hsakmt-roct-asan" )
# RPM package devel specific variable
set(CPACK_RPM_DEVEL_PACKAGE_NAME "hsakmt-roct-devel")
# Since we changed the package name to match RPM specs, take care of older builds that had -dev installed
# Also cover the fact that this now replaces the old binary package hsakmt-roct
set(CPACK_RPM_PACKAGE_OBSOLETES "hsakmt-roct-dev hsakmt-roct")
set(CPACK_RPM_DEVEL_PACKAGE_OBSOLETES "hsakmt-roct-dev hsakmt-roct")
if(DEFINED ENV{CPACK_RPM_PACKAGE_RELEASE})
set(CPACK_RPM_PACKAGE_RELEASE $ENV{CPACK_RPM_PACKAGE_RELEASE})
else()
set(CPACK_RPM_PACKAGE_RELEASE "local")
endif()
## Specify a RPM SPEC template
# This template was generated by using "CPACK_RPM_GENERATE_USER_BINARY_SPECFILE_TEMPLATE"
# but I've modified it to support CPACK_RPM_PACKAGE_RECOMMENDS for EL8+ and SLE
set ( CPACK_RPM_USER_BINARY_SPECFILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/${CPACK_RPM_PACKAGE_NAME}.spec.in" )
# 'dist' breaks manual builds on debian systems due to empty Provides
execute_process( COMMAND rpm --eval %{?dist}
RESULT_VARIABLE PROC_RESULT
OUTPUT_VARIABLE EVAL_RESULT
OUTPUT_STRIP_TRAILING_WHITESPACE )
message("RESULT_VARIABLE ${PROC_RESULT} OUTPUT_VARIABLE: ${EVAL_RESULT}")
if ( PROC_RESULT EQUAL "0" AND NOT EVAL_RESULT STREQUAL "" )
string ( APPEND CPACK_RPM_PACKAGE_RELEASE "%{?dist}" )
endif()
# Setting devel package dependendent version
set(CPACK_DEBIAN_PACKAGE_DEPENDS "libdrm-amdgpu-dev | libdrm-dev, rocm-core")
set(CPACK_DEBIAN_PACKAGE_RECOMMENDS "libdrm-amdgpu-dev")
set(CPACK_DEBIAN_PACKAGE_REPLACES "hsakmt-roct")
set(CPACK_DEBIAN_DEVEL_PACKAGE_DEPENDS "libdrm-amdgpu-dev | libdrm-dev, rocm-core")
set(CPACK_DEBIAN_DEVEL_PACKAGE_RECOMMENDS "libdrm-amdgpu-dev")
set(CPACK_DEBIAN_DEVEL_PACKAGE_REPLACES "hsakmt-roct")
# Debian package specific variable for ASAN
set(CPACK_DEBIAN_ASAN_PACKAGE_DEPENDS "libdrm-amdgpu-dev | libdrm-dev, rocm-core-asan")
set(CPACK_DEBIAN_ASAN_PACKAGE_RECOMMENDS "libdrm-amdgpu-dev")
set(CPACK_RPM_PACKAGE_REQUIRES "rocm-core")
set(CPACK_RPM_DEVEL_PACKAGE_REQUIRES "rocm-core")
# RPM package specific variable for ASAN
set(CPACK_RPM_ASAN_PACKAGE_REQUIRES "rocm-core-asan")
# NOTE: CentOS/RHEL <8 and SUSE <10 do not have support for rpm recommends,
# so HSAKMT_REQUIRES_LIBDRM should be defined when building EL7 to avoid
# producing binaries which do not install on CentOS7 or RHEL7
if(HSAKMT_REQUIRES_LIBDRM)
string ( APPEND CPACK_RPM_PACKAGE_REQUIRES ", libdrm-amdgpu-devel" )
string ( APPEND CPACK_RPM_DEVEL_PACKAGE_REQUIRES ", libdrm-amdgpu-devel" )
string ( APPEND CPACK_RPM_ASAN_PACKAGE_REQUIRES ", libdrm-amdgpu-devel" )
else()
string ( APPEND CPACK_RPM_PACKAGE_REQUIRES ", (libdrm-amdgpu-devel or libdrm-devel)" )
string ( APPEND CPACK_RPM_DEVEL_PACKAGE_REQUIRES ", (libdrm-amdgpu-devel or libdrm-devel)" )
string ( APPEND CPACK_RPM_ASAN_PACKAGE_REQUIRES ", (libdrm-amdgpu-devel or libdrm-devel)" )
set(CPACK_RPM_PACKAGE_RECOMMENDS "libdrm-amdgpu-devel" )
set(CPACK_RPM_DEVEL_PACKAGE_RECOMMENDS "libdrm-amdgpu-devel" )
set(CPACK_RPM_ASAN_PACKAGE_RECOMMENDS "libdrm-amdgpu-devel" )
endif()
# Set the names now using CPACK utility
set(CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT")
set(CPACK_RPM_FILE_NAME "RPM-DEFAULT")
# Remove dependency on rocm-core if -DROCM_DEP_ROCMCORE=ON not given to cmake
if(NOT ROCM_DEP_ROCMCORE)
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_PACKAGE_REQUIRES ${CPACK_RPM_PACKAGE_REQUIRES})
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_PACKAGE_DEPENDS ${CPACK_DEBIAN_PACKAGE_DEPENDS})
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_DEVEL_PACKAGE_REQUIRES ${CPACK_RPM_DEVEL_PACKAGE_REQUIRES})
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_DEVEL_PACKAGE_DEPENDS ${CPACK_DEBIAN_DEVEL_PACKAGE_DEPENDS})
string(REGEX REPLACE ",? ?rocm-core-asan" "" CPACK_RPM_ASAN_PACKAGE_REQUIRES ${CPACK_RPM_ASAN_PACKAGE_REQUIRES})
string(REGEX REPLACE ",? ?rocm-core-asan" "" CPACK_DEBIAN_ASAN_PACKAGE_DEPENDS ${CPACK_DEBIAN_ASAN_PACKAGE_DEPENDS})
endif()
include(CPack)
cpack_add_component(asan
DISPLAY_NAME "ASAN"
DESCRIPTION "ASAN libraries for the LIBHSAKMT")
+8
View File
@@ -0,0 +1,8 @@
Package: hsakmt-rocm-dev
Architecture: $arch
Maintainer: Advanced Micro Devices Inc.
Depends:libpci3, libnuma1
Priority: optional
Version: $version
Description: Thunk library for AMD KFD
+23
View File
@@ -0,0 +1,23 @@
#!/bin/bash
set -e
# left-hand term originates from ENABLE_LDCONFIG = ON/OFF at package build
do_ldconfig() {
if [ "@ENABLE_LDCONFIG@" == "ON" ]; then
echo @CPACK_PACKAGING_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@ > /@CMAKE_INSTALL_SYSCONFDIR@/ld.so.conf.d/x86_64-libhsakmt.conf
ldconfig
fi
}
case "$1" in
( configure )
do_ldconfig
;;
( abort-upgrade | abort-remove | abort-deconfigure )
echo "$1"
;;
( * )
exit 0
;;
esac
+21
View File
@@ -0,0 +1,21 @@
#!/bin/bash
set -e
# left-hand term originates from ENABLE_LDCONFIG = ON/OFF at package build
rm_ldconfig() {
if [ "@ENABLE_LDCONFIG@" == "ON" ]; then
rm -f /@CMAKE_INSTALL_SYSCONFDIR@/ld.so.conf.d/x86_64-libhsakmt.conf && ldconfig
fi
}
case "$1" in
( remove | upgrade )
rm_ldconfig
;;
( purge )
;;
( * )
exit 0
;;
esac
+1
View File
@@ -0,0 +1 @@
/opt/rocm/lib
+50
View File
@@ -0,0 +1,50 @@
ROCT-Thunk Interface LICENSE
Copyright (c) 2016 Advanced Micro Devices, Inc. All rights reserved.
MIT LICENSE:
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
This product contains software provided by Nginx, Inc. and its contributors.
Copyright (C) 2002-2018 Igor Sysoev
Copyright (C) 2011-2018 Nginx, Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
+47
View File
@@ -0,0 +1,47 @@
# ROCt Library
This repository includes the user-mode API interfaces used to interact with the ROCk driver.
Starting at 1.7 release, ROCt uses drm render device. This requires the user to belong to video group. Add the user account to video group with "sudo usermod -a -G video _username_" command if the user if not part of video group yet.
NOTE: Users of Ubuntu 20.04 will need to add the user to the new "render" group, as Ubuntu has changed the owner:group of /dev/kfd to render:render as of that release
## ROCk Driver
The ROCt library is not a standalone product and requires that you have the correct ROCk driver installed, or are using a compatible upstream kernel.
Please refer to <https://rocm.docs.amd.com> under "Getting Started Guide" for a list of supported Operating Systems and kernel versions, as well as supported hardware.
## Building the Thunk
A simple cmake-based system is available for building thunk. To build the thunk from the the ROCT-Thunk-Interface directory, execute:
```bash
mkdir -p build
cd build
cmake ..
make
```
If the hsakmt-roct and hsakmt-roct-dev packages are desired:
```bash
mkdir -p build
cd build
cmake ..
make package
```
If you choose not to build and install packages, manual installation of the binaries and header files can be done via:
```bash
make install
```
NOTE: For older versions of the thunk where hsakmt-dev.txt is present, "make package-dev" and "make install-dev" are required to generate/install the developer packages. Currently, these are created via the "make package" and "make install" commands
## Disclaimer
The information contained herein is for informational purposes only, and is subject to change without notice. While every precaution has been taken in the preparation of this document, it may contain technical inaccuracies, omissions and typographical errors, and AMD is under no obligation to update or otherwise correct this information. Advanced Micro Devices, Inc. makes no representations or warranties with respect to the accuracy or completeness of the contents of this document, and assumes no liability of any kind, including the implied warranties of noninfringement, merchantability or fitness for particular purposes, with respect to the operation or use of AMD hardware, software or other products described herein. No license, including implied or arising by estoppel, to any intellectual property rights is granted by this document. Terms and limitations applicable to the purchase or use of AMD's products are as set forth in a signed agreement between the parties or in AMD's Standard Terms and Conditions of Sale.
AMD, the AMD Arrow logo, and combinations thereof are trademarks of Advanced Micro Devices, Inc. Other product names used in this publication are for identification purposes only and may be trademarks of their respective companies.
Copyright (c) 2014-2023 Advanced Micro Devices, Inc. All rights reserved.
+97
View File
@@ -0,0 +1,97 @@
# Restore old style debuginfo creation for rpm >= 4.14.
%undefine _debugsource_packages
%undefine _debuginfo_subpackages
# -*- rpm-spec -*-
BuildRoot: %_topdir/@CPACK_PACKAGE_FILE_NAME@@CPACK_RPM_PACKAGE_COMPONENT_PART_PATH@
Summary: @CPACK_RPM_PACKAGE_SUMMARY@
Name: @CPACK_RPM_PACKAGE_NAME@
Version: @CPACK_RPM_PACKAGE_VERSION@
Release: @CPACK_RPM_PACKAGE_RELEASE@
License: @CPACK_RPM_PACKAGE_LICENSE@
Group: @CPACK_RPM_PACKAGE_GROUP@
Vendor: @CPACK_RPM_PACKAGE_VENDOR@
@TMP_RPM_URL@
@TMP_RPM_REQUIRES@
@TMP_RPM_REQUIRES_PRE@
@TMP_RPM_REQUIRES_POST@
@TMP_RPM_REQUIRES_PREUN@
@TMP_RPM_REQUIRES_POSTUN@
@TMP_RPM_PROVIDES@
@TMP_RPM_OBSOLETES@
@TMP_RPM_CONFLICTS@
@TMP_RPM_SUGGESTS@
@TMP_RPM_AUTOPROV@
@TMP_RPM_AUTOREQ@
@TMP_RPM_AUTOREQPROV@
@TMP_RPM_BUILDARCH@
@TMP_RPM_PREFIXES@
@TMP_RPM_EPOCH@
# Modifications to allow recommends to be used (not implemented in cpack):
%if "@CPACK_RPM_PACKAGE_RECOMMENDS@" != ""
Recommends: @CPACK_RPM_PACKAGE_RECOMMENDS@
%endif
# End of modifications
@TMP_RPM_DEBUGINFO@
%define _rpmdir %_topdir/RPMS
%define _srcrpmdir %_topdir/SRPMS
@FILE_NAME_DEFINE@
%define _unpackaged_files_terminate_build 0
@TMP_RPM_SPEC_INSTALL_POST@
@CPACK_RPM_SPEC_MORE_DEFINE@
@CPACK_RPM_COMPRESSION_TYPE_TMP@
%description
@CPACK_RPM_PACKAGE_DESCRIPTION@
# This is a shortcutted spec file generated by CMake RPM generator
# we skip _install step because CPack does that for us.
# We do only save CPack installed tree in _prepr
# and then restore it in build.
%prep
mv $RPM_BUILD_ROOT %_topdir/tmpBBroot
%install
if [ -e $RPM_BUILD_ROOT ];
then
rm -rf $RPM_BUILD_ROOT
fi
mv %_topdir/tmpBBroot $RPM_BUILD_ROOT
@TMP_RPM_DEBUGINFO_INSTALL@
%clean
%post
@RPM_SYMLINK_POSTINSTALL@
@CPACK_RPM_SPEC_POSTINSTALL@
%posttrans
@CPACK_RPM_SPEC_POSTTRANS@
%postun
@CPACK_RPM_SPEC_POSTUNINSTALL@
%pre
@CPACK_RPM_SPEC_PREINSTALL@
%pretrans
@CPACK_RPM_SPEC_PRETRANS@
%preun
@CPACK_RPM_SPEC_PREUNINSTALL@
%files
%defattr(@TMP_DEFAULT_FILE_PERMISSIONS@,@TMP_DEFAULT_USER@,@TMP_DEFAULT_GROUP@,@TMP_DEFAULT_DIR_PERMISSIONS@)
@CPACK_RPM_INSTALL_FILES@
@CPACK_RPM_ABSOLUTE_INSTALL_FILES@
@CPACK_RPM_USER_INSTALL_FILES@
%changelog
@CPACK_RPM_SPEC_CHANGELOG@
@TMP_OTHER_COMPONENTS@
+42
View File
@@ -0,0 +1,42 @@
%define name hsakmt-rocm-dev
%define version %{getenv:PACKAGE_VER}
%define packageroot %{getenv:PACKAGE_DIR}
Name: %{name}
Version: %{version}
Release: 1
Summary: Thunk libraries for AMD KFD
Group: System Environment/Libraries
License: Advanced Micro Devices Inc.
%if 0%{?centos} == 6
Requires: numactl
%else
Requires: numactl-libs
%endif
%description
This package includes the libhsakmt (Thunk) libraries
for AMD KFD
%prep
%setup -T -D -c -n %{name}
%install
cp -R %packageroot $RPM_BUILD_ROOT
find $RPM_BUILD_ROOT \! -type d | sed "s|$RPM_BUILD_ROOT||"> thunk.list
%post
ldconfig
%postun
ldconfig
%clean
rm -rf $RPM_BUILD_ROOT
%files -f thunk.list
%defattr(-,root,root,-)
+5
View File
@@ -0,0 +1,5 @@
# left-hand term originates from ENABLE_LDCONFIG = ON/OFF at package build
if [ "@ENABLE_LDCONFIG@" == "ON" ]; then
echo -e "@CPACK_PACKAGING_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@" > /@CMAKE_INSTALL_SYSCONFDIR@/ld.so.conf.d/x86_64-libhsakmt.conf
ldconfig
fi
+6
View File
@@ -0,0 +1,6 @@
# second term originates from ENABLE_LDCONFIG = ON/OFF at package build
if [ $1 -le 1 ] && [ "@ENABLE_LDCONFIG@" == "ON" ]; then
# perform the below actions for rpm remove($1=0) or upgrade($1=1) operations
rm -f /@CMAKE_INSTALL_SYSCONFDIR@/ld.so.conf.d/x86_64-libhsakmt.conf
ldconfig
fi
+139
View File
@@ -0,0 +1,139 @@
################################################################################
##
## The University of Illinois/NCSA
## Open Source License (NCSA)
##
## Copyright (c) 2014-2017, Advanced Micro Devices, Inc. All rights reserved.
##
## Developed by:
##
## AMD Research and AMD HSA Software Development
##
## Advanced Micro Devices, Inc.
##
## www.amd.com
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal with the Software without restriction, including without limitation
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
## and#or sell copies of the Software, and to permit persons to whom the
## Software is furnished to do so, subject to the following conditions:
##
## - Redistributions of source code must retain the above copyright notice,
## this list of conditions and the following disclaimers.
## - Redistributions in binary form must reproduce the above copyright
## notice, this list of conditions and the following disclaimers in
## the documentation and#or other materials provided with the distribution.
## - Neither the names of Advanced Micro Devices, Inc,
## nor the names of its contributors may be used to endorse or promote
## products derived from this Software without specific prior written
## permission.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
## DEALINGS WITH THE SOFTWARE.
##
################################################################################
## Parses the VERSION_STRING variable and places
## the first, second and third number values in
## the major, minor and patch variables.
function( parse_version VERSION_STRING )
string ( FIND ${VERSION_STRING} "-" STRING_INDEX )
if ( ${STRING_INDEX} GREATER -1 )
math ( EXPR STRING_INDEX "${STRING_INDEX} + 1" )
string ( SUBSTRING ${VERSION_STRING} ${STRING_INDEX} -1 VERSION_BUILD )
endif ()
string ( REGEX MATCHALL "[0123456789]+" VERSIONS ${VERSION_STRING} )
list ( LENGTH VERSIONS VERSION_COUNT )
if ( ${VERSION_COUNT} GREATER 0)
list ( GET VERSIONS 0 MAJOR )
set ( VERSION_MAJOR ${MAJOR} PARENT_SCOPE )
set ( TEMP_VERSION_STRING "${MAJOR}" )
endif ()
if ( ${VERSION_COUNT} GREATER 1 )
list ( GET VERSIONS 1 MINOR )
set ( VERSION_MINOR ${MINOR} PARENT_SCOPE )
set ( TEMP_VERSION_STRING "${TEMP_VERSION_STRING}.${MINOR}" )
endif ()
if ( ${VERSION_COUNT} GREATER 2 )
list ( GET VERSIONS 2 PATCH )
set ( VERSION_PATCH ${PATCH} PARENT_SCOPE )
set ( TEMP_VERSION_STRING "${TEMP_VERSION_STRING}.${PATCH}" )
endif ()
if ( DEFINED VERSION_BUILD )
set ( VERSION_BUILD "${VERSION_BUILD}" PARENT_SCOPE )
endif ()
set ( VERSION_STRING "${TEMP_VERSION_STRING}" PARENT_SCOPE )
endfunction ()
## Gets the current version of the repository
## using versioning tags and git describe.
## Passes back a packaging version string
## and a library version string.
function ( get_version DEFAULT_VERSION_STRING )
parse_version ( ${DEFAULT_VERSION_STRING} )
find_program ( GIT NAMES git )
if ( GIT )
execute_process ( COMMAND git describe --tags --dirty --long
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
OUTPUT_VARIABLE GIT_TAG_STRING
OUTPUT_STRIP_TRAILING_WHITESPACE
RESULT_VARIABLE RESULT )
if ( ${RESULT} EQUAL 0 )
parse_version ( ${GIT_TAG_STRING} )
endif ()
endif ()
set( VERSION_STRING "${VERSION_STRING}" PARENT_SCOPE )
set( VERSION_MAJOR "${VERSION_MAJOR}" PARENT_SCOPE )
set( VERSION_MINOR "${VERSION_MINOR}" PARENT_SCOPE )
set( VERSION_PATCH "${VERSION_PATCH}" PARENT_SCOPE )
set( VERSION_BUILD "${VERSION_BUILD}" PARENT_SCOPE )
endfunction()
#get the OS version
function(get_os_info)
if( EXISTS "/etc/os-release")
file(STRINGS "/etc/os-release" DISTRO_ID REGEX "^ID=")
file(STRINGS "/etc/os-release" DISTRO_RELEASE REGEX "^VERSION_ID=")
string(REPLACE "ID=" "" DISTRO_ID "${DISTRO_ID}")
string(REPLACE "VERSION_ID=" "" DISTRO_RELEASE "${DISTRO_RELEASE}")
message(STATUS "Detected distribution: ${DISTRO_ID}:${DISTRO_RELEASE}")
elseif(EXISTS "/etc/centos-release" )
# Example: CentOS release 6.10 (Final)
file(STRINGS "/etc/centos-release" DISTRO_FULL_STR REGEX "release")
string(REGEX MATCH "^[a-zA-Z]+" DISTRO_ID "${DISTRO_FULL_STR}")
string(TOLOWER "${DISTRO_ID}" DISTRO_ID)
string(REGEX MATCH "[0-9]+" DISTRO_RELEASE "${DISTRO_FULL_STR}")
message(STATUS "Detected distribution: ${DISTRO_ID}:${DISTRO_RELEASE}")
else()
message(STATUS "Not able to detect OS")
endif()
set(DISTRO_ID "${DISTRO_ID}" PARENT_SCOPE )
set(DISTRO_RELEASE "${DISTRO_RELEASE}" PARENT_SCOPE )
endfunction()
+91
View File
@@ -0,0 +1,91 @@
# Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved.
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
set(HSAKMT_WRAPPER_DIR ${CMAKE_CURRENT_BINARY_DIR}/wrapper_dir)
set(HSAKMT_WRAPPER_INC_DIR ${HSAKMT_WRAPPER_DIR}/include)
#Function to generate header template file
function(create_header_template)
file(WRITE ${HSAKMT_WRAPPER_DIR}/header.hpp.in "/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the \"Software\"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef @include_guard@
#define @include_guard@
#ifndef ROCM_HEADER_WRAPPER_WERROR
#define ROCM_HEADER_WRAPPER_WERROR @deprecated_error@
#endif
#if ROCM_HEADER_WRAPPER_WERROR /* ROCM_HEADER_WRAPPER_WERROR 1 */
#error \"@file_name@ has moved to @CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_INCLUDEDIR@/hsakmt and package include paths have changed.\\nInclude as \\\"hsakmt/@file_name@\\\" when using cmake packages.\"
#else /* ROCM_HEADER_WRAPPER_WERROR 0 */
#if defined(__GNUC__)
#warning \"@file_name@ has moved to @CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_INCLUDEDIR@/hsakmt and package include paths have changed.\\nInclude as \\\"hsakmt/@file_name@\\\" when using cmake packages.\"
#else
#pragma message(\"@file_name@ has moved to @CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_INCLUDEDIR@/hsakmt and package include paths have changed.\\nInclude as \\\"hsakmt/@file_name@\\\" when using cmake packages.\")
#endif
#endif /* ROCM_HEADER_WRAPPER_WERROR */
@include_statements@
#endif")
endfunction()
#use header template file and generate wrapper header files
function(generate_wrapper_header)
file(MAKE_DIRECTORY ${HSAKMT_WRAPPER_INC_DIR})
#find all header files from include folder
file(GLOB include_files ${CMAKE_CURRENT_SOURCE_DIR}/include/*.h)
#generate wrapper header files
foreach(header_file ${include_files})
# set include guard
get_filename_component(INC_GUARD_NAME ${header_file} NAME_WE)
string(TOUPPER ${INC_GUARD_NAME} INC_GUARD_NAME)
set(include_guard "${include_guard}HSAKMT_WRAPPER_INCLUDE_${INC_GUARD_NAME}_H")
# set include statements
get_filename_component(file_name ${header_file} NAME)
set(include_statements "${include_statements}#include \"hsakmt/${file_name}\"\n")
configure_file(${HSAKMT_WRAPPER_DIR}/header.hpp.in ${HSAKMT_WRAPPER_INC_DIR}/${file_name})
unset(include_guard)
unset(include_statements)
endforeach()
endfunction()
#Creater a template for header file
create_header_template()
#Use template header file and generater wrapper header files
generate_wrapper_header()
install(DIRECTORY ${HSAKMT_WRAPPER_INC_DIR} DESTINATION . COMPONENT devel PATTERN "linux" EXCLUDE)
+13
View File
@@ -0,0 +1,13 @@
@PACKAGE_INIT@
include( CMakeFindDependencyMacro )
# Locate dependent packages here. Finding them propagates usage requirements,
# if any, to our clients and ensures that their target names are in scope for
# the build. hsakmt has no cmake project dependencies so there is nothing to
# find. If we switch to use find_package with external (to ROCm) library
# dependencies (ie libnuma) then those packages should be located here using
# find_dependencies as shown below.
#find_dependency(Bar, 2.0)
include( "${CMAKE_CURRENT_LIST_DIR}/@HSAKMT_TARGET@Targets.cmake" )
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+11
View File
@@ -0,0 +1,11 @@
prefix=${pcfiledir}/../..
exec_prefix=${prefix}
libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@
includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@
Name: libhsakmt
Description: HSA Kernel Mode Thunk library for AMD KFD support
Version: @LIB_VERSION_STRING@
Libs: -L${libdir} -lhsakmt
Cflags: -I${includedir}
+552
View File
@@ -0,0 +1,552 @@
/*
* Copyright © 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including
* the next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "libhsakmt.h"
#include "hsakmt/linux/kfd_ioctl.h"
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
static bool *is_device_debugged;
static uint32_t runtime_capabilities_mask = 0;
HSAKMT_STATUS init_device_debugging_memory(unsigned int NumNodes)
{
unsigned int i;
is_device_debugged = malloc(NumNodes * sizeof(bool));
if (!is_device_debugged)
return HSAKMT_STATUS_NO_MEMORY;
for (i = 0; i < NumNodes; i++)
is_device_debugged[i] = false;
return HSAKMT_STATUS_SUCCESS;
}
void destroy_device_debugging_memory(void)
{
if (is_device_debugged) {
free(is_device_debugged);
is_device_debugged = NULL;
}
}
bool debug_get_reg_status(uint32_t node_id)
{
return is_device_debugged[node_id];
}
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgRegister(HSAuint32 NodeId)
{
HSAKMT_STATUS result;
uint32_t gpu_id;
CHECK_KFD_OPEN();
if (!is_device_debugged)
return HSAKMT_STATUS_NO_MEMORY;
result = validate_nodeid(NodeId, &gpu_id);
if (result != HSAKMT_STATUS_SUCCESS)
return result;
struct kfd_ioctl_dbg_register_args args = {0};
args.gpu_id = gpu_id;
long err = kmtIoctl(kfd_fd, AMDKFD_IOC_DBG_REGISTER_DEPRECATED, &args);
if (err == 0)
result = HSAKMT_STATUS_SUCCESS;
else
result = HSAKMT_STATUS_ERROR;
return result;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgUnregister(HSAuint32 NodeId)
{
uint32_t gpu_id;
HSAKMT_STATUS result;
CHECK_KFD_OPEN();
if (!is_device_debugged)
return HSAKMT_STATUS_NO_MEMORY;
result = validate_nodeid(NodeId, &gpu_id);
if (result != HSAKMT_STATUS_SUCCESS)
return result;
struct kfd_ioctl_dbg_unregister_args args = {0};
args.gpu_id = gpu_id;
long err = kmtIoctl(kfd_fd, AMDKFD_IOC_DBG_UNREGISTER_DEPRECATED, &args);
if (err)
return HSAKMT_STATUS_ERROR;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgWavefrontControl(HSAuint32 NodeId,
HSA_DBG_WAVEOP Operand,
HSA_DBG_WAVEMODE Mode,
HSAuint32 TrapId,
HsaDbgWaveMessage *DbgWaveMsgRing)
{
HSAKMT_STATUS result;
uint32_t gpu_id;
struct kfd_ioctl_dbg_wave_control_args *args;
CHECK_KFD_OPEN();
result = validate_nodeid(NodeId, &gpu_id);
if (result != HSAKMT_STATUS_SUCCESS)
return result;
/* Determine Size of the ioctl buffer */
uint32_t buff_size = sizeof(Operand) + sizeof(Mode) + sizeof(TrapId) +
sizeof(DbgWaveMsgRing->DbgWaveMsg) +
sizeof(DbgWaveMsgRing->MemoryVA) + sizeof(*args);
args = (struct kfd_ioctl_dbg_wave_control_args *)malloc(buff_size);
if (!args)
return HSAKMT_STATUS_ERROR;
memset(args, 0, buff_size);
args->gpu_id = gpu_id;
args->buf_size_in_bytes = buff_size;
/* increment pointer to the start of the non fixed part */
unsigned char *run_ptr = (unsigned char *)args + sizeof(*args);
/* save variable content pointer for kfd */
args->content_ptr = (uint64_t)run_ptr;
/* insert items, and increment pointer accordingly */
*((HSA_DBG_WAVEOP *)run_ptr) = Operand;
run_ptr += sizeof(Operand);
*((HSA_DBG_WAVEMODE *)run_ptr) = Mode;
run_ptr += sizeof(Mode);
*((HSAuint32 *)run_ptr) = TrapId;
run_ptr += sizeof(TrapId);
*((HsaDbgWaveMessageAMD *)run_ptr) = DbgWaveMsgRing->DbgWaveMsg;
run_ptr += sizeof(DbgWaveMsgRing->DbgWaveMsg);
*((void **)run_ptr) = DbgWaveMsgRing->MemoryVA;
run_ptr += sizeof(DbgWaveMsgRing->MemoryVA);
/* send to kernel */
long err = kmtIoctl(kfd_fd, AMDKFD_IOC_DBG_WAVE_CONTROL_DEPRECATED, args);
free(args);
if (err)
return HSAKMT_STATUS_ERROR;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgAddressWatch(HSAuint32 NodeId,
HSAuint32 NumWatchPoints,
HSA_DBG_WATCH_MODE WatchMode[],
void *WatchAddress[],
HSAuint64 WatchMask[],
HsaEvent *WatchEvent[])
{
HSAKMT_STATUS result;
uint32_t gpu_id;
/* determine the size of the watch mask and event buffers
* the value is NULL if and only if no vector data should be attached
*/
uint32_t watch_mask_items = WatchMask[0] > 0 ? NumWatchPoints:1;
uint32_t watch_event_items = WatchEvent != NULL ? NumWatchPoints:0;
struct kfd_ioctl_dbg_address_watch_args *args;
HSAuint32 i = 0;
CHECK_KFD_OPEN();
result = validate_nodeid(NodeId, &gpu_id);
if (result != HSAKMT_STATUS_SUCCESS)
return result;
if (NumWatchPoints > MAX_ALLOWED_NUM_POINTS)
return HSAKMT_STATUS_INVALID_PARAMETER;
/* Size and structure of the ioctl buffer is dynamic in this case
* Here we calculate the buff size.
*/
uint32_t buff_size = sizeof(NumWatchPoints) +
(sizeof(WatchMode[0]) + sizeof(WatchAddress[0])) *
NumWatchPoints +
watch_mask_items * sizeof(HSAuint64) +
watch_event_items * sizeof(HsaEvent *) + sizeof(*args);
args = (struct kfd_ioctl_dbg_address_watch_args *) malloc(buff_size);
if (!args)
return HSAKMT_STATUS_ERROR;
memset(args, 0, buff_size);
args->gpu_id = gpu_id;
args->buf_size_in_bytes = buff_size;
/* increment pointer to the start of the non fixed part */
unsigned char *run_ptr = (unsigned char *)args + sizeof(*args);
/* save variable content pointer for kfd */
args->content_ptr = (uint64_t)run_ptr;
/* insert items, and increment pointer accordingly */
*((HSAuint32 *)run_ptr) = NumWatchPoints;
run_ptr += sizeof(NumWatchPoints);
for (i = 0; i < NumWatchPoints; i++) {
*((HSA_DBG_WATCH_MODE *)run_ptr) = WatchMode[i];
run_ptr += sizeof(WatchMode[i]);
}
for (i = 0; i < NumWatchPoints; i++) {
*((void **)run_ptr) = WatchAddress[i];
run_ptr += sizeof(WatchAddress[i]);
}
for (i = 0; i < watch_mask_items; i++) {
*((HSAuint64 *)run_ptr) = WatchMask[i];
run_ptr += sizeof(WatchMask[i]);
}
for (i = 0; i < watch_event_items; i++) {
*((HsaEvent **)run_ptr) = WatchEvent[i];
run_ptr += sizeof(WatchEvent[i]);
}
/* send to kernel */
long err = kmtIoctl(kfd_fd, AMDKFD_IOC_DBG_ADDRESS_WATCH_DEPRECATED, args);
free(args);
if (err)
return HSAKMT_STATUS_ERROR;
return HSAKMT_STATUS_SUCCESS;
}
#define HSA_RUNTIME_ENABLE_MAX_MAJOR 1
#define HSA_RUNTIME_ENABLE_MIN_MINOR 13
HSAKMT_STATUS hsaKmtCheckRuntimeDebugSupport(void) {
HsaNodeProperties node = {0};
HsaSystemProperties props = {0};
HsaVersionInfo versionInfo = {0};
memset(&node, 0x00, sizeof(node));
memset(&props, 0x00, sizeof(props));
if (hsaKmtAcquireSystemProperties(&props))
return HSAKMT_STATUS_ERROR;
//the firmware of gpu node doesn't support the debugger, disable it.
for (uint32_t i = 0; i < props.NumNodes; i++) {
if (hsaKmtGetNodeProperties(i, &node))
return HSAKMT_STATUS_ERROR;
//ignore cpu node
if (node.NumCPUCores)
continue;
if (!node.Capability.ui32.DebugSupportedFirmware)
return HSAKMT_STATUS_NOT_SUPPORTED;
}
if (hsaKmtGetVersion(&versionInfo))
return HSAKMT_STATUS_NOT_SUPPORTED;
if (versionInfo.KernelInterfaceMajorVersion < HSA_RUNTIME_ENABLE_MAX_MAJOR ||
(versionInfo.KernelInterfaceMajorVersion ==
HSA_RUNTIME_ENABLE_MAX_MAJOR &&
(int)versionInfo.KernelInterfaceMinorVersion < HSA_RUNTIME_ENABLE_MIN_MINOR))
return HSAKMT_STATUS_NOT_SUPPORTED;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtRuntimeEnable(void *rDebug,
bool setupTtmp)
{
struct kfd_ioctl_runtime_enable_args args = {0};
HSAKMT_STATUS result = hsaKmtCheckRuntimeDebugSupport();
if (result)
return result;
memset(&args, 0x00, sizeof(args));
args.mode_mask = KFD_RUNTIME_ENABLE_MODE_ENABLE_MASK |
((setupTtmp) ? KFD_RUNTIME_ENABLE_MODE_TTMP_SAVE_MASK : 0);
args.r_debug = (HSAuint64)rDebug;
long err = kmtIoctl(kfd_fd, AMDKFD_IOC_RUNTIME_ENABLE, &args);
if (err) {
if (errno == EBUSY)
return HSAKMT_STATUS_UNAVAILABLE;
else
return HSAKMT_STATUS_ERROR;
}
runtime_capabilities_mask= args.capabilities_mask;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtRuntimeDisable(void)
{
struct kfd_ioctl_runtime_enable_args args = {0};
HSAKMT_STATUS result = hsaKmtCheckRuntimeDebugSupport();
if (result)
return result;
memset(&args, 0x00, sizeof(args));
args.mode_mask = 0; //Disable
if (kmtIoctl(kfd_fd, AMDKFD_IOC_RUNTIME_ENABLE, &args))
return HSAKMT_STATUS_ERROR;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtGetRuntimeCapabilities(HSAuint32 *caps_mask)
{
*caps_mask = runtime_capabilities_mask;
return HSAKMT_STATUS_SUCCESS;
}
static HSAKMT_STATUS dbg_trap_get_device_data(void *data,
uint32_t *n_entries,
uint32_t entry_size)
{
struct kfd_ioctl_dbg_trap_args args = {0};
args.device_snapshot.snapshot_buf_ptr = (uint64_t) data;
args.device_snapshot.num_devices = *n_entries;
args.device_snapshot.entry_size = entry_size;
args.op = KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT;
args.pid = getpid();
if (kmtIoctl(kfd_fd, AMDKFD_IOC_DBG_TRAP, &args))
return HSAKMT_STATUS_ERROR;
*n_entries = args.device_snapshot.num_devices;
return HSAKMT_STATUS_SUCCESS;
}
static HSAKMT_STATUS dbg_trap_get_queue_data(void *data,
uint32_t *n_entries,
uint32_t entry_size,
uint32_t *queue_ids)
{
struct kfd_ioctl_dbg_trap_args args = {0};
args.queue_snapshot.num_queues = *n_entries;
args.queue_snapshot.entry_size = entry_size;
args.queue_snapshot.exception_mask = KFD_EC_MASK(EC_QUEUE_NEW);
args.op = KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT;
args.queue_snapshot.snapshot_buf_ptr = (uint64_t) data;
args.pid = getpid();
if (kmtIoctl(kfd_fd, AMDKFD_IOC_DBG_TRAP, &args))
return HSAKMT_STATUS_ERROR;
*n_entries = args.queue_snapshot.num_queues;
if (queue_ids && *n_entries) {
struct kfd_queue_snapshot_entry *queue_entry =
(struct kfd_queue_snapshot_entry *) data;
for (uint32_t i = 0; i < *n_entries; i++)
queue_ids[i] = queue_entry[i].queue_id;
}
return HSAKMT_STATUS_SUCCESS;
}
static HSAKMT_STATUS dbg_trap_suspend_queues(uint32_t *queue_ids,
uint32_t num_queues)
{
struct kfd_ioctl_dbg_trap_args args = {0};
int r;
args.suspend_queues.queue_array_ptr = (uint64_t) queue_ids;
args.suspend_queues.num_queues = num_queues;
args.suspend_queues.exception_mask = KFD_EC_MASK(EC_QUEUE_NEW);
args.op = KFD_IOC_DBG_TRAP_SUSPEND_QUEUES;
args.pid = getpid();
r = kmtIoctl(kfd_fd, AMDKFD_IOC_DBG_TRAP, &args);
if (r < 0)
return HSAKMT_STATUS_ERROR;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgEnable(void **runtime_info,
HSAuint32 *data_size)
{
struct kfd_ioctl_dbg_trap_args args = {0};
CHECK_KFD_OPEN();
CHECK_KFD_MINOR_VERSION(KFD_IOCTL_MINOR_VERSION);
*data_size = sizeof(struct kfd_runtime_info);
args.enable.rinfo_size = *data_size;
args.enable.dbg_fd = kfd_fd;
*runtime_info = malloc(args.enable.rinfo_size);
if (!*runtime_info)
return HSAKMT_STATUS_NO_MEMORY;
args.enable.rinfo_ptr = (uint64_t) *runtime_info;
args.op = KFD_IOC_DBG_TRAP_ENABLE;
args.pid = getpid();
if (kmtIoctl(kfd_fd, AMDKFD_IOC_DBG_TRAP, &args)) {
free(*runtime_info);
return HSAKMT_STATUS_ERROR;
}
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgDisable(void)
{
struct kfd_ioctl_dbg_trap_args args = {0};
CHECK_KFD_OPEN();
CHECK_KFD_MINOR_VERSION(KFD_IOCTL_MINOR_VERSION);
args.enable.dbg_fd = kfd_fd;
args.op = KFD_IOC_DBG_TRAP_DISABLE;
args.pid = getpid();
if (kmtIoctl(kfd_fd, AMDKFD_IOC_DBG_TRAP, &args))
return HSAKMT_STATUS_ERROR;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgGetDeviceData(void **data,
HSAuint32 *n_entries,
HSAuint32 *entry_size)
{
HSAKMT_STATUS ret = HSAKMT_STATUS_NO_MEMORY;
CHECK_KFD_OPEN();
CHECK_KFD_MINOR_VERSION(KFD_IOCTL_MINOR_VERSION);
*n_entries = UINT32_MAX;
*entry_size = sizeof(struct kfd_dbg_device_info_entry);
*data = malloc(*entry_size * *n_entries);
if (!*data)
return ret;
ret = dbg_trap_get_device_data(*data, n_entries, *entry_size);
if (ret)
free(*data);
return ret;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtDbgGetQueueData(void **data,
HSAuint32 *n_entries,
HSAuint32 *entry_size,
bool suspend_queues)
{
uint32_t *queue_ids = NULL;
CHECK_KFD_OPEN();
CHECK_KFD_MINOR_VERSION(KFD_IOCTL_MINOR_VERSION);
*entry_size = sizeof(struct kfd_queue_snapshot_entry);
*n_entries = 0;
if (dbg_trap_get_queue_data(NULL, n_entries, *entry_size, NULL))
return HSAKMT_STATUS_ERROR;
*data = malloc(*n_entries * *entry_size);
if (!*data)
return HSAKMT_STATUS_NO_MEMORY;
if (suspend_queues && *n_entries)
queue_ids = (uint32_t *)malloc(sizeof(uint32_t) * *n_entries);
if (!queue_ids ||
dbg_trap_get_queue_data(*data, n_entries, *entry_size, queue_ids))
goto free_data;
if (queue_ids) {
if (dbg_trap_suspend_queues(queue_ids, *n_entries) ||
dbg_trap_get_queue_data(*data, n_entries, *entry_size, NULL))
goto free_data;
free(queue_ids);
}
return HSAKMT_STATUS_SUCCESS;
free_data:
free(*data);
if (queue_ids)
free(queue_ids);
return HSAKMT_STATUS_ERROR;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtDebugTrapIoctl(struct kfd_ioctl_dbg_trap_args *args,
HSA_QUEUEID *Queues,
HSAuint64 *DebugReturn)
{
HSAKMT_STATUS result;
CHECK_KFD_OPEN();
if (Queues) {
int num_queues = args->op == KFD_IOC_DBG_TRAP_SUSPEND_QUEUES ?
args->suspend_queues.num_queues :
args->resume_queues.num_queues;
void *queue_ptr = args->op == KFD_IOC_DBG_TRAP_SUSPEND_QUEUES ?
(void *)args->suspend_queues.queue_array_ptr :
(void *)args->resume_queues.queue_array_ptr;
memcpy(queue_ptr, convert_queue_ids(num_queues, Queues),
num_queues * sizeof(uint32_t));
}
long err = kmtIoctl(kfd_fd, AMDKFD_IOC_DBG_TRAP, args);
if (DebugReturn)
*DebugReturn = err;
if (args->op == KFD_IOC_DBG_TRAP_SUSPEND_QUEUES &&
err >= 0 && err <= args->suspend_queues.num_queues)
result = HSAKMT_STATUS_SUCCESS;
else if (args->op == KFD_IOC_DBG_TRAP_RESUME_QUEUES &&
err >= 0 && err <= args->resume_queues.num_queues)
result = HSAKMT_STATUS_SUCCESS;
else if (err == 0)
result = HSAKMT_STATUS_SUCCESS;
else
result = HSAKMT_STATUS_ERROR;
return result;
}
+482
View File
@@ -0,0 +1,482 @@
/*
* Copyright © 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including
* the next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "libhsakmt.h"
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <errno.h>
#include <unistd.h>
#include <sys/mman.h>
#include <stdio.h>
#include "hsakmt/linux/kfd_ioctl.h"
#include "fmm.h"
static HSAuint64 *events_page = NULL;
void clear_events_page(void)
{
events_page = NULL;
}
static bool IsSystemEventType(HSA_EVENTTYPE type)
{
// Debug events behave as signal events.
return (type != HSA_EVENTTYPE_SIGNAL && type != HSA_EVENTTYPE_DEBUG_EVENT);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtCreateEvent(HsaEventDescriptor *EventDesc,
bool ManualReset, bool IsSignaled,
HsaEvent **Event)
{
unsigned int event_limit = KFD_SIGNAL_EVENT_LIMIT;
CHECK_KFD_OPEN();
if (EventDesc->EventType >= HSA_EVENTTYPE_MAXID)
return HSAKMT_STATUS_INVALID_PARAMETER;
HsaEvent *e = malloc(sizeof(HsaEvent));
if (!e)
return HSAKMT_STATUS_ERROR;
memset(e, 0, sizeof(*e));
struct kfd_ioctl_create_event_args args = {0};
args.event_type = EventDesc->EventType;
args.node_id = EventDesc->NodeId;
args.auto_reset = !ManualReset;
/* dGPU code */
pthread_mutex_lock(&hsakmt_mutex);
if (is_dgpu && !events_page) {
events_page = allocate_exec_aligned_memory_gpu(
KFD_SIGNAL_EVENT_LIMIT * 8, PAGE_SIZE, 0, 0, true, false, true);
if (!events_page) {
free(e);
pthread_mutex_unlock(&hsakmt_mutex);
return HSAKMT_STATUS_ERROR;
}
fmm_get_handle(events_page, (uint64_t *)&args.event_page_offset);
}
if (kmtIoctl(kfd_fd, AMDKFD_IOC_CREATE_EVENT, &args) != 0) {
free(e);
*Event = NULL;
pthread_mutex_unlock(&hsakmt_mutex);
return HSAKMT_STATUS_ERROR;
}
e->EventId = args.event_id;
if (!events_page && args.event_page_offset > 0) {
events_page = mmap(NULL, event_limit * 8, PROT_WRITE | PROT_READ,
MAP_SHARED, kfd_fd, args.event_page_offset);
if (events_page == MAP_FAILED) {
/* old kernels only support 256 events */
event_limit = 256;
events_page = mmap(NULL, PAGE_SIZE, PROT_WRITE | PROT_READ,
MAP_SHARED, kfd_fd, args.event_page_offset);
}
if (events_page == MAP_FAILED) {
events_page = NULL;
pthread_mutex_unlock(&hsakmt_mutex);
hsaKmtDestroyEvent(e);
return HSAKMT_STATUS_ERROR;
}
}
pthread_mutex_unlock(&hsakmt_mutex);
if (args.event_page_offset > 0 && args.event_slot_index < event_limit)
e->EventData.HWData2 = (HSAuint64)&events_page[args.event_slot_index];
e->EventData.EventType = EventDesc->EventType;
e->EventData.HWData1 = args.event_id;
e->EventData.HWData3 = args.event_trigger_data;
e->EventData.EventData.SyncVar.SyncVar.UserData =
EventDesc->SyncVar.SyncVar.UserData;
e->EventData.EventData.SyncVar.SyncVarSize =
EventDesc->SyncVar.SyncVarSize;
if (IsSignaled && !IsSystemEventType(e->EventData.EventType)) {
struct kfd_ioctl_set_event_args set_args = {0};
set_args.event_id = args.event_id;
kmtIoctl(kfd_fd, AMDKFD_IOC_SET_EVENT, &set_args);
}
*Event = e;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtDestroyEvent(HsaEvent *Event)
{
CHECK_KFD_OPEN();
if (!Event)
return HSAKMT_STATUS_INVALID_HANDLE;
struct kfd_ioctl_destroy_event_args args = {0};
args.event_id = Event->EventId;
if (kmtIoctl(kfd_fd, AMDKFD_IOC_DESTROY_EVENT, &args) != 0)
return HSAKMT_STATUS_ERROR;
free(Event);
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtSetEvent(HsaEvent *Event)
{
CHECK_KFD_OPEN();
if (!Event)
return HSAKMT_STATUS_INVALID_HANDLE;
/* Although the spec is doesn't say, don't allow system-defined events
* to be signaled.
*/
if (IsSystemEventType(Event->EventData.EventType))
return HSAKMT_STATUS_ERROR;
struct kfd_ioctl_set_event_args args = {0};
args.event_id = Event->EventId;
if (kmtIoctl(kfd_fd, AMDKFD_IOC_SET_EVENT, &args) == -1)
return HSAKMT_STATUS_ERROR;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtResetEvent(HsaEvent *Event)
{
CHECK_KFD_OPEN();
if (!Event)
return HSAKMT_STATUS_INVALID_HANDLE;
/* Although the spec is doesn't say, don't allow system-defined events
* to be signaled.
*/
if (IsSystemEventType(Event->EventData.EventType))
return HSAKMT_STATUS_ERROR;
struct kfd_ioctl_reset_event_args args = {0};
args.event_id = Event->EventId;
if (kmtIoctl(kfd_fd, AMDKFD_IOC_RESET_EVENT, &args) == -1)
return HSAKMT_STATUS_ERROR;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtQueryEventState(HsaEvent *Event)
{
CHECK_KFD_OPEN();
if (!Event)
return HSAKMT_STATUS_INVALID_HANDLE;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnEvent(HsaEvent *Event,
HSAuint32 Milliseconds)
{
return hsaKmtWaitOnEvent_Ext(Event, Milliseconds, NULL);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnEvent_Ext(HsaEvent *Event,
HSAuint32 Milliseconds, uint64_t *event_age)
{
if (!Event)
return HSAKMT_STATUS_INVALID_HANDLE;
return hsaKmtWaitOnMultipleEvents_Ext(&Event, 1, true, Milliseconds, event_age);
}
static HSAKMT_STATUS get_mem_info_svm_api(uint64_t address, uint32_t gpu_id)
{
struct kfd_ioctl_svm_args *args;
uint32_t node_id;
HSAuint32 s_attr;
HSAuint32 i;
HSA_SVM_ATTRIBUTE attrs[] = {
{HSA_SVM_ATTR_PREFERRED_LOC, 0},
{HSA_SVM_ATTR_PREFETCH_LOC, 0},
{HSA_SVM_ATTR_ACCESS, gpu_id},
{HSA_SVM_ATTR_SET_FLAGS, 0},
};
CHECK_KFD_OPEN();
CHECK_KFD_MINOR_VERSION(5);
s_attr = sizeof(attrs);
args = alloca(sizeof(*args) + s_attr);
args->start_addr = address;
args->size = PAGE_SIZE;
args->op = KFD_IOCTL_SVM_OP_GET_ATTR;
args->nattr = s_attr / sizeof(*attrs);
memcpy(args->attrs, attrs, s_attr);
if (kmtIoctl(kfd_fd, AMDKFD_IOC_SVM + (s_attr << _IOC_SIZESHIFT), args)) {
pr_debug("op get range attrs failed %s\n", strerror(errno));
return HSAKMT_STATUS_ERROR;
}
pr_err("GPU address 0x%lx, is Unified memory\n", address);
for (i = 0; i < args->nattr; i++) {
if (args->attrs[i].value == KFD_IOCTL_SVM_LOCATION_SYSMEM ||
args->attrs[i].value == KFD_IOCTL_SVM_LOCATION_UNDEFINED)
node_id = args->attrs[i].value;
else
gpuid_to_nodeid(args->attrs[i].value, &node_id);
switch (args->attrs[i].type) {
case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
pr_err("Preferred location for address 0x%lx is Node id %d\n",
address, node_id);
break;
case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
pr_err("Prefetch location for address 0x%lx is Node id %d\n",
address, node_id);
break;
case KFD_IOCTL_SVM_ATTR_ACCESS:
pr_err("Node id %d has access to address 0x%lx\n",
node_id, address);
break;
case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
pr_err("Node id %d has access in place to address 0x%lx\n",
node_id, address);
break;
case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
pr_err("Node id %d has no access to address 0x%lx\n",
node_id, address);
break;
case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
if (args->attrs[i].value & KFD_IOCTL_SVM_FLAG_COHERENT)
pr_err("Fine grained coherency between devices\n");
if (args->attrs[i].value & KFD_IOCTL_SVM_FLAG_GPU_RO)
pr_err("Read only\n");
if (args->attrs[i].value & KFD_IOCTL_SVM_FLAG_GPU_EXEC)
pr_err("GPU exec allowed\n");
if (args->attrs[i].value & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)
pr_err("GPU always mapped\n");
if (args->attrs[i].value & KFD_IOCTL_SVM_FLAG_EXT_COHERENT)
pr_err("Extended-scope fine grained coherency between devices\n");
break;
default:
pr_debug("get invalid attr type 0x%x\n", args->attrs[i].type);
return HSAKMT_STATUS_ERROR;
}
}
return HSAKMT_STATUS_SUCCESS;
}
//Analysis memory exception data, print debug messages
static void analysis_memory_exception(struct kfd_hsa_memory_exception_data *
memory_exception_data)
{
HSAKMT_STATUS ret;
HsaPointerInfo info;
const uint64_t addr = memory_exception_data->va;
uint32_t node_id = 0;
unsigned int i;
gpuid_to_nodeid(memory_exception_data->gpu_id, &node_id);
pr_err("Memory exception on virtual address 0x%lx, ", addr);
pr_err("node id %d : ", node_id);
if (memory_exception_data->failure.NotPresent)
pr_err("Page not present\n");
else if (memory_exception_data->failure.ReadOnly)
pr_err("Writing to readonly page\n");
else if (memory_exception_data->failure.NoExecute)
pr_err("Execute to none-executable page\n");
ret = fmm_get_mem_info((const void *)addr, &info);
if (ret != HSAKMT_STATUS_SUCCESS) {
ret = get_mem_info_svm_api(addr, memory_exception_data->gpu_id);
if (ret != HSAKMT_STATUS_SUCCESS)
pr_err("Address does not belong to a known buffer\n");
return;
}
pr_err("GPU address 0x%lx, node id %d, size in byte 0x%lx\n",
info.GPUAddress, info.Node, info.SizeInBytes);
switch (info.Type) {
case HSA_POINTER_REGISTERED_SHARED:
pr_err("Memory is registered shared buffer (IPC)\n");
break;
case HSA_POINTER_REGISTERED_GRAPHICS:
pr_err("Memory is registered graphics buffer\n");
break;
case HSA_POINTER_REGISTERED_USER:
pr_err("Memory is registered user pointer\n");
pr_err("CPU address of the memory is %p\n", info.CPUAddress);
break;
case HSA_POINTER_ALLOCATED:
pr_err("Memory is allocated using hsaKmtAllocMemory\n");
pr_err("CPU address of the memory is %p\n", info.CPUAddress);
break;
case HSA_POINTER_RESERVED_ADDR:
pr_err("Memory is allocated by OnlyAddress mode\n");
break;
default:
pr_err("Invalid memory type %d\n", info.Type);
break;
}
if (info.RegisteredNodes) {
pr_err("Memory is registered to node id: ");
for (i = 0; i < info.NRegisteredNodes; i++)
pr_err("%d ", info.RegisteredNodes[i]);
pr_err("\n");
}
if (info.MappedNodes) {
pr_err("Memory is mapped to node id: ");
for (i = 0; i < info.NMappedNodes; i++)
pr_err("%d ", info.MappedNodes[i]);
pr_err("\n");
}
}
HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnMultipleEvents(HsaEvent *Events[],
HSAuint32 NumEvents,
bool WaitOnAll,
HSAuint32 Milliseconds)
{
return hsaKmtWaitOnMultipleEvents_Ext(Events, NumEvents, WaitOnAll, Milliseconds, NULL);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnMultipleEvents_Ext(HsaEvent *Events[],
HSAuint32 NumEvents,
bool WaitOnAll,
HSAuint32 Milliseconds,
uint64_t *event_age)
{
CHECK_KFD_OPEN();
if (!Events)
return HSAKMT_STATUS_INVALID_HANDLE;
struct kfd_event_data *event_data = calloc(NumEvents, sizeof(struct kfd_event_data));
for (HSAuint32 i = 0; i < NumEvents; i++) {
event_data[i].event_id = Events[i]->EventId;
event_data[i].kfd_event_data_ext = (uint64_t)(uintptr_t)NULL;
if (event_age && Events[i]->EventData.EventType == HSA_EVENTTYPE_SIGNAL)
event_data[i].signal_event_data.last_event_age = event_age[i];
}
struct kfd_ioctl_wait_events_args args = {0};
args.wait_for_all = WaitOnAll;
args.timeout = Milliseconds;
args.num_events = NumEvents;
args.events_ptr = (uint64_t)(uintptr_t)event_data;
HSAKMT_STATUS result;
if (kmtIoctl(kfd_fd, AMDKFD_IOC_WAIT_EVENTS, &args) == -1)
result = HSAKMT_STATUS_ERROR;
else if (args.wait_result == KFD_IOC_WAIT_RESULT_TIMEOUT)
result = HSAKMT_STATUS_WAIT_TIMEOUT;
else {
result = HSAKMT_STATUS_SUCCESS;
for (HSAuint32 i = 0; i < NumEvents; i++) {
if (Events[i]->EventData.EventType == HSA_EVENTTYPE_MEMORY &&
event_data[i].memory_exception_data.gpu_id) {
Events[i]->EventData.EventData.MemoryAccessFault.VirtualAddress = event_data[i].memory_exception_data.va;
result = gpuid_to_nodeid(event_data[i].memory_exception_data.gpu_id, &Events[i]->EventData.EventData.MemoryAccessFault.NodeId);
if (result != HSAKMT_STATUS_SUCCESS)
goto out;
Events[i]->EventData.EventData.MemoryAccessFault.Failure.NotPresent = event_data[i].memory_exception_data.failure.NotPresent;
Events[i]->EventData.EventData.MemoryAccessFault.Failure.ReadOnly = event_data[i].memory_exception_data.failure.ReadOnly;
Events[i]->EventData.EventData.MemoryAccessFault.Failure.NoExecute = event_data[i].memory_exception_data.failure.NoExecute;
Events[i]->EventData.EventData.MemoryAccessFault.Failure.Imprecise = event_data[i].memory_exception_data.failure.imprecise;
Events[i]->EventData.EventData.MemoryAccessFault.Failure.ErrorType = event_data[i].memory_exception_data.ErrorType;
Events[i]->EventData.EventData.MemoryAccessFault.Failure.ECC =
((event_data[i].memory_exception_data.ErrorType == 1) || (event_data[i].memory_exception_data.ErrorType == 2)) ? 1 : 0;
Events[i]->EventData.EventData.MemoryAccessFault.Flags = HSA_EVENTID_MEMORY_FATAL_PROCESS;
analysis_memory_exception(&event_data[i].memory_exception_data);
} else if (Events[i]->EventData.EventType == HSA_EVENTTYPE_HW_EXCEPTION &&
event_data[i].hw_exception_data.gpu_id) {
result = gpuid_to_nodeid(event_data[i].hw_exception_data.gpu_id, &Events[i]->EventData.EventData.HwException.NodeId);
if (result != HSAKMT_STATUS_SUCCESS)
goto out;
Events[i]->EventData.EventData.HwException.ResetType = event_data[i].hw_exception_data.reset_type;
Events[i]->EventData.EventData.HwException.ResetCause = event_data[i].hw_exception_data.reset_cause;
Events[i]->EventData.EventData.HwException.MemoryLost = event_data[i].hw_exception_data.memory_lost;
}
}
}
out:
for (HSAuint32 i = 0; i < NumEvents; i++) {
if (event_age && Events[i]->EventData.EventType == HSA_EVENTTYPE_SIGNAL)
event_age[i] = event_data[i].signal_event_data.last_event_age;
}
free(event_data);
return result;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtOpenSMI(HSAuint32 NodeId, int *fd)
{
struct kfd_ioctl_smi_events_args args;
HSAKMT_STATUS result;
uint32_t gpuid;
CHECK_KFD_OPEN();
pr_debug("[%s] node %d\n", __func__, NodeId);
result = validate_nodeid(NodeId, &gpuid);
if (result != HSAKMT_STATUS_SUCCESS) {
pr_err("[%s] invalid node ID: %d\n", __func__, NodeId);
return result;
}
args.gpuid = gpuid;
result = kmtIoctl(kfd_fd, AMDKFD_IOC_SMI_EVENTS, &args);
if (result) {
pr_debug("open SMI event fd failed %s\n", strerror(errno));
return HSAKMT_STATUS_ERROR;
}
*fd = args.anon_fd;
return HSAKMT_STATUS_SUCCESS;
}
+4277
View File
File diff suppressed because it is too large Load Diff
+107
View File
@@ -0,0 +1,107 @@
/*
* Copyright © 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including
* the next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef FMM_H_
#define FMM_H_
#include "hsakmt/hsakmttypes.h"
#include <stddef.h>
typedef enum {
FMM_FIRST_APERTURE_TYPE = 0,
FMM_GPUVM = FMM_FIRST_APERTURE_TYPE,
FMM_LDS,
FMM_SCRATCH,
FMM_SVM,
FMM_MMIO,
FMM_LAST_APERTURE_TYPE
} aperture_type_e;
typedef struct {
aperture_type_e app_type;
uint64_t size;
void *start_address;
} aperture_properties_t;
HSAKMT_STATUS fmm_get_amdgpu_device_handle(uint32_t node_id, HsaAMDGPUDeviceHandle *DeviceHandle);
HSAKMT_STATUS fmm_init_process_apertures(unsigned int NumNodes);
void fmm_destroy_process_apertures(void);
/* Memory interface */
void *fmm_allocate_scratch(uint32_t gpu_id, void *address, uint64_t MemorySizeInBytes);
void *fmm_allocate_device(uint32_t gpu_id, uint32_t node_id, void *address,
uint64_t MemorySizeInBytes, HsaMemFlags flags);
void *fmm_allocate_doorbell(uint32_t gpu_id, uint64_t MemorySizeInBytes, uint64_t doorbell_offset);
void *fmm_allocate_host(uint32_t gpu_id, uint32_t node_id, void *address, uint64_t MemorySizeInBytes,
HsaMemFlags flags);
void fmm_print(uint32_t node);
HSAKMT_STATUS fmm_release(void *address);
HSAKMT_STATUS fmm_map_to_gpu(void *address, uint64_t size, uint64_t *gpuvm_address);
int fmm_unmap_from_gpu(void *address);
bool fmm_get_handle(void *address, uint64_t *handle);
HSAKMT_STATUS fmm_get_mem_info(const void *address, HsaPointerInfo *info);
HSAKMT_STATUS fmm_set_mem_user_data(const void *mem, void *usr_data);
#ifdef SANITIZER_AMDGPU
HSAKMT_STATUS fmm_replace_asan_header_page(void* address);
HSAKMT_STATUS fmm_return_asan_header_page(void* address);
#endif
/* Topology interface*/
HSAKMT_STATUS fmm_node_added(HSAuint32 gpu_id);
HSAKMT_STATUS fmm_node_removed(HSAuint32 gpu_id);
HSAKMT_STATUS fmm_get_aperture_base_and_limit(aperture_type_e aperture_type, HSAuint32 gpu_id,
HSAuint64 *aperture_base, HSAuint64 *aperture_limit);
HSAKMT_STATUS fmm_register_memory(void *address, uint64_t size_in_bytes,
uint32_t *gpu_id_array,
uint32_t gpu_id_array_size,
bool coarse_grain,
bool ext_coherent);
HSAKMT_STATUS fmm_register_graphics_handle(HSAuint64 GraphicsResourceHandle,
HsaGraphicsResourceInfo *GraphicsResourceInfo,
uint32_t *gpu_id_array,
uint32_t gpu_id_array_size);
HSAKMT_STATUS fmm_deregister_memory(void *address);
HSAKMT_STATUS fmm_export_dma_buf_fd(void *MemoryAddress,
HSAuint64 MemorySizeInBytes,
int *DMABufFd,
HSAuint64 *Offset);
HSAKMT_STATUS fmm_share_memory(void *MemoryAddress,
HSAuint64 SizeInBytes,
HsaSharedMemoryHandle *SharedMemoryHandle);
HSAKMT_STATUS fmm_register_shared_memory(const HsaSharedMemoryHandle *SharedMemoryHandle,
HSAuint64 *SizeInBytes,
void **MemoryAddress,
uint32_t *gpu_id_array,
uint32_t gpu_id_array_size);
HSAKMT_STATUS fmm_map_to_gpu_nodes(void *address, uint64_t size,
uint32_t *nodes_to_map, uint64_t num_of_nodes, uint64_t *gpuvm_address);
int open_drm_render_device(int minor);
void *mmap_allocate_aligned(int prot, int flags, uint64_t size, uint64_t align,
uint64_t guard_size, void *aper_base, void *aper_limit);
extern int (*fn_amdgpu_device_get_fd)(HsaAMDGPUDeviceHandle device_handle);
#endif /* FMM_H_ */
+45
View File
@@ -0,0 +1,45 @@
/*
* Copyright © 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including
* the next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "libhsakmt.h"
// HSAKMT global data
int kfd_fd = -1;
unsigned long kfd_open_count;
unsigned long system_properties_count;
pthread_mutex_t hsakmt_mutex = PTHREAD_MUTEX_INITIALIZER;
bool is_dgpu;
#ifndef PAGE_SIZE
int PAGE_SIZE;
#endif
int PAGE_SHIFT;
/* whether to check all dGPUs in the topology support SVM API */
bool is_svm_api_supported;
/* zfb is mainly used during emulation */
int zfb_support;
+25
View File
@@ -0,0 +1,25 @@
#include <stdio.h>
#include <errno.h>
#include <sys/ioctl.h>
#include "libhsakmt.h"
/* Call ioctl, restarting if it is interrupted */
int kmtIoctl(int fd, unsigned long request, void *arg)
{
int ret;
do {
ret = ioctl(fd, request, arg);
} while (ret == -1 && (errno == EINTR || errno == EAGAIN));
if (ret == -1 && errno == EBADF) {
/* In case pthread_atfork didn't catch it, this will
* make any subsequent hsaKmt calls fail in CHECK_KFD_OPEN.
*/
pr_err("KFD file descriptor not valid in this process\n");
is_forked_child();
}
return ret;
}
+241
View File
@@ -0,0 +1,241 @@
/*
* Copyright © 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including
* the next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef LIBHSAKMT_H_INCLUDED
#define LIBHSAKMT_H_INCLUDED
#include "hsakmt/linux/kfd_ioctl.h"
#include "hsakmt/hsakmt.h"
#include <pthread.h>
#include <stdint.h>
#include <limits.h>
extern int kfd_fd;
extern unsigned long kfd_open_count;
extern bool hsakmt_forked;
extern pthread_mutex_t hsakmt_mutex;
extern bool is_dgpu;
extern bool is_svm_api_supported;
extern int zfb_support;
extern HsaVersionInfo kfd_version_info;
#undef HSAKMTAPI
#define HSAKMTAPI __attribute__((visibility ("default")))
#if defined(__clang__)
#if __has_feature(address_sanitizer)
#define SANITIZER_AMDGPU 1
#endif
#endif
/*Avoid pointer-to-int-cast warning*/
#define PORT_VPTR_TO_UINT64(vptr) ((uint64_t)(unsigned long)(vptr))
/*Avoid int-to-pointer-cast warning*/
#define PORT_UINT64_TO_VPTR(v) ((void*)(unsigned long)(v))
#define CHECK_KFD_OPEN() \
do { if (kfd_open_count == 0 || hsakmt_forked) return HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED; } while (0)
#define CHECK_KFD_MINOR_VERSION(minor) \
do { if ((minor) > kfd_version_info.KernelInterfaceMinorVersion)\
return HSAKMT_STATUS_NOT_SUPPORTED; } while (0)
/* Might be defined in limits.h on platforms where it is constant (used by musl) */
/* See also: https://pubs.opengroup.org/onlinepubs/7908799/xsh/limits.h.html */
#ifndef PAGE_SIZE
extern int PAGE_SIZE;
#endif
extern int PAGE_SHIFT;
/* VI HW bug requires this virtual address alignment */
#define TONGA_PAGE_SIZE 0x8000
/* 64KB BigK fragment size for TLB efficiency */
#define GPU_BIGK_PAGE_SIZE (1 << 16)
/* 2MB huge page size for 4-level page tables on Vega10 and later GPUs */
#define GPU_HUGE_PAGE_SIZE (2 << 20)
#define CHECK_PAGE_MULTIPLE(x) \
do { if ((uint64_t)PORT_VPTR_TO_UINT64(x) % PAGE_SIZE) return HSAKMT_STATUS_INVALID_PARAMETER; } while(0)
#define ALIGN_UP(x,align) (((uint64_t)(x) + (align) - 1) & ~(uint64_t)((align)-1))
#define ALIGN_UP_32(x,align) (((uint32_t)(x) + (align) - 1) & ~(uint32_t)((align)-1))
#define PAGE_ALIGN_UP(x) ALIGN_UP(x,PAGE_SIZE)
#define BITMASK(n) ((n) ? (UINT64_MAX >> (sizeof(UINT64_MAX) * CHAR_BIT - (n))) : 0)
#define ARRAY_LEN(array) (sizeof(array) / sizeof(array[0]))
/* HSA Thunk logging usage */
extern int hsakmt_debug_level;
#define hsakmt_print(level, fmt, ...) \
do { if (level <= hsakmt_debug_level) fprintf(stderr, fmt, ##__VA_ARGS__); } while (0)
#define HSAKMT_DEBUG_LEVEL_DEFAULT -1
#define HSAKMT_DEBUG_LEVEL_ERR 3
#define HSAKMT_DEBUG_LEVEL_WARNING 4
#define HSAKMT_DEBUG_LEVEL_INFO 6
#define HSAKMT_DEBUG_LEVEL_DEBUG 7
#define pr_err(fmt, ...) \
hsakmt_print(HSAKMT_DEBUG_LEVEL_ERR, fmt, ##__VA_ARGS__)
#define pr_warn(fmt, ...) \
hsakmt_print(HSAKMT_DEBUG_LEVEL_WARNING, fmt, ##__VA_ARGS__)
#define pr_info(fmt, ...) \
hsakmt_print(HSAKMT_DEBUG_LEVEL_INFO, fmt, ##__VA_ARGS__)
#define pr_debug(fmt, ...) \
hsakmt_print(HSAKMT_DEBUG_LEVEL_DEBUG, fmt, ##__VA_ARGS__)
#define pr_err_once(fmt, ...) \
({ \
static bool __print_once; \
if (!__print_once) { \
__print_once = true; \
pr_err(fmt, ##__VA_ARGS__); \
} \
})
#define pr_warn_once(fmt, ...) \
({ \
static bool __print_once; \
if (!__print_once) { \
__print_once = true; \
pr_warn(fmt, ##__VA_ARGS__); \
} \
})
/* Expects gfxv (full) in decimal */
#define HSA_GET_GFX_VERSION_MAJOR(gfxv) (((gfxv) / 10000) % 100)
#define HSA_GET_GFX_VERSION_MINOR(gfxv) (((gfxv) / 100) % 100)
#define HSA_GET_GFX_VERSION_STEP(gfxv) ((gfxv) % 100)
/* Expects HSA_ENGINE_ID.ui32, returns gfxv (full) in hex */
#define HSA_GET_GFX_VERSION_FULL(ui32) \
(((ui32.Major) << 16) | ((ui32.Minor) << 8) | (ui32.Stepping))
enum full_gfx_versions {
GFX_VERSION_KAVERI = 0x070000,
GFX_VERSION_HAWAII = 0x070001,
GFX_VERSION_CARRIZO = 0x080001,
GFX_VERSION_TONGA = 0x080002,
GFX_VERSION_FIJI = 0x080003,
GFX_VERSION_POLARIS10 = 0x080003,
GFX_VERSION_POLARIS11 = 0x080003,
GFX_VERSION_POLARIS12 = 0x080003,
GFX_VERSION_VEGAM = 0x080003,
GFX_VERSION_VEGA10 = 0x090000,
GFX_VERSION_RAVEN = 0x090002,
GFX_VERSION_VEGA12 = 0x090004,
GFX_VERSION_VEGA20 = 0x090006,
GFX_VERSION_ARCTURUS = 0x090008,
GFX_VERSION_ALDEBARAN = 0x09000A,
GFX_VERSION_AQUA_VANJARAM = 0x090400,
GFX_VERSION_RENOIR = 0x09000C,
GFX_VERSION_NAVI10 = 0x0A0100,
GFX_VERSION_NAVI12 = 0x0A0101,
GFX_VERSION_NAVI14 = 0x0A0102,
GFX_VERSION_CYAN_SKILLFISH = 0x0A0103,
GFX_VERSION_SIENNA_CICHLID = 0x0A0300,
GFX_VERSION_NAVY_FLOUNDER = 0x0A0301,
GFX_VERSION_DIMGREY_CAVEFISH = 0x0A0302,
GFX_VERSION_VANGOGH = 0x0A0303,
GFX_VERSION_BEIGE_GOBY = 0x0A0304,
GFX_VERSION_YELLOW_CARP = 0x0A0305,
GFX_VERSION_PLUM_BONITO = 0x0B0000,
GFX_VERSION_WHEAT_NAS = 0x0B0001,
};
struct hsa_gfxip_table {
uint16_t device_id; // Device ID
unsigned char major; // GFXIP Major engine version
unsigned char minor; // GFXIP Minor engine version
unsigned char stepping; // GFXIP Stepping info
const char *amd_name; // CALName of the device
};
HSAKMT_STATUS init_kfd_version(void);
#define IS_SOC15(gfxv) ((gfxv) >= GFX_VERSION_VEGA10)
HSAKMT_STATUS validate_nodeid(uint32_t nodeid, uint32_t *gpu_id);
HSAKMT_STATUS gpuid_to_nodeid(uint32_t gpu_id, uint32_t* node_id);
uint32_t get_gfxv_by_node_id(HSAuint32 node_id);
bool prefer_ats(HSAuint32 node_id);
uint16_t get_device_id_by_node_id(HSAuint32 node_id);
uint16_t get_device_id_by_gpu_id(HSAuint32 gpu_id);
uint32_t get_direct_link_cpu(uint32_t gpu_node);
int get_drm_render_fd_by_gpu_id(HSAuint32 gpu_id);
HSAKMT_STATUS validate_nodeid_array(uint32_t **gpu_id_array,
uint32_t NumberOfNodes, uint32_t *NodeArray);
HSAKMT_STATUS topology_sysfs_get_system_props(HsaSystemProperties *props);
HSAKMT_STATUS topology_get_node_props(HSAuint32 NodeId,
HsaNodeProperties *NodeProperties);
HSAKMT_STATUS topology_get_iolink_props(HSAuint32 NodeId,
HSAuint32 NumIoLinks,
HsaIoLinkProperties *IoLinkProperties);
void topology_setup_is_dgpu_param(HsaNodeProperties *props);
bool topology_is_svm_needed(HSA_ENGINE_ID EngineId);
HSAuint32 PageSizeFromFlags(unsigned int pageSizeFlags);
void* allocate_exec_aligned_memory_gpu(uint32_t size, uint32_t align,
uint32_t gpu_id,
uint32_t NodeId, bool NonPaged,
bool DeviceLocal, bool Uncached);
void free_exec_aligned_memory_gpu(void *addr, uint32_t size, uint32_t align);
HSAKMT_STATUS init_process_doorbells(unsigned int NumNodes);
void destroy_process_doorbells(void);
HSAKMT_STATUS init_device_debugging_memory(unsigned int NumNodes);
void destroy_device_debugging_memory(void);
bool debug_get_reg_status(uint32_t node_id);
HSAKMT_STATUS init_counter_props(unsigned int NumNodes);
void destroy_counter_props(void);
uint32_t *convert_queue_ids(HSAuint32 NumQueues, HSA_QUEUEID *Queues);
extern int kmtIoctl(int fd, unsigned long request, void *arg);
/* Void pointer arithmetic (or remove -Wpointer-arith to allow void pointers arithmetic) */
#define VOID_PTR_ADD32(ptr,n) (void*)((uint32_t*)(ptr) + n)/*ptr + offset*/
#define VOID_PTR_ADD(ptr,n) (void*)((uint8_t*)(ptr) + n)/*ptr + offset*/
#define VOID_PTR_SUB(ptr,n) (void*)((uint8_t*)(ptr) - n)/*ptr - offset*/
#define VOID_PTRS_SUB(ptr1,ptr2) (uint64_t)((uint8_t*)(ptr1) - (uint8_t*)(ptr2)) /*ptr1 - ptr2*/
#define MIN(a, b) ({ \
typeof(a) tmp1 = (a), tmp2 = (b); \
tmp1 < tmp2 ? tmp1 : tmp2; })
#define MAX(a, b) ({ \
typeof(a) tmp1 = (a), tmp2 = (b); \
tmp1 > tmp2 ? tmp1 : tmp2; })
void clear_events_page(void);
void fmm_clear_all_mem(void);
void clear_process_doorbells(void);
uint32_t get_num_sysfs_nodes(void);
bool is_forked_child(void);
/* Calculate VGPR and SGPR register file size per CU */
uint32_t get_vgpr_size_per_cu(uint32_t gfxv);
#define SGPR_SIZE_PER_CU 0x4000
#endif
+93
View File
@@ -0,0 +1,93 @@
HSAKMT_1
{
global:
hsaKmtOpenKFD;
hsaKmtCloseKFD;
hsaKmtGetVersion;
hsaKmtAcquireSystemProperties;
hsaKmtReleaseSystemProperties;
hsaKmtGetNodeProperties;
hsaKmtGetNodeMemoryProperties;
hsaKmtGetNodeCacheProperties;
hsaKmtGetNodeIoLinkProperties;
hsaKmtCreateEvent;
hsaKmtDestroyEvent;
hsaKmtSetEvent;
hsaKmtResetEvent;
hsaKmtQueryEventState;
hsaKmtWaitOnEvent;
hsaKmtWaitOnMultipleEvents;
hsaKmtCreateQueue;
hsaKmtUpdateQueue;
hsaKmtDestroyQueue;
hsaKmtSetQueueCUMask;
hsaKmtSetMemoryPolicy;
hsaKmtAllocMemory;
hsaKmtFreeMemory;
hsaKmtAvailableMemory;
hsaKmtRegisterMemory;
hsaKmtRegisterMemoryToNodes;
hsaKmtRegisterMemoryWithFlags;
hsaKmtRegisterGraphicsHandleToNodes;
hsaKmtShareMemory;
hsaKmtRegisterSharedHandle;
hsaKmtRegisterSharedHandleToNodes;
hsaKmtProcessVMRead;
hsaKmtProcessVMWrite;
hsaKmtDeregisterMemory;
hsaKmtMapMemoryToGPU;
hsaKmtMapMemoryToGPUNodes;
hsaKmtUnmapMemoryToGPU;
hsaKmtDbgRegister;
hsaKmtDbgUnregister;
hsaKmtDbgWavefrontControl;
hsaKmtDbgAddressWatch;
hsaKmtDbgEnable;
hsaKmtDbgDisable;
hsaKmtDbgGetDeviceData;
hsaKmtDbgGetQueueData;
hsaKmtGetClockCounters;
hsaKmtPmcGetCounterProperties;
hsaKmtPmcRegisterTrace;
hsaKmtPmcUnregisterTrace;
hsaKmtPmcAcquireTraceAccess;
hsaKmtPmcReleaseTraceAccess;
hsaKmtPmcStartTrace;
hsaKmtPmcQueryTrace;
hsaKmtPmcStopTrace;
hsaKmtMapGraphicHandle;
hsaKmtUnmapGraphicHandle;
hsaKmtSetTrapHandler;
hsaKmtGetTileConfig;
hsaKmtQueryPointerInfo;
hsaKmtSetMemoryUserData;
hsaKmtGetQueueInfo;
hsaKmtAllocQueueGWS;
hsaKmtRuntimeEnable;
hsaKmtRuntimeDisable;
hsaKmtCheckRuntimeDebugSupport;
hsaKmtGetRuntimeCapabilities;
hsaKmtDebugTrapIoctl;
hsaKmtSPMAcquire;
hsaKmtSPMRelease;
hsaKmtSPMSetDestBuffer;
hsaKmtSVMSetAttr;
hsaKmtSVMGetAttr;
hsaKmtSetXNACKMode;
hsaKmtGetXNACKMode;
hsaKmtOpenSMI;
hsaKmtExportDMABufHandle;
hsaKmtWaitOnEvent_Ext;
hsaKmtWaitOnMultipleEvents_Ext;
hsaKmtReplaceAsanHeaderPage;
hsaKmtReturnAsanHeaderPage;
hsaKmtGetAMDGPUDeviceHandle;
hsaKmtPcSamplingQueryCapabilities;
hsaKmtPcSamplingCreate;
hsaKmtPcSamplingDestroy;
hsaKmtPcSamplingStart;
hsaKmtPcSamplingStop;
local: *;
};
+637
View File
@@ -0,0 +1,637 @@
/*
* Copyright © 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including
* the next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "libhsakmt.h"
#include "hsakmt/linux/kfd_ioctl.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <fcntl.h>
#include "fmm.h"
HSAKMT_STATUS HSAKMTAPI hsaKmtSetMemoryPolicy(HSAuint32 Node,
HSAuint32 DefaultPolicy,
HSAuint32 AlternatePolicy,
void *MemoryAddressAlternate,
HSAuint64 MemorySizeInBytes)
{
struct kfd_ioctl_set_memory_policy_args args = {0};
HSAKMT_STATUS result;
uint32_t gpu_id;
CHECK_KFD_OPEN();
pr_debug("[%s] node %d; default %d; alternate %d\n",
__func__, Node, DefaultPolicy, AlternatePolicy);
result = validate_nodeid(Node, &gpu_id);
if (result != HSAKMT_STATUS_SUCCESS)
return result;
if (get_gfxv_by_node_id(Node) != GFX_VERSION_KAVERI)
/* This is a legacy API useful on Kaveri only. On dGPU
* the alternate aperture is setup and used
* automatically for coherent allocations. Don't let
* app override it.
*/
return HSAKMT_STATUS_NOT_IMPLEMENTED;
/*
* We accept any legal policy and alternate address location.
* You get CC everywhere anyway.
*/
if ((DefaultPolicy != HSA_CACHING_CACHED &&
DefaultPolicy != HSA_CACHING_NONCACHED) ||
(AlternatePolicy != HSA_CACHING_CACHED &&
AlternatePolicy != HSA_CACHING_NONCACHED))
return HSAKMT_STATUS_INVALID_PARAMETER;
CHECK_PAGE_MULTIPLE(MemoryAddressAlternate);
CHECK_PAGE_MULTIPLE(MemorySizeInBytes);
args.gpu_id = gpu_id;
args.default_policy = (DefaultPolicy == HSA_CACHING_CACHED) ?
KFD_IOC_CACHE_POLICY_COHERENT :
KFD_IOC_CACHE_POLICY_NONCOHERENT;
args.alternate_policy = (AlternatePolicy == HSA_CACHING_CACHED) ?
KFD_IOC_CACHE_POLICY_COHERENT :
KFD_IOC_CACHE_POLICY_NONCOHERENT;
args.alternate_aperture_base = (uintptr_t) MemoryAddressAlternate;
args.alternate_aperture_size = MemorySizeInBytes;
int err = kmtIoctl(kfd_fd, AMDKFD_IOC_SET_MEMORY_POLICY, &args);
return (err == -1) ? HSAKMT_STATUS_ERROR : HSAKMT_STATUS_SUCCESS;
}
HSAuint32 PageSizeFromFlags(unsigned int pageSizeFlags)
{
switch (pageSizeFlags) {
case HSA_PAGE_SIZE_4KB: return 4*1024;
case HSA_PAGE_SIZE_64KB: return 64*1024;
case HSA_PAGE_SIZE_2MB: return 2*1024*1024;
case HSA_PAGE_SIZE_1GB: return 1024*1024*1024;
default:
assert(false);
return 4*1024;
}
}
HSAKMT_STATUS HSAKMTAPI hsaKmtAllocMemory(HSAuint32 PreferredNode,
HSAuint64 SizeInBytes,
HsaMemFlags MemFlags,
void **MemoryAddress)
{
HSAKMT_STATUS result;
uint32_t gpu_id;
HSAuint64 page_size;
CHECK_KFD_OPEN();
if (MemFlags.ui32.Contiguous)
CHECK_KFD_MINOR_VERSION(17);
pr_debug("[%s] node %d\n", __func__, PreferredNode);
result = validate_nodeid(PreferredNode, &gpu_id);
if (result != HSAKMT_STATUS_SUCCESS) {
pr_err("[%s] invalid node ID: %d\n", __func__, PreferredNode);
return result;
}
page_size = PageSizeFromFlags(MemFlags.ui32.PageSize);
if (!MemoryAddress || !SizeInBytes || (SizeInBytes & (page_size-1)))
return HSAKMT_STATUS_INVALID_PARAMETER;
if (MemFlags.ui32.FixedAddress) {
if (*MemoryAddress == NULL)
return HSAKMT_STATUS_INVALID_PARAMETER;
} else
*MemoryAddress = NULL;
if ((MemFlags.ui32.CoarseGrain && MemFlags.ui32.ExtendedCoherent) ||
(MemFlags.ui32.CoarseGrain && MemFlags.ui32.Uncached) ||
(MemFlags.ui32.ExtendedCoherent && MemFlags.ui32.Uncached))
return HSAKMT_STATUS_INVALID_PARAMETER;
if (MemFlags.ui32.Scratch) {
*MemoryAddress = fmm_allocate_scratch(gpu_id, *MemoryAddress, SizeInBytes);
if (!(*MemoryAddress)) {
pr_err("[%s] failed to allocate %lu bytes from scratch\n",
__func__, SizeInBytes);
return HSAKMT_STATUS_NO_MEMORY;
}
return HSAKMT_STATUS_SUCCESS;
}
/* GPU allocated system memory */
if (!gpu_id || !MemFlags.ui32.NonPaged || zfb_support || MemFlags.ui32.GTTAccess) {
/* Backwards compatibility hack: Allocate system memory if app
* asks for paged memory from a GPU node.
*/
/* If allocate VRAM under ZFB mode */
if (zfb_support && gpu_id && MemFlags.ui32.NonPaged == 1)
MemFlags.ui32.CoarseGrain = 1;
*MemoryAddress = fmm_allocate_host(gpu_id, MemFlags.ui32.GTTAccess ? 0 : PreferredNode,
*MemoryAddress, SizeInBytes, MemFlags);
if (!(*MemoryAddress)) {
pr_err("[%s] failed to allocate %lu bytes from host\n",
__func__, SizeInBytes);
return HSAKMT_STATUS_ERROR;
}
return HSAKMT_STATUS_SUCCESS;
}
/* GPU allocated VRAM */
/* sanity check cannot do OnlyAddress and NoAddress alloc at same time */
if (MemFlags.ui32.OnlyAddress && MemFlags.ui32.NoAddress) {
pr_err("[%s] allocate addr-only and memory-only at same time\n",
__func__);
return HSAKMT_STATUS_INVALID_PARAMETER;
}
*MemoryAddress = fmm_allocate_device(gpu_id, PreferredNode, *MemoryAddress,
SizeInBytes, MemFlags);
if (!(*MemoryAddress)) {
pr_err("[%s] failed to allocate %lu bytes from device\n",
__func__, SizeInBytes);
return HSAKMT_STATUS_NO_MEMORY;
}
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtFreeMemory(void *MemoryAddress,
HSAuint64 SizeInBytes)
{
CHECK_KFD_OPEN();
pr_debug("[%s] address %p\n", __func__, MemoryAddress);
if (!MemoryAddress) {
pr_err("FIXME: freeing NULL pointer\n");
return HSAKMT_STATUS_ERROR;
}
return fmm_release(MemoryAddress);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtAvailableMemory(HSAuint32 Node,
HSAuint64 *AvailableBytes)
{
struct kfd_ioctl_get_available_memory_args args = {};
HSAKMT_STATUS result;
CHECK_KFD_OPEN();
CHECK_KFD_MINOR_VERSION(9);
pr_debug("[%s] node %d\n", __func__, Node);
result = validate_nodeid(Node, &args.gpu_id);
if (result != HSAKMT_STATUS_SUCCESS) {
pr_err("[%s] invalid node ID: %d\n", __func__, Node);
return result;
}
if (kmtIoctl(kfd_fd, AMDKFD_IOC_AVAILABLE_MEMORY, &args))
return HSAKMT_STATUS_ERROR;
*AvailableBytes = args.available;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemory(void *MemoryAddress,
HSAuint64 MemorySizeInBytes)
{
CHECK_KFD_OPEN();
pr_debug("[%s] address %p\n", __func__, MemoryAddress);
if (!is_dgpu)
/* TODO: support mixed APU and dGPU configurations */
return HSAKMT_STATUS_SUCCESS;
return fmm_register_memory(MemoryAddress, MemorySizeInBytes,
NULL, 0, true, false);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryToNodes(void *MemoryAddress,
HSAuint64 MemorySizeInBytes,
HSAuint64 NumberOfNodes,
HSAuint32 *NodeArray)
{
CHECK_KFD_OPEN();
uint32_t *gpu_id_array;
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
pr_debug("[%s] address %p number of nodes %lu\n",
__func__, MemoryAddress, NumberOfNodes);
if (!is_dgpu)
/* TODO: support mixed APU and dGPU configurations */
return HSAKMT_STATUS_NOT_SUPPORTED;
ret = validate_nodeid_array(&gpu_id_array,
NumberOfNodes, NodeArray);
if (ret == HSAKMT_STATUS_SUCCESS) {
ret = fmm_register_memory(MemoryAddress, MemorySizeInBytes,
gpu_id_array,
NumberOfNodes*sizeof(uint32_t),
true, false);
if (ret != HSAKMT_STATUS_SUCCESS)
free(gpu_id_array);
}
return ret;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryWithFlags(void *MemoryAddress,
HSAuint64 MemorySizeInBytes,
HsaMemFlags MemFlags)
{
CHECK_KFD_OPEN();
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
pr_debug("[%s] address %p\n",
__func__, MemoryAddress);
if (MemFlags.ui32.ExtendedCoherent && MemFlags.ui32.CoarseGrain)
return HSAKMT_STATUS_INVALID_PARAMETER;
// Registered memory should be ordinary paged host memory.
if ((MemFlags.ui32.HostAccess != 1) || (MemFlags.ui32.NonPaged == 1))
return HSAKMT_STATUS_NOT_SUPPORTED;
if (!is_dgpu)
/* TODO: support mixed APU and dGPU configurations */
return HSAKMT_STATUS_NOT_SUPPORTED;
ret = fmm_register_memory(MemoryAddress, MemorySizeInBytes,
NULL, 0, MemFlags.ui32.CoarseGrain, MemFlags.ui32.ExtendedCoherent);
return ret;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodes(HSAuint64 GraphicsResourceHandle,
HsaGraphicsResourceInfo *GraphicsResourceInfo,
HSAuint64 NumberOfNodes,
HSAuint32 *NodeArray)
{
CHECK_KFD_OPEN();
uint32_t *gpu_id_array = NULL;
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
pr_debug("[%s] number of nodes %lu\n", __func__, NumberOfNodes);
if (NodeArray != NULL || NumberOfNodes != 0) {
ret = validate_nodeid_array(&gpu_id_array,
NumberOfNodes, NodeArray);
}
if (ret == HSAKMT_STATUS_SUCCESS) {
ret = fmm_register_graphics_handle(
GraphicsResourceHandle, GraphicsResourceInfo,
gpu_id_array, NumberOfNodes * sizeof(uint32_t));
if (ret != HSAKMT_STATUS_SUCCESS)
free(gpu_id_array);
}
return ret;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtExportDMABufHandle(void *MemoryAddress,
HSAuint64 MemorySizeInBytes,
int *DMABufFd,
HSAuint64 *Offset)
{
CHECK_KFD_OPEN();
CHECK_KFD_MINOR_VERSION(12);
pr_debug("[%s] address %p\n", __func__, MemoryAddress);
return fmm_export_dma_buf_fd(MemoryAddress, MemorySizeInBytes,
DMABufFd, Offset);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtShareMemory(void *MemoryAddress,
HSAuint64 SizeInBytes,
HsaSharedMemoryHandle *SharedMemoryHandle)
{
CHECK_KFD_OPEN();
pr_debug("[%s] address %p\n", __func__, MemoryAddress);
if (!SharedMemoryHandle)
return HSAKMT_STATUS_INVALID_PARAMETER;
return fmm_share_memory(MemoryAddress, SizeInBytes, SharedMemoryHandle);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterSharedHandle(const HsaSharedMemoryHandle *SharedMemoryHandle,
void **MemoryAddress,
HSAuint64 *SizeInBytes)
{
CHECK_KFD_OPEN();
pr_debug("[%s] handle %p\n", __func__, SharedMemoryHandle);
return hsaKmtRegisterSharedHandleToNodes(SharedMemoryHandle,
MemoryAddress,
SizeInBytes,
0,
NULL);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterSharedHandleToNodes(const HsaSharedMemoryHandle *SharedMemoryHandle,
void **MemoryAddress,
HSAuint64 *SizeInBytes,
HSAuint64 NumberOfNodes,
HSAuint32 *NodeArray)
{
CHECK_KFD_OPEN();
uint32_t *gpu_id_array = NULL;
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
pr_debug("[%s] handle %p number of nodes %lu\n",
__func__, SharedMemoryHandle, NumberOfNodes);
if (!SharedMemoryHandle)
return HSAKMT_STATUS_INVALID_PARAMETER;
if (NodeArray) {
ret = validate_nodeid_array(&gpu_id_array, NumberOfNodes, NodeArray);
if (ret != HSAKMT_STATUS_SUCCESS)
goto error;
}
ret = fmm_register_shared_memory(SharedMemoryHandle,
SizeInBytes,
MemoryAddress,
gpu_id_array,
NumberOfNodes*sizeof(uint32_t));
if (ret != HSAKMT_STATUS_SUCCESS)
goto error;
return ret;
error:
if (gpu_id_array)
free(gpu_id_array);
return ret;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtProcessVMRead(HSAuint32 Pid,
HsaMemoryRange *LocalMemoryArray,
HSAuint64 LocalMemoryArrayCount,
HsaMemoryRange *RemoteMemoryArray,
HSAuint64 RemoteMemoryArrayCount,
HSAuint64 *SizeCopied)
{
pr_err("[%s] Deprecated\n", __func__);
return HSAKMT_STATUS_NOT_IMPLEMENTED;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtProcessVMWrite(HSAuint32 Pid,
HsaMemoryRange *LocalMemoryArray,
HSAuint64 LocalMemoryArrayCount,
HsaMemoryRange *RemoteMemoryArray,
HSAuint64 RemoteMemoryArrayCount,
HSAuint64 *SizeCopied)
{
pr_err("[%s] Deprecated\n", __func__);
return HSAKMT_STATUS_NOT_IMPLEMENTED;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtDeregisterMemory(void *MemoryAddress)
{
CHECK_KFD_OPEN();
pr_debug("[%s] address %p\n", __func__, MemoryAddress);
return fmm_deregister_memory(MemoryAddress);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtMapMemoryToGPU(void *MemoryAddress,
HSAuint64 MemorySizeInBytes,
HSAuint64 *AlternateVAGPU)
{
CHECK_KFD_OPEN();
pr_debug("[%s] address %p\n", __func__, MemoryAddress);
if (!MemoryAddress) {
pr_err("FIXME: mapping NULL pointer\n");
return HSAKMT_STATUS_ERROR;
}
if (AlternateVAGPU)
*AlternateVAGPU = 0;
return fmm_map_to_gpu(MemoryAddress, MemorySizeInBytes, AlternateVAGPU);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtMapMemoryToGPUNodes(void *MemoryAddress,
HSAuint64 MemorySizeInBytes,
HSAuint64 *AlternateVAGPU,
HsaMemMapFlags MemMapFlags,
HSAuint64 NumberOfNodes,
HSAuint32 *NodeArray)
{
uint32_t *gpu_id_array;
HSAKMT_STATUS ret;
pr_debug("[%s] address %p number of nodes %lu\n",
__func__, MemoryAddress, NumberOfNodes);
if (!MemoryAddress) {
pr_err("FIXME: mapping NULL pointer\n");
return HSAKMT_STATUS_ERROR;
}
if (!is_dgpu && NumberOfNodes == 1)
return hsaKmtMapMemoryToGPU(MemoryAddress,
MemorySizeInBytes,
AlternateVAGPU);
ret = validate_nodeid_array(&gpu_id_array,
NumberOfNodes, NodeArray);
if (ret != HSAKMT_STATUS_SUCCESS)
return ret;
ret = fmm_map_to_gpu_nodes(MemoryAddress, MemorySizeInBytes,
gpu_id_array, NumberOfNodes, AlternateVAGPU);
if (gpu_id_array)
free(gpu_id_array);
return ret;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtUnmapMemoryToGPU(void *MemoryAddress)
{
CHECK_KFD_OPEN();
pr_debug("[%s] address %p\n", __func__, MemoryAddress);
if (!MemoryAddress) {
/* Workaround for runtime bug */
pr_err("FIXME: Unmapping NULL pointer\n");
return HSAKMT_STATUS_SUCCESS;
}
if (!fmm_unmap_from_gpu(MemoryAddress))
return HSAKMT_STATUS_SUCCESS;
else
return HSAKMT_STATUS_ERROR;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtMapGraphicHandle(HSAuint32 NodeId,
HSAuint64 GraphicDeviceHandle,
HSAuint64 GraphicResourceHandle,
HSAuint64 GraphicResourceOffset,
HSAuint64 GraphicResourceSize,
HSAuint64 *FlatMemoryAddress)
{
/* This API was only ever implemented in KFD for Kaveri and
* was never upstreamed. There are no open-source users of
* this interface. It has been superseded by
* RegisterGraphicsHandleToNodes.
*/
return HSAKMT_STATUS_NOT_IMPLEMENTED;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtUnmapGraphicHandle(HSAuint32 NodeId,
HSAuint64 FlatMemoryAddress,
HSAuint64 SizeInBytes)
{
return hsaKmtUnmapMemoryToGPU(PORT_UINT64_TO_VPTR(FlatMemoryAddress));
}
HSAKMT_STATUS HSAKMTAPI hsaKmtGetTileConfig(HSAuint32 NodeId, HsaGpuTileConfig *config)
{
struct kfd_ioctl_get_tile_config_args args = {0};
uint32_t gpu_id;
HSAKMT_STATUS result;
pr_debug("[%s] node %d\n", __func__, NodeId);
result = validate_nodeid(NodeId, &gpu_id);
if (result != HSAKMT_STATUS_SUCCESS)
return result;
/* Avoid Valgrind warnings about uninitialized data. Valgrind doesn't
* know that KFD writes this.
*/
memset(config->TileConfig, 0, sizeof(*config->TileConfig) * config->NumTileConfigs);
memset(config->MacroTileConfig, 0, sizeof(*config->MacroTileConfig) * config->NumMacroTileConfigs);
args.gpu_id = gpu_id;
args.tile_config_ptr = (uint64_t)config->TileConfig;
args.macro_tile_config_ptr = (uint64_t)config->MacroTileConfig;
args.num_tile_configs = config->NumTileConfigs;
args.num_macro_tile_configs = config->NumMacroTileConfigs;
if (kmtIoctl(kfd_fd, AMDKFD_IOC_GET_TILE_CONFIG, &args) != 0)
return HSAKMT_STATUS_ERROR;
config->NumTileConfigs = args.num_tile_configs;
config->NumMacroTileConfigs = args.num_macro_tile_configs;
config->GbAddrConfig = args.gb_addr_config;
config->NumBanks = args.num_banks;
config->NumRanks = args.num_ranks;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtQueryPointerInfo(const void *Pointer,
HsaPointerInfo *PointerInfo)
{
pr_debug("[%s] pointer %p\n", __func__, Pointer);
if (!PointerInfo)
return HSAKMT_STATUS_INVALID_PARAMETER;
return fmm_get_mem_info(Pointer, PointerInfo);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtSetMemoryUserData(const void *Pointer,
void *UserData)
{
pr_debug("[%s] pointer %p\n", __func__, Pointer);
return fmm_set_mem_user_data(Pointer, UserData);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtReplaceAsanHeaderPage(void *addr)
{
#ifdef SANITIZER_AMDGPU
pr_debug("[%s] address %p\n", __func__, addr);
CHECK_KFD_OPEN();
return fmm_replace_asan_header_page(addr);
#else
return HSAKMT_STATUS_NOT_SUPPORTED;
#endif
}
HSAKMT_STATUS HSAKMTAPI hsaKmtReturnAsanHeaderPage(void *addr)
{
#ifdef SANITIZER_AMDGPU
pr_debug("[%s] address %p\n", __func__, addr);
CHECK_KFD_OPEN();
return fmm_return_asan_header_page(addr);
#else
return HSAKMT_STATUS_NOT_SUPPORTED;
#endif
}
HSAKMT_STATUS HSAKMTAPI hsaKmtGetAMDGPUDeviceHandle( HSAuint32 NodeId,
HsaAMDGPUDeviceHandle *DeviceHandle)
{
CHECK_KFD_OPEN();
return fmm_get_amdgpu_device_handle(NodeId, DeviceHandle);
}
+256
View File
@@ -0,0 +1,256 @@
/*
* Copyright © 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including
* the next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/* glibc macro that enables access some nonstandard GNU/Linux extensions
* such as RTLD_DEFAULT used by dlsym
*/
#define _GNU_SOURCE
#include "libhsakmt.h"
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdio.h>
#include <strings.h>
#include "fmm.h"
#include <dlfcn.h>
#include <string.h>
int (*fn_amdgpu_device_get_fd)(HsaAMDGPUDeviceHandle device_handle);
static const char kfd_device_name[] = "/dev/kfd";
static pid_t parent_pid = -1;
int hsakmt_debug_level;
bool hsakmt_forked;
/* is_forked_child detects when the process has forked since the last
* time this function was called. We cannot rely on pthread_atfork
* because the process can fork without calling the fork function in
* libc (using clone or calling the system call directly).
*/
bool is_forked_child(void)
{
pid_t cur_pid;
if (hsakmt_forked)
return true;
cur_pid = getpid();
if (parent_pid == -1) {
parent_pid = cur_pid;
return false;
}
if (parent_pid != cur_pid) {
hsakmt_forked = true;
return true;
}
return false;
}
/* Callbacks from pthread_atfork */
static void prepare_fork_handler(void)
{
pthread_mutex_lock(&hsakmt_mutex);
}
static void parent_fork_handler(void)
{
pthread_mutex_unlock(&hsakmt_mutex);
}
static void child_fork_handler(void)
{
pthread_mutex_init(&hsakmt_mutex, NULL);
hsakmt_forked = true;
}
/* Call this from the child process after fork. This will clear all
* data that is duplicated from the parent process, that is not valid
* in the child.
* The topology information is duplicated from the parent is valid
* in the child process so it is not cleared
*/
static void clear_after_fork(void)
{
clear_process_doorbells();
clear_events_page();
fmm_clear_all_mem();
destroy_device_debugging_memory();
if (kfd_fd) {
close(kfd_fd);
kfd_fd = -1;
}
kfd_open_count = 0;
parent_pid = -1;
hsakmt_forked = false;
}
static inline void init_page_size(void)
{
#ifndef PAGE_SIZE
PAGE_SIZE = sysconf(_SC_PAGESIZE);
#endif
PAGE_SHIFT = ffs(PAGE_SIZE) - 1;
}
static HSAKMT_STATUS init_vars_from_env(void)
{
char *envvar;
int debug_level;
/* Normally libraries don't print messages. For debugging purpose, we'll
* print messages if an environment variable, HSAKMT_DEBUG_LEVEL, is set.
*/
hsakmt_debug_level = HSAKMT_DEBUG_LEVEL_DEFAULT;
envvar = getenv("HSAKMT_DEBUG_LEVEL");
if (envvar) {
debug_level = atoi(envvar);
if (debug_level >= HSAKMT_DEBUG_LEVEL_ERR &&
debug_level <= HSAKMT_DEBUG_LEVEL_DEBUG)
hsakmt_debug_level = debug_level;
}
/* Check whether to support Zero frame buffer */
envvar = getenv("HSA_ZFB");
if (envvar)
zfb_support = atoi(envvar);
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFD(void)
{
HSAKMT_STATUS result;
int fd = -1;
HsaSystemProperties sys_props;
char *error;
char *useSvmStr;
pthread_mutex_lock(&hsakmt_mutex);
/* If the process has forked, the child process must re-initialize
* it's connection to KFD. Any references tracked by kfd_open_count
* belong to the parent
*/
if (is_forked_child())
clear_after_fork();
if (kfd_open_count == 0) {
static bool atfork_installed = false;
fn_amdgpu_device_get_fd = dlsym(RTLD_DEFAULT, "amdgpu_device_get_fd");
if ((error = dlerror()) != NULL)
pr_err("amdgpu_device_get_fd is not available: %s\n", error);
else
pr_info("amdgpu_device_get_fd is available %p\n", fn_amdgpu_device_get_fd);
result = init_vars_from_env();
if (result != HSAKMT_STATUS_SUCCESS)
goto open_failed;
if (kfd_fd < 0) {
fd = open(kfd_device_name, O_RDWR | O_CLOEXEC);
if (fd == -1) {
result = HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED;
goto open_failed;
}
kfd_fd = fd;
}
init_page_size();
result = init_kfd_version();
if (result != HSAKMT_STATUS_SUCCESS)
goto kfd_version_failed;
useSvmStr = getenv("HSA_USE_SVM");
is_svm_api_supported = !(useSvmStr && !strcmp(useSvmStr, "0"));
result = topology_sysfs_get_system_props(&sys_props);
if (result != HSAKMT_STATUS_SUCCESS)
goto topology_sysfs_failed;
kfd_open_count = 1;
if (init_device_debugging_memory(sys_props.NumNodes) != HSAKMT_STATUS_SUCCESS)
pr_warn("Insufficient Memory. Debugging unavailable\n");
init_counter_props(sys_props.NumNodes);
if (!atfork_installed) {
/* Atfork handlers cannot be uninstalled and
* must be installed only once. Otherwise
* prepare will deadlock when trying to take
* the same lock multiple times.
*/
pthread_atfork(prepare_fork_handler,
parent_fork_handler,
child_fork_handler);
atfork_installed = true;
}
} else {
kfd_open_count++;
result = HSAKMT_STATUS_KERNEL_ALREADY_OPENED;
}
pthread_mutex_unlock(&hsakmt_mutex);
return result;
topology_sysfs_failed:
kfd_version_failed:
close(fd);
open_failed:
pthread_mutex_unlock(&hsakmt_mutex);
return result;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtCloseKFD(void)
{
HSAKMT_STATUS result;
pthread_mutex_lock(&hsakmt_mutex);
if (kfd_open_count > 0) {
if (--kfd_open_count == 0) {
destroy_counter_props();
destroy_device_debugging_memory();
}
result = HSAKMT_STATUS_SUCCESS;
} else
result = HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED;
pthread_mutex_unlock(&hsakmt_mutex);
return result;
}
+210
View File
@@ -0,0 +1,210 @@
/*
* Copyright © 2023 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including
* the next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "libhsakmt.h"
#include "hsakmt/linux/kfd_ioctl.h"
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <errno.h>
#define INVALID_TRACE_ID 0x0
HSAKMT_STATUS HSAKMTAPI hsaKmtPcSamplingSupport(void)
{
CHECK_KFD_OPEN();
CHECK_KFD_MINOR_VERSION(16);
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtPcSamplingQueryCapabilities(HSAuint32 NodeId, void *sample_info,
HSAuint32 sample_info_sz, HSAuint32 *size)
{
int ret;
struct kfd_ioctl_pc_sample_args args = {0};
uint32_t gpu_id;
if (size == NULL)
return HSAKMT_STATUS_INVALID_PARAMETER;
CHECK_KFD_OPEN();
CHECK_KFD_MINOR_VERSION(16);
ret = validate_nodeid(NodeId, &gpu_id);
if (ret != HSAKMT_STATUS_SUCCESS) {
pr_err("[%s] invalid node ID: %d\n", __func__, NodeId);
return ret;
}
assert(sizeof(HsaPcSamplingInfo) == sizeof(struct kfd_pc_sample_info));
ret = HSAKMT_STATUS_SUCCESS;
args.op = KFD_IOCTL_PCS_OP_QUERY_CAPABILITIES;
args.gpu_id = gpu_id;
args.sample_info_ptr = (uint64_t)sample_info;
args.num_sample_info = sample_info_sz;
args.flags = 0;
ret = kmtIoctl(kfd_fd, AMDKFD_IOC_PC_SAMPLE, &args);
if (ret) {
return (ret == -ENOSPC) ? HSAKMT_STATUS_BUFFER_TOO_SMALL :
(ret == -EINVAL) ? HSAKMT_STATUS_INVALID_PARAMETER :
(ret == -EOPNOTSUPP) ? HSAKMT_STATUS_NOT_SUPPORTED :
(ret == -EBUSY) ? HSAKMT_STATUS_UNAVAILABLE :
HSAKMT_STATUS_ERROR;
}
*size = args.num_sample_info;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtPcSamplingCreate(HSAuint32 NodeId, HsaPcSamplingInfo *sample_info,
HsaPcSamplingTraceId *traceId)
{
int ret;
struct kfd_ioctl_pc_sample_args args = {0};
uint32_t gpu_id;
if (sample_info == NULL || traceId == NULL)
return HSAKMT_STATUS_INVALID_PARAMETER;
CHECK_KFD_OPEN();
*traceId = INVALID_TRACE_ID;
ret = validate_nodeid(NodeId, &gpu_id);
if (ret != HSAKMT_STATUS_SUCCESS) {
pr_err("[%s] invalid node ID: %d\n", __func__, NodeId);
return ret;
}
ret = HSAKMT_STATUS_SUCCESS;
args.op = KFD_IOCTL_PCS_OP_CREATE;
args.gpu_id = gpu_id;
args.sample_info_ptr = (uint64_t)sample_info;
args.num_sample_info = 1;
args.trace_id = INVALID_TRACE_ID;
ret = kmtIoctl(kfd_fd, AMDKFD_IOC_PC_SAMPLE, &args);
if (ret) {
return (ret == -EINVAL) ? HSAKMT_STATUS_INVALID_PARAMETER :
(ret == -ENOMEM) ? HSAKMT_STATUS_NO_MEMORY :
(ret == -EBUSY) ? HSAKMT_STATUS_UNAVAILABLE :
HSAKMT_STATUS_ERROR;
}
*traceId = args.trace_id;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtPcSamplingDestroy(HSAuint32 NodeId, HsaPcSamplingTraceId traceId)
{
int ret;
struct kfd_ioctl_pc_sample_args args = {0};
uint32_t gpu_id;
if (traceId == INVALID_TRACE_ID)
return HSAKMT_STATUS_INVALID_HANDLE;
CHECK_KFD_OPEN();
ret = validate_nodeid(NodeId, &gpu_id);
if (ret != HSAKMT_STATUS_SUCCESS) {
pr_err("[%s] invalid node ID: %d\n", __func__, NodeId);
return ret;
}
hsaKmtPcSamplingStop(NodeId, traceId);
ret = HSAKMT_STATUS_SUCCESS;
args.op = KFD_IOCTL_PCS_OP_DESTROY;
args.gpu_id = gpu_id;
args.trace_id = traceId;
ret = kmtIoctl(kfd_fd, AMDKFD_IOC_PC_SAMPLE, &args);
return (ret == -EINVAL) ? HSAKMT_STATUS_INVALID_PARAMETER :
ret ? HSAKMT_STATUS_ERROR :
HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtPcSamplingStart(HSAuint32 NodeId, HsaPcSamplingTraceId traceId)
{
int ret;
struct kfd_ioctl_pc_sample_args args = {0};
uint32_t gpu_id;
if (traceId == INVALID_TRACE_ID)
return HSAKMT_STATUS_INVALID_HANDLE;
CHECK_KFD_OPEN();
ret = validate_nodeid(NodeId, &gpu_id);
if (ret != HSAKMT_STATUS_SUCCESS) {
pr_err("[%s] invalid node ID: %d\n", __func__, NodeId);
return ret;
}
ret = HSAKMT_STATUS_SUCCESS;
args.op = KFD_IOCTL_PCS_OP_START;
args.gpu_id = gpu_id;
args.trace_id = traceId;
ret = kmtIoctl(kfd_fd, AMDKFD_IOC_PC_SAMPLE, &args);
return (ret == -EINVAL) ? HSAKMT_STATUS_INVALID_PARAMETER :
(ret == -ENOMEM) ? HSAKMT_STATUS_OUT_OF_RESOURCES :
(ret == -EBUSY) ? HSAKMT_STATUS_UNAVAILABLE :
(ret == -EALREADY) ? HSAKMT_STATUS_KERNEL_ALREADY_OPENED :
ret ? HSAKMT_STATUS_ERROR :
HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtPcSamplingStop(HSAuint32 NodeId, HsaPcSamplingTraceId traceId)
{
int ret;
struct kfd_ioctl_pc_sample_args args = {0};
uint32_t gpu_id;
if (traceId == INVALID_TRACE_ID)
return HSAKMT_STATUS_INVALID_HANDLE;
CHECK_KFD_OPEN();
ret = validate_nodeid(NodeId, &gpu_id);
if (ret != HSAKMT_STATUS_SUCCESS) {
pr_err("[%s] invalid node ID: %d\n", __func__, NodeId);
return ret;
}
ret = HSAKMT_STATUS_SUCCESS;
args.op = KFD_IOCTL_PCS_OP_STOP;
args.gpu_id = gpu_id;
args.trace_id = traceId;
ret = kmtIoctl(kfd_fd, AMDKFD_IOC_PC_SAMPLE, &args);
return (ret == -EINVAL) ? HSAKMT_STATUS_INVALID_PARAMETER :
(ret == -EALREADY) ? HSAKMT_STATUS_KERNEL_ALREADY_OPENED :
ret ? HSAKMT_STATUS_ERROR :
HSAKMT_STATUS_SUCCESS;
}
+659
View File
@@ -0,0 +1,659 @@
/*
* Copyright © 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including
* the next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <linux/perf_event.h>
#include <sys/syscall.h>
#include "libhsakmt.h"
#include "pmc_table.h"
#include "hsakmt/linux/kfd_ioctl.h"
#include <unistd.h>
#include <sys/ioctl.h>
#include <errno.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <semaphore.h>
#define BITS_PER_BYTE CHAR_BIT
#define HSA_PERF_MAGIC4CC 0x54415348
enum perf_trace_state {
PERF_TRACE_STATE__STOPPED = 0,
PERF_TRACE_STATE__STARTED
};
struct perf_trace_block {
enum perf_block_id block_id;
uint32_t num_counters;
uint64_t *counter_id;
int *perf_event_fd;
};
struct perf_trace {
uint32_t magic4cc;
uint32_t gpu_id;
enum perf_trace_state state;
uint32_t num_blocks;
void *buf;
uint64_t buf_size;
struct perf_trace_block blocks[0];
};
struct perf_counts_values {
union {
struct {
uint64_t val;
uint64_t ena;
uint64_t run;
};
uint64_t values[3];
};
};
static HsaCounterProperties **counter_props;
static unsigned int counter_props_count;
static ssize_t readn(int fd, void *buf, size_t n)
{
size_t left = n;
ssize_t bytes;
while (left) {
bytes = read(fd, buf, left);
if (!bytes) /* reach EOF */
return (n - left);
if (bytes < 0) {
if (errno == EINTR) /* read got interrupted */
continue;
else
return -errno;
}
left -= bytes;
buf = VOID_PTR_ADD(buf, bytes);
}
return n;
}
HSAKMT_STATUS init_counter_props(unsigned int NumNodes)
{
counter_props = calloc(NumNodes, sizeof(struct HsaCounterProperties *));
if (!counter_props) {
pr_warn("Profiling is not available.\n");
return HSAKMT_STATUS_NO_MEMORY;
}
counter_props_count = NumNodes;
return HSAKMT_STATUS_SUCCESS;
}
void destroy_counter_props(void)
{
unsigned int i;
if (!counter_props)
return;
for (i = 0; i < counter_props_count; i++)
if (counter_props[i]) {
free(counter_props[i]);
counter_props[i] = NULL;
}
free(counter_props);
}
static int blockid2uuid(enum perf_block_id block_id, HSA_UUID *uuid)
{
int rc = 0;
switch (block_id) {
case PERFCOUNTER_BLOCKID__CB:
*uuid = HSA_PROFILEBLOCK_AMD_CB;
break;
case PERFCOUNTER_BLOCKID__CPF:
*uuid = HSA_PROFILEBLOCK_AMD_CPF;
break;
case PERFCOUNTER_BLOCKID__CPG:
*uuid = HSA_PROFILEBLOCK_AMD_CPG;
break;
case PERFCOUNTER_BLOCKID__DB:
*uuid = HSA_PROFILEBLOCK_AMD_DB;
break;
case PERFCOUNTER_BLOCKID__GDS:
*uuid = HSA_PROFILEBLOCK_AMD_GDS;
break;
case PERFCOUNTER_BLOCKID__GRBM:
*uuid = HSA_PROFILEBLOCK_AMD_GRBM;
break;
case PERFCOUNTER_BLOCKID__GRBMSE:
*uuid = HSA_PROFILEBLOCK_AMD_GRBMSE;
break;
case PERFCOUNTER_BLOCKID__IA:
*uuid = HSA_PROFILEBLOCK_AMD_IA;
break;
case PERFCOUNTER_BLOCKID__MC:
*uuid = HSA_PROFILEBLOCK_AMD_MC;
break;
case PERFCOUNTER_BLOCKID__PASC:
*uuid = HSA_PROFILEBLOCK_AMD_PASC;
break;
case PERFCOUNTER_BLOCKID__PASU:
*uuid = HSA_PROFILEBLOCK_AMD_PASU;
break;
case PERFCOUNTER_BLOCKID__SPI:
*uuid = HSA_PROFILEBLOCK_AMD_SPI;
break;
case PERFCOUNTER_BLOCKID__SRBM:
*uuid = HSA_PROFILEBLOCK_AMD_SRBM;
break;
case PERFCOUNTER_BLOCKID__SQ:
*uuid = HSA_PROFILEBLOCK_AMD_SQ;
break;
case PERFCOUNTER_BLOCKID__SX:
*uuid = HSA_PROFILEBLOCK_AMD_SX;
break;
case PERFCOUNTER_BLOCKID__TA:
*uuid = HSA_PROFILEBLOCK_AMD_TA;
break;
case PERFCOUNTER_BLOCKID__TCA:
*uuid = HSA_PROFILEBLOCK_AMD_TCA;
break;
case PERFCOUNTER_BLOCKID__TCC:
*uuid = HSA_PROFILEBLOCK_AMD_TCC;
break;
case PERFCOUNTER_BLOCKID__TCP:
*uuid = HSA_PROFILEBLOCK_AMD_TCP;
break;
case PERFCOUNTER_BLOCKID__TCS:
*uuid = HSA_PROFILEBLOCK_AMD_TCS;
break;
case PERFCOUNTER_BLOCKID__TD:
*uuid = HSA_PROFILEBLOCK_AMD_TD;
break;
case PERFCOUNTER_BLOCKID__VGT:
*uuid = HSA_PROFILEBLOCK_AMD_VGT;
break;
case PERFCOUNTER_BLOCKID__WD:
*uuid = HSA_PROFILEBLOCK_AMD_WD;
break;
default:
/* If we reach this point, it's a bug */
rc = -1;
break;
}
return rc;
}
static HSAuint32 get_block_concurrent_limit(uint32_t node_id,
HSAuint32 block_id)
{
uint32_t i;
HsaCounterBlockProperties *block = &counter_props[node_id]->Blocks[0];
for (i = 0; i < PERFCOUNTER_BLOCKID__MAX; i++) {
if (block->Counters[0].BlockIndex == block_id)
return block->NumConcurrent;
block = (HsaCounterBlockProperties *)&block->Counters[block->NumCounters];
}
return 0;
}
static HSAKMT_STATUS perf_trace_ioctl(struct perf_trace_block *block,
uint32_t cmd)
{
uint32_t i;
for (i = 0; i < block->num_counters; i++) {
if (block->perf_event_fd[i] < 0)
return HSAKMT_STATUS_UNAVAILABLE;
if (ioctl(block->perf_event_fd[i], cmd, NULL))
return HSAKMT_STATUS_ERROR;
}
return HSAKMT_STATUS_SUCCESS;
}
static HSAKMT_STATUS query_trace(int fd, uint64_t *buf)
{
struct perf_counts_values content;
if (fd < 0)
return HSAKMT_STATUS_ERROR;
if (readn(fd, &content, sizeof(content)) != sizeof(content))
return HSAKMT_STATUS_ERROR;
*buf = content.val;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcGetCounterProperties(HSAuint32 NodeId,
HsaCounterProperties **CounterProperties)
{
HSAKMT_STATUS rc = HSAKMT_STATUS_SUCCESS;
uint32_t gpu_id, i, block_id;
uint32_t counter_props_size = 0;
uint32_t total_counters = 0;
uint32_t total_concurrent = 0;
struct perf_counter_block block = {0};
uint32_t total_blocks = 0;
HsaCounterBlockProperties *block_prop;
if (!counter_props)
return HSAKMT_STATUS_NO_MEMORY;
if (!CounterProperties)
return HSAKMT_STATUS_INVALID_PARAMETER;
if (validate_nodeid(NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS)
return HSAKMT_STATUS_INVALID_NODE_UNIT;
if (counter_props[NodeId]) {
*CounterProperties = counter_props[NodeId];
return HSAKMT_STATUS_SUCCESS;
}
for (i = 0; i < PERFCOUNTER_BLOCKID__MAX; i++) {
rc = get_block_properties(NodeId, i, &block);
if (rc != HSAKMT_STATUS_SUCCESS)
return rc;
total_concurrent += block.num_of_slots;
total_counters += block.num_of_counters;
/* If num_of_slots=0, this block doesn't exist */
if (block.num_of_slots)
total_blocks++;
}
counter_props_size = sizeof(HsaCounterProperties) +
sizeof(HsaCounterBlockProperties) * (total_blocks - 1) +
sizeof(HsaCounter) * (total_counters - total_blocks);
counter_props[NodeId] = malloc(counter_props_size);
if (!counter_props[NodeId])
return HSAKMT_STATUS_NO_MEMORY;
counter_props[NodeId]->NumBlocks = total_blocks;
counter_props[NodeId]->NumConcurrent = total_concurrent;
block_prop = &counter_props[NodeId]->Blocks[0];
for (block_id = 0; block_id < PERFCOUNTER_BLOCKID__MAX; block_id++) {
rc = get_block_properties(NodeId, block_id, &block);
if (rc != HSAKMT_STATUS_SUCCESS) {
free(counter_props[NodeId]);
counter_props[NodeId] = NULL;
return rc;
}
if (!block.num_of_slots) /* not a valid block */
continue;
blockid2uuid(block_id, &block_prop->BlockId);
block_prop->NumCounters = block.num_of_counters;
block_prop->NumConcurrent = block.num_of_slots;
for (i = 0; i < block.num_of_counters; i++) {
block_prop->Counters[i].BlockIndex = block_id;
block_prop->Counters[i].CounterId = block.counter_ids[i];
block_prop->Counters[i].CounterSizeInBits = block.counter_size_in_bits;
block_prop->Counters[i].CounterMask = block.counter_mask;
block_prop->Counters[i].Flags.ui32.Global = 1;
block_prop->Counters[i].Type = HSA_PROFILE_TYPE_NONPRIV_IMMEDIATE;
}
block_prop = (HsaCounterBlockProperties *)&block_prop->Counters[block_prop->NumCounters];
}
*CounterProperties = counter_props[NodeId];
return HSAKMT_STATUS_SUCCESS;
}
/* Registers a set of (HW) counters to be used for tracing/profiling */
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcRegisterTrace(HSAuint32 NodeId,
HSAuint32 NumberOfCounters,
HsaCounter *Counters,
HsaPmcTraceRoot *TraceRoot)
{
uint32_t gpu_id, i, j;
uint64_t min_buf_size = 0;
struct perf_trace *trace = NULL;
uint32_t concurrent_limit;
const uint32_t MAX_COUNTERS = 512;
uint64_t counter_id[PERFCOUNTER_BLOCKID__MAX][MAX_COUNTERS];
uint32_t num_counters[PERFCOUNTER_BLOCKID__MAX] = {0};
uint32_t block, num_blocks = 0, total_counters = 0;
uint64_t *counter_id_ptr;
int *fd_ptr;
pr_debug("[%s] Number of counters %d\n", __func__, NumberOfCounters);
if (!counter_props)
return HSAKMT_STATUS_NO_MEMORY;
if (!Counters || !TraceRoot || NumberOfCounters == 0)
return HSAKMT_STATUS_INVALID_PARAMETER;
if (validate_nodeid(NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS)
return HSAKMT_STATUS_INVALID_NODE_UNIT;
if (NumberOfCounters > MAX_COUNTERS) {
pr_err("MAX_COUNTERS is too small for %d.\n",
NumberOfCounters);
return HSAKMT_STATUS_NO_MEMORY;
}
/* Calculating the minimum buffer size */
for (i = 0; i < NumberOfCounters; i++) {
if (Counters[i].BlockIndex >= PERFCOUNTER_BLOCKID__MAX)
return HSAKMT_STATUS_INVALID_PARAMETER;
/* Only privileged counters need to register */
if (Counters[i].Type > HSA_PROFILE_TYPE_PRIVILEGED_STREAMING)
continue;
min_buf_size += Counters[i].CounterSizeInBits/BITS_PER_BYTE;
/* j: the first blank entry in the block to record counter_id */
j = num_counters[Counters[i].BlockIndex];
counter_id[Counters[i].BlockIndex][j] = Counters[i].CounterId;
num_counters[Counters[i].BlockIndex]++;
total_counters++;
}
/* Verify that the number of counters per block is not larger than the
* number of slots.
*/
for (i = 0; i < PERFCOUNTER_BLOCKID__MAX; i++) {
if (!num_counters[i])
continue;
concurrent_limit = get_block_concurrent_limit(NodeId, i);
if (!concurrent_limit) {
pr_err("Invalid block ID: %d\n", i);
return HSAKMT_STATUS_INVALID_PARAMETER;
}
if (num_counters[i] > concurrent_limit) {
pr_err("Counters exceed the limit.\n");
return HSAKMT_STATUS_INVALID_PARAMETER;
}
num_blocks++;
}
if (!num_blocks)
return HSAKMT_STATUS_INVALID_PARAMETER;
/* Now we have sorted blocks/counters information in
* num_counters[block_id] and counter_id[block_id][]. Allocate trace
* and record the information.
*/
trace = (struct perf_trace *)calloc(sizeof(struct perf_trace)
+ sizeof(struct perf_trace_block) * num_blocks
+ sizeof(uint64_t) * total_counters
+ sizeof(int) * total_counters,
1);
if (!trace)
return HSAKMT_STATUS_NO_MEMORY;
/* Allocated area is partitioned as:
* +---------------------------------+ trace
* | perf_trace |
* |---------------------------------| trace->blocks[0]
* | perf_trace_block 0 |
* | .... |
* | perf_trace_block N-1 | trace->blocks[N-1]
* |---------------------------------| <-- counter_id_ptr starts here
* | block 0's counter IDs(uint64_t) |
* | ...... |
* | block N-1's counter IDs |
* |---------------------------------| <-- perf_event_fd starts here
* | block 0's perf_event_fds(int) |
* | ...... |
* | block N-1's perf_event_fds |
* +---------------------------------+
*/
block = 0;
counter_id_ptr = (uint64_t *)((char *)
trace + sizeof(struct perf_trace)
+ sizeof(struct perf_trace_block) * num_blocks);
fd_ptr = (int *)(counter_id_ptr + total_counters);
/* Fill in each block's information to the TraceId */
for (i = 0; i < PERFCOUNTER_BLOCKID__MAX; i++) {
if (!num_counters[i]) /* not a block to trace */
continue;
/* Following perf_trace + perf_trace_block x N are those
* counter_id arrays. Assign the counter_id array belonging to
* this block.
*/
trace->blocks[block].counter_id = counter_id_ptr;
/* Fill in counter IDs to the counter_id array. */
for (j = 0; j < num_counters[i]; j++)
trace->blocks[block].counter_id[j] = counter_id[i][j];
trace->blocks[block].perf_event_fd = fd_ptr;
/* how many counters to trace */
trace->blocks[block].num_counters = num_counters[i];
/* block index in "enum perf_block_id" */
trace->blocks[block].block_id = i;
block++; /* move to next */
counter_id_ptr += num_counters[i];
fd_ptr += num_counters[i];
}
trace->magic4cc = HSA_PERF_MAGIC4CC;
trace->gpu_id = gpu_id;
trace->state = PERF_TRACE_STATE__STOPPED;
trace->num_blocks = num_blocks;
TraceRoot->NumberOfPasses = 1;
TraceRoot->TraceBufferMinSizeBytes = PAGE_ALIGN_UP(min_buf_size);
TraceRoot->TraceId = PORT_VPTR_TO_UINT64(trace);
return HSAKMT_STATUS_SUCCESS;
}
/* Unregisters a set of (HW) counters used for tracing/profiling */
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcUnregisterTrace(HSAuint32 NodeId,
HSATraceId TraceId)
{
uint32_t gpu_id;
struct perf_trace *trace;
pr_debug("[%s] Trace ID 0x%lx\n", __func__, TraceId);
if (TraceId == 0)
return HSAKMT_STATUS_INVALID_PARAMETER;
if (validate_nodeid(NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS)
return HSAKMT_STATUS_INVALID_NODE_UNIT;
trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
if (trace->magic4cc != HSA_PERF_MAGIC4CC)
return HSAKMT_STATUS_INVALID_HANDLE;
if (trace->gpu_id != gpu_id)
return HSAKMT_STATUS_INVALID_NODE_UNIT;
/* If the trace is in the running state, stop it */
if (trace->state == PERF_TRACE_STATE__STARTED) {
HSAKMT_STATUS status = hsaKmtPmcStopTrace(TraceId);
if (status != HSAKMT_STATUS_SUCCESS)
return status;
}
free(trace);
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcAcquireTraceAccess(HSAuint32 NodeId,
HSATraceId TraceId)
{
struct perf_trace *trace;
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
uint32_t gpu_id;
pr_debug("[%s] Trace ID 0x%lx\n", __func__, TraceId);
if (TraceId == 0)
return HSAKMT_STATUS_INVALID_PARAMETER;
trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
if (trace->magic4cc != HSA_PERF_MAGIC4CC)
return HSAKMT_STATUS_INVALID_HANDLE;
if (validate_nodeid(NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS)
return HSAKMT_STATUS_INVALID_NODE_UNIT;
return ret;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcReleaseTraceAccess(HSAuint32 NodeId,
HSATraceId TraceId)
{
struct perf_trace *trace;
pr_debug("[%s] Trace ID 0x%lx\n", __func__, TraceId);
if (TraceId == 0)
return HSAKMT_STATUS_INVALID_PARAMETER;
trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
if (trace->magic4cc != HSA_PERF_MAGIC4CC)
return HSAKMT_STATUS_INVALID_HANDLE;
return HSAKMT_STATUS_SUCCESS;
}
/* Starts tracing operation on a previously established set of performance counters */
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcStartTrace(HSATraceId TraceId,
void *TraceBuffer,
HSAuint64 TraceBufferSizeBytes)
{
struct perf_trace *trace =
(struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
uint32_t i;
int32_t j;
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
pr_debug("[%s] Trace ID 0x%lx\n", __func__, TraceId);
if (TraceId == 0 || !TraceBuffer || TraceBufferSizeBytes == 0)
return HSAKMT_STATUS_INVALID_PARAMETER;
if (trace->magic4cc != HSA_PERF_MAGIC4CC)
return HSAKMT_STATUS_INVALID_HANDLE;
for (i = 0; i < trace->num_blocks; i++) {
ret = perf_trace_ioctl(&trace->blocks[i],
PERF_EVENT_IOC_ENABLE);
if (ret != HSAKMT_STATUS_SUCCESS)
break;
}
if (ret != HSAKMT_STATUS_SUCCESS) {
/* Disable enabled blocks before returning the failure. */
j = (int32_t)i;
while (--j >= 0)
perf_trace_ioctl(&trace->blocks[j],
PERF_EVENT_IOC_DISABLE);
return ret;
}
trace->state = PERF_TRACE_STATE__STARTED;
trace->buf = TraceBuffer;
trace->buf_size = TraceBufferSizeBytes;
return HSAKMT_STATUS_SUCCESS;
}
/*Forces an update of all the counters that a previously started trace operation has registered */
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcQueryTrace(HSATraceId TraceId)
{
struct perf_trace *trace =
(struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
uint32_t i, j;
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
uint64_t *buf;
uint64_t buf_filled = 0;
if (TraceId == 0)
return HSAKMT_STATUS_INVALID_PARAMETER;
if (trace->magic4cc != HSA_PERF_MAGIC4CC)
return HSAKMT_STATUS_INVALID_HANDLE;
buf = (uint64_t *)trace->buf;
pr_debug("[%s] Trace buffer(%p): ", __func__, buf);
for (i = 0; i < trace->num_blocks; i++)
for (j = 0; j < trace->blocks[i].num_counters; j++) {
buf_filled += sizeof(uint64_t);
if (buf_filled > trace->buf_size)
return HSAKMT_STATUS_NO_MEMORY;
ret = query_trace(trace->blocks[i].perf_event_fd[j],
buf);
if (ret != HSAKMT_STATUS_SUCCESS)
return ret;
pr_debug("%lu_", *buf);
buf++;
}
pr_debug("\n");
return HSAKMT_STATUS_SUCCESS;
}
/* Stops tracing operation on a previously established set of performance counters */
HSAKMT_STATUS HSAKMTAPI hsaKmtPmcStopTrace(HSATraceId TraceId)
{
struct perf_trace *trace =
(struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
uint32_t i;
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
pr_debug("[%s] Trace ID 0x%lx\n", __func__, TraceId);
if (TraceId == 0)
return HSAKMT_STATUS_INVALID_PARAMETER;
if (trace->magic4cc != HSA_PERF_MAGIC4CC)
return HSAKMT_STATUS_INVALID_HANDLE;
for (i = 0; i < trace->num_blocks; i++) {
ret = perf_trace_ioctl(&trace->blocks[i],
PERF_EVENT_IOC_DISABLE);
if (ret != HSAKMT_STATUS_SUCCESS)
return ret;
}
trace->state = PERF_TRACE_STATE__STOPPED;
return ret;
}
File diff suppressed because it is too large Load Diff
+74
View File
@@ -0,0 +1,74 @@
/*
* Copyright © 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including
* the next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef PMC_TABLE_H
#define PMC_TABLE_H
#include "libhsakmt.h"
enum perf_block_id {
PERFCOUNTER_BLOCKID__FIRST = 0,
/* non-privileged */
PERFCOUNTER_BLOCKID__CB = PERFCOUNTER_BLOCKID__FIRST,
PERFCOUNTER_BLOCKID__CPC,
PERFCOUNTER_BLOCKID__CPF,
PERFCOUNTER_BLOCKID__CPG,
PERFCOUNTER_BLOCKID__DB,
PERFCOUNTER_BLOCKID__GDS,
PERFCOUNTER_BLOCKID__GRBM,
PERFCOUNTER_BLOCKID__GRBMSE,
PERFCOUNTER_BLOCKID__IA,
PERFCOUNTER_BLOCKID__MC,
PERFCOUNTER_BLOCKID__PASC,
PERFCOUNTER_BLOCKID__PASU,
PERFCOUNTER_BLOCKID__SPI,
PERFCOUNTER_BLOCKID__SRBM,
PERFCOUNTER_BLOCKID__SQ,
PERFCOUNTER_BLOCKID__SX,
PERFCOUNTER_BLOCKID__TA,
PERFCOUNTER_BLOCKID__TCA,
PERFCOUNTER_BLOCKID__TCC,
PERFCOUNTER_BLOCKID__TCP,
PERFCOUNTER_BLOCKID__TCS,
PERFCOUNTER_BLOCKID__TD,
PERFCOUNTER_BLOCKID__VGT,
PERFCOUNTER_BLOCKID__WD,
/* privileged */
PERFCOUNTER_BLOCKID__MAX
};
struct perf_counter_block {
uint32_t num_of_slots;
uint32_t num_of_counters;
uint32_t *counter_ids;
uint32_t counter_size_in_bits;
uint64_t counter_mask;
};
HSAKMT_STATUS get_block_properties(uint32_t node_id,
enum perf_block_id block_id,
struct perf_counter_block *block);
#endif // PMC_TABLE_H
+920
View File
@@ -0,0 +1,920 @@
/*
* Copyright © 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including
* the next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "libhsakmt.h"
#include "fmm.h"
#include "hsakmt/linux/kfd_ioctl.h"
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <math.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <errno.h>
/* 1024 doorbells, 4 or 8 bytes each doorbell depending on ASIC generation */
#define DOORBELL_SIZE(gfxv) (((gfxv) >= 0x90000) ? 8 : 4)
#define DOORBELLS_PAGE_SIZE(ds) (1024 * (ds))
#define WG_CONTEXT_DATA_SIZE_PER_CU(gfxv) \
(get_vgpr_size_per_cu(gfxv) + SGPR_SIZE_PER_CU + \
LDS_SIZE_PER_CU + HWREG_SIZE_PER_CU)
#define CNTL_STACK_BYTES_PER_WAVE(gfxv) \
((gfxv) >= GFX_VERSION_NAVI10 ? 12 : 8)
#define LDS_SIZE_PER_CU 0x10000
#define HWREG_SIZE_PER_CU 0x1000
#define DEBUGGER_BYTES_ALIGN 64
#define DEBUGGER_BYTES_PER_WAVE 32
struct queue {
uint32_t queue_id;
uint64_t wptr;
uint64_t rptr;
void *eop_buffer;
void *ctx_save_restore;
uint32_t ctx_save_restore_size;
uint32_t ctl_stack_size;
uint32_t debug_memory_size;
uint32_t eop_buffer_size;
uint32_t total_mem_alloc_size;
uint32_t gfxv;
bool use_ats;
bool unified_ctx_save_restore;
/* This queue structure is allocated from GPU with page aligned size
* but only small bytes are used. We use the extra space in the end for
* cu_mask bits array.
*/
uint32_t cu_mask_count; /* in bits */
uint32_t cu_mask[0];
};
struct process_doorbells {
bool use_gpuvm;
uint32_t size;
void *mapping;
pthread_mutex_t mutex;
};
static unsigned int num_doorbells;
static struct process_doorbells *doorbells;
uint32_t get_vgpr_size_per_cu(uint32_t gfxv)
{
uint32_t vgpr_size = 0x40000;
if ((gfxv & ~(0xff)) == GFX_VERSION_AQUA_VANJARAM ||
gfxv == GFX_VERSION_ALDEBARAN ||
gfxv == GFX_VERSION_ARCTURUS)
vgpr_size = 0x80000;
else if (gfxv == GFX_VERSION_PLUM_BONITO ||
gfxv == GFX_VERSION_WHEAT_NAS)
vgpr_size = 0x60000;
return vgpr_size;
}
HSAKMT_STATUS init_process_doorbells(unsigned int NumNodes)
{
unsigned int i;
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
/* doorbells[] is accessed using Topology NodeId. This means doorbells[0],
* which corresponds to CPU only Node, might not be used
*/
doorbells = malloc(NumNodes * sizeof(struct process_doorbells));
if (!doorbells)
return HSAKMT_STATUS_NO_MEMORY;
for (i = 0; i < NumNodes; i++) {
doorbells[i].use_gpuvm = false;
doorbells[i].size = 0;
doorbells[i].mapping = NULL;
pthread_mutex_init(&doorbells[i].mutex, NULL);
}
num_doorbells = NumNodes;
return ret;
}
static void get_doorbell_map_info(uint32_t node_id,
struct process_doorbells *doorbell)
{
/*
* GPUVM doorbell on Tonga requires a workaround for VM TLB ACTIVE bit
* lookup bug. Remove ASIC check when this is implemented in amdgpu.
*/
uint32_t gfxv = get_gfxv_by_node_id(node_id);
doorbell->use_gpuvm = (is_dgpu && gfxv != GFX_VERSION_TONGA);
doorbell->size = DOORBELLS_PAGE_SIZE(DOORBELL_SIZE(gfxv));
if (doorbell->size < (uint32_t) PAGE_SIZE) {
doorbell->size = PAGE_SIZE;
}
return;
}
void destroy_process_doorbells(void)
{
unsigned int i;
if (!doorbells)
return;
for (i = 0; i < num_doorbells; i++) {
if (!doorbells[i].size)
continue;
if (doorbells[i].use_gpuvm) {
fmm_unmap_from_gpu(doorbells[i].mapping);
fmm_release(doorbells[i].mapping);
} else
munmap(doorbells[i].mapping, doorbells[i].size);
}
free(doorbells);
doorbells = NULL;
num_doorbells = 0;
}
/* This is a special funcion that should be called only from the child process
* after a fork(). This will clear doorbells duplicated from the parent.
*/
void clear_process_doorbells(void)
{
unsigned int i;
if (!doorbells)
return;
for (i = 0; i < num_doorbells; i++) {
if (!doorbells[i].size)
continue;
if (!doorbells[i].use_gpuvm)
munmap(doorbells[i].mapping, doorbells[i].size);
}
free(doorbells);
doorbells = NULL;
num_doorbells = 0;
}
static HSAKMT_STATUS map_doorbell_apu(HSAuint32 NodeId, HSAuint32 gpu_id,
HSAuint64 doorbell_mmap_offset)
{
void *ptr;
ptr = mmap(0, doorbells[NodeId].size, PROT_READ|PROT_WRITE,
MAP_SHARED, kfd_fd, doorbell_mmap_offset);
if (ptr == MAP_FAILED)
return HSAKMT_STATUS_ERROR;
doorbells[NodeId].mapping = ptr;
return HSAKMT_STATUS_SUCCESS;
}
static HSAKMT_STATUS map_doorbell_dgpu(HSAuint32 NodeId, HSAuint32 gpu_id,
HSAuint64 doorbell_mmap_offset)
{
void *ptr;
ptr = fmm_allocate_doorbell(gpu_id, doorbells[NodeId].size,
doorbell_mmap_offset);
if (!ptr)
return HSAKMT_STATUS_ERROR;
/* map for GPU access */
if (fmm_map_to_gpu(ptr, doorbells[NodeId].size, NULL)) {
fmm_release(ptr);
return HSAKMT_STATUS_ERROR;
}
doorbells[NodeId].mapping = ptr;
return HSAKMT_STATUS_SUCCESS;
}
static HSAKMT_STATUS map_doorbell(HSAuint32 NodeId, HSAuint32 gpu_id,
HSAuint64 doorbell_mmap_offset)
{
HSAKMT_STATUS status = HSAKMT_STATUS_SUCCESS;
pthread_mutex_lock(&doorbells[NodeId].mutex);
if (doorbells[NodeId].size) {
pthread_mutex_unlock(&doorbells[NodeId].mutex);
return HSAKMT_STATUS_SUCCESS;
}
get_doorbell_map_info(NodeId, &doorbells[NodeId]);
if (doorbells[NodeId].use_gpuvm) {
status = map_doorbell_dgpu(NodeId, gpu_id, doorbell_mmap_offset);
if (status != HSAKMT_STATUS_SUCCESS) {
/* Fall back to the old method if KFD doesn't
* support doorbells in GPUVM
*/
doorbells[NodeId].use_gpuvm = false;
status = map_doorbell_apu(NodeId, gpu_id, doorbell_mmap_offset);
}
} else
status = map_doorbell_apu(NodeId, gpu_id, doorbell_mmap_offset);
if (status != HSAKMT_STATUS_SUCCESS)
doorbells[NodeId].size = 0;
pthread_mutex_unlock(&doorbells[NodeId].mutex);
return status;
}
static void *allocate_exec_aligned_memory_cpu(uint32_t size)
{
void *ptr;
/* mmap will return a pointer with alignment equal to
* sysconf(_SC_PAGESIZE).
*
* MAP_ANONYMOUS initializes the memory to zero.
*/
ptr = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (ptr == MAP_FAILED)
return NULL;
return ptr;
}
/* The bool return indicate whether the queue needs a context-save-restore area*/
static bool update_ctx_save_restore_size(uint32_t nodeid, struct queue *q)
{
HsaNodeProperties node;
if (q->gfxv < GFX_VERSION_CARRIZO)
return false;
if (hsaKmtGetNodeProperties(nodeid, &node))
return false;
if (node.NumFComputeCores && node.NumSIMDPerCU) {
uint32_t ctl_stack_size, wg_data_size;
uint32_t cu_num = node.NumFComputeCores / node.NumSIMDPerCU / node.NumXcc;
uint32_t wave_num = (q->gfxv < GFX_VERSION_NAVI10)
? MIN(cu_num * 40, node.NumShaderBanks / node.NumArrays * 512)
: cu_num * 32;
ctl_stack_size = wave_num * CNTL_STACK_BYTES_PER_WAVE(q->gfxv) + 8;
wg_data_size = cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(q->gfxv);
q->ctl_stack_size = PAGE_ALIGN_UP(sizeof(HsaUserContextSaveAreaHeader)
+ ctl_stack_size);
if ((q->gfxv & 0x3f0000) == 0xA0000) {
/* HW design limits control stack size to 0x7000.
* This is insufficient for theoretical PM4 cases
* but sufficient for AQL, limited by SPI events.
*/
q->ctl_stack_size = MIN(q->ctl_stack_size, 0x7000);
}
q->debug_memory_size =
ALIGN_UP(wave_num * DEBUGGER_BYTES_PER_WAVE, DEBUGGER_BYTES_ALIGN);
q->ctx_save_restore_size = q->ctl_stack_size
+ PAGE_ALIGN_UP(wg_data_size);
return true;
}
return false;
}
void *allocate_exec_aligned_memory_gpu(uint32_t size, uint32_t align, uint32_t gpu_id,
uint32_t NodeId, bool nonPaged,
bool DeviceLocal,
bool Uncached)
{
void *mem = NULL;
HSAuint64 gpu_va;
HsaMemFlags flags;
HSAuint32 cpu_id = 0;
flags.Value = 0;
flags.ui32.HostAccess = !DeviceLocal;
flags.ui32.ExecuteAccess = 1;
flags.ui32.NonPaged = nonPaged;
flags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
flags.ui32.CoarseGrain = DeviceLocal;
flags.ui32.Uncached = Uncached;
size = ALIGN_UP(size, align);
if (DeviceLocal && !zfb_support)
mem = fmm_allocate_device(gpu_id, NodeId, mem, size, flags);
else {
/* VRAM under ZFB mode should be supported here without any
* additional code
*/
/* Get the closest cpu_id to GPU NodeId for system memory allocation
* nonPaged=0 system memory allocation uses GTT path
*/
if (!nonPaged) {
cpu_id = get_direct_link_cpu(NodeId);
if (cpu_id == INVALID_NODEID) {
flags.ui32.NoNUMABind = 1;
cpu_id = 0;
}
}
mem = fmm_allocate_host(gpu_id, cpu_id, mem, size, flags);
}
if (!mem) {
pr_err("Alloc %s memory failed size %d\n",
DeviceLocal ? "VRAM" : "GTT", size);
return NULL;
}
if (NodeId != 0) {
uint32_t nodes_array[1] = {NodeId};
if (hsaKmtRegisterMemoryToNodes(mem, size, 1, nodes_array) != HSAKMT_STATUS_SUCCESS) {
hsaKmtFreeMemory(mem, size);
return NULL;
}
}
if (hsaKmtMapMemoryToGPU(mem, size, &gpu_va) != HSAKMT_STATUS_SUCCESS) {
hsaKmtFreeMemory(mem, size);
return NULL;
}
return mem;
}
void free_exec_aligned_memory_gpu(void *addr, uint32_t size, uint32_t align)
{
size = ALIGN_UP(size, align);
if (hsaKmtUnmapMemoryToGPU(addr) == HSAKMT_STATUS_SUCCESS)
hsaKmtFreeMemory(addr, size);
}
/*
* Allocates memory aligned to sysconf(_SC_PAGESIZE)
*/
static void *allocate_exec_aligned_memory(uint32_t size,
bool use_ats,
uint32_t gpu_id,
uint32_t NodeId,
bool nonPaged,
bool DeviceLocal,
bool Uncached)
{
if (!use_ats)
return allocate_exec_aligned_memory_gpu(size, PAGE_SIZE, gpu_id, NodeId,
nonPaged, DeviceLocal,
Uncached);
return allocate_exec_aligned_memory_cpu(size);
}
static void free_exec_aligned_memory(void *addr, uint32_t size, uint32_t align,
bool use_ats)
{
if (!use_ats)
free_exec_aligned_memory_gpu(addr, size, align);
else
munmap(addr, size);
}
static HSAKMT_STATUS register_svm_range(void *mem, uint32_t size,
uint32_t gpuNode, uint32_t prefetchNode,
uint32_t preferredNode, bool alwaysMapped)
{
HSA_SVM_ATTRIBUTE *attrs;
HSAuint64 s_attr;
HSAuint32 nattr;
HSAuint32 flags;
flags = HSA_SVM_FLAG_HOST_ACCESS | HSA_SVM_FLAG_GPU_EXEC;
if (alwaysMapped) {
CHECK_KFD_MINOR_VERSION(11);
flags |= HSA_SVM_FLAG_GPU_ALWAYS_MAPPED;
}
nattr = 6;
s_attr = sizeof(*attrs) * nattr;
attrs = (HSA_SVM_ATTRIBUTE *)alloca(s_attr);
attrs[0].type = HSA_SVM_ATTR_PREFETCH_LOC;
attrs[0].value = prefetchNode;
attrs[1].type = HSA_SVM_ATTR_PREFERRED_LOC;
attrs[1].value = preferredNode;
attrs[2].type = HSA_SVM_ATTR_CLR_FLAGS;
attrs[2].value = ~flags;
attrs[3].type = HSA_SVM_ATTR_SET_FLAGS;
attrs[3].value = flags;
attrs[4].type = HSA_SVM_ATTR_ACCESS;
attrs[4].value = gpuNode;
attrs[5].type = HSA_SVM_ATTR_GRANULARITY;
attrs[5].value = 0xFF;
return hsaKmtSVMSetAttr(mem, size, nattr, attrs);
}
static void free_queue(struct queue *q)
{
if (q->eop_buffer)
free_exec_aligned_memory(q->eop_buffer,
q->eop_buffer_size,
PAGE_SIZE, q->use_ats);
if (q->unified_ctx_save_restore)
munmap(q->ctx_save_restore, q->total_mem_alloc_size);
else if (q->ctx_save_restore)
free_exec_aligned_memory(q->ctx_save_restore,
q->total_mem_alloc_size,
PAGE_SIZE, q->use_ats);
free_exec_aligned_memory((void *)q, sizeof(*q), PAGE_SIZE, q->use_ats);
}
static inline void fill_cwsr_header(struct queue *q, void *addr,
HsaEvent *Event, volatile HSAint64 *ErrPayload, HSAuint32 NumXcc)
{
uint32_t i;
HsaUserContextSaveAreaHeader *header;
for (i = 0; i < NumXcc; i++) {
header = (HsaUserContextSaveAreaHeader *)
((uintptr_t)addr + (i * q->ctx_save_restore_size));
header->ErrorEventId = 0;
if (Event)
header->ErrorEventId = Event->EventId;
header->ErrorReason = ErrPayload;
header->DebugOffset = (NumXcc - i) * q->ctx_save_restore_size;
header->DebugSize = q->debug_memory_size * NumXcc;
}
}
static int handle_concrete_asic(struct queue *q,
struct kfd_ioctl_create_queue_args *args,
uint32_t gpu_id,
uint32_t NodeId,
HsaEvent *Event,
volatile HSAint64 *ErrPayload)
{
bool ret;
if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA ||
args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)
return HSAKMT_STATUS_SUCCESS;
if (q->eop_buffer_size > 0) {
pr_info("Allocating VRAM for EOP\n");
q->eop_buffer = allocate_exec_aligned_memory(q->eop_buffer_size,
q->use_ats, gpu_id,
NodeId, true, true, /* Unused for VRAM */false);
if (!q->eop_buffer)
return HSAKMT_STATUS_NO_MEMORY;
args->eop_buffer_address = (uintptr_t)q->eop_buffer;
args->eop_buffer_size = q->eop_buffer_size;
}
ret = update_ctx_save_restore_size(NodeId, q);
if (ret) {
HsaNodeProperties node;
if (hsaKmtGetNodeProperties(NodeId, &node))
return HSAKMT_STATUS_ERROR;
args->ctx_save_restore_size = q->ctx_save_restore_size;
args->ctl_stack_size = q->ctl_stack_size;
/* Total memory to be allocated is =
* (Control Stack size + WG size +
* Debug memory area size) * num_xcc
*/
q->total_mem_alloc_size = (q->ctx_save_restore_size +
q->debug_memory_size) * node.NumXcc;
/* Allocate unified memory for context save restore
* area on dGPU.
*/
if (!q->use_ats && is_svm_api_supported) {
uint32_t size = PAGE_ALIGN_UP(q->total_mem_alloc_size);
void *addr;
HSAKMT_STATUS r = HSAKMT_STATUS_ERROR;
pr_info("Allocating GTT for CWSR\n");
addr = mmap_allocate_aligned(PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE,
size, GPU_HUGE_PAGE_SIZE, 0,
0, (void *)LONG_MAX);
if (!addr) {
pr_err("mmap failed to alloc ctx area size 0x%x: %s\n",
size, strerror(errno));
} else {
/*
* To avoid fork child process COW MMU notifier
* callback evict parent process queues.
*/
if (madvise(addr, size, MADV_DONTFORK))
pr_err("madvise failed -%d\n", errno);
fill_cwsr_header(q, addr, Event, ErrPayload, node.NumXcc);
r = register_svm_range(addr, size,
NodeId, NodeId, 0, true);
if (r == HSAKMT_STATUS_SUCCESS) {
q->ctx_save_restore = addr;
q->unified_ctx_save_restore = true;
} else {
munmap(addr, size);
}
}
}
if (!q->unified_ctx_save_restore) {
q->ctx_save_restore = allocate_exec_aligned_memory(
q->total_mem_alloc_size,
q->use_ats, gpu_id, NodeId,
false, false, false);
if (!q->ctx_save_restore)
return HSAKMT_STATUS_NO_MEMORY;
fill_cwsr_header(q, q->ctx_save_restore, Event, ErrPayload, node.NumXcc);
}
args->ctx_save_restore_address = (uintptr_t)q->ctx_save_restore;
}
return HSAKMT_STATUS_SUCCESS;
}
/* A map to translate thunk queue priority (-3 to +3)
* to KFD queue priority (0 to 15)
* Indexed by thunk_queue_priority+3
*/
static uint32_t priority_map[] = {0, 3, 5, 7, 9, 11, 15};
HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueue(HSAuint32 NodeId,
HSA_QUEUE_TYPE Type,
HSAuint32 QueuePercentage,
HSA_QUEUE_PRIORITY Priority,
void *QueueAddress,
HSAuint64 QueueSizeInBytes,
HsaEvent *Event,
HsaQueueResource *QueueResource)
{
HSAKMT_STATUS result;
uint32_t gpu_id;
uint64_t doorbell_mmap_offset;
unsigned int doorbell_offset;
int err;
HsaNodeProperties props;
uint32_t cu_num, i;
CHECK_KFD_OPEN();
if (Priority < HSA_QUEUE_PRIORITY_MINIMUM ||
Priority > HSA_QUEUE_PRIORITY_MAXIMUM)
return HSAKMT_STATUS_INVALID_PARAMETER;
result = validate_nodeid(NodeId, &gpu_id);
if (result != HSAKMT_STATUS_SUCCESS)
return result;
struct queue *q = allocate_exec_aligned_memory(sizeof(*q),
false, gpu_id, NodeId, true, false, true);
if (!q)
return HSAKMT_STATUS_NO_MEMORY;
memset(q, 0, sizeof(*q));
q->gfxv = get_gfxv_by_node_id(NodeId);
q->use_ats = false;
if (q->gfxv == GFX_VERSION_TONGA)
q->eop_buffer_size = TONGA_PAGE_SIZE;
else if ((q->gfxv & ~(0xff)) == GFX_VERSION_AQUA_VANJARAM)
q->eop_buffer_size = ((Type == HSA_QUEUE_COMPUTE) ? 4096 : 0);
else if (q->gfxv >= 0x80000)
q->eop_buffer_size = 4096;
/* By default, CUs are all turned on. Initialize cu_mask to '1
* for all CU bits.
*/
if (hsaKmtGetNodeProperties(NodeId, &props))
q->cu_mask_count = 0;
else {
cu_num = props.NumFComputeCores / props.NumSIMDPerCU;
/* cu_mask_count counts bits. It must be multiple of 32 */
q->cu_mask_count = ALIGN_UP_32(cu_num, 32);
for (i = 0; i < cu_num; i++)
q->cu_mask[i/32] |= (1 << (i % 32));
}
struct kfd_ioctl_create_queue_args args = {0};
args.gpu_id = gpu_id;
switch (Type) {
case HSA_QUEUE_COMPUTE:
args.queue_type = KFD_IOC_QUEUE_TYPE_COMPUTE;
break;
case HSA_QUEUE_SDMA:
args.queue_type = KFD_IOC_QUEUE_TYPE_SDMA;
break;
case HSA_QUEUE_SDMA_XGMI:
args.queue_type = KFD_IOC_QUEUE_TYPE_SDMA_XGMI;
break;
case HSA_QUEUE_COMPUTE_AQL:
args.queue_type = KFD_IOC_QUEUE_TYPE_COMPUTE_AQL;
break;
default:
return HSAKMT_STATUS_INVALID_PARAMETER;
}
if (Type != HSA_QUEUE_COMPUTE_AQL) {
QueueResource->QueueRptrValue = (uintptr_t)&q->rptr;
QueueResource->QueueWptrValue = (uintptr_t)&q->wptr;
}
err = handle_concrete_asic(q, &args, gpu_id, NodeId, Event, QueueResource->ErrorReason);
if (err != HSAKMT_STATUS_SUCCESS) {
free_queue(q);
return err;
}
args.read_pointer_address = QueueResource->QueueRptrValue;
args.write_pointer_address = QueueResource->QueueWptrValue;
args.ring_base_address = (uintptr_t)QueueAddress;
args.ring_size = QueueSizeInBytes;
args.queue_percentage = QueuePercentage;
args.queue_priority = priority_map[Priority+3];
err = kmtIoctl(kfd_fd, AMDKFD_IOC_CREATE_QUEUE, &args);
if (err == -1) {
free_queue(q);
return HSAKMT_STATUS_ERROR;
}
q->queue_id = args.queue_id;
if (IS_SOC15(q->gfxv)) {
HSAuint64 mask = DOORBELLS_PAGE_SIZE(DOORBELL_SIZE(q->gfxv)) - 1;
/* On SOC15 chips, the doorbell offset within the
* doorbell page is included in the doorbell offset
* returned by KFD. This allows CP queue doorbells to be
* allocated dynamically (while SDMA queue doorbells fixed)
* rather than based on the its process queue ID.
*/
doorbell_mmap_offset = args.doorbell_offset & ~mask;
doorbell_offset = args.doorbell_offset & mask;
} else {
/* On older chips, the doorbell offset within the
* doorbell page is based on the queue ID.
*/
doorbell_mmap_offset = args.doorbell_offset;
doorbell_offset = q->queue_id * DOORBELL_SIZE(q->gfxv);
}
err = map_doorbell(NodeId, gpu_id, doorbell_mmap_offset);
if (err != HSAKMT_STATUS_SUCCESS) {
hsaKmtDestroyQueue(q->queue_id);
return HSAKMT_STATUS_ERROR;
}
QueueResource->QueueId = PORT_VPTR_TO_UINT64(q);
QueueResource->Queue_DoorBell = VOID_PTR_ADD(doorbells[NodeId].mapping,
doorbell_offset);
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtUpdateQueue(HSA_QUEUEID QueueId,
HSAuint32 QueuePercentage,
HSA_QUEUE_PRIORITY Priority,
void *QueueAddress,
HSAuint64 QueueSize,
HsaEvent *Event)
{
struct kfd_ioctl_update_queue_args arg = {0};
struct queue *q = PORT_UINT64_TO_VPTR(QueueId);
CHECK_KFD_OPEN();
if (Priority < HSA_QUEUE_PRIORITY_MINIMUM ||
Priority > HSA_QUEUE_PRIORITY_MAXIMUM)
return HSAKMT_STATUS_INVALID_PARAMETER;
if (!q)
return HSAKMT_STATUS_INVALID_PARAMETER;
arg.queue_id = (HSAuint32)q->queue_id;
arg.ring_base_address = (uintptr_t)QueueAddress;
arg.ring_size = QueueSize;
arg.queue_percentage = QueuePercentage;
arg.queue_priority = priority_map[Priority+3];
int err = kmtIoctl(kfd_fd, AMDKFD_IOC_UPDATE_QUEUE, &arg);
if (err == -1)
return HSAKMT_STATUS_ERROR;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtDestroyQueue(HSA_QUEUEID QueueId)
{
CHECK_KFD_OPEN();
struct queue *q = PORT_UINT64_TO_VPTR(QueueId);
struct kfd_ioctl_destroy_queue_args args = {0};
if (!q)
return HSAKMT_STATUS_INVALID_PARAMETER;
args.queue_id = q->queue_id;
int err = kmtIoctl(kfd_fd, AMDKFD_IOC_DESTROY_QUEUE, &args);
if (err == -1) {
pr_err("Failed to destroy queue: %s\n", strerror(errno));
return HSAKMT_STATUS_ERROR;
}
free_queue(q);
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtSetQueueCUMask(HSA_QUEUEID QueueId,
HSAuint32 CUMaskCount,
HSAuint32 *QueueCUMask)
{
struct queue *q = PORT_UINT64_TO_VPTR(QueueId);
struct kfd_ioctl_set_cu_mask_args args = {0};
CHECK_KFD_OPEN();
if (CUMaskCount == 0 || !QueueCUMask || ((CUMaskCount % 32) != 0))
return HSAKMT_STATUS_INVALID_PARAMETER;
args.queue_id = q->queue_id;
args.num_cu_mask = CUMaskCount;
args.cu_mask_ptr = (uintptr_t)QueueCUMask;
int err = kmtIoctl(kfd_fd, AMDKFD_IOC_SET_CU_MASK, &args);
if (err == -1)
return HSAKMT_STATUS_ERROR;
memcpy(q->cu_mask, QueueCUMask, CUMaskCount / 8);
q->cu_mask_count = CUMaskCount;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS
HSAKMTAPI
hsaKmtGetQueueInfo(
HSA_QUEUEID QueueId,
HsaQueueInfo *QueueInfo
)
{
struct queue *q = PORT_UINT64_TO_VPTR(QueueId);
struct kfd_ioctl_get_queue_wave_state_args args = {0};
CHECK_KFD_OPEN();
if (QueueInfo == NULL || q == NULL)
return HSAKMT_STATUS_INVALID_PARAMETER;
if (q->ctx_save_restore == NULL)
return HSAKMT_STATUS_ERROR;
args.queue_id = q->queue_id;
args.ctl_stack_address = (uintptr_t)q->ctx_save_restore;
if (kmtIoctl(kfd_fd, AMDKFD_IOC_GET_QUEUE_WAVE_STATE, &args) < 0)
return HSAKMT_STATUS_ERROR;
QueueInfo->ControlStackTop = (void *)(args.ctl_stack_address +
q->ctl_stack_size - args.ctl_stack_used_size);
QueueInfo->UserContextSaveArea = (void *)
(args.ctl_stack_address + q->ctl_stack_size);
QueueInfo->SaveAreaSizeInBytes = args.save_area_used_size;
QueueInfo->ControlStackUsedInBytes = args.ctl_stack_used_size;
QueueInfo->NumCUAssigned = q->cu_mask_count;
QueueInfo->CUMaskInfo = q->cu_mask;
QueueInfo->QueueDetailError = 0;
QueueInfo->QueueTypeExtended = 0;
QueueInfo->SaveAreaHeader = q->ctx_save_restore;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtSetTrapHandler(HSAuint32 Node,
void *TrapHandlerBaseAddress,
HSAuint64 TrapHandlerSizeInBytes,
void *TrapBufferBaseAddress,
HSAuint64 TrapBufferSizeInBytes)
{
struct kfd_ioctl_set_trap_handler_args args = {0};
HSAKMT_STATUS result;
uint32_t gpu_id;
CHECK_KFD_OPEN();
result = validate_nodeid(Node, &gpu_id);
if (result != HSAKMT_STATUS_SUCCESS)
return result;
args.gpu_id = gpu_id;
args.tba_addr = (uintptr_t)TrapHandlerBaseAddress;
args.tma_addr = (uintptr_t)TrapBufferBaseAddress;
int err = kmtIoctl(kfd_fd, AMDKFD_IOC_SET_TRAP_HANDLER, &args);
return (err == -1) ? HSAKMT_STATUS_ERROR : HSAKMT_STATUS_SUCCESS;
}
uint32_t *convert_queue_ids(HSAuint32 NumQueues, HSA_QUEUEID *Queues)
{
uint32_t *queue_ids_ptr;
unsigned int i;
queue_ids_ptr = malloc(NumQueues * sizeof(uint32_t));
if (!queue_ids_ptr)
return NULL;
for (i = 0; i < NumQueues; i++) {
struct queue *q = PORT_UINT64_TO_VPTR(Queues[i]);
queue_ids_ptr[i] = q->queue_id;
}
return queue_ids_ptr;
}
HSAKMT_STATUS
HSAKMTAPI
hsaKmtAllocQueueGWS(
HSA_QUEUEID QueueId,
HSAuint32 nGWS,
HSAuint32 *firstGWS)
{
struct kfd_ioctl_alloc_queue_gws_args args = {0};
struct queue *q = PORT_UINT64_TO_VPTR(QueueId);
CHECK_KFD_OPEN();
args.queue_id = (HSAuint32)q->queue_id;
args.num_gws = nGWS;
int err = kmtIoctl(kfd_fd, AMDKFD_IOC_ALLOC_QUEUE_GWS, &args);
if (!err && firstGWS)
*firstGWS = args.first_gws;
if (!err)
return HSAKMT_STATUS_SUCCESS;
else if (errno == EINVAL)
return HSAKMT_STATUS_INVALID_PARAMETER;
else if (errno == EBUSY)
return HSAKMT_STATUS_OUT_OF_RESOURCES;
else if (errno == ENODEV)
return HSAKMT_STATUS_NOT_SUPPORTED;
else
return HSAKMT_STATUS_ERROR;
}
+402
View File
@@ -0,0 +1,402 @@
/*
* Copyright (C) 2002-2018 Igor Sysoev
* Copyright (C) 2011-2018 Nginx, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "rbtree.h"
static inline void rbtree_left_rotate(rbtree_node_t **root,
rbtree_node_t *sentinel, rbtree_node_t *node);
static inline void rbtree_right_rotate(rbtree_node_t **root,
rbtree_node_t *sentinel, rbtree_node_t *node);
static void
rbtree_insert_value(rbtree_node_t *temp, rbtree_node_t *node,
rbtree_node_t *sentinel)
{
rbtree_node_t **p;
for ( ;; ) {
p = rbtree_key_compare(LKP_ALL, &node->key, &temp->key) < 0 ?
&temp->left : &temp->right;
if (*p == sentinel) {
break;
}
temp = *p;
}
*p = node;
node->parent = temp;
node->left = sentinel;
node->right = sentinel;
rbt_red(node);
}
void
rbtree_insert(rbtree_t *tree, rbtree_node_t *node)
{
rbtree_node_t **root, *temp, *sentinel;
/* a binary tree insert */
root = &tree->root;
sentinel = &tree->sentinel;
if (*root == sentinel) {
node->parent = NULL;
node->left = sentinel;
node->right = sentinel;
rbt_black(node);
*root = node;
return;
}
rbtree_insert_value(*root, node, sentinel);
/* re-balance tree */
while (node != *root && rbt_is_red(node->parent)) {
if (node->parent == node->parent->parent->left) {
temp = node->parent->parent->right;
if (rbt_is_red(temp)) {
rbt_black(node->parent);
rbt_black(temp);
rbt_red(node->parent->parent);
node = node->parent->parent;
} else {
if (node == node->parent->right) {
node = node->parent;
rbtree_left_rotate(root, sentinel, node);
}
rbt_black(node->parent);
rbt_red(node->parent->parent);
rbtree_right_rotate(root, sentinel, node->parent->parent);
}
} else {
temp = node->parent->parent->left;
if (rbt_is_red(temp)) {
rbt_black(node->parent);
rbt_black(temp);
rbt_red(node->parent->parent);
node = node->parent->parent;
} else {
if (node == node->parent->left) {
node = node->parent;
rbtree_right_rotate(root, sentinel, node);
}
rbt_black(node->parent);
rbt_red(node->parent->parent);
rbtree_left_rotate(root, sentinel, node->parent->parent);
}
}
}
rbt_black(*root);
}
void
rbtree_delete(rbtree_t *tree, rbtree_node_t *node)
{
unsigned int red;
rbtree_node_t **root, *sentinel, *subst, *temp, *w;
/* a binary tree delete */
root = &tree->root;
sentinel = &tree->sentinel;
if (node->left == sentinel) {
temp = node->right;
subst = node;
} else if (node->right == sentinel) {
temp = node->left;
subst = node;
} else {
subst = rbtree_min(node->right, sentinel);
if (subst->left != sentinel) {
temp = subst->left;
} else {
temp = subst->right;
}
}
if (subst == *root) {
*root = temp;
rbt_black(temp);
return;
}
red = rbt_is_red(subst);
if (subst == subst->parent->left) {
subst->parent->left = temp;
} else {
subst->parent->right = temp;
}
if (subst == node) {
temp->parent = subst->parent;
} else {
if (subst->parent == node) {
temp->parent = subst;
} else {
temp->parent = subst->parent;
}
subst->left = node->left;
subst->right = node->right;
subst->parent = node->parent;
rbt_copy_color(subst, node);
if (node == *root) {
*root = subst;
} else {
if (node == node->parent->left) {
node->parent->left = subst;
} else {
node->parent->right = subst;
}
}
if (subst->left != sentinel) {
subst->left->parent = subst;
}
if (subst->right != sentinel) {
subst->right->parent = subst;
}
}
if (red) {
return;
}
/* a delete fixup */
while (temp != *root && rbt_is_black(temp)) {
if (temp == temp->parent->left) {
w = temp->parent->right;
if (rbt_is_red(w)) {
rbt_black(w);
rbt_red(temp->parent);
rbtree_left_rotate(root, sentinel, temp->parent);
w = temp->parent->right;
}
if (rbt_is_black(w->left) && rbt_is_black(w->right)) {
rbt_red(w);
temp = temp->parent;
} else {
if (rbt_is_black(w->right)) {
rbt_black(w->left);
rbt_red(w);
rbtree_right_rotate(root, sentinel, w);
w = temp->parent->right;
}
rbt_copy_color(w, temp->parent);
rbt_black(temp->parent);
rbt_black(w->right);
rbtree_left_rotate(root, sentinel, temp->parent);
temp = *root;
}
} else {
w = temp->parent->left;
if (rbt_is_red(w)) {
rbt_black(w);
rbt_red(temp->parent);
rbtree_right_rotate(root, sentinel, temp->parent);
w = temp->parent->left;
}
if (rbt_is_black(w->left) && rbt_is_black(w->right)) {
rbt_red(w);
temp = temp->parent;
} else {
if (rbt_is_black(w->left)) {
rbt_black(w->right);
rbt_red(w);
rbtree_left_rotate(root, sentinel, w);
w = temp->parent->left;
}
rbt_copy_color(w, temp->parent);
rbt_black(temp->parent);
rbt_black(w->left);
rbtree_right_rotate(root, sentinel, temp->parent);
temp = *root;
}
}
}
rbt_black(temp);
}
static inline void
rbtree_left_rotate(rbtree_node_t **root, rbtree_node_t *sentinel,
rbtree_node_t *node)
{
rbtree_node_t *temp;
temp = node->right;
node->right = temp->left;
if (temp->left != sentinel) {
temp->left->parent = node;
}
temp->parent = node->parent;
if (node == *root) {
*root = temp;
} else if (node == node->parent->left) {
node->parent->left = temp;
} else {
node->parent->right = temp;
}
temp->left = node;
node->parent = temp;
}
static inline void
rbtree_right_rotate(rbtree_node_t **root, rbtree_node_t *sentinel,
rbtree_node_t *node)
{
rbtree_node_t *temp;
temp = node->left;
node->left = temp->right;
if (temp->right != sentinel) {
temp->right->parent = node;
}
temp->parent = node->parent;
if (node == *root) {
*root = temp;
} else if (node == node->parent->right) {
node->parent->right = temp;
} else {
node->parent->left = temp;
}
temp->right = node;
node->parent = temp;
}
rbtree_node_t *
rbtree_next(rbtree_t *tree, rbtree_node_t *node)
{
rbtree_node_t *root, *sentinel, *parent;
sentinel = &tree->sentinel;
if (node->right != sentinel) {
return rbtree_min(node->right, sentinel);
}
root = tree->root;
for ( ;; ) {
parent = node->parent;
if (node == root) {
return NULL;
}
if (node == parent->left) {
return parent;
}
node = parent;
}
}
rbtree_node_t *
rbtree_prev(rbtree_t *tree, rbtree_node_t *node)
{
rbtree_node_t *root, *sentinel, *parent;
sentinel = &tree->sentinel;
if (node->left != sentinel) {
return rbtree_max(node->left, sentinel);
}
root = tree->root;
for ( ;; ) {
parent = node->parent;
if (node == root) {
return NULL;
}
if (node == parent->right) {
return parent;
}
node = parent;
}
}
+94
View File
@@ -0,0 +1,94 @@
/*
* Copyright (C) 2002-2018 Igor Sysoev
* Copyright (C) 2011-2018 Nginx, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef _RBTREE_H_
#define _RBTREE_H_
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
#include <inttypes.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <errno.h>
#include "rbtree_amd.h"
typedef struct rbtree_node_s rbtree_node_t;
struct rbtree_node_s {
rbtree_key_t key;
rbtree_node_t *left;
rbtree_node_t *right;
rbtree_node_t *parent;
unsigned char color;
unsigned char data;
};
typedef struct rbtree_s rbtree_t;
struct rbtree_s {
rbtree_node_t *root;
rbtree_node_t sentinel;
};
#define rbtree_init(tree) \
rbtree_sentinel_init(&(tree)->sentinel); \
(tree)->root = &(tree)->sentinel;
void rbtree_insert(rbtree_t *tree, rbtree_node_t *node);
void rbtree_delete(rbtree_t *tree, rbtree_node_t *node);
rbtree_node_t *rbtree_prev(rbtree_t *tree,
rbtree_node_t *node);
rbtree_node_t *rbtree_next(rbtree_t *tree,
rbtree_node_t *node);
#define rbt_red(node) ((node)->color = 1)
#define rbt_black(node) ((node)->color = 0)
#define rbt_is_red(node) ((node)->color)
#define rbt_is_black(node) (!rbt_is_red(node))
#define rbt_copy_color(n1, n2) (n1->color = n2->color)
/* a sentinel must be black */
#define rbtree_sentinel_init(node) rbt_black(node)
static inline rbtree_node_t *
rbtree_min(rbtree_node_t *node, rbtree_node_t *sentinel)
{
while (node->left != sentinel) {
node = node->left;
}
return node;
}
#include "rbtree_amd.h"
#endif
+155
View File
@@ -0,0 +1,155 @@
/*
* Copyright © 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including
* the next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef _RBTREE_AMD_H_
#define _RBTREE_AMD_H_
typedef struct rbtree_key_s rbtree_key_t;
struct rbtree_key_s {
#define ADDR_BIT 0
#define SIZE_BIT 1
unsigned long addr;
unsigned long size;
};
#define BIT(x) (1<<(x))
#define LKP_ALL (BIT(ADDR_BIT) | BIT(SIZE_BIT))
#define LKP_ADDR (BIT(ADDR_BIT))
#define LKP_ADDR_SIZE (BIT(ADDR_BIT) | BIT(SIZE_BIT))
static inline rbtree_key_t
rbtree_key(unsigned long addr, unsigned long size)
{
return (rbtree_key_t){addr, size};
}
/*
* compare addr, size one by one
*/
static inline int
rbtree_key_compare(unsigned int type, rbtree_key_t *key1, rbtree_key_t *key2)
{
if ((type & 1 << ADDR_BIT) && (key1->addr != key2->addr))
return key1->addr > key2->addr ? 1 : -1;
if ((type & 1 << SIZE_BIT) && (key1->size != key2->size))
return key1->size > key2->size ? 1 : -1;
return 0;
}
#endif /*_RBTREE_AMD_H_*/
/*inlcude this file again with RBTREE_HELPER defined*/
#ifndef RBTREE_HELPER
#define RBTREE_HELPER
#else
#ifndef _RBTREE_AMD_H_HELPER_
#define _RBTREE_AMD_H_HELPER_
static inline rbtree_node_t *
rbtree_max(rbtree_node_t *node, rbtree_node_t *sentinel)
{
while (node->right != sentinel)
node = node->right;
return node;
}
#define LEFT 0
#define RIGHT 1
#define MID 2
static inline rbtree_node_t *
rbtree_min_max(rbtree_t *tree, int lr)
{
rbtree_node_t *sentinel = &tree->sentinel;
rbtree_node_t *node = tree->root;
if (node == sentinel)
return NULL;
if (lr == LEFT)
node = rbtree_min(node, sentinel);
else if (lr == RIGHT)
node = rbtree_max(node, sentinel);
return node;
}
static inline rbtree_node_t *
rbtree_node_any(rbtree_t *tree, int lmr)
{
rbtree_node_t *sentinel = &tree->sentinel;
rbtree_node_t *node = tree->root;
if (node == sentinel)
return NULL;
if (lmr == MID)
return node;
return rbtree_min_max(tree, lmr);
}
static inline rbtree_node_t *
rbtree_lookup_nearest(rbtree_t *rbtree, rbtree_key_t *key,
unsigned int type, int lr)
{
int rc;
rbtree_node_t *node, *sentinel, *n = NULL;
node = rbtree->root;
sentinel = &rbtree->sentinel;
while (node != sentinel) {
rc = rbtree_key_compare(type, key, &node->key);
if (rc < 0) {
if (lr == RIGHT)
n = node;
node = node->left;
continue;
}
if (rc > 0) {
if (lr == LEFT)
n = node;
node = node->right;
continue;
}
return node;
}
return n;
}
static inline rbtree_node_t *
rbtree_lookup(rbtree_t *rbtree, rbtree_key_t *key,
unsigned int type)
{
return rbtree_lookup_nearest(rbtree, key, type, -1);
}
#endif /*_RBTREE_AMD_H_HELPER_*/
#endif /*RBTREE_HELPER*/
+103
View File
@@ -0,0 +1,103 @@
/*
* Copyright © 2020 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including
* the next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "libhsakmt.h"
#include "hsakmt/linux/kfd_ioctl.h"
#include <stdlib.h>
#include <stdio.h>
HSAKMT_STATUS HSAKMTAPI hsaKmtSPMAcquire(HSAuint32 PreferredNode)
{
int ret;
struct kfd_ioctl_spm_args args = {0};
uint32_t gpu_id;
ret = validate_nodeid(PreferredNode, &gpu_id);
if (ret != HSAKMT_STATUS_SUCCESS) {
pr_err("[%s] invalid node ID: %d\n", __func__, PreferredNode);
return ret;
}
ret = HSAKMT_STATUS_SUCCESS;
args.op = KFD_IOCTL_SPM_OP_ACQUIRE;
args.gpu_id = gpu_id;
ret = kmtIoctl(kfd_fd, AMDKFD_IOC_RLC_SPM, &args);
return ret;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtSPMSetDestBuffer(HSAuint32 PreferredNode,
HSAuint32 SizeInBytes,
HSAuint32 * timeout,
HSAuint32 * SizeCopied,
void *DestMemoryAddress,
bool *isSPMDataLoss)
{
int ret;
struct kfd_ioctl_spm_args args = {0};
uint32_t gpu_id;
ret = HSAKMT_STATUS_SUCCESS;
ret = validate_nodeid(PreferredNode, &gpu_id);
args.timeout = *timeout;
args.dest_buf = (uint64_t)DestMemoryAddress;
args.buf_size = SizeInBytes;
args.op = KFD_IOCTL_SPM_OP_SET_DEST_BUF;
args.gpu_id = gpu_id;
ret = kmtIoctl(kfd_fd, AMDKFD_IOC_RLC_SPM, &args);
*SizeCopied = args.bytes_copied;
*isSPMDataLoss = args.has_data_loss;
*timeout = args.timeout;
return ret;
}
HSAKMT_STATUS HSAKMTAPI hsaKmtSPMRelease(HSAuint32 PreferredNode)
{
int ret = HSAKMT_STATUS_SUCCESS;
struct kfd_ioctl_spm_args args = {0};
uint32_t gpu_id;
ret = validate_nodeid(PreferredNode, &gpu_id);
if (ret != HSAKMT_STATUS_SUCCESS) {
pr_err("[%s] invalid node ID: %d\n", __func__, PreferredNode);
return ret;
}
args.op = KFD_IOCTL_SPM_OP_RELEASE;
args.gpu_id = gpu_id;
ret = kmtIoctl(kfd_fd, AMDKFD_IOC_RLC_SPM, &args);
return ret;
}
+227
View File
@@ -0,0 +1,227 @@
/*
* Copyright © 2020 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including
* the next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "libhsakmt.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
#include <inttypes.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <errno.h>
/* Helper functions for calling KFD SVM ioctl */
HSAKMT_STATUS HSAKMTAPI
hsaKmtSVMSetAttr(void *start_addr, HSAuint64 size, unsigned int nattr,
HSA_SVM_ATTRIBUTE *attrs)
{
struct kfd_ioctl_svm_args *args;
HSAuint64 s_attr;
HSAKMT_STATUS r;
HSAuint32 i;
CHECK_KFD_OPEN();
CHECK_KFD_MINOR_VERSION(5);
pr_debug("%s: address 0x%p size 0x%lx\n", __func__, start_addr, size);
if (!start_addr || !size)
return HSAKMT_STATUS_INVALID_PARAMETER;
if ((uint64_t)start_addr & (PAGE_SIZE - 1))
return HSAKMT_STATUS_INVALID_PARAMETER;
if (size & (PAGE_SIZE - 1))
return HSAKMT_STATUS_INVALID_PARAMETER;
s_attr = sizeof(*attrs) * nattr;
args = alloca(sizeof(*args) + s_attr);
args->start_addr = (uint64_t)start_addr;
args->size = size;
args->op = KFD_IOCTL_SVM_OP_SET_ATTR;
args->nattr = nattr;
memcpy(args->attrs, attrs, s_attr);
for (i = 0; i < nattr; i++) {
if (attrs[i].type != KFD_IOCTL_SVM_ATTR_PREFERRED_LOC &&
attrs[i].type != KFD_IOCTL_SVM_ATTR_PREFETCH_LOC &&
attrs[i].type != KFD_IOCTL_SVM_ATTR_ACCESS &&
attrs[i].type != KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE &&
attrs[i].type != KFD_IOCTL_SVM_ATTR_NO_ACCESS)
continue;
if (attrs[i].type == KFD_IOCTL_SVM_ATTR_PREFERRED_LOC &&
attrs[i].value == INVALID_NODEID) {
args->attrs[i].value = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
continue;
}
r = validate_nodeid(attrs[i].value, &args->attrs[i].value);
if (r != HSAKMT_STATUS_SUCCESS) {
pr_debug("invalid node ID: %d\n", attrs[i].value);
return r;
} else if (!args->attrs[i].value &&
(attrs[i].type == KFD_IOCTL_SVM_ATTR_ACCESS ||
attrs[i].type == KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE ||
attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS)) {
pr_debug("CPU node invalid for access attribute\n");
return HSAKMT_STATUS_INVALID_NODE_UNIT;
}
}
/* Driver does one copy_from_user, with extra attrs size */
r = kmtIoctl(kfd_fd, AMDKFD_IOC_SVM + (s_attr << _IOC_SIZESHIFT), args);
if (r) {
pr_debug("op set range attrs failed %s\n", strerror(errno));
return HSAKMT_STATUS_ERROR;
}
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI
hsaKmtSVMGetAttr(void *start_addr, HSAuint64 size, unsigned int nattr,
HSA_SVM_ATTRIBUTE *attrs)
{
struct kfd_ioctl_svm_args *args;
HSAuint64 s_attr;
HSAKMT_STATUS r;
HSAuint32 i;
CHECK_KFD_OPEN();
CHECK_KFD_MINOR_VERSION(5);
pr_debug("%s: address 0x%p size 0x%lx\n", __func__, start_addr, size);
if (!start_addr || !size)
return HSAKMT_STATUS_INVALID_PARAMETER;
if ((uint64_t)start_addr & (PAGE_SIZE - 1))
return HSAKMT_STATUS_INVALID_PARAMETER;
if (size & (PAGE_SIZE - 1))
return HSAKMT_STATUS_INVALID_PARAMETER;
s_attr = sizeof(*attrs) * nattr;
args = alloca(sizeof(*args) + s_attr);
args->start_addr = (uint64_t)start_addr;
args->size = size;
args->op = KFD_IOCTL_SVM_OP_GET_ATTR;
args->nattr = nattr;
memcpy(args->attrs, attrs, s_attr);
for (i = 0; i < nattr; i++) {
if (attrs[i].type != KFD_IOCTL_SVM_ATTR_ACCESS &&
attrs[i].type != KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE &&
attrs[i].type != KFD_IOCTL_SVM_ATTR_NO_ACCESS)
continue;
r = validate_nodeid(attrs[i].value, &args->attrs[i].value);
if (r != HSAKMT_STATUS_SUCCESS) {
pr_debug("invalid node ID: %d\n", attrs[i].value);
return r;
} else if (!args->attrs[i].value) {
pr_debug("CPU node invalid for access attribute\n");
return HSAKMT_STATUS_INVALID_NODE_UNIT;
}
}
/* Driver does one copy_from_user, with extra attrs size */
r = kmtIoctl(kfd_fd, AMDKFD_IOC_SVM + (s_attr << _IOC_SIZESHIFT), args);
if (r) {
pr_debug("op get range attrs failed %s\n", strerror(errno));
return HSAKMT_STATUS_ERROR;
}
memcpy(attrs, args->attrs, s_attr);
for (i = 0; i < nattr; i++) {
if (attrs[i].type != KFD_IOCTL_SVM_ATTR_PREFERRED_LOC &&
attrs[i].type != KFD_IOCTL_SVM_ATTR_PREFETCH_LOC &&
attrs[i].type != KFD_IOCTL_SVM_ATTR_ACCESS &&
attrs[i].type != KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE &&
attrs[i].type != KFD_IOCTL_SVM_ATTR_NO_ACCESS)
continue;
switch (attrs[i].value) {
case KFD_IOCTL_SVM_LOCATION_SYSMEM:
attrs[i].value = 0;
break;
case KFD_IOCTL_SVM_LOCATION_UNDEFINED:
attrs[i].value = INVALID_NODEID;
break;
default:
r = gpuid_to_nodeid(attrs[i].value, &attrs[i].value);
if (r != HSAKMT_STATUS_SUCCESS) {
pr_debug("invalid GPU ID: %d\n",
attrs[i].value);
return r;
}
}
}
return HSAKMT_STATUS_SUCCESS;
}
static HSAKMT_STATUS
hsaKmtSetGetXNACKMode(HSAint32 * enable)
{
struct kfd_ioctl_set_xnack_mode_args args;
CHECK_KFD_OPEN();
CHECK_KFD_MINOR_VERSION(5);
args.xnack_enabled = *enable;
if (kmtIoctl(kfd_fd, AMDKFD_IOC_SET_XNACK_MODE, &args)) {
if (errno == EPERM) {
pr_debug("set mode not supported %s\n",
strerror(errno));
return HSAKMT_STATUS_NOT_SUPPORTED;
} else if (errno == EBUSY) {
pr_debug("kmtIoctl queues not empty %s\n",
strerror(errno));
}
return HSAKMT_STATUS_ERROR;
}
*enable = args.xnack_enabled;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS HSAKMTAPI
hsaKmtSetXNACKMode(HSAint32 enable)
{
return hsaKmtSetGetXNACKMode(&enable);
}
HSAKMT_STATUS HSAKMTAPI
hsaKmtGetXNACKMode(HSAint32 * enable)
{
*enable = -1;
return hsaKmtSetGetXNACKMode(enable);
}
+57
View File
@@ -0,0 +1,57 @@
/*
* Copyright © 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including
* the next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "libhsakmt.h"
#include "hsakmt/linux/kfd_ioctl.h"
HSAKMT_STATUS HSAKMTAPI hsaKmtGetClockCounters(HSAuint32 NodeId,
HsaClockCounters *Counters)
{
HSAKMT_STATUS result;
uint32_t gpu_id;
struct kfd_ioctl_get_clock_counters_args args = {0};
int err;
CHECK_KFD_OPEN();
result = validate_nodeid(NodeId, &gpu_id);
if (result != HSAKMT_STATUS_SUCCESS)
return result;
args.gpu_id = gpu_id;
err = kmtIoctl(kfd_fd, AMDKFD_IOC_GET_CLOCK_COUNTERS, &args);
if (err < 0) {
result = HSAKMT_STATUS_ERROR;
} else {
/* At this point the result is already HSAKMT_STATUS_SUCCESS */
Counters->GPUClockCounter = args.gpu_clock_counter;
Counters->CPUClockCounter = args.cpu_clock_counter;
Counters->SystemClockCounter = args.system_clock_counter;
Counters->SystemClockFrequencyHz = args.system_clock_freq;
}
return result;
}
File diff suppressed because it is too large Load Diff
+56
View File
@@ -0,0 +1,56 @@
/*
* Copyright © 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including
* the next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "libhsakmt.h"
#include <stdlib.h>
#include <string.h>
#include "hsakmt/linux/kfd_ioctl.h"
HsaVersionInfo kfd_version_info;
HSAKMT_STATUS HSAKMTAPI hsaKmtGetVersion(HsaVersionInfo *VersionInfo)
{
CHECK_KFD_OPEN();
*VersionInfo = kfd_version_info;
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS init_kfd_version(void)
{
struct kfd_ioctl_get_version_args args = {0};
if (kmtIoctl(kfd_fd, AMDKFD_IOC_GET_VERSION, &args) == -1)
return HSAKMT_STATUS_ERROR;
kfd_version_info.KernelInterfaceMajorVersion = args.major_version;
kfd_version_info.KernelInterfaceMinorVersion = args.minor_version;
if (args.major_version != 1)
return HSAKMT_STATUS_DRIVER_MISMATCH;
return HSAKMT_STATUS_SUCCESS;
}
View File
+261
View File
@@ -0,0 +1,261 @@
#
# Copyright (C) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
#
# If environment variable DRM_DIR or LIBHSAKMT_PATH is set, the script
# will pick up the corresponding libraries from those pathes.
cmake_minimum_required(VERSION 3.5 FATAL_ERROR)
project(KFDTest)
# For DEB/RPM generation
set ( CPACK_PACKAGE_NAME "kfdtest" )
set ( CPACK_PACKAGE_CONTACT "Advanced Micro Devices Inc." )
set ( CPACK_PACKAGE_DESCRIPTION "This package includes kfdtest, the list of excluded tests for each ASIC, and a convenience script to run the test suite" )
set ( CPACK_PACKAGE_DESCRIPTION_SUMMARY "Test suite for ROCK/KFD" )
# Make proper version for appending
# Default Value is 99999, setting it first
set(ROCM_VERSION_FOR_PACKAGE "99999")
if(DEFINED ENV{ROCM_LIBPATCH_VERSION})
set(ROCM_VERSION_FOR_PACKAGE $ENV{ROCM_LIBPATCH_VERSION})
endif()
set ( CPACK_PACKAGE_VERSION_MAJOR "1" )
set ( CPACK_PACKAGE_VERSION_MINOR "0" )
set ( CPACK_PACKAGE_VERSION_PATCH "0" )
set ( CPACK_PACKAGE_HOMEPAGE_URL "https://github.com/ROCm/ROCT-Thunk-Interface" )
set ( CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT")
set ( CPACK_RPM_FILE_NAME "RPM-DEFAULT")
## Debian package values
set ( CPACK_DEBIAN_PACKAGE_RELEASE "local" )
if( DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE} )
set ( CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE} )
endif()
## RPM package variables
set ( CPACK_RPM_PACKAGE_RELEASE "local" )
if( DEFINED ENV{CPACK_RPM_PACKAGE_RELEASE} )
set ( CPACK_RPM_PACKAGE_RELEASE $ENV{CPACK_RPM_PACKAGE_RELEASE} )
endif()
## Note: rpm --eval %{?dist} will evaluate to NULL in Debian
## So Debian distros won't append dist tag to CPACK_RPM_PACKAGE_RELEASE.
## Also for debian package name , the dist tag is added from build env
execute_process( COMMAND rpm --eval %{?dist}
RESULT_VARIABLE PROC_RESULT
OUTPUT_VARIABLE EVAL_RESULT
OUTPUT_STRIP_TRAILING_WHITESPACE )
message("RESULT_VARIABLE ${PROC_RESULT} OUTPUT_VARIABLE: ${EVAL_RESULT}")
## Add distribution tag to rpm package name
if ( PROC_RESULT EQUAL "0" AND NOT EVAL_RESULT STREQUAL "" )
string ( APPEND CPACK_RPM_PACKAGE_RELEASE "%{?dist}" )
endif()
set(PACKAGE_VERSION_STR "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}.${ROCM_VERSION_FOR_PACKAGE}")
set(CPACK_PACKAGE_VERSION "${PACKAGE_VERSION_STR}")
## Define default variable and variables for the optional build target hsakmt-dev
set ( SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR} CACHE STRING "Location of hsakmt source code." )
set ( CMAKE_INSTALL_PREFIX "/opt/rocm" CACHE STRING "Default installation directory." )
set ( CPACK_PACKAGING_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}" CACHE STRING "Default packaging prefix." )
set ( CPACK_GENERATOR "DEB;RPM" CACHE STRING "Default packaging generators." )
# Debian package specific variables
set ( CPACK_DEBIAN_PACKAGE_HOMEPAGE "https://github.com/ROCm/ROCT-Thunk-Interface" )
set (CPACK_DEBIAN_PACKAGE_DEPENDS "rocm-core")
# RPM package specific variables
set (CPACK_RPM_PACKAGE_REQUIRES "rocm-core")
#set ( CMAKE_VERBOSE_MAKEFILE on )
find_package(PkgConfig)
list (PREPEND CMAKE_PREFIX_PATH "${DRM_DIR}")
# The module name passed to pkg_check_modules() is determined by the
# name of file *.pc
pkg_check_modules(DRM REQUIRED libdrm)
pkg_check_modules(DRM_AMDGPU REQUIRED libdrm_amdgpu)
include_directories(${DRM_AMDGPU_INCLUDE_DIRS})
if( DEFINED ENV{LIBHSAKMT_PATH} )
set ( LIBHSAKMT_PATH $ENV{LIBHSAKMT_PATH} )
message ( "LIBHSAKMT_PATH environment variable is set" )
else()
if ( ${ROCM_INSTALL_PATH} )
set ( ENV{PKG_CONFIG_PATH} ${ROCM_INSTALL_PATH}/share/pkgconfig )
else()
set ( ENV{PKG_CONFIG_PATH} /opt/rocm/share/pkgconfig )
endif()
pkg_check_modules(HSAKMT libhsakmt)
if( NOT HSAKMT_FOUND )
set ( LIBHSAKMT_PATH $ENV{OUT_DIR} )
endif()
endif()
if( DEFINED LIBHSAKMT_PATH )
set ( HSAKMT_LIBRARY_DIRS ${LIBHSAKMT_PATH}/lib )
set ( HSAKMT_LIBRARIES hsakmt )
endif()
message ( "Find libhsakmt at ${HSAKMT_LIBRARY_DIRS}" )
if ( POLICY CMP0074 )
cmake_policy( SET CMP0074 NEW )
endif()
find_path( LIGHTNING_CMAKE_DIR NAMES LLVMConfig.cmake
PATHS $ENV{OUT_DIR}/llvm/lib/cmake/llvm NO_CACHE NO_DEFAULT_PATH)
if ( DEFINED LIGHTNING_CMAKE_DIR AND EXISTS ${LIGHTNING_CMAKE_DIR} )
set ( LLVM_DIR ${LIGHTNING_CMAKE_DIR} )
else()
message( STATUS "Couldn't find Lightning build in compute directory. "
"Searching LLVM_DIR then defaulting to system LLVM install if still not found..." )
endif()
find_package( LLVM REQUIRED CONFIG )
if( ${LLVM_PACKAGE_VERSION} VERSION_LESS "7.0" )
message( FATAL_ERROR "Requires LLVM 7.0 or greater "
"(found ${LLVM_PACKAGE_VERSION})" )
elseif( ${LLVM_PACKAGE_VERSION} VERSION_LESS "14.0" )
message( WARNING "Not using latest LLVM version. "
"Some ASIC targets may not work!" )
endif()
message( STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}" )
message( STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}" )
include_directories(${LLVM_INCLUDE_DIRS})
separate_arguments(LLVM_DEFINITIONS_LIST NATIVE_COMMAND ${LLVM_DEFINITIONS})
add_definitions(${LLVM_DEFINITIONS_LIST})
llvm_map_components_to_libnames(llvm_libs AMDGPUAsmParser Core Support)
include_directories(${PROJECT_SOURCE_DIR}/gtest-1.6.0)
include_directories(${PROJECT_SOURCE_DIR}/include)
include_directories(${PROJECT_SOURCE_DIR}/../../include)
include_directories(${PROJECT_SOURCE_DIR}/../../libhsakmt/include)
include_directories(${DRM_INCLUDE_DIRS})
set (SRC_FILES gtest-1.6.0/gtest-all.cpp
src/AqlQueue.cpp
src/BasePacket.cpp
src/BaseDebug.cpp
src/BaseQueue.cpp
src/Dispatch.cpp
src/GoogleTestExtension.cpp
src/IndirectBuffer.cpp
src/Assemble.cpp
src/ShaderStore.cpp
src/LinuxOSWrapper.cpp
src/PM4Packet.cpp
src/PM4Queue.cpp
src/RDMAUtil.cpp
src/SDMAPacket.cpp
src/SDMAQueue.cpp
src/KFDBaseComponentTest.cpp
src/KFDMultiProcessTest.cpp
src/KFDTestMain.cpp
src/KFDTestUtil.cpp
src/KFDTestUtilQueue.cpp
src/KFDOpenCloseKFDTest.cpp
src/KFDTopologyTest.cpp
src/KFDMemoryTest.cpp
src/KFDLocalMemoryTest.cpp
src/KFDEventTest.cpp
src/KFDQMTest.cpp
src/KFDCWSRTest.cpp
src/KFDExceptionTest.cpp
src/KFDGraphicsInterop.cpp
src/KFDPerfCounters.cpp
src/KFDDBGTest.cpp
src/KFDGWSTest.cpp
src/KFDIPCTest.cpp
src/KFDASMTest.cpp
src/KFDEvictTest.cpp
src/KFDHWSTest.cpp
src/KFDPerformanceTest.cpp
src/KFDPMTest.cpp
src/KFDSVMRangeTest.cpp
src/KFDSVMEvictTest.cpp
src/KFDRASTest.cpp
src/KFDPCSamplingTest.cpp
src/RDMATest.cpp)
message( STATUS "PROJECT_SOURCE_DIR:" ${PROJECT_SOURCE_DIR} )
#message( STATUS "SRC_FILES: ")
#foreach(file ${SRC_FILES})
# message(STATUS "${file}")
#endforeach()
#add_definitions(-Wall -std=c++11)
if ( "${CMAKE_C_COMPILER_VERSION}" STRGREATER "4.8.0")
## Add --enable-new-dtags to generate DT_RUNPATH
set ( CMAKE_CXX_FLAGS "-std=gnu++17 -Wl,--enable-new-dtags" )
endif()
if ( "${CMAKE_BUILD_TYPE}" STREQUAL Release )
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2" )
else ()
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g" )
endif ()
## Address Sanitize Flag
if ( ${ADDRESS_SANITIZER} )
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address" )
set ( CMAKE_EXE_LINKER_FLAGS -fsanitize=address )
endif ()
# link_directories() has to be put before add_executable()
# The modules found by pkg_check_modules() in the default pkg config
# path do not need to use link_directories() here.
link_directories(${HSAKMT_LIBRARY_DIRS})
add_executable(kfdtest ${SRC_FILES})
target_link_libraries(kfdtest ${HSAKMT_LIBRARIES} ${DRM_LDFLAGS} ${DRM_AMDGPU_LDFLAGS} ${llvm_libs} pthread m stdc++ rt numa)
configure_file ( scripts/kfdtest.exclude kfdtest.exclude COPYONLY )
configure_file ( scripts/run_kfdtest.sh run_kfdtest.sh COPYONLY )
install( FILES ${CMAKE_CURRENT_BINARY_DIR}/kfdtest ${CMAKE_CURRENT_BINARY_DIR}/run_kfdtest.sh
PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_WRITE GROUP_READ WORLD_EXECUTE WORLD_READ
DESTINATION bin )
install( FILES ${CMAKE_CURRENT_BINARY_DIR}/kfdtest.exclude
PERMISSIONS OWNER_WRITE OWNER_READ GROUP_WRITE GROUP_READ WORLD_READ
DESTINATION share/kfdtest )
# Remove dependency on rocm-core if -DROCM_DEP_ROCMCORE=ON not given to cmake
if(NOT ROCM_DEP_ROCMCORE)
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_PACKAGE_REQUIRES ${CPACK_RPM_PACKAGE_REQUIRES})
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_PACKAGE_DEPENDS ${CPACK_DEBIAN_PACKAGE_DEPENDS})
endif()
include ( CPack )
+85
View File
@@ -0,0 +1,85 @@
KFDTest - KFD unit tests LICENSE
/* The following license applies to all parts of kfdtest except the
* SP3 shader assembler. See separate license terms below.
*/
Copyright (C) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
MIT LICENSE:
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-----------------------------------------------
libamdsp3 - AMD SP3 shader assembler library LICENSE
/* The following license applies to the SP3 shader assembler, which is
* named as libamdsp3.a and will be built into the kfdtest binary.
*/
Copyright (C) 2018 Advanced Micro Devices, Inc. All rights reserved.
REDISTRIBUTION: Permission is hereby granted, free of any license fees,
to any person obtaining a copy of this program (the "Software"), to
install, reproduce, copy and distribute copies, in binary form only, of
the Software and to permit persons to whom the Software is provided to
do the same, provided that the following conditions are met:
No reverse engineering, decompilation, or disassembly of this Software
is permitted.
Redistributions must reproduce the above copyright notice, this
permission notice, and the following disclaimers and notices in the
Software documentation and/or other materials provided with the
Software.
DISCLAIMER: THE USE OF THE SOFTWARE IS AT YOUR SOLE RISK. THE SOFTWARE
IS PROVIDED "AS IS" AND WITHOUT WARRANTY OF ANY KIND AND COPYRIGHT
HOLDER AND ITS LICENSORS EXPRESSLY DISCLAIM ALL WARRANTIES, EXPRESS AND
IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
COPYRIGHT HOLDER AND ITS LICENSORS DO NOT WARRANT THAT THE SOFTWARE WILL
MEET YOUR REQUIREMENTS, OR THAT THE OPERATION OF THE SOFTWARE WILL BE
UNINTERRUPTED OR ERROR-FREE. THE ENTIRE RISK ASSOCIATED WITH THE USE OF
THE SOFTWARE IS ASSUMED BY YOU. FURTHERMORE, COPYRIGHT HOLDER AND ITS
LICENSORS DO NOT WARRANT OR MAKE ANY REPRESENTATIONS REGARDING THE USE
OR THE RESULTS OF THE USE OF THE SOFTWARE IN TERMS OF ITS CORRECTNESS,
ACCURACY, RELIABILITY, CURRENTNESS, OR OTHERWISE.
DISCLAIMER: UNDER NO CIRCUMSTANCES INCLUDING NEGLIGENCE, SHALL COPYRIGHT
HOLDER AND ITS LICENSORS OR ITS DIRECTORS, OFFICERS, EMPLOYEES OR AGENTS
("AUTHORIZED REPRESENTATIVES") BE LIABLE FOR ANY INCIDENTAL, INDIRECT,
SPECIAL OR CONSEQUENTIAL DAMAGES (INCLUDING DAMAGES FOR LOSS OF BUSINESS
PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, AND THE
LIKE) ARISING OUT OF THE USE, MISUSE OR INABILITY TO USE THE SOFTWARE,
BREACH OR DEFAULT, INCLUDING THOSE ARISING FROM INFRINGEMENT OR ALLEGED
INFRINGEMENT OF ANY PATENT, TRADEMARK, COPYRIGHT OR OTHER INTELLECTUAL
PROPERTY RIGHT EVEN IF COPYRIGHT HOLDER AND ITS AUTHORIZED
REPRESENTATIVES HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. IN
NO EVENT SHALL COPYRIGHT HOLDER OR ITS AUTHORIZED REPRESENTATIVES TOTAL
LIABILITY FOR ALL DAMAGES, LOSSES, AND CAUSES OF ACTION (WHETHER IN
CONTRACT, TORT (INCLUDING NEGLIGENCE) OR OTHERWISE) EXCEED THE AMOUNT OF
US$10.
Notice: The Software is subject to United States export laws and
regulations. You agree to comply with all domestic and international
export laws and regulations that apply to the Software, including but
not limited to the Export Administration Regulations administered by the
U.S. Department of Commerce and International Traffic in Arm Regulations
administered by the U.S. Department of State. These laws include
restrictions on destinations, end users and end use.
+20
View File
@@ -0,0 +1,20 @@
1. Note on building kfdtest
To build this kfdtest application, the following libraries should be already
installed on the building machine:
libdrm libdrm_amdgpu libhsakmt
If libhsakmt is not installed, but the headers and libraries are present
locally, you can specify its directory by
export LIBHSAKMT_PATH=/*your local libhsakmt folder*/
With that, the headers and libraries are searched under
LIBHSAKMT_PATH/include and LIBHSAKMT_PATH/lib respectively.
2. How to run kfdtest
Just run "./run_kfdtest.sh" under the building output folder. You may need
to specify library path through:
export LD_LIBRARY_PATH=/*your library path containing libhsakmt*/
Note: you can use "run_kfdtest.sh -h" to see more options.
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,76 @@
/*
* Copyright (C) 2015-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef AMDP2PTEST_H_
#define AMDP2PTEST_H_
#include <linux/ioctl.h>
#define AMDP2PTEST_IOCTL_MAGIC 'A'
#define AMDP2PTEST_DEVICE_NAME "amdp2ptest"
#define AMDP2PTEST_DEVICE_PATH "/dev/amdp2ptest"
struct AMDRDMA_IOCTL_GET_PAGE_SIZE_PARAM {
/* Input parameters */
uint64_t addr;
uint64_t length;
/* Output parameters */
uint64_t page_size;
};
struct AMDRDMA_IOCTL_GET_PAGES_PARAM {
/* Input parameters */
uint64_t addr;
uint64_t length;
};
struct AMDRDMA_IOCTL_PUT_PAGES_PARAM {
/* Input parameters */
uint64_t addr;
uint64_t length;
};
struct AMDRDMA_IOCTL_IS_GPU_ADDRESS_PARAM {
/* Input parameters */
uint64_t addr;
uint64_t ret_value;
};
#define AMD2P2PTEST_IOCTL_GET_PAGE_SIZE \
_IOWR(AMDP2PTEST_IOCTL_MAGIC, 1, struct AMDRDMA_IOCTL_GET_PAGE_SIZE_PARAM *)
#define AMD2P2PTEST_IOCTL_GET_PAGES \
_IOWR(AMDP2PTEST_IOCTL_MAGIC, 2, struct AMDRDMA_IOCTL_GET_PAGES_PARAM *)
#define AMD2P2PTEST_IOCTL_PUT_PAGES \
_IOW(AMDP2PTEST_IOCTL_MAGIC, 3, struct AMDRDMA_IOCTL_PUT_PAGES_PARAM *)
#define AMD2P2PTEST_IOCTL_IS_GPU_ADDRESS \
_IOW(AMDP2PTEST_IOCTL_MAGIC, 4, struct AMDRDMA_IOCTL_IS_GPU_ADDRESS *)
#endif /* AMDP2PTEST_H */
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,107 @@
/*
* Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef KFD_PM4_OPCODES_H
#define KFD_PM4_OPCODES_H
enum it_opcode_type {
IT_NOP = 0x10,
IT_SET_BASE = 0x11,
IT_CLEAR_STATE = 0x12,
IT_INDEX_BUFFER_SIZE = 0x13,
IT_DISPATCH_DIRECT = 0x15,
IT_DISPATCH_INDIRECT = 0x16,
IT_ATOMIC_GDS = 0x1D,
IT_OCCLUSION_QUERY = 0x1F,
IT_SET_PREDICATION = 0x20,
IT_REG_RMW = 0x21,
IT_COND_EXEC = 0x22,
IT_PRED_EXEC = 0x23,
IT_DRAW_INDIRECT = 0x24,
IT_DRAW_INDEX_INDIRECT = 0x25,
IT_INDEX_BASE = 0x26,
IT_DRAW_INDEX_2 = 0x27,
IT_CONTEXT_CONTROL = 0x28,
IT_INDEX_TYPE = 0x2A,
IT_DRAW_INDIRECT_MULTI = 0x2C,
IT_DRAW_INDEX_AUTO = 0x2D,
IT_NUM_INSTANCES = 0x2F,
IT_DRAW_INDEX_MULTI_AUTO = 0x30,
IT_INDIRECT_BUFFER_CNST = 0x33,
IT_STRMOUT_BUFFER_UPDATE = 0x34,
IT_DRAW_INDEX_OFFSET_2 = 0x35,
IT_DRAW_PREAMBLE = 0x36,
IT_WRITE_DATA = 0x37,
IT_DRAW_INDEX_INDIRECT_MULTI = 0x38,
IT_MEM_SEMAPHORE = 0x39,
IT_COPY_DW = 0x3B,
IT_WAIT_REG_MEM = 0x3C,
IT_INDIRECT_BUFFER = 0x3F,
IT_COPY_DATA = 0x40,
IT_PFP_SYNC_ME = 0x42,
IT_SURFACE_SYNC = 0x43,
IT_COND_WRITE = 0x45,
IT_EVENT_WRITE = 0x46,
IT_EVENT_WRITE_EOP = 0x47,
IT_EVENT_WRITE_EOS = 0x48,
IT_RELEASE_MEM = 0x49,
IT_PREAMBLE_CNTL = 0x4A,
IT_DMA_DATA = 0x50,
IT_ACQUIRE_MEM = 0x58,
IT_REWIND = 0x59,
IT_LOAD_UCONFIG_REG = 0x5E,
IT_LOAD_SH_REG = 0x5F,
IT_LOAD_CONFIG_REG = 0x60,
IT_LOAD_CONTEXT_REG = 0x61,
IT_SET_CONFIG_REG = 0x68,
IT_SET_CONTEXT_REG = 0x69,
IT_SET_CONTEXT_REG_INDIRECT = 0x73,
IT_SET_SH_REG = 0x76,
IT_SET_SH_REG_OFFSET = 0x77,
IT_SET_QUEUE_REG = 0x78,
IT_SET_UCONFIG_REG = 0x79,
IT_SCRATCH_RAM_WRITE = 0x7D,
IT_SCRATCH_RAM_READ = 0x7E,
IT_LOAD_CONST_RAM = 0x80,
IT_WRITE_CONST_RAM = 0x81,
IT_DUMP_CONST_RAM = 0x83,
IT_INCREMENT_CE_COUNTER = 0x84,
IT_INCREMENT_DE_COUNTER = 0x85,
IT_WAIT_ON_CE_COUNTER = 0x86,
IT_WAIT_ON_DE_COUNTER_DIFF = 0x88,
IT_SWITCH_BUFFER = 0x8B,
IT_SET_RESOURCES = 0xA0,
IT_MAP_PROCESS = 0xA1,
IT_MAP_QUEUES = 0xA2,
IT_UNMAP_QUEUES = 0xA3,
IT_QUERY_STATUS = 0xA4,
IT_RUN_LIST = 0xA5,
};
#define PM4_TYPE_0 0
#define PM4_TYPE_2 2
#define PM4_TYPE_3 3
#endif /* KFD_PM4_OPCODES_H */
@@ -0,0 +1,160 @@
/*
* Copyright (C) 2016-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __PM4_PKT_STRUCT_AI_H__
#define __PM4_PKT_STRUCT_AI_H__
#ifndef PM4_MEC_RELEASE_MEM_AI_DEFINED
#define PM4_MEC_RELEASE_MEM_AI_DEFINED
enum AI_MEC_RELEASE_MEM_event_index_enum {
event_index__mec_release_mem__end_of_pipe = 5,
event_index__mec_release_mem__shader_done = 6 };
enum AI_MEC_RELEASE_MEM_cache_policy_enum {
cache_policy__mec_release_mem__lru = 0,
cache_policy__mec_release_mem__stream = 1 };
enum AI_MEC_RELEASE_MEM_pq_exe_status_enum {
pq_exe_status__mec_release_mem__default = 0,
pq_exe_status__mec_release_mem__phase_update = 1 };
enum AI_MEC_RELEASE_MEM_dst_sel_enum {
dst_sel__mec_release_mem__memory_controller = 0,
dst_sel__mec_release_mem__tc_l2 = 1,
dst_sel__mec_release_mem__queue_write_pointer_register = 2,
dst_sel__mec_release_mem__queue_write_pointer_poll_mask_bit = 3 };
enum AI_MEC_RELEASE_MEM_int_sel_enum {
int_sel__mec_release_mem__none = 0,
int_sel__mec_release_mem__send_interrupt_only = 1,
int_sel__mec_release_mem__send_interrupt_after_write_confirm = 2,
int_sel__mec_release_mem__send_data_after_write_confirm = 3,
int_sel__mec_release_mem__unconditionally_send_int_ctxid = 4,
int_sel__mec_release_mem__conditionally_send_int_ctxid_based_on_32_bit_compare = 5,
int_sel__mec_release_mem__conditionally_send_int_ctxid_based_on_64_bit_compare = 6 };
enum AI_MEC_RELEASE_MEM_data_sel_enum {
data_sel__mec_release_mem__none = 0,
data_sel__mec_release_mem__send_32_bit_low = 1,
data_sel__mec_release_mem__send_64_bit_data = 2,
data_sel__mec_release_mem__send_gpu_clock_counter = 3,
data_sel__mec_release_mem__send_cp_perfcounter_hi_lo = 4,
data_sel__mec_release_mem__store_gds_data_to_memory = 5 };
typedef struct PM4_MEC_RELEASE_MEM_AI {
union {
PM4_TYPE_3_HEADER header;
unsigned int ordinal1;
};
union {
struct {
unsigned int event_type:6;
unsigned int reserved1:2;
AI_MEC_RELEASE_MEM_event_index_enum event_index:4;
unsigned int tcl1_vol_action_ena:1;
unsigned int tc_vol_action_ena:1;
unsigned int reserved2:1;
unsigned int tc_wb_action_ena:1;
unsigned int tcl1_action_ena:1;
unsigned int tc_action_ena:1;
unsigned int reserved3:1;
unsigned int tc_nc_action_ena:1;
unsigned int tc_wc_action_ena:1;
unsigned int tc_md_action_ena:1;
unsigned int reserved4:3;
AI_MEC_RELEASE_MEM_cache_policy_enum cache_policy:2;
unsigned int reserved5:2;
AI_MEC_RELEASE_MEM_pq_exe_status_enum pq_exe_status:1;
unsigned int reserved6:2;
} bitfields2;
unsigned int ordinal2;
};
union {
struct {
unsigned int reserved7:16;
AI_MEC_RELEASE_MEM_dst_sel_enum dst_sel:2;
unsigned int reserved8:6;
AI_MEC_RELEASE_MEM_int_sel_enum int_sel:3;
unsigned int reserved9:2;
AI_MEC_RELEASE_MEM_data_sel_enum data_sel:3;
} bitfields3;
unsigned int ordinal3;
};
union {
struct {
unsigned int reserved10:2;
unsigned int address_lo_32b:30;
} bitfields4a;
struct {
unsigned int reserved11:3;
unsigned int address_lo_64b:29;
} bitfields4b;
unsigned int reserved12;
unsigned int ordinal4;
};
union {
unsigned int address_hi;
unsigned int reserved13;
unsigned int ordinal5;
};
union {
unsigned int data_lo;
unsigned int cmp_data_lo;
struct {
unsigned int dw_offset:16;
unsigned int num_dwords:16;
} bitfields6c;
unsigned int reserved14;
unsigned int ordinal6;
};
union {
unsigned int data_hi;
unsigned int cmp_data_hi;
unsigned int reserved15;
unsigned int reserved16;
unsigned int ordinal7;
};
unsigned int int_ctxid;
} PM4MEC_RELEASE_MEM_AI, *PPM4MEC_RELEASE_MEM_AI;
#endif // PM4_MEC_RELEASE_MEM_AI_DEFINED
#endif // __PM4_PKT_STRUCT_AI_H__
@@ -0,0 +1,129 @@
/*
* Copyright (C) 2012-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __PM4_PKT_STRUCT_CI_H__
#define __PM4_PKT_STRUCT_CI_H__
enum WRITE_DATA_CI_atc_enum { atc_write_data_NOT_USE_ATC_0 = 0, atc_write_data_USE_ATC_1 = 1 };
enum WRITE_DATA_CI_engine_sel { engine_sel_write_data_ci_MICRO_ENGINE_0 = 0, engine_sel_write_data_ci_PREFETCH_PARSER_1 = 1, engine_sel_write_data_ci_CONST_ENG_2 = 2 };
typedef struct _PM4WRITE_DATA_CI {
union {
PM4_TYPE_3_HEADER header;
unsigned int ordinal1;
};
union {
struct {
unsigned int reserved1:8;
MEC_WRITE_DATA_dst_sel_enum dst_sel:4;
unsigned int reserved2:4;
MEC_WRITE_DATA_addr_incr_enum addr_incr:1;
unsigned int reserved3:3;
MEC_WRITE_DATA_wr_confirm_enum wr_confirm:1;
unsigned int reserved4:3;
WRITE_DATA_CI_atc_enum atc:1;
MEC_WRITE_DATA_cache_policy_enum cache_policy:2;
unsigned int volatile_setting:1;
unsigned int reserved5:2;
WRITE_DATA_CI_engine_sel engine_sel:2;
} bitfields2;
unsigned int ordinal2;
};
unsigned int dst_addr_lo;
unsigned int dst_address_hi;
unsigned int data[1]; // 1..N of these fields
} PM4WRITE_DATA_CI, *PPM4WRITE_DATA_CI;
enum MEC_RELEASE_MEM_CI_atc_enum { atc_mec_release_mem_ci_NOT_USE_ATC_0 = 0, atc_mec_release_mem_ci_USE_ATC_1 = 1 };
typedef struct _PM4_RELEASE_MEM_CI {
union {
PM4_TYPE_3_HEADER header;
unsigned int ordinal1;
};
union {
struct {
unsigned int event_type:6;
unsigned int reserved1:2;
MEC_RELEASE_MEM_event_index_enum event_index:4;
unsigned int l1_vol:1;
unsigned int l2_vol:1;
unsigned int reserved:1;
unsigned int l2_wb:1;
unsigned int l1_inv:1;
unsigned int l2_inv:1;
unsigned int reserved2:6;
MEC_RELEASE_MEM_CI_atc_enum atc:1;
MEC_RELEASE_MEM_cache_policy_enum cache_policy:2;
unsigned int volatile_setting:1;
unsigned int reserved3:4;
} bitfields2;
unsigned int ordinal2;
};
union {
struct {
unsigned int reserved4:16;
MEC_RELEASE_MEM_dst_sel_enum dst_sel:2;
unsigned int reserved5:6;
MEC_RELEASE_MEM_int_sel_enum int_sel:3;
unsigned int reserved6:2;
MEC_RELEASE_MEM_data_sel_enum data_sel:3;
} bitfields3;
unsigned int ordinal3;
};
union {
struct {
unsigned int reserved7:2;
unsigned int address_lo_dword_aligned:30;
} bitfields4a;
struct {
unsigned int reserved8:3;
unsigned int address_lo_qword_aligned:29;
} bitfields4b;
unsigned int ordinal4;
};
unsigned int addr_hi;
union {
unsigned int data_lo;
struct {
unsigned int offset:16;
unsigned int num_dwords:16;
} bitfields5b;
unsigned int ordinal6;
};
unsigned int data_hi;
} PM4_RELEASE_MEM_CI, *PPM4_RELEASE_MEM_CI;
#endif // __PM4_PKT_STRUCT_CI_H__
@@ -0,0 +1,366 @@
/*
* Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __PM4_PKT_STRUCT_COMMON_H__
#define __PM4_PKT_STRUCT_COMMON_H__
#ifndef PM4_HEADER_DEFINED
#define PM4_HEADER_DEFINED
typedef union PM4_TYPE_3_HEADER
{
struct
{
unsigned int predicate : 1; ///< predicated version of packet when set
unsigned int shaderType: 1; ///< 0: Graphics, 1: Compute Shader
unsigned int reserved1 : 6; ///< reserved
unsigned int opcode : 8; ///< IT opcode
unsigned int count : 14;///< number of DWORDs - 1 in the information body.
unsigned int type : 2; ///< packet identifier. It should be 3 for type 3 packets
};
unsigned int u32All;
} PM4_TYPE_3_HEADER;
#endif // PM4_HEADER_DEFINED
//--------------------DISPATCH_DIRECT--------------------
typedef struct _PM4_DISPATCH_DIRECT
{
union
{
PM4_TYPE_3_HEADER header; ///header
unsigned int ordinal1;
};
unsigned int dim_x;
unsigned int dim_y;
unsigned int dim_z;
unsigned int dispatch_initiator;
} PM4DISPATCH_DIRECT, *PPM4DISPATCH_DIRECT;
//--------------------INDIRECT_BUFFER--------------------
enum INDIRECT_BUFFER_cache_policy_enum { cache_policy_indirect_buffer_LRU_0 = 0, cache_policy_indirect_buffer_STREAM_1 = 1, cache_policy_indirect_buffer_BYPASS_2 = 2 };
//--------------------EVENT_WRITE--------------------
enum EVENT_WRITE_event_index_enum { event_index_event_write_OTHER_0 = 0, event_index_event_write_ZPASS_DONE_1 = 1, event_index_event_write_SAMPLE_PIPELINESTAT_2 = 2, event_index_event_write_SAMPLE_STREAMOUTSTAT_3 = 3, event_index_event_write_CS_VS_PS_PARTIAL_FLUSH_4 = 4, event_index_event_write_RESERVED_EOP_5 = 5, event_index_event_write_RESERVED_EOS_6 = 6, event_index_event_write_CACHE_FLUSH_7 = 7 };
typedef struct _PM4_EVENT_WRITE
{
union
{
PM4_TYPE_3_HEADER header; ///header
unsigned int ordinal1;
};
union
{
struct
{
unsigned int event_type:6;
unsigned int reserved1:2;
EVENT_WRITE_event_index_enum event_index:4;
unsigned int reserved2:20;
} bitfields2;
unsigned int ordinal2;
};
union
{
struct
{
unsigned int reserved3:3;
unsigned int address_lo:29;
} bitfields3;
unsigned int ordinal3;
};
union
{
struct
{
unsigned int address_hi:16;
unsigned int reserved4:16;
} bitfields4;
unsigned int ordinal4;
};
} PM4EVENT_WRITE, *PPM4EVENT_WRITE;
//--------------------SET_SH_REG--------------------
typedef struct _PM4_SET_SH_REG
{
union
{
PM4_TYPE_3_HEADER header; ///header
unsigned int ordinal1;
};
union
{
struct
{
unsigned int reg_offset:16;
unsigned int reserved1:16;
} bitfields2;
unsigned int ordinal2;
};
unsigned int reg_data[1]; //1..N of these fields
} PM4SET_SH_REG, *PPM4SET_SH_REG;
//--------------------ACQUIRE_MEM--------------------
enum ACQUIRE_MEM_engine_enum { engine_acquire_mem_PFP_0 = 0, engine_acquire_mem_ME_1 = 1 };
typedef struct _PM4_ACQUIRE_MEM
{
union
{
PM4_TYPE_3_HEADER header; ///header
unsigned int ordinal1;
};
union
{
struct
{
unsigned int coher_cntl:31;
ACQUIRE_MEM_engine_enum engine:1;
} bitfields2;
unsigned int ordinal2;
};
unsigned int coher_size;
union
{
struct
{
unsigned int coher_size_hi:8;
unsigned int reserved1:24;
} bitfields3;
unsigned int ordinal4;
};
unsigned int coher_base_lo;
union
{
struct
{
unsigned int coher_base_hi:25;
unsigned int reserved2:7;
} bitfields4;
unsigned int ordinal6;
};
union
{
struct
{
unsigned int poll_interval:16;
unsigned int reserved3:16;
} bitfields5;
unsigned int ordinal7;
};
} PM4ACQUIRE_MEM, *PPM4ACQUIRE_MEM;
//--------------------MEC_INDIRECT_BUFFER--------------------
typedef struct _PM4_MEC_INDIRECT_BUFFER
{
union
{
PM4_TYPE_3_HEADER header; ///header
unsigned int ordinal1;
};
union
{
struct
{
unsigned int swap_function:2;
unsigned int ib_base_lo:30;
} bitfields2;
unsigned int ordinal2;
};
union
{
struct
{
unsigned int ib_base_hi:16;
unsigned int reserved1:16;
} bitfields3;
unsigned int ordinal3;
};
union
{
struct
{
unsigned int ib_size:20;
unsigned int chain:1;
unsigned int offload_polling:1;
unsigned int volatile_setting:1;
unsigned int valid:1;
unsigned int vmid:4;
INDIRECT_BUFFER_cache_policy_enum cache_policy:2;
unsigned int reserved4:2;
} bitfields4;
unsigned int ordinal4;
};
} PM4MEC_INDIRECT_BUFFER, *PPM4MEC_INDIRECT_BUFFER;
//--------------------MEC_WAIT_REG_MEM--------------------
enum MEC_WAIT_REG_MEM_function_enum {
function__mec_wait_reg_mem__always_pass = 0,
function__mec_wait_reg_mem__less_than_ref_value = 1,
function__mec_wait_reg_mem__less_than_equal_to_the_ref_value = 2,
function__mec_wait_reg_mem__equal_to_the_reference_value = 3,
function__mec_wait_reg_mem__not_equal_reference_value = 4,
function__mec_wait_reg_mem__greater_than_or_equal_reference_value = 5,
function__mec_wait_reg_mem__greater_than_reference_value = 6 };
enum MEC_WAIT_REG_MEM_mem_space_enum {
mem_space__mec_wait_reg_mem__register_space = 0,
mem_space__mec_wait_reg_mem__memory_space = 1 };
enum MEC_WAIT_REG_MEM_operation_enum {
operation__mec_wait_reg_mem__wait_reg_mem = 0,
operation__mec_wait_reg_mem__wr_wait_wr_reg = 1,
operation__mec_wait_reg_mem__wait_mem_preemptable = 3 };
typedef struct PM4_MEC_WAIT_REG_MEM
{
union
{
PM4_TYPE_3_HEADER header; ///header
uint32_t ordinal1;
};
union
{
struct
{
MEC_WAIT_REG_MEM_function_enum function:3;
uint32_t reserved1:1;
MEC_WAIT_REG_MEM_mem_space_enum mem_space:2;
MEC_WAIT_REG_MEM_operation_enum operation:2;
uint32_t reserved2:24;
} bitfields2;
uint32_t ordinal2;
};
union
{
struct
{
uint32_t reserved3:2;
uint32_t mem_poll_addr_lo:30;
} bitfields3a;
struct
{
uint32_t reg_poll_addr:18;
uint32_t reserved4:14;
} bitfields3b;
struct
{
uint32_t reg_write_addr1:18;
uint32_t reserved5:14;
} bitfields3c;
uint32_t ordinal3;
};
union
{
uint32_t mem_poll_addr_hi;
struct
{
uint32_t reg_write_addr2:18;
uint32_t reserved6:14;
} bitfields4b;
uint32_t ordinal4;
};
uint32_t reference;
uint32_t mask;
union
{
struct
{
uint32_t poll_interval:16;
uint32_t reserved7:15;
uint32_t optimize_ace_offload_mode:1;
} bitfields7;
uint32_t ordinal7;
};
} PM4MEC_WAIT_REG_MEM, *PPM4MEC_WAIT_REG_MEM;
//--------------------MEC_WRITE_DATA--------------------
enum MEC_WRITE_DATA_dst_sel_enum { dst_sel_mec_write_data_MEM_MAPPED_REGISTER_0 = 0, dst_sel_mec_write_data_TC_L2_2 = 2, dst_sel_mec_write_data_GDS_3 = 3, dst_sel_mec_write_data_MEMORY_5 = 5 };
enum MEC_WRITE_DATA_addr_incr_enum { addr_incr_mec_write_data_INCREMENT_ADDR_0 = 0, addr_incr_mec_write_data_DO_NOT_INCREMENT_ADDR_1 = 1 };
enum MEC_WRITE_DATA_wr_confirm_enum { wr_confirm_mec_write_data_DO_NOT_WAIT_FOR_CONFIRMATION_0 = 0, wr_confirm_mec_write_data_WAIT_FOR_CONFIRMATION_1 = 1 };
enum MEC_WRITE_DATA_cache_policy_enum { cache_policy_mec_write_data_LRU_0 = 0, cache_policy_mec_write_data_STREAM_1 = 1, cache_policy_mec_write_data_BYPASS_2 = 2 };
//--------------------MEC_RELEASE_MEM--------------------
enum MEC_RELEASE_MEM_event_index_enum { event_index_mec_release_mem_EVENT_WRITE_EOP_5 = 5, event_index_mec_release_mem_CS_Done_6 = 6 };
enum MEC_RELEASE_MEM_cache_policy_enum { cache_policy_mec_release_mem_LRU_0 = 0, cache_policy_mec_release_mem_STREAM_1 = 1, cache_policy_mec_release_mem_BYPASS_2 = 2 };
enum MEC_RELEASE_MEM_dst_sel_enum { dst_sel_mec_release_mem_MEMORY_CONTROLLER_0 = 0, dst_sel_mec_release_mem_TC_L2_1 = 1 };
enum MEC_RELEASE_MEM_int_sel_enum { int_sel_mec_release_mem_NONE_0 = 0, int_sel_mec_release_mem_SEND_INTERRUPT_ONLY_1 = 1, int_sel_mec_release_mem_SEND_INTERRUPT_AFTER_WRITE_CONFIRM_2 = 2, int_sel_mec_release_mem_SEND_DATA_AFTER_WRITE_CONFIRM_3 = 3 };
enum MEC_RELEASE_MEM_data_sel_enum { data_sel_mec_release_mem_NONE_0 = 0, data_sel_mec_release_mem_SEND_32_BIT_LOW_1 = 1, data_sel_mec_release_mem_SEND_64_BIT_DATA_2 = 2, data_sel_mec_release_mem_SEND_GPU_CLOCK_COUNTER_3 = 3, data_sel_mec_release_mem_SEND_CP_PERFCOUNTER_HI_LO_4 = 4, data_sel_mec_release_mem_STORE_GDS_DATA_TO_MEMORY_5 = 5 };
#endif
@@ -0,0 +1,173 @@
/*
* Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __PM4__PKT__STRUCT__NV__HPP__
#define __PM4__PKT__STRUCT__NV__HPP__
#include "pm4_pkt_struct_ai.h"
typedef struct _PM4_ACQUIRE_MEM_NV
{
union
{
PM4_TYPE_3_HEADER header; ///header
unsigned int ordinal1;
};
unsigned int reserved;
unsigned int coher_size;
union
{
struct
{
unsigned int coher_size_hi:8;
unsigned int reserved1:24;
} bitfields3;
unsigned int ordinal4;
};
unsigned int coher_base_lo;
union
{
struct
{
unsigned int coher_base_hi:24;
unsigned int reserved2:8;
} bitfields4;
unsigned int ordinal6;
};
union
{
struct
{
unsigned int poll_interval:16;
unsigned int reserved3:16;
} bitfields5;
unsigned int ordinal7;
};
union
{
struct
{
unsigned int gcr_cntl:18;
unsigned int reserved4:14;
} bitfields6;
unsigned int ordinal8;
};
} PM4ACQUIRE_MEM_NV, *PPM4ACQUIRE_MEM_NV;
typedef struct PM4_MEC_RELEASE_MEM_NV {
union {
PM4_TYPE_3_HEADER header;
unsigned int ordinal1;
};
union {
struct {
unsigned int event_type:6;
unsigned int reserved1:2;
AI_MEC_RELEASE_MEM_event_index_enum event_index:4;
unsigned int gcr_cntl:12;
unsigned int reserved4:1;
AI_MEC_RELEASE_MEM_cache_policy_enum cache_policy:2;
unsigned int reserved5:1;
AI_MEC_RELEASE_MEM_pq_exe_status_enum pq_exe_status:1;
unsigned int reserved6:3;
} bitfields2;
unsigned int ordinal2;
};
union {
struct {
unsigned int reserved7:16;
AI_MEC_RELEASE_MEM_dst_sel_enum dst_sel:2;
unsigned int reserved8:6;
AI_MEC_RELEASE_MEM_int_sel_enum int_sel:3;
unsigned int reserved9:2;
AI_MEC_RELEASE_MEM_data_sel_enum data_sel:3;
} bitfields3;
unsigned int ordinal3;
};
union {
struct {
unsigned int reserved10:2;
unsigned int address_lo_32b:30;
} bitfields4a;
struct {
unsigned int reserved11:3;
unsigned int address_lo_64b:29;
} bitfields4b;
unsigned int reserved12;
unsigned int ordinal4;
};
union {
unsigned int address_hi;
unsigned int reserved13;
unsigned int ordinal5;
};
union {
unsigned int data_lo;
unsigned int cmp_data_lo;
struct {
unsigned int dw_offset:16;
unsigned int num_dwords:16;
} bitfields6c;
unsigned int reserved14;
unsigned int ordinal6;
};
union {
unsigned int data_hi;
unsigned int cmp_data_hi;
unsigned int reserved15;
unsigned int reserved16;
unsigned int ordinal7;
};
unsigned int int_ctxid;
} PM4MEC_RELEASE_MEM_NV, *PPM4MEC_RELEASE_MEM_NV;
#endif // __PM4__PKT__STRUCT__NV__HPP__
@@ -0,0 +1,387 @@
/*
* Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __SDMA_PKT_STRUCT_H__
#define __SDMA_PKT_STRUCT_H__
const unsigned int SDMA_OP_NOP = 0;
const unsigned int SDMA_OP_COPY = 1;
const unsigned int SDMA_OP_WRITE = 2;
const unsigned int SDMA_OP_FENCE = 5;
const unsigned int SDMA_OP_TRAP = 6;
const unsigned int SDMA_OP_TIMESTAMP = 13;
const unsigned int SDMA_OP_CONST_FILL = 11;
const unsigned int SDMA_SUBOP_COPY_LINEAR = 0;
const unsigned int SDMA_SUBOP_WRITE_LINEAR = 0;
/*
** Definitions for SDMA_PKT_COPY_LINEAR packet
*/
typedef struct SDMA_PKT_COPY_LINEAR_TAG
{
union
{
struct
{
unsigned int op:8;
unsigned int sub_op:8;
unsigned int reserved_0:11;
unsigned int broadcast:1;
unsigned int reserved_1:4;
};
unsigned int DW_0_DATA;
} HEADER_UNION;
union
{
struct
{
unsigned int count:22;
unsigned int reserved_0:10;
};
unsigned int DW_1_DATA;
} COUNT_UNION;
union
{
struct
{
unsigned int reserved_0:16;
unsigned int dst_sw:2;
unsigned int reserved_1:4;
unsigned int dst_ha:1;
unsigned int reserved_2:1;
unsigned int src_sw:2;
unsigned int reserved_3:4;
unsigned int src_ha:1;
unsigned int reserved_4:1;
};
unsigned int DW_2_DATA;
} PARAMETER_UNION;
union
{
struct
{
unsigned int src_addr_31_0:32;
};
unsigned int DW_3_DATA;
} SRC_ADDR_LO_UNION;
union
{
struct
{
unsigned int src_addr_63_32:32;
};
unsigned int DW_4_DATA;
} SRC_ADDR_HI_UNION;
struct
{
union
{
struct
{
unsigned int dst_addr_31_0:32;
};
unsigned int DW_5_DATA;
} DST_ADDR_LO_UNION;
union
{
struct
{
unsigned int dst_addr_63_32:32;
};
unsigned int DW_6_DATA;
} DST_ADDR_HI_UNION;
} DST_ADDR[0];
} SDMA_PKT_COPY_LINEAR, *PSDMA_PKT_COPY_LINEAR;
/*
** Definitions for SDMA_PKT_WRITE_UNTILED packet
*/
typedef struct SDMA_PKT_WRITE_UNTILED_TAG
{
union
{
struct
{
unsigned int op:8;
unsigned int sub_op:8;
unsigned int reserved_0:16;
};
unsigned int DW_0_DATA;
} HEADER_UNION;
union
{
struct
{
unsigned int dst_addr_31_0:32;
};
unsigned int DW_1_DATA;
} DST_ADDR_LO_UNION;
union
{
struct
{
unsigned int dst_addr_63_32:32;
};
unsigned int DW_2_DATA;
} DST_ADDR_HI_UNION;
union
{
struct
{
unsigned int count:22;
unsigned int reserved_0:2;
unsigned int sw:2;
unsigned int reserved_1:6;
};
unsigned int DW_3_DATA;
} DW_3_UNION;
union
{
struct
{
unsigned int data0:32;
};
unsigned int DW_4_DATA;
} DATA0_UNION;
} SDMA_PKT_WRITE_UNTILED, *PSDMA_PKT_WRITE_UNTILED;
/*
** Definitions for SDMA_PKT_FENCE packet
*/
typedef struct SDMA_PKT_FENCE_TAG
{
union
{
struct
{
unsigned int op:8;
unsigned int sub_op:8;
unsigned int reserved_0:16;
};
unsigned int DW_0_DATA;
} HEADER_UNION;
union
{
struct
{
unsigned int addr_31_0:32;
};
unsigned int DW_1_DATA;
} ADDR_LO_UNION;
union
{
struct
{
unsigned int addr_63_32:32;
};
unsigned int DW_2_DATA;
} ADDR_HI_UNION;
union
{
struct
{
unsigned int data:32;
};
unsigned int DW_3_DATA;
} DATA_UNION;
} SDMA_PKT_FENCE, *PSDMA_PKT_FENCE;
/*
** Definitions for SDMA_PKT_CONSTANT_FILL packet
*/
typedef struct SDMA_PKT_CONSTANT_FILL_TAG
{
union
{
struct
{
unsigned int op:8;
unsigned int sub_op:8;
unsigned int sw:2;
unsigned int reserved_0:12;
unsigned int fillsize:2;
};
unsigned int DW_0_DATA;
} HEADER_UNION;
union
{
struct
{
unsigned int dst_addr_31_0:32;
};
unsigned int DW_1_DATA;
} DST_ADDR_LO_UNION;
union
{
struct
{
unsigned int dst_addr_63_32:32;
};
unsigned int DW_2_DATA;
} DST_ADDR_HI_UNION;
union
{
struct
{
unsigned int src_data_31_0:32;
};
unsigned int DW_3_DATA;
} DATA_UNION;
union
{
struct
{
unsigned int count:22;
unsigned int reserved_0:10;
};
unsigned int DW_4_DATA;
} COUNT_UNION;
} SDMA_PKT_CONSTANT_FILL, *PSDMA_PKT_CONSTANT_FILL;
/*
** Definitions for SDMA_PKT_TRAP packet
*/
typedef struct SDMA_PKT_TRAP_TAG
{
union
{
struct
{
unsigned int op:8;
unsigned int sub_op:8;
unsigned int reserved_0:16;
};
unsigned int DW_0_DATA;
} HEADER_UNION;
union
{
struct
{
unsigned int int_context:28;
unsigned int reserved_0:4;
};
unsigned int DW_1_DATA;
} INT_CONTEXT_UNION;
} SDMA_PKT_TRAP, *PSDMA_PKT_TRAP;
/*
** Definitions for SDMA_PKT_TIMESTAMP packet
*/
typedef struct SDMA_PKT_TIMESTAMP_TAG
{
union
{
struct
{
unsigned int op:8;
unsigned int sub_op:8;
unsigned int reserved_0:16;
};
unsigned int DW_0_DATA;
} HEADER_UNION;
union
{
struct
{
unsigned int addr_31_0:32;
};
unsigned int DW_1_DATA;
} ADDR_LO_UNION;
union
{
struct
{
unsigned int addr_63_32:32;
};
unsigned int DW_2_DATA;
} ADDR_HI_UNION;
} SDMA_PKT_TIMESTAMP, *PSDMA_PKT_TIMESTAMP;
/*
** Definitions for SDMA_PKT_NOP packet
*/
typedef struct SDMA_PKT_NOP_TAG
{
union
{
struct
{
unsigned int op:8;
unsigned int sub_op:8;
unsigned int count:14;
unsigned int reserved_0:2;
};
unsigned int DW_0_DATA;
} HEADER_UNION;
union
{
struct
{
unsigned int data0:32;
};
unsigned int DW_1_DATA;
} DATA0_UNION;
} SDMA_PKT_NOP, *PSDMA_PKT_NOP;
#endif // __SDMA_PKT_STRUCT_H__
@@ -0,0 +1,343 @@
declare -A FILTER
# Power management tests
FILTER[pm]=\
"KFDPMTest.SuspendWithActiveProcess:"\
"KFDPMTest.SuspendWithIdleQueue:"\
"KFDPMTest.SuspendWithIdleQueueAfterWork"
# Core tests, used in scenarios like bringup
# Software scheduler mode, i. e. non HWS mode
FILTER[core_sws]=\
"KFDQMTest.CreateDestroyCpQueue:"\
"KFDQMTest.SubmitNopCpQueue:"\
"KFDQMTest.SubmitPacketCpQueue:"\
"KFDQMTest.AllCpQueues:"\
"KFDQMTest.CreateDestroySdmaQueue:"\
"KFDQMTest.SubmitNopSdmaQueue:"\
"KFDQMTest.SubmitPacketSdmaQueue:"\
"KFDQMTest.AllSdmaQueues:"\
"KFDQMTest.AllXgmiSdmaQueues:"\
"KFDQMTest.AllQueues:"\
"KFDLocalMemoryTest.AccessLocalMem:"\
"KFDEventTest.SignalEvent"
# HWS mode
FILTER[core]=\
"${FILTER[core_sws]}:"\
"KFDCWSRTest.BasicTest"
# Permanent exclusions
# These tests are included for debugging, but are not executed in normal execution on any ASIC:
# FILTER[pm] need human intervention, so put it here. Developers can run them
# manually through "-p pm" option.
#
# Fragmentation test fails intermittently on different systems (see KFD-344). It
# stresses memory management in unusual ways by having lots of memory allocated but
# not mapped, which is not relevant to compute applications over ROCr.
#
# CU Masking Linear are not working correctly due to how the HW distributes work over CUs.
# They are available for testing but are not currently expected to pass on CI/VI/AI.
#
# CU Masking Even is added here due to some non-obvious baseline measurements. Though
# using wallclock to measure performance is always risky, there are just too many ASICs
# where this test is failing. Ideally we'll get better CU Masking coverage via rocrtst
#
# The CheckZeroInitializationVram test is no longer expected to pass as KFD no longer
# clears memory at allocation time.
PERMANENT_BLACKLIST_ALL_ASICS=\
"-KFDEventTest.MeasureInterruptConsumption:"\
"${FILTER[pm]}:"\
"KFDLocalMemoryTest.Fragmentation:"\
"KFDQMTest.BasicCuMaskingLinear:"\
"KFDQMTest.BasicCuMaskingEven:"\
"RDMATest.GPUDirect:"\
"KFDRASTest.*:"\
"KFDLocalMemoryTest.CheckZeroInitializationVram"
# This is the temporary blacklist for all ASICs. This is to be used when a test is failing consistently
# on every ASIC (Kaveri, Carrizo, Hawaii, Tonga, Fiji, Polaris10, Polaris11 and Vega10 .
# TODO means that a JIRA ticket needs to be created for this issue, as no documentation regarding
# failures can be found
# NOTE: If you update this alphabetical listing, add the corresponding JIRA ticket for reference
#
# KFDQMTest.GPUDoorbellWrite fails intermittently (KFD-318)
# KFDQMTest.mGPUShareBO (KFD-334)
# KFDHWSTest.* (SWDEV-193035)
# KFDEvictTest.BurstyTest (ROCMOPS-464)
# KFDEvictTest.BurstyTest (SWDEV-291256)
# KFDEvictTest.BurstyTest (KFD-425)
# KFDDBGTest.SuspendQueues (SWDEV-417850)
# KFDDBGTest.HitAddressWatch (SWDEV-420281)
TEMPORARY_BLACKLIST_ALL_ASICS=\
"KFDQMTest.GPUDoorbellWrite:"\
"KFDQMTest.mGPUShareBO:"\
"KFDQMTest.SdmaEventInterrupt:"\
"KFDMemoryTest.CacheInvalidateOnRemoteWrite:"\
"KFDEvictTest.BurstyTest:"\
"KFDHWSTest.*:"\
"KFDSVMRangeTest.ReadOnlyRangeTest*:"\
"KFDDBGTest.SuspendQueues:"\
"KFDDBGTest.HitAddressWatch"
BLACKLIST_ALL_ASICS=\
"$PERMANENT_BLACKLIST_ALL_ASICS:"\
"$TEMPORARY_BLACKLIST_ALL_ASICS"
# SDMA-based tests (KFDIPCTest.BasicTest, KFDQM.*Sdma*, KFDMemoryTest.MMBench) are all
# disabled on non-Hawaii due to SDMA instability - SWDEV-101666
SDMA_BLACKLIST=\
"KFDIPCTest.*:"\
"KFDLocalMemoryTest.CheckZeroInitializationVram:"\
"KFDMemoryTest.MemoryRegister:"\
"KFDMemoryTest.MMBench:"\
"KFDMemoryTest.SignalHandling:"\
"KFDQMTest.AllQueues:"\
"KFDQMTest.*Sdma*:"\
"KFDQMTest.CreateQueueStressSingleThreaded:"\
"KFDQMTest.GPUDoorbellWrite:"\
"KFDQMTest.P2PTest:"\
"KFDPerformanceTest.P2PBandWidthTest:"\
"KFDPerformanceTest.P2POverheadTest"
# Anything involving CP queue creation is failing on Kaveri. Separate them here for convenience (KFD-336)
KV_QUEUE_BLACKLIST=\
"KFDExceptionTest.AddressFault:"\
"KFDExceptionTest.PermissionFault:"\
"KFDLocalMemoryTest.*:"\
"KFDEventTest.Signal*Event*:"\
"KFDQMTest.CreateQueueStressSingleThreaded:"\
"KFDQMTest.*CpQueue*:"\
"KFDQMTest.*Dispatch*:"\
"KFDQMTest.Atomics:"\
"KFDQMTest.GPUDoorbellWrite"
# KFDCWSRTest.BasicTest*: SWDEV-353206
BLACKLIST_GFX10=\
"KFDMemoryTest.DeviceHdpFlush:"\
"KFDSVMEvictTest.*:"\
"KFDCWSRTest.BasicTest*"
BLACKLIST_GFX10_NV2X=\
"$BLACKLIST_GFX10:"\
"KFDPerfCountersTest.*"
# KFDMemoryTest.FlatScratchAccess - SWDEV-329877
# KFDGWSTest.*: GFX11 will no longer use global wave sync
BLACKLIST_GFX11=\
"KFDQMTest.CreateAqlCpQueue:"\
"KFDCWSRTest.InterruptRestore:"\
"KFDPerfCountersTest.*:"\
"KFDMemoryTest.FlatScratchAccess:"\
"KFDGWSTest.*"
# KFDQMTest.CpuWriteCoherence fails. 0 dwordsAvailable (KFD-338)
# KFDMemoryTest.MemoryRegister fails on SDMA queue creation (KFD-337)
FILTER[kaveri]=\
"$BLACKLIST_ALL_ASICS:"\
"$SDMA_BLACKLIST:"\
"$KV_QUEUE_BLACKLIST:"\
"KFDMemoryTest.MemoryRegister:"\
"KFDQMTest.CpuWriteCoherence"
# KFDLocalMemoryTest.BasicTest is failing intermittently (KFD-368)
# KFDMemoryTest.BigSysBufferStressTest was failing intermittently on 4.9
# and hangs when executed twice (KFD-312)
# KFDQMTest.GPUDoorbellWrite fails on Hawaii. Could be HW-related (KFD-342)
FILTER[hawaii]=\
"$BLACKLIST_ALL_ASICS:"\
"KFDLocalMemoryTest.BasicTest:"\
"KFDMemoryTest.BigSysBufferStressTest:"\
"KFDQMTest.GPUDoorbellWrite"
FILTER[carrizo]=\
"$BLACKLIST_ALL_ASICS:"\
"$SDMA_BLACKLIST:"\
"KFDExceptionTest.PermissionFault"
# KFDPerfCountersTest.*Trace fail (KFD-339)
# KFDMemoryTest.QueryPointerInfo/MemoryRegister* (KFD-341)
# The remaining tests listed here fail on map memory to GPU with a VA conflict (KFD-340)
FILTER[tonga]=\
"$BLACKLIST_ALL_ASICS:"\
"$SDMA_BLACKLIST:"\
"KFDCWSRTest.BasicTest:"\
"KFDPerfCountersTest.*:"\
"KFDQMTest.OverSubscribeCpQueues"
# Since Navi10 was merged, the PM4Event test takes 6min to run
FILTER[fiji]=\
"$BLACKLIST_ALL_ASICS:"\
"KFDQMTest.PM4EventInterrupt:"\
"$SDMA_BLACKLIST"
FILTER[polaris10]=\
"$BLACKLIST_ALL_ASICS:"\
"$SDMA_BLACKLIST"
FILTER[polaris11]=\
"$BLACKLIST_ALL_ASICS:"\
"$SDMA_BLACKLIST"
FILTER[polaris12]=\
"$BLACKLIST_ALL_ASICS:"\
"$SDMA_BLACKLIST"
# KFDIPCTest.BasicTest (ROCMOPS-459) .CMABasicTest (ROCMOPS-460) .CrossMemoryAttachTest (ROCMOPS-461)
# KFDQMTest.AllSdmaQueues (ROCMOPS-463)
FILTER[vega10]=\
"$BLACKLIST_ALL_ASICS:"\
"KFDIPCTest.BasicTest:"\
"KFDIPCTest.CMABasicTest:"\
"KFDIPCTest.CrossMemoryAttachTest:"\
"KFDQMTest.AllSdmaQueues"
FILTER[vega12]=\
"$BLACKLIST_ALL_ASICS:"\
"$SDMA_BLACKLIST"\
FILTER[vega20]=\
"$BLACKLIST_ALL_ASICS:"\
"$SDMA_BLACKLIST:"\
"KFDQMTest.GPUDoorbellWrite"
FILTER[raven_dgpuFallback]=\
"$BLACKLIST_ALL_ASICS:"\
"$SDMA_BLACKLIST:"\
"KFDEvictTest.*:"\
"KFDMemoryTest.MemoryRegister:"\
"KFDSVMRangeTest.BasicSystemMemTest:"\
"KFDSVMRangeTest.BasicVramTest:"\
"KFDSVMRangeTest.EvictSystemRangeTest:"\
"KFDSVMRangeTest.PartialUnmapSysMemTest:"\
"KFDSVMRangeTest.MigrateTest:"\
"KFDSVMRangeTest.MigratePolicyTest:"\
"KFDSVMRangeTest.MigrateGranularityTest:"\
"KFDSVMRangeTest.MigrateLargeBufTest:"\
"KFDSVMRangeTest.MultiThreadMigrationTest:"\
"KFDSVMRangeTest.MigrateAccessInPlaceTest:"\
"KFDSVMEvictTest.QueueTest"
FILTER[raven]=\
"$BLACKLIST_ALL_ASICS:"\
"$SDMA_BLACKLIST:"\
"KFDEvictTest.*:"\
"KFDSVMRangeTest.EvictSystemRangeTest:"\
"KFDSVMRangeTest.PartialUnmapSysMemTest:"\
"KFDSVMRangeTest.PrefetchTest:"\
"KFDSVMRangeTest.MultiThreadMigrationTest:"\
"KFDSVMEvictTest.QueueTest:"\
"KFDQMTest.MultipleCpQueuesStressDispatch"
FILTER[renoir]=\
"$BLACKLIST_ALL_ASICS:"\
"KFDEvictTest.*:"\
"KFDMemoryTest.LargestSysBufferTest:"\
"KFDMemoryTest.SignalHandling"
# KFDExceptionTest.* (KFD-435)
FILTER[arcturus]=\
"$BLACKLIST_ALL_ASICS:"\
"KFDExceptionTest.FaultStorm"\
FILTER[aldebaran]=\
"$BLACKLIST_ALL_ASICS:"\
"KFDExceptionTest.FaultStorm:"\
"KFDMemoryTest.PtraceAccess:"\
"KFDMemoryTest.DeviceHdpFlush"
FILTER[navi10]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX10:"\
"KFDMemoryTest.MMBench"
# Need to verify the following failed tests on another machine:
# Exceptions not being received during exception tests
# PerfCounters return HSAKMT_STATUS_INVALID_PARAMETER
# P2PBandwidth failing (wait times out) on node-to-multiple-nodes by [push, NONE]
FILTER[navi12]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX10:"\
"KFDExceptionTest.*:"\
"KFDPerfCountersTest.*:"\
"KFDPerformanceTest.P2PBandWidthTest"
FILTER[navi14]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX10"
FILTER[sienna_cichlid]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX10_NV2X"
FILTER[navy_flounder]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX10_NV2X"
FILTER[dimgrey_cavefish]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX10_NV2X"
FILTER[beige_goby]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX10_NV2X"
FILTER[yellow_carp]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX10_NV2X"
FILTER[gfx1100]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX11"
# SWDEV-384028
FILTER[gfx1101]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX11:"\
"KFDExceptionTest.SdmaQueueException"
FILTER[gfx1102]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX11"
FILTER[gfx1103]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX11"
FILTER[gfx1150]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX11"
FILTER[gfx1151]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX11"
FILTER[gfx1036]=\
"$BLACKLIST_ALL_ASICS:"\
"$BLACKLIST_GFX10_NV2X"
FILTER[gfx940]=\
"$BLACKLIST_ALL_ASICS:"\
"KFDMemoryTest.LargestSysBufferTest:"\
"KFDMemoryTest.BigSysBufferStressTest:"\
"KFDMemoryTest.FlatScratchAccess:"\
"KFDIPCTest.BasicTest"\
"KFDQMTest.QueueLatency"
FILTER[gfx941]=\
"$BLACKLIST_ALL_ASICS:"\
"KFDMemoryTest.LargestSysBufferTest:"\
"KFDMemoryTest.BigSysBufferStressTest:"\
"KFDMemoryTest.FlatScratchAccess:"\
"KFDIPCTest.BasicTest"\
"KFDQMTest.QueueLatency"
FILTER[gfx942]=\
"$BLACKLIST_ALL_ASICS:"\
"KFDMemoryTest.LargestSysBufferTest:"\
"KFDMemoryTest.BigSysBufferStressTest:"\
"KFDMemoryTest.FlatScratchAccess:"\
"KFDIPCTest.BasicTest"\
"KFDQMTest.QueueLatency"
+302
View File
@@ -0,0 +1,302 @@
#!/bin/bash
#
# Copyright (C) 2018 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
#
# See if we can find the SHARE/BIN dirs in their expected locations
CWD="${BASH_SOURCE%/*}"
while read candidate; do
if [ -e "$candidate/kfdtest.exclude" ]; then
source "$candidate/kfdtest.exclude"
break
fi
done <<EOF
$KFDTEST_SHARE_DIR
$CWD
$CWD/../share/kfdtest
/opt/rocm/share/kfdtest
EOF
# Keep these checks until automation starts using the package install
if [ -z "${FILTER[core]}" ]; then
if [ -e "$CWD/../bin/kfdtest/kfdtest.exclude" ]; then
source "$CWD/../bin/kfdtest/kfdtest.exclude"
elif [ -e "$CWD/../../share/kfdtest.exclude" ]; then
source "$CWD/../../share/kfdtest.exclude"
fi
fi
# This filter will always exist if we sourced a valid kfdtest.exclude
if [ -z "${FILTER[core]}" ]; then
echo "Unable to locate kfdtest.exclude."
echo "Please set KFDTEST_SHARE_DIR or ensure that kfdtest.exclude is present inside $CWD, $CWD/../share/kfdtest or /opt/rocm/share/kfdtest"
exit 1
fi
# Using "which" produces different results in different
# OSes so use command -v instead. It returns "" if the
# command isn't in the PATH
if [ -z "$(command -v kfdtest)" ]; then
if [ -z "$BIN_DIR" ]; then
if [ -e "${0%/*}/kfdtest" ]; then
BIN_DIR="${0%/*}"
else
# The default location
BIN_DIR="/opt/rocm/bin"
fi
fi
if [ -e "$BIN_DIR/kfdtest" ]; then
KFDTEST="$BIN_DIR/kfdtest"
else
echo "Unable to locate kfdtest."
echo "Please set BIN_DIR, ensure that kfdtest is in $PATH, or ensure that kfdtest is present inside ${0%/*} or /opt/rocm/bin"
exit 1
fi
else
KFDTEST="kfdtest"
fi
PLATFORM=""
GDB=""
NODE=""
FORCE_HIGH=""
RUN_IN_DOCKER=""
ADDITIONAL_EXCLUDE=""
printUsage() {
echo
echo "Usage: $(basename $0) [options ...] [gtest arguments]"
echo
echo "Options:"
echo " -p <platform> , --platform <platform> Only run tests that"\
"pass on the specified platform. Usually you"\
"don't need this option"
echo " -g , --gdb Run in debugger"
echo " -n <node(s)> , --node <node(s)> NodeId(s) to test. Takes a single integer, or a"\
"quoted, space-separated string as an argument"\
"(e.g. -n 1 OR -n \"1 2 3\")"\
"NOTE: Node numbers come from /sys/class/kfd/kfd/topology/nodes/#"
echo " -l , --list List available nodes"
echo " --high Force clocks to high for test execution"
echo " -d , --docker Run in docker container"
echo " -e <list> , --exclude <list> Additional tests to exclude, in addition to kfdtest.exclude."\
"Takes a colon-separated string as an argument"\
"(e.g. -e KFDEvictTest.*:KFDSVMEvictTest.*)"
echo " -h , --help Prints this help"
echo
echo "Gtest arguments will be forwarded to the app"
echo
echo "Valid platform options: core_sws, core, polaris10, vega10, vega20, pm, all, and so on"
echo "'all' option runs all tests"
return 0
}
# Print gtest_filter for the given Platform
# param - Platform.
getFilter() {
# For regular platforms such as vega10, this will automatically generate
# the valid variable BLACKLIST based on the variable platform.
local platform=$1;
case "$platform" in
all ) gtestFilter="" ;;
* )
if [ -z "${FILTER[$platform]}" ]; then
echo "Unsupported platform $platform. Exiting"
exit 1
fi
gtestFilter="--gtest_filter=${FILTER[$platform]}"
;;
esac
if [ -n "$ADDITIONAL_EXCLUDE" ]; then
gtestFilter="$gtestFilter:$ADDITIONAL_EXCLUDE"
fi
}
TOPOLOGY_SYSFS_DIR=/sys/devices/virtual/kfd/kfd/topology/nodes
# Prints list of HSA Nodes. HSA Nodes are identified from sysfs KFD topology. The nodes
# should have valid SIMD count
getHsaNodes() {
for i in $(find $TOPOLOGY_SYSFS_DIR -maxdepth 1 -mindepth 1 -type d); do
simdcount=$(cat $i/properties | grep simd_count | awk '{print $2}')
if [ $simdcount != 0 ]; then
hsaNodeList+="$(basename $i) "
fi
done
echo "$hsaNodeList"
}
# Prints GPU Name for the given Node ID. If transitioned to IP discovery,
# use target gfx version
# param - Node ID
getNodeName() {
local nodeId=$1; shift;
local gpuName=$(cat $TOPOLOGY_SYSFS_DIR/$nodeId/name)
if [ "$gpuName" == "raven" ]; then
local CpuCoresCount=$(cat $TOPOLOGY_SYSFS_DIR/$nodeId/properties | grep cpu_cores_count | awk '{print $2}')
local SimdCount=$(cat $TOPOLOGY_SYSFS_DIR/$nodeId/properties | grep simd_count | awk '{print $2}')
if [ "$CpuCoresCount" -eq 0 ] && [ "$SimdCount" -gt 0 ]; then
gpuName="raven_dgpuFallback"
fi
elif [ "$gpuName" == "ip discovery" ]; then
if [ -n "$HSA_OVERRIDE_GFX_VERSION" ]; then
gpuName="gfx$(echo "$HSA_OVERRIDE_GFX_VERSION" | awk 'BEGIN {FS="."; RS=""} {printf "%d%x%x", $1, $2, $3 }')"
else
local GfxVersionDec=$(cat $TOPOLOGY_SYSFS_DIR/$nodeId/properties | grep gfx_target_version | awk '{print $2}')
if [[ ${#GfxVersionDec} = 5 ]]; then
GfxVersionDec="0${GfxVersionDec}"
fi
gpuName="gfx$(printf "$GfxVersionDec" | fold -w2 | awk 'BEGIN {FS="\n"; RS=""} {printf "%d%x%x", $1, $2, $3}')"
fi
fi
echo "$gpuName"
}
# Run KfdTest independently. Two global variables set by command-line
# will influence the tests as indicated below
# PLATFORM - If set all tests will run with this platform filter
# NODE - If set tests will be run only on this NODE, else it will be
# run on all available HSA Nodes
runKfdTest() {
if [ "$RUN_IN_DOCKER" == "true" ]; then
if [ `sudo systemctl is-active docker` != "active" ]; then
echo "docker isn't active, install and setup docker first!!!!"
exit 0
fi
PKG_ROOT="$(getPackageRoot)"
fi
if [ -n "$GTEST_ARGS" ] && [ -n "$ADDITIONAL_EXCLUDE" ]; then
echo "Cannot use -e and --gtest_filter flags together"
exit 0
fi
if [ "$NODE" == "" ]; then
hsaNodes=$(getHsaNodes)
if [ "$hsaNodes" == "" ]; then
echo "No GPU found in the system."
exit 1
fi
else
hsaNodes=$NODE
fi
for hsaNode in $hsaNodes; do
nodeName=$(getNodeName $hsaNode)
if [ "$PLATFORM" != "" ] && [ "$PLATFORM" != "$nodeName" ]; then
echo "WARNING: Actual ASIC $nodeName treated as $PLATFORM"
nodeName="$PLATFORM"
fi
getFilter $nodeName
if [ "$RUN_IN_DOCKER" == "true" ]; then
if [ "$NODE" == "" ]; then
DEVICE_NODE="/dev/dri"
else
RENDER_NODE=$(($hsaNode + 127))
DEVICE_NODE="/dev/dri/renderD${RENDER_NODE}"
fi
echo "Starting testing node $hsaNode ($nodeName) in docker container"
sudo docker run -it --name kfdtest_docker --user="jenkins" --network=host \
--device=/dev/kfd --device=${DEVICE_NODE} --group-add video --cap-add=SYS_PTRACE \
--security-opt seccomp=unconfined -v $PKG_ROOT:/home/jenkins/rocm \
compute-artifactory.amd.com:5000/yuho/tianli-ubuntu1604-kfdtest:01 \
/home/jenkins/rocm/utils/run_kfdtest.sh -n $hsaNode $gtestFilter $GTEST_ARGS
if [ "$?" = "0" ]; then
echo "Finished node $hsaNode ($nodeName) successfully in docker container"
else
echo "Testing failed for node $hsaNode ($nodeName) in docker container"
fi
sudo docker rm kfdtest_docker
else
echo ""
echo "++++ Starting testing node $hsaNode ($nodeName) ++++"
$GDB $KFDTEST "--node=$hsaNode" $gtestFilter $GTEST_ARGS
echo "---- Finished testing node $hsaNode ($nodeName) ----"
fi
done
}
# Prints number of GPUs present in the system
getGPUCount() {
gNodes=$(getHsaNodes)
gNodes=( $gNodes )
gpuCount=${#gNodes[@]}
echo "$gpuCount"
}
while [ "$1" != "" ]; do
case "$1" in
-p | --platform )
shift 1; PLATFORM=$1 ;;
-g | --gdb )
GDB="gdb --args" ;;
-l | --list )
printGpuNodelist; exit 0 ;;
-n | --node )
shift 1; NODE=$1 ;;
--high)
FORCE_HIGH="true" ;;
-d | --docker )
RUN_IN_DOCKER="true" ;;
-e | --exclude )
shift 1; ADDITIONAL_EXCLUDE="$1" ;;
-h | --help )
printUsage; exit 0 ;;
*)
GTEST_ARGS=$@; break;;
esac
shift 1
done
# If the SMI is missing, just report and continue
SMI="$(find /opt/rocm* -type l -name rocm-smi | tail -1)"
if [ "$FORCE_HIGH" == "true" ]; then
if [ -e "$SMI" ]; then
OLDPERF=$($SMI -p | awk '/Performance Level:/ {print $NF; exit}')
$($SMI --setperflevel high &> /dev/null)
if [ $? != 0 ]; then
echo "SMI failed to set perf level"
OLDPERF=""
fi
else
echo "Unable to set clocks to high, cannot find rocm-smi"
fi
fi
# Set HSA_DEBUG env to run KFDMemoryTest.PtraceAccessInvisibleVram
export HSA_DEBUG=1
runKfdTest
# OLDPERF is only set if FORCE_HIGH and SMI both exist
if [ -n "$OLDPERF" ]; then
$SMI --setperflevel $OLDPERF &> /dev/null
fi
+52
View File
@@ -0,0 +1,52 @@
/*
* Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "AqlQueue.hpp"
#include "GoogleTestExtension.hpp"
AqlQueue::AqlQueue(void) {
}
AqlQueue::~AqlQueue(void) {
}
unsigned int AqlQueue::Wptr() {
return *m_Resources.Queue_write_ptr;
}
unsigned int AqlQueue::Rptr() {
return *m_Resources.Queue_read_ptr;
}
unsigned int AqlQueue::RptrWhenConsumed() {
return Wptr();
}
void AqlQueue::SubmitPacket() {
// m_pending Wptr is in dwords
*m_Resources.Queue_write_ptr = m_pendingWptr;
*(m_Resources.Queue_DoorBell) = Wptr();
}
+50
View File
@@ -0,0 +1,50 @@
/*
* Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __KFD_AQL_QUEUE__H__
#define __KFD_AQL_QUEUE__H__
#include "BaseQueue.hpp"
class AqlQueue : public BaseQueue {
public:
AqlQueue();
virtual ~AqlQueue();
// @brief Updates queue write pointer and sets the queue doorbell to the queue write pointer
virtual void SubmitPacket();
// @return Read pointer in dwords
virtual unsigned int Rptr();
// @return Write pointer in dwords
virtual unsigned int Wptr();
// @return Expected m_Resources.Queue_read_ptr when all packets are consumed
virtual unsigned int RptrWhenConsumed();
protected:
virtual PACKETTYPE PacketTypeSupported() { return PACKETTYPE_AQL; }
virtual _HSA_QUEUE_TYPE GetQueueType() { return HSA_QUEUE_COMPUTE_AQL; }
};
#endif // __KFD_AQL_QUEUE__H__
+388
View File
@@ -0,0 +1,388 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2022, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
/**
* Self-contained assembler that uses the LLVM MC API to assemble AMDGCN
* instructions
*/
#include <llvm/Config/llvm-config.h>
#include <llvm/MC/MCAsmBackend.h>
#include <llvm/MC/MCAsmInfo.h>
#include <llvm/MC/MCCodeEmitter.h>
#include <llvm/MC/MCContext.h>
#include <llvm/MC/MCInstPrinter.h>
#include <llvm/MC/MCInstrInfo.h>
#include <llvm/MC/MCObjectFileInfo.h>
#include <llvm/MC/MCObjectWriter.h>
#include <llvm/MC/MCParser/AsmLexer.h>
#include <llvm/MC/MCParser/MCTargetAsmParser.h>
#include <llvm/MC/MCRegisterInfo.h>
#include <llvm/MC/MCStreamer.h>
#include <llvm/MC/MCSubtargetInfo.h>
#include <llvm/Support/CommandLine.h>
#include <llvm/Support/InitLLVM.h>
#include <llvm/Support/MemoryBuffer.h>
#include <llvm/Support/SourceMgr.h>
#include <llvm/Support/TargetSelect.h>
#if LLVM_VERSION_MAJOR > 13
#include <llvm/MC/TargetRegistry.h>
#else
#include <llvm/Support/TargetRegistry.h>
#endif
#include <linux/elf.h>
#include "OSWrapper.hpp"
#include "Assemble.hpp"
using namespace llvm;
Assembler::Assembler(const uint32_t Gfxv) {
SetTargetAsic(Gfxv);
TextData = nullptr;
TextSize = 0;
LLVMInit();
}
Assembler::~Assembler() {
FlushText();
llvm_shutdown();
}
const char* Assembler::GetInstrStream() {
return TextData;
}
const size_t Assembler::GetInstrStreamSize() {
return TextSize;
}
int Assembler::CopyInstrStream(char* OutBuf, const size_t BufSize) {
if (TextSize > BufSize)
return -2;
std::copy(TextData, TextData + TextSize, OutBuf);
return 0;
}
const char* Assembler::GetTargetAsic() {
return MCPU;
}
/**
* Set MCPU via GFX Version from Thunk
* LLVM Target IDs use decimal for Maj/Min, hex for Step
*/
void Assembler::SetTargetAsic(const uint32_t Gfxv) {
const uint8_t Major = (Gfxv >> 16) & 0xff;
const uint8_t Minor = (Gfxv >> 8) & 0xff;
const uint8_t Step = Gfxv & 0xff;
snprintf(MCPU, ASM_MCPU_LEN, "gfx%d%d%x", Major, Minor, Step);
}
/**
* Initialize LLVM targets and assembly printers/parsers
*/
void Assembler::LLVMInit() {
LLVMInitializeAMDGPUTargetInfo();
LLVMInitializeAMDGPUTargetMC();
LLVMInitializeAMDGPUAsmParser();
}
/**
* Flush/reset TextData and TextSize to initial state
*/
void Assembler::FlushText() {
if (TextData)
delete[] TextData;
TextData = nullptr;
TextSize = 0;
}
/**
* Print hex of ELF object to stdout (debug)
*/
void Assembler::PrintELFHex(const std::string Data) {
outs() << "ASM Info: assembled ELF hex data (length " << Data.length() << "):\n";
outs() << "0x00:\t";
for (size_t i = 0; i < Data.length(); ++i) {
char c = Data[i];
outs() << format_hex(static_cast<uint8_t>(c), 4);
if ((i+1) % 16 == 0)
outs() << "\n" << format_hex(i+1, 4) << ":\t";
else
outs() << " ";
}
outs() << "\n";
}
/**
* Print hex of raw instruction stream to stdout (debug)
*/
void Assembler::PrintTextHex() {
outs() << "ASM Info: assembled .text hex data (length " << TextSize << "):\n";
outs() << "0x00:\t";
for (size_t i = 0; i < TextSize; i++) {
outs() << format_hex(static_cast<uint8_t>(TextData[i]), 4);
if ((i+1) % 16 == 0)
outs() << "\n" << format_hex(i+1, 4) << ":\t";
else
outs() << " ";
}
outs() << "\n";
}
/**
* Extract raw instruction stream from .text section in ELF object
*
* @param RawData Raw C string of ELF object
* @return 0 on success
*/
int Assembler::ExtractELFText(const char* RawData) {
const Elf64_Ehdr* ElfHeader;
const Elf64_Shdr* SectHeader;
const Elf64_Shdr* SectStrTable;
const char* SectStrAddr;
unsigned NumSects, SectIdx;
if (!(ElfHeader = reinterpret_cast<const Elf64_Ehdr*>(RawData))) {
outs() << "ASM Error: elf data is invalid or corrupted\n";
return -1;
}
if (ElfHeader->e_ident[EI_CLASS] != ELFCLASS64) {
outs() << "ASM Error: elf object must be of 64-bit type\n";
return -1;
}
SectHeader = reinterpret_cast<const Elf64_Shdr*>(RawData + ElfHeader->e_shoff);
SectStrTable = &SectHeader[ElfHeader->e_shstrndx];
SectStrAddr = static_cast<const char*>(RawData + SectStrTable->sh_offset);
// Loop through sections, break on .text
NumSects = ElfHeader->e_shnum;
for (SectIdx = 0; SectIdx < NumSects; SectIdx++) {
std::string SectName = std::string(SectStrAddr + SectHeader[SectIdx].sh_name);
if (SectName == std::string(".text")) {
TextSize = SectHeader[SectIdx].sh_size;
TextData = new char[TextSize];
memcpy(TextData, RawData + SectHeader[SectIdx].sh_offset, TextSize);
break;
}
}
if (SectIdx >= NumSects) {
outs() << "ASM Error: couldn't locate .text section\n";
return -1;
}
return 0;
}
/**
* Assemble shader, fill member vars, and copy to output buffer
*
* @param AssemblySource Shader source represented as a raw C string
* @param OutBuf Raw instruction stream output buffer
* @param BufSize Size of OutBuf (defaults to PAGE_SIZE)
* @param Gfxv Optional overload to temporarily set target ASIC
* @return Value of RunAssemble() (0 on success)
*/
int Assembler::RunAssembleBuf(const char* const AssemblySource, char* OutBuf,
const size_t BufSize) {
int ret = RunAssemble(AssemblySource);
return ret ? ret : CopyInstrStream(OutBuf, BufSize);
}
int Assembler::RunAssembleBuf(const char* const AssemblySource, char* OutBuf,
const size_t BufSize, const uint32_t Gfxv) {
const char* defaultMCPU = GetTargetAsic();
SetTargetAsic(Gfxv);
int ret = RunAssemble(AssemblySource);
strncpy(MCPU, defaultMCPU, ASM_MCPU_LEN);
return ret ? ret : CopyInstrStream(OutBuf, BufSize);
}
/**
* Assemble shader and fill member vars
*
* @param AssemblySource Shader source represented as a raw C string
* @return 0 on success
*/
int Assembler::RunAssemble(const char* const AssemblySource) {
// Ensure target ASIC has been set
if (!*MCPU) {
outs() << "ASM Error: target asic is uninitialized\n";
return -1;
}
// Delete TextData for any previous runs
FlushText();
#if 0
outs() << "ASM Info: running assembly for target: " << MCPU << "\n";
outs() << "ASM Info: source:\n";
outs() << AssemblySource << "\n";
#endif
// Initialize MCOptions and target triple
const MCTargetOptions MCOptions;
Triple TheTriple;
const Target* TheTarget =
TargetRegistry::lookupTarget(ArchName, TheTriple, Error);
if (!TheTarget) {
outs() << Error;
return -1;
}
TheTriple.setArchName(ArchName);
TheTriple.setVendorName(VendorName);
TheTriple.setOSName(OSName);
TripleName = TheTriple.getTriple();
TheTriple.setTriple(Triple::normalize(TripleName));
// Create MemoryBuffer for assembly source
StringRef AssemblyRef(AssemblySource);
std::unique_ptr<MemoryBuffer> BufferPtr =
MemoryBuffer::getMemBuffer(AssemblyRef, "", false);
if (!BufferPtr->getBufferSize()) {
outs() << "ASM Error: assembly source is empty\n";
return -1;
}
// Instantiate SrcMgr and transfer BufferPtr ownership
SourceMgr SrcMgr;
SrcMgr.AddNewSourceBuffer(std::move(BufferPtr), SMLoc());
// Initialize MC interfaces and base class objects
std::unique_ptr<const MCRegisterInfo> MRI(
TheTarget->createMCRegInfo(TripleName));
if (!MRI) {
outs() << "ASM Error: no register info for target " << MCPU << "\n";
return -1;
}
#if LLVM_VERSION_MAJOR > 9
std::unique_ptr<const MCAsmInfo> MAI(
TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
#else
std::unique_ptr<const MCAsmInfo> MAI(
TheTarget->createMCAsmInfo(*MRI, TripleName));
#endif
if (!MAI) {
outs() << "ASM Error: no assembly info for target " << MCPU << "\n";
return -1;
}
std::unique_ptr<MCInstrInfo> MCII(
TheTarget->createMCInstrInfo());
if (!MCII) {
outs() << "ASM Error: no instruction info for target " << MCPU << "\n";
return -1;
}
std::unique_ptr<MCSubtargetInfo> STI(
TheTarget->createMCSubtargetInfo(TripleName, MCPU, std::string()));
if (!STI || !STI->isCPUStringValid(MCPU)) {
outs() << "ASM Error: no subtarget info for target " << MCPU << "\n";
return -1;
}
// Set up the MCContext for creating symbols and MCExpr's
#if LLVM_VERSION_MAJOR > 12
MCContext Ctx(TheTriple, MAI.get(), MRI.get(), STI.get(), &SrcMgr, &MCOptions);
#else
MCObjectFileInfo MOFI;
MCContext Ctx(MAI.get(), MRI.get(), &MOFI, &SrcMgr, &MCOptions);
MOFI.InitMCObjectFileInfo(TheTriple, true, Ctx);
#endif
// Finalize setup for output object code stream
std::string Data;
std::unique_ptr<raw_string_ostream> DataStream(std::make_unique<raw_string_ostream>(Data));
std::unique_ptr<buffer_ostream> BOS(std::make_unique<buffer_ostream>(*DataStream));
raw_pwrite_stream* OS = BOS.get();
#if LLVM_VERSION_MAJOR > 14
MCCodeEmitter* CE = TheTarget->createMCCodeEmitter(*MCII, Ctx);
#else
MCCodeEmitter* CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx);
#endif
MCAsmBackend* MAB = TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions);
std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
TheTriple, Ctx,
std::unique_ptr<MCAsmBackend>(MAB), MAB->createObjectWriter(*OS),
std::unique_ptr<MCCodeEmitter>(CE), *STI, MCOptions.MCRelaxAll,
MCOptions.MCIncrementalLinkerCompatible, /*DWARFMustBeAtTheEnd*/ false));
std::unique_ptr<MCAsmParser> Parser(
createMCAsmParser(SrcMgr, Ctx, *Streamer, *MAI));
// Set parser to target parser and run
std::unique_ptr<MCTargetAsmParser> TAP(
TheTarget->createMCAsmParser(*STI, *Parser, *MCII, MCOptions));
if (!TAP) {
outs() << "ASM Error: no assembly parsing support for target " << MCPU << "\n";
return -1;
}
Parser->setTargetParser(*TAP);
if (Parser->Run(true)) {
outs() << "ASM Error: assembly parser failed\n";
return -1;
}
BOS.reset();
DataStream->flush();
int ret = ExtractELFText(Data.data());
if (ret < 0 || !TextData) {
outs() << "ASM Error: .text extraction failed\n";
return ret;
}
#if 0
PrintELFHex(Data);
PrintTextHex();
#endif
return 0;
}
+88
View File
@@ -0,0 +1,88 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2022, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef _ASSEMBLE_H_
#define _ASSEMBLE_H_
#include "OSWrapper.hpp"
#define ASM_MCPU_LEN 16
class Assembler {
private:
const char* ArchName = "amdgcn";
const char* VendorName = "amd";
const char* OSName = "amdhsa";
char MCPU[ASM_MCPU_LEN];
std::string TripleName;
std::string Error;
char* TextData;
size_t TextSize;
void SetTargetAsic(const uint32_t Gfxv);
void LLVMInit();
void FlushText();
void PrintELFHex(const std::string Data);
int ExtractELFText(const char* RawData);
public:
Assembler(const uint32_t Gfxv);
~Assembler();
void PrintTextHex();
const char* GetTargetAsic();
const char* GetInstrStream();
const size_t GetInstrStreamSize();
int CopyInstrStream(char* OutBuf, const size_t BufSize = PAGE_SIZE);
int RunAssemble(const char* const AssemblySource);
int RunAssembleBuf(const char* const AssemblySource, char* OutBuf,
const size_t BufSize = PAGE_SIZE);
int RunAssembleBuf(const char* const AssemblySource, char* OutBuf,
const size_t BufSize, const uint32_t Gfxv);
};
#endif // _ASSEMBLE_H_
+311
View File
@@ -0,0 +1,311 @@
/*
* Copyright (C) 2023 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "BaseDebug.hpp"
#include <string.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <hsakmt/linux/kfd_ioctl.h>
#include <fcntl.h>
#include "unistd.h"
BaseDebug::BaseDebug(void) {
}
BaseDebug::~BaseDebug(void) {
/*
* If the process is still attached, close and destroy the polling file
* descriptor. Note that on process termination, the KFD automatically
* disables processes that are still runtime enabled and debug enabled
* so we don't do it here.
*/
if (m_Pid) {
close(m_Fd.fd);
unlink(m_Fd_Name);
}
}
// Creates temp file descriptor and debug attaches.
HSAKMT_STATUS BaseDebug::Attach(struct kfd_runtime_info *rInfo,
int rInfoSize,
unsigned int pid,
uint64_t exceptionEnable) {
struct kfd_ioctl_dbg_trap_args args = {0};
char fd_name[32];
memset(&args, 0x00, sizeof(args));
mkfifo(m_Fd_Name, 0666);
m_Fd.fd = open(m_Fd_Name, O_CLOEXEC | O_NONBLOCK | O_RDWR);
m_Fd.events = POLLIN | POLLRDNORM;
args.pid = pid;
args.op = KFD_IOC_DBG_TRAP_ENABLE;
args.enable.rinfo_ptr = (uint64_t)rInfo;
args.enable.rinfo_size = rInfoSize;
args.enable.dbg_fd = m_Fd.fd;
args.enable.exception_mask = exceptionEnable;
if (hsaKmtDebugTrapIoctl(&args, NULL, NULL)) {
close(m_Fd.fd);
unlink(m_Fd_Name);
return HSAKMT_STATUS_ERROR;
}
m_Pid = pid;
return HSAKMT_STATUS_SUCCESS;
}
void BaseDebug::Detach(void) {
struct kfd_ioctl_dbg_trap_args args = {0};
memset(&args, 0x00, sizeof(args));
args.pid = m_Pid;
args.op = KFD_IOC_DBG_TRAP_DISABLE;
hsaKmtDebugTrapIoctl(&args, NULL, NULL);
close(m_Fd.fd);
unlink(m_Fd_Name);
m_Pid = 0;
m_Fd.fd = 0;
m_Fd.events = 0;
}
HSAKMT_STATUS BaseDebug::SendRuntimeEvent(uint64_t exceptions, int gpuId, int queueId)
{
struct kfd_ioctl_dbg_trap_args args = {0};
memset(&args, 0x00, sizeof(args));
args.pid = m_Pid;
args.op = KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT;
args.send_runtime_event.exception_mask = exceptions;
args.send_runtime_event.gpu_id = gpuId;
args.send_runtime_event.queue_id = queueId;
return hsaKmtDebugTrapIoctl(&args, NULL, NULL);
}
HSAKMT_STATUS BaseDebug::QueryDebugEvent(uint64_t *exceptions,
uint32_t *gpuId, uint32_t *queueId,
int timeoutMsec)
{
struct kfd_ioctl_dbg_trap_args args = {0};
HSAKMT_STATUS result;
int r = poll(&m_Fd, 1, timeoutMsec);
if (r > 0) {
char tmp[r];
read(m_Fd.fd, tmp, sizeof(tmp));
} else {
return HSAKMT_STATUS_ERROR;
}
memset(&args, 0x00, sizeof(args));
args.pid = m_Pid;
args.op = KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT;
args.query_debug_event.exception_mask = *exceptions;
result = hsaKmtDebugTrapIoctl(&args, NULL, NULL);
*exceptions = args.query_debug_event.exception_mask;
if (gpuId)
*gpuId = args.query_debug_event.gpu_id;
if (queueId)
*queueId = args.query_debug_event.queue_id;
return result;
}
void BaseDebug::SetExceptionsEnabled(uint64_t exceptions)
{
struct kfd_ioctl_dbg_trap_args args = {0};
memset(&args, 0x00, sizeof(args));
args.pid = m_Pid;
args.op = KFD_IOC_DBG_TRAP_SET_EXCEPTIONS_ENABLED;
args.set_exceptions_enabled.exception_mask = exceptions;
hsaKmtDebugTrapIoctl(&args, NULL, NULL);
}
HSAKMT_STATUS BaseDebug::SuspendQueues(unsigned int *numQueues,
HSA_QUEUEID *queues,
uint32_t *queueIds,
uint64_t exceptionsToClear)
{
struct kfd_ioctl_dbg_trap_args args = {0};
memset(&args, 0x00, sizeof(args));
args.pid = m_Pid;
args.op = KFD_IOC_DBG_TRAP_SUSPEND_QUEUES;
args.suspend_queues.num_queues = *numQueues;
args.suspend_queues.queue_array_ptr = (uint64_t)queueIds;
args.suspend_queues.exception_mask = exceptionsToClear;
return hsaKmtDebugTrapIoctl(&args, queues, (HSAuint64 *)numQueues);
}
HSAKMT_STATUS BaseDebug::ResumeQueues(unsigned int *numQueues,
HSA_QUEUEID *queues,
uint32_t *queueIds)
{
struct kfd_ioctl_dbg_trap_args args = {0};
memset(&args, 0x00, sizeof(args));
args.pid = m_Pid;
args.op = KFD_IOC_DBG_TRAP_RESUME_QUEUES;
args.resume_queues.num_queues = *numQueues;
args.resume_queues.queue_array_ptr = (uint64_t)queueIds;
return hsaKmtDebugTrapIoctl(&args, queues, (HSAuint64 *)numQueues);
}
HSAKMT_STATUS BaseDebug::QueueSnapshot(uint64_t exceptionsToClear,
uint64_t snapshotBufAddr,
uint32_t *numSnapshots)
{
struct kfd_ioctl_dbg_trap_args args = {0};
HSAKMT_STATUS result;
memset(&args, 0x00, sizeof(args));
args.pid = m_Pid;
args.op = KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT;
args.queue_snapshot.exception_mask = exceptionsToClear;
args.queue_snapshot.snapshot_buf_ptr = snapshotBufAddr;
args.queue_snapshot.num_queues = *numSnapshots;
args.queue_snapshot.entry_size = sizeof(struct kfd_queue_snapshot_entry);
result = hsaKmtDebugTrapIoctl(&args, NULL, NULL);
*numSnapshots = args.queue_snapshot.num_queues;
return result;
}
HSAKMT_STATUS BaseDebug::DeviceSnapshot(uint64_t exceptionsToClear,
uint64_t snapshotBufAddr,
uint32_t *numSnapshots)
{
struct kfd_ioctl_dbg_trap_args args = {0};
HSAKMT_STATUS result;
memset(&args, 0x00, sizeof(args));
args.pid = m_Pid;
args.op = KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT;
args.device_snapshot.exception_mask = exceptionsToClear;
args.device_snapshot.snapshot_buf_ptr = snapshotBufAddr;
args.device_snapshot.num_devices = *numSnapshots;
args.device_snapshot.entry_size = sizeof(struct kfd_dbg_device_info_entry);
result = hsaKmtDebugTrapIoctl(&args, NULL, NULL);
*numSnapshots = args.device_snapshot.num_devices;
return result;
}
HSAKMT_STATUS BaseDebug::SetWaveLaunchOverride(int mode,
uint32_t *enableMask,
uint32_t *supportMask)
{
struct kfd_ioctl_dbg_trap_args args = {0};
HSAKMT_STATUS Result;
memset(&args, 0x00, sizeof(args));
args.pid = m_Pid;
args.op = KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE;
args.launch_override.override_mode = mode;
args.launch_override.enable_mask = *enableMask;
args.launch_override.support_request_mask = *supportMask;
Result = hsaKmtDebugTrapIoctl(&args, NULL, NULL);
*enableMask = args.launch_override.enable_mask;
*supportMask = args.launch_override.support_request_mask;
return Result;
}
HSAKMT_STATUS BaseDebug::SetAddressWatch(uint64_t address,
int mode,
uint64_t mask,
uint32_t gpuId,
uint32_t *id)
{
struct kfd_ioctl_dbg_trap_args args = {};
args.pid = m_Pid;
args.op = KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH;
args.set_node_address_watch.address = address;
args.set_node_address_watch.mode = mode;
args.set_node_address_watch.mask = mask;
args.set_node_address_watch.gpu_id = gpuId;
HSAKMT_STATUS result = hsaKmtDebugTrapIoctl(&args, NULL, NULL);
*id = args.set_node_address_watch.id;
return result;
}
HSAKMT_STATUS BaseDebug::ClearAddressWatch(uint32_t gpuId,
uint32_t id)
{
struct kfd_ioctl_dbg_trap_args args = {};
args.pid = m_Pid;
args.op = KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH;
args.clear_node_address_watch.gpu_id = gpuId;
args.clear_node_address_watch.id = id;
return hsaKmtDebugTrapIoctl(&args, NULL, NULL);
}
HSAKMT_STATUS BaseDebug::SetFlags(uint32_t *flags)
{
struct kfd_ioctl_dbg_trap_args args = {};
args.pid = m_Pid;
args.op = KFD_IOC_DBG_TRAP_SET_FLAGS;
args.set_flags.flags = *flags;
HSAKMT_STATUS result = hsaKmtDebugTrapIoctl(&args, NULL, NULL);
*flags = args.set_flags.flags;
return result;
}
+66
View File
@@ -0,0 +1,66 @@
/*
* Copyright (C) 2023 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __KFD_BASE_DEBUG__H__
#define __KFD_BASE_DEBUG__H__
#include "hsakmt/hsakmt.h"
#include <poll.h>
#include <stdlib.h>
// @class BaseDebug
class BaseDebug {
public:
BaseDebug(void);
virtual ~BaseDebug(void);
HSAKMT_STATUS Attach(struct kfd_runtime_info *rInfo,
int rInfoSize,
unsigned int pid,
uint64_t exceptionEnable);
void Detach(void);
HSAKMT_STATUS SendRuntimeEvent(uint64_t exceptions, int gpuId, int queueId);
HSAKMT_STATUS QueryDebugEvent(uint64_t *exceptions,
uint32_t *gpuId, uint32_t *queueId,
int timeoutMsec);
void SetExceptionsEnabled(uint64_t exceptions);
HSAKMT_STATUS SuspendQueues(unsigned int *numQueues, HSA_QUEUEID *queues, uint32_t *queueIds,
uint64_t exceptionsToClear);
HSAKMT_STATUS ResumeQueues(unsigned int *numQueues, HSA_QUEUEID *queues, uint32_t *queueIds);
HSAKMT_STATUS QueueSnapshot(uint64_t exceptionsToClear, uint64_t snapshotBufAddr,
uint32_t *numSnapshots);
HSAKMT_STATUS DeviceSnapshot(uint64_t exceptionsToClear, uint64_t snapshotBuffAddr,
uint32_t *numSnapshots);
HSAKMT_STATUS SetWaveLaunchOverride(int mode, uint32_t *enableMask, uint32_t *supportMask);
HSAKMT_STATUS SetAddressWatch(uint64_t address, int mode, uint64_t mask, uint32_t gpuId, uint32_t *id);
HSAKMT_STATUS ClearAddressWatch(uint32_t gpuId, uint32_t id);
HSAKMT_STATUS SetFlags(uint32_t *flags);
private:
unsigned int m_Pid;
struct pollfd m_Fd;
const char *m_Fd_Name = "/tmp/dbg_fifo";
};
#endif // __KFD_BASE_DEBUG__H__
@@ -0,0 +1,60 @@
/*
* Copyright (C) 2017-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "BasePacket.hpp"
#include "KFDTestUtil.hpp"
#include "KFDBaseComponentTest.hpp"
BasePacket::BasePacket(void): m_packetAllocation(NULL) {
m_FamilyId = g_baseTest->GetFamilyIdFromDefaultNode();
}
BasePacket::~BasePacket(void) {
if (m_packetAllocation)
free(m_packetAllocation);
}
void BasePacket::Dump() const {
unsigned int size = SizeInDWords();
const HSAuint32 *packet = (const HSAuint32 *)GetPacket();
std::ostream &log = LOG();
unsigned int i;
log << "Packet dump:" << std::hex;
for (i = 0; i < size; i++)
log << " " << std::setw(8) << std::setfill('0') << packet[i];
log << std::endl;
}
void *BasePacket::AllocPacket(void) {
unsigned int size = SizeInBytes();
EXPECT_NE(0, size);
if (!size)
return NULL;
m_packetAllocation = calloc(1, size);
EXPECT_NOTNULL(m_packetAllocation);
return m_packetAllocation;
}
@@ -0,0 +1,61 @@
/*
* Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __KFD_BASE_PACKET__H__
#define __KFD_BASE_PACKET__H__
/**
* All packets profiles must be defined here
* Every type defined here has sub-types
*/
enum PACKETTYPE {
PACKETTYPE_PM4,
PACKETTYPE_SDMA,
PACKETTYPE_AQL
};
// @class BasePacket
class BasePacket {
public:
BasePacket(void);
virtual ~BasePacket(void);
// @returns Packet type
virtual PACKETTYPE PacketType() const = 0;
// @returns Pointer to the packet
virtual const void *GetPacket() const = 0;
// @returns Packet size in bytes
virtual unsigned int SizeInBytes() const = 0;
// @returns Packet size in dwordS
unsigned int SizeInDWords() const { return SizeInBytes()/sizeof(unsigned int); }
void Dump() const;
protected:
unsigned int m_FamilyId;
void *m_packetAllocation;
void *AllocPacket(void);
};
#endif // __KFD_BASE_PACKET__H__
+206
View File
@@ -0,0 +1,206 @@
/*
* Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "BaseQueue.hpp"
#include "SDMAQueue.hpp"
#include "PM4Queue.hpp"
#include "AqlQueue.hpp"
#include "hsakmt/hsakmt.h"
#include "KFDBaseComponentTest.hpp"
BaseQueue::BaseQueue()
:m_QueueBuf(NULL),
m_SkipWaitConsumption(true) {
}
BaseQueue::~BaseQueue(void) {
Destroy();
}
HSAKMT_STATUS BaseQueue::Create(unsigned int NodeId, unsigned int size, HSAuint64 *pointers) {
HSAKMT_STATUS status;
HSA_QUEUE_TYPE type = GetQueueType();
if (m_QueueBuf != NULL) {
// Queue already exists, one queue per object
Destroy();
}
memset(&m_Resources, 0, sizeof(m_Resources));
m_QueueBuf = new HsaMemoryBuffer(size, NodeId, true/*zero*/, false/*local*/, true/*exec*/,
/*isScratch */ false, /* isReadOnly */false, /* isUncached */true);
if (type == HSA_QUEUE_COMPUTE_AQL) {
m_Resources.Queue_read_ptr_aql = &pointers[0];
m_Resources.Queue_write_ptr_aql = &pointers[1];
}
status = hsaKmtCreateQueue(NodeId,
type,
DEFAULT_QUEUE_PERCENTAGE,
DEFAULT_PRIORITY,
m_QueueBuf->As<unsigned int*>(),
m_QueueBuf->Size(),
NULL,
&m_Resources);
if (status != HSAKMT_STATUS_SUCCESS) {
return status;
}
if (m_Resources.Queue_read_ptr == NULL) {
WARN() << "CreateQueue: read pointer value should be 0" << std::endl;
status = HSAKMT_STATUS_ERROR;
}
if (m_Resources.Queue_write_ptr == NULL) {
WARN() << "CreateQueue: write pointer value should be 0" << std::endl;
status = HSAKMT_STATUS_ERROR;
}
// Needs to match the queue write ptr
m_pendingWptr = 0;
m_pendingWptr64 = 0;
m_Node = NodeId;
m_FamilyId = g_baseTest->GetFamilyIdFromNodeId(NodeId);
return status;
}
HSAKMT_STATUS BaseQueue::Update(unsigned int percent, HSA_QUEUE_PRIORITY priority, bool nullifyBuffer) {
void* pNewBuffer = (nullifyBuffer ? NULL : m_QueueBuf->As<void*>());
HSAuint64 newSize = (nullifyBuffer ? 0 : m_QueueBuf->Size());
return hsaKmtUpdateQueue(m_Resources.QueueId, percent, priority, pNewBuffer, newSize, NULL);
}
HSAKMT_STATUS BaseQueue::SetCUMask(unsigned int *mask, unsigned int mask_count) {
return hsaKmtSetQueueCUMask(m_Resources.QueueId, mask_count, mask);
}
HSAKMT_STATUS BaseQueue::Destroy() {
HSAKMT_STATUS status = HSAKMT_STATUS_SUCCESS;
if (m_QueueBuf != NULL) {
status = hsaKmtDestroyQueue(m_Resources.QueueId);
if (status == HSAKMT_STATUS_SUCCESS) {
delete m_QueueBuf;
m_QueueBuf = NULL;
}
}
return status;
}
void BaseQueue::PlaceAndSubmitPacket(const BasePacket &packet) {
PlacePacket(packet);
SubmitPacket();
}
void BaseQueue::Wait4PacketConsumption(HsaEvent *event, unsigned int timeOut) {
ASSERT_TRUE(!event) << "Not supported!" << std::endl;
ASSERT_TRUE(WaitOnValue(m_Resources.Queue_read_ptr, RptrWhenConsumed(), timeOut));
}
bool BaseQueue::AllPacketsSubmitted() {
return Wptr() == Rptr();
}
void BaseQueue::PlacePacket(const BasePacket &packet) {
ASSERT_EQ(packet.PacketType(), PacketTypeSupported())
<< "Cannot add a packet since packet type doesn't match queue";
unsigned int readPtr = Rptr();
unsigned int writePtr = m_pendingWptr;
HSAuint64 writePtr64 = m_pendingWptr64;
unsigned int packetSizeInDwords = packet.SizeInDWords();
unsigned int dwordsRequired = packetSizeInDwords;
unsigned int queueSizeInDWord = m_QueueBuf->Size() / sizeof(uint32_t);
if (writePtr + packetSizeInDwords > queueSizeInDWord) {
// Wraparound expected. We need enough room to also place NOPs to avoid crossing the buffer end.
dwordsRequired += queueSizeInDWord - writePtr;
}
unsigned int dwordsAvailable = (readPtr - 1 - writePtr + queueSizeInDWord) % queueSizeInDWord;
ASSERT_GE(dwordsAvailable, dwordsRequired) << "Cannot add a packet, buffer overrun";
ASSERT_GE(queueSizeInDWord, packetSizeInDwords) << "Cannot add a packet, packet size too large";
if (writePtr + packetSizeInDwords >= queueSizeInDWord) {
// Wraparound
while (writePtr + packetSizeInDwords > queueSizeInDWord) {
m_QueueBuf->As<unsigned int *>()[writePtr] = CMD_NOP;
writePtr = (writePtr + 1) % queueSizeInDWord;
writePtr64++;
}
// Not updating Wptr since we might want to place the packet without submission
m_pendingWptr = (writePtr % queueSizeInDWord);
m_pendingWptr64 = writePtr64;
}
memcpy(m_pendingWptr + m_QueueBuf->As<unsigned int*>(), packet.GetPacket(), packetSizeInDwords * 4);
m_pendingWptr = (m_pendingWptr + packetSizeInDwords) % queueSizeInDWord;
m_pendingWptr64 += packetSizeInDwords;
}
BaseQueue* QueueArray::GetQueue(unsigned int Node) {
// If a queue exists for that node then return, else create one
for (unsigned int i = 0; i < m_QueueList.size(); i++) {
if (Node == m_QueueList.at(i)->GetNodeId())
return m_QueueList.at(i);
}
BaseQueue *pQueue = NULL;
switch (m_QueueType) {
case HSA_QUEUE_COMPUTE:
pQueue = new PM4Queue();
break;
case HSA_QUEUE_SDMA:
pQueue = new SDMAQueue();
break;
case HSA_QUEUE_COMPUTE_AQL:
pQueue = new AqlQueue();
break;
default:
return NULL;
}
if (pQueue) {
pQueue->Create(Node);
m_QueueList.push_back(pQueue);
}
return pQueue;
}
void QueueArray::Destroy() {
for (unsigned int i = 0; i < m_QueueList.size(); i++)
delete m_QueueList.at(i);
m_QueueList.clear();
}
+135
View File
@@ -0,0 +1,135 @@
/*
* Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __KFD_BASE_QUEUE__H__
#define __KFD_BASE_QUEUE__H__
#include <vector>
#include "KFDTestUtil.hpp"
#include "BasePacket.hpp"
// @class BasePacket
class BaseQueue {
public:
static const unsigned int DEFAULT_QUEUE_SIZE = PAGE_SIZE;
static const HSA_QUEUE_PRIORITY DEFAULT_PRIORITY = HSA_QUEUE_PRIORITY_NORMAL;
static const unsigned int DEFAULT_QUEUE_PERCENTAGE = 100;
static const unsigned int ZERO_QUEUE_PERCENTAGE = 0;
static const unsigned int FLUSH_GPU_CACHES_TO = 1000;
BaseQueue(void);
virtual ~BaseQueue(void);
/** Create the queue.
* @see hsaKmtCreateQueue
* @param pointers is used only for creating AQL queues. Otherwise it is omitted.
*/
virtual HSAKMT_STATUS Create(unsigned int NodeId, unsigned int size = DEFAULT_QUEUE_SIZE,
HSAuint64 *pointers = NULL);
/** Update the queue.
* @see hsaKmtUpdateQueue
* @param percent New queue percentage
* @param priority New queue priority
* @param nullifyBuffer
* If 'true', set the new buffer address to NULL and the size to 0. Otherwise
* don't change the queue buffer address/size.
*/
virtual HSAKMT_STATUS Update(unsigned int percent, HSA_QUEUE_PRIORITY priority, bool nullifyBuffer);
virtual HSAKMT_STATUS SetCUMask(unsigned int *mask, unsigned int mask_count);
/** Destroy the queue.
* @see hsaKmtDestroyQueue
*/
virtual HSAKMT_STATUS Destroy();
/** Wait for all the packets submitted to the queue to be consumed. (i.e. wait until RPTR=WPTR).
* Note that all packets being consumed is not the same as all packets being processed.
*/
virtual void Wait4PacketConsumption(HsaEvent *event = NULL, unsigned int timeOut = g_TestTimeOut);
/** @brief Place packet and submit it in one function
*/
virtual void PlaceAndSubmitPacket(const BasePacket &packet);
/** @brief Copy packet to queue and update write pointer
*/
virtual void PlacePacket(const BasePacket &packet);
/** @brief Update queue write pointer and set the queue doorbell to the queue write pointer
*/
virtual void SubmitPacket() = 0;
/** @brief Check if all packets in queue are already processed
* Compare queue read and write pointers
*/
bool AllPacketsSubmitted();
void SetSkipWaitConsump(int val) { m_SkipWaitConsumption = val; }
int GetSkipWaitConsump() { return m_SkipWaitConsumption; }
int Size() { return m_QueueBuf->Size(); }
HsaQueueResource *GetResource() { return &m_Resources; }
unsigned int GetPendingWptr() { return m_pendingWptr; }
HSAuint64 GetPendingWptr64() { return m_pendingWptr64; }
virtual _HSA_QUEUE_TYPE GetQueueType() = 0;
unsigned int GetNodeId() { return m_Node; }
unsigned int GetFamilyId() { return m_FamilyId; }
protected:
static const unsigned int CMD_NOP_TYPE_2 = 0x80000000;
static const unsigned int CMD_NOP_TYPE_3 = 0xFFFF1002;
unsigned int CMD_NOP;
unsigned int m_pendingWptr;
HSAuint64 m_pendingWptr64;
HsaQueueResource m_Resources;
HsaMemoryBuffer *m_QueueBuf;
unsigned int m_Node;
unsigned int m_FamilyId;
// @return Write pointer modulo queue size in dwords
virtual unsigned int Wptr() = 0;
// @return Read pointer modulo queue size in dwords
virtual unsigned int Rptr() = 0;
// @return Expected m_Resources.Queue_read_ptr when all packets consumed
virtual unsigned int RptrWhenConsumed() = 0;
virtual PACKETTYPE PacketTypeSupported() = 0;
private:
// Some tests(such as exception) may not need wait pm4 packet consumption on CZ.
int m_SkipWaitConsumption;
};
// @class QueueArray
// Managed QueueArray for different GPU Nodes
class QueueArray {
// List of Queues. One for each GPU
std::vector<BaseQueue*> m_QueueList;
_HSA_QUEUE_TYPE m_QueueType;
public:
QueueArray(_HSA_QUEUE_TYPE type): m_QueueType(type) {}
~QueueArray() {
Destroy();
}
BaseQueue* GetQueue(unsigned int Node);
void Destroy();
};
#endif // __KFD_BASE_QUEUE__H__
+262
View File
@@ -0,0 +1,262 @@
/*
* Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "Dispatch.hpp"
#include "PM4Packet.hpp"
#include "asic_reg/gfx_7_2_d.h"
#include "asic_reg/gfx_7_2_sh_mask.h"
#include "KFDBaseComponentTest.hpp"
#define mmCOMPUTE_PGM_RSRC3 0x2e2d
Dispatch::Dispatch(const HsaMemoryBuffer& isaBuf, const bool eventAutoReset)
:m_IsaBuf(isaBuf), m_IndirectBuf(PACKETTYPE_PM4, PAGE_SIZE / sizeof(unsigned int), isaBuf.Node()),
m_DimX(1), m_DimY(1), m_DimZ(1), m_pArg1(NULL), m_pArg2(NULL), m_pEop(NULL), m_ScratchEn(false),
m_ComputeTmpringSize(0), m_scratch_base(0ll), m_SpiPriority(0) {
HsaEventDescriptor eventDesc;
eventDesc.EventType = HSA_EVENTTYPE_SIGNAL;
eventDesc.NodeId = isaBuf.Node();
eventDesc.SyncVar.SyncVar.UserData = NULL;
eventDesc.SyncVar.SyncVarSize = 0;
hsaKmtCreateEvent(&eventDesc, !eventAutoReset, false, &m_pEop);
m_FamilyId = g_baseTest->GetFamilyIdFromNodeId(isaBuf.Node());
m_NeedCwsrWA = g_baseTest->NeedCwsrWA(isaBuf.Node());
}
Dispatch::~Dispatch() {
if (m_pEop != NULL)
hsaKmtDestroyEvent(m_pEop);
}
void Dispatch::SetArgs(void* pArg1, void* pArg2) {
m_pArg1 = pArg1;
m_pArg2 = pArg2;
}
void Dispatch::SetDim(unsigned int x, unsigned int y, unsigned int z) {
m_DimX = x;
m_DimY = y;
m_DimZ = z;
}
void Dispatch::SetScratch(int numWaves, int waveSize, HSAuint64 scratch_base) {
m_ComputeTmpringSize = ((waveSize << 12) | (numWaves));
m_ScratchEn = true;
m_scratch_base = scratch_base;
}
void Dispatch::SetSpiPriority(unsigned int priority) {
m_SpiPriority = priority;
}
void Dispatch::SetPriv(bool priv) {
m_NeedCwsrWA = priv;
}
void Dispatch::Submit(BaseQueue& queue) {
ASSERT_NE(m_pEop, (void*)0);
EXPECT_EQ(m_FamilyId, queue.GetFamilyId());
BuildIb();
queue.PlaceAndSubmitPacket(PM4IndirectBufPacket(&m_IndirectBuf));
// Write data to SyncVar for synchronization purpose
if (m_pEop->EventData.EventData.SyncVar.SyncVar.UserData != NULL) {
queue.PlaceAndSubmitPacket(PM4WriteDataPacket((unsigned int*)m_pEop->
EventData.EventData.SyncVar.SyncVar.UserData, m_pEop->EventId));
}
queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(m_FamilyId, false, m_pEop->EventData.HWData2, m_pEop->EventId));
if (!queue.GetSkipWaitConsump())
queue.Wait4PacketConsumption();
}
void Dispatch::Sync(unsigned int timeout) {
ASSERT_SUCCESS(hsaKmtWaitOnEvent(m_pEop, timeout));
}
// Returning with status in order to allow actions to be performed before process termination
int Dispatch::SyncWithStatus(unsigned int timeout) {
int stat;
return ((stat = hsaKmtWaitOnEvent(m_pEop, timeout)) != HSAKMT_STATUS_SUCCESS);
}
void Dispatch::BuildIb() {
HSAuint64 shiftedIsaAddr = m_IsaBuf.As<uint64_t>() >> 8;
unsigned int arg0, arg1, arg2, arg3;
SplitU64(reinterpret_cast<uint64_t>(m_pArg1), arg0, arg1);
SplitU64(reinterpret_cast<uint64_t>(m_pArg2), arg2, arg3);
// Starts at COMPUTE_START_X
const unsigned int COMPUTE_DISPATCH_DIMS_VALUES[] = {
0, // START_X
0, // START_Y
0, // START_Z
1, // NUM_THREADS_X - this is actually the number of threads in a thread group
1, // NUM_THREADS_Y
1, // NUM_THREADS_Z
0, // COMPUTE_PIPELINESTAT_ENABLE
0, // COMPUTE_PERFCOUNT_ENABLE
};
unsigned int pgmRsrc2 = 0;
pgmRsrc2 |= (m_ScratchEn << COMPUTE_PGM_RSRC2__SCRATCH_EN__SHIFT)
& COMPUTE_PGM_RSRC2__SCRATCH_EN_MASK;
pgmRsrc2 |= ((m_scratch_base ? 6 : 4) << COMPUTE_PGM_RSRC2__USER_SGPR__SHIFT)
& COMPUTE_PGM_RSRC2__USER_SGPR_MASK;
pgmRsrc2 |= (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT)
& COMPUTE_PGM_RSRC2__TRAP_PRESENT_MASK;
pgmRsrc2 |= (1 << COMPUTE_PGM_RSRC2__TGID_X_EN__SHIFT)
& COMPUTE_PGM_RSRC2__TGID_X_EN_MASK;
pgmRsrc2 |= (1 << COMPUTE_PGM_RSRC2__TIDIG_COMP_CNT__SHIFT)
& COMPUTE_PGM_RSRC2__TIDIG_COMP_CNT_MASK;
pgmRsrc2 |= (0 << COMPUTE_PGM_RSRC2__EXCP_EN__SHIFT)
& COMPUTE_PGM_RSRC2__EXCP_EN_MASK;
pgmRsrc2 |= (1 << COMPUTE_PGM_RSRC2__EXCP_EN_MSB__SHIFT)
& COMPUTE_PGM_RSRC2__EXCP_EN_MSB_MASK;
/*
* For some special asics in the list of DEGFX11_12113
* COMPUTE_PGM_RSRC needs priv=1 to prevent hardware traps
*/
const bool priv = m_NeedCwsrWA;
const unsigned int COMPUTE_PGM_RSRC[] = {
// PGM_RSRC1 = { VGPRS: 16 SGPRS: 16 PRIORITY: m_SpiPriority FLOAT_MODE: c0
// PRIV: 0 (1 for GFX11) DX10_CLAMP: 0 DEBUG_MODE: 0 IEEE_MODE: 0 BULKY: 0 CDBG_USER: 0 }
0x000c0084 | ((m_SpiPriority & 3) << 10) | (priv << 20),
pgmRsrc2
};
// Starts at COMPUTE_PGM_LO
const unsigned int COMPUTE_PGM_VALUES_GFX8[] = {
static_cast<uint32_t>(shiftedIsaAddr), // PGM_LO
static_cast<uint32_t>(shiftedIsaAddr >> 32) // PGM_HI
| (is_dgpu() ? 0 : (1<<8)) // including PGM_ATC=?
};
// Starts at COMPUTE_PGM_LO
const unsigned int COMPUTE_PGM_VALUES_GFX9[] = {
static_cast<uint32_t>(shiftedIsaAddr), // PGM_LO
static_cast<uint32_t>(shiftedIsaAddr >> 32) // PGM_HI
| (is_dgpu() ? 0 : (1<<8)), // including PGM_ATC=?
0,
0,
static_cast<uint32_t>(m_scratch_base >> 8), // compute_dispatch_scratch_base
static_cast<uint32_t>(m_scratch_base >> 40)
};
// Starts at COMPUTE_RESOURCE_LIMITS
const unsigned int COMPUTE_RESOURCE_LIMITS[] = {
0, // COMPUTE_RESOURCE_LIMITS
};
// Starts at COMPUTE_TMPRING_SIZE
const unsigned int COMPUTE_TMPRING_SIZE[] = {
m_ComputeTmpringSize, // COMPUTE_TMPRING_SIZE
};
// Starts at COMPUTE_RESTART_X
const unsigned int COMPUTE_RESTART_VALUES[] = {
0, // COMPUTE_RESTART_X
0, // COMPUTE_RESTART_Y
0, // COMPUTE_RESTART_Z
0 // COMPUTE_THREAD_TRACE_ENABLE
};
// Starts at COMPUTE_USER_DATA_0
const unsigned int COMPUTE_USER_DATA_VALUES[] = {
// Reg name - use in KFDtest - use in ABI
arg0, // COMPUTE_USER_DATA_0 - arg0 - resource descriptor for the scratch buffer - 1st dword
arg1, // COMPUTE_USER_DATA_1 - arg1 - resource descriptor for the scratch buffer - 2nd dword
arg2, // COMPUTE_USER_DATA_2 - arg2 - resource descriptor for the scratch buffer - 3rd dword
arg3, // COMPUTE_USER_DATA_3 - arg3 - resource descriptor for the scratch buffer - 4th dword
static_cast<uint32_t>(m_scratch_base), // COMPUTE_USER_DATA_4 - flat_scratch_lo
static_cast<uint32_t>(m_scratch_base >> 32), // COMPUTE_USER_DATA_4 - flat_scratch_hi
0, // COMPUTE_USER_DATA_6 - - AQL queue address, low part
0, // COMPUTE_USER_DATA_7 - - AQL queue address, high part
0, // COMPUTE_USER_DATA_8 - - kernel arguments block, low part
0, // COMPUTE_USER_DATA_9 - - kernel arguments block, high part
0, // COMPUTE_USER_DATA_10 - - unused
0, // COMPUTE_USER_DATA_11 - - unused
0, // COMPUTE_USER_DATA_12 - - unused
0, // COMPUTE_USER_DATA_13 - - unused
0, // COMPUTE_USER_DATA_14 - - unused
0, // COMPUTE_USER_DATA_15 - - unused
};
const unsigned int DISPATCH_INIT_VALUE = 0x00000021 | (is_dgpu() ? 0 : 0x1000) |
((m_FamilyId >= FAMILY_NV) ? 0x8000 : 0);
// {COMPUTE_SHADER_EN=1, PARTIAL_TG_EN=0, FORCE_START_AT_000=0, ORDERED_APPEND_ENBL=0,
// ORDERED_APPEND_MODE=0, USE_THREAD_DIMENSIONS=1, ORDER_MODE=0, DISPATCH_CACHE_CNTL=0,
// SCALAR_L1_INV_VOL=0, VECTOR_L1_INV_VOL=0, DATA_ATC=?, RESTORE=0}
// Set CS_W32_EN for wave32 workloads for gfx10 since all the shaders used in KFDTest is 32 bit .
m_IndirectBuf.AddPacket(PM4AcquireMemoryPacket(m_FamilyId));
m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_START_X, COMPUTE_DISPATCH_DIMS_VALUES,
ARRAY_SIZE(COMPUTE_DISPATCH_DIMS_VALUES)));
m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_PGM_LO,
(m_FamilyId >= FAMILY_AI) ? COMPUTE_PGM_VALUES_GFX9 : COMPUTE_PGM_VALUES_GFX8,
(m_FamilyId >= FAMILY_AI) ? ARRAY_SIZE(COMPUTE_PGM_VALUES_GFX9) : ARRAY_SIZE(COMPUTE_PGM_VALUES_GFX8)));
m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_PGM_RSRC1, COMPUTE_PGM_RSRC,
ARRAY_SIZE(COMPUTE_PGM_RSRC)));
if (m_FamilyId == FAMILY_AL || m_FamilyId == FAMILY_AV) {
const unsigned int COMPUTE_PGM_RSRC3[] = {9};
m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_PGM_RSRC3, COMPUTE_PGM_RSRC3,
ARRAY_SIZE(COMPUTE_PGM_RSRC3)));
}
m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_RESOURCE_LIMITS, COMPUTE_RESOURCE_LIMITS,
ARRAY_SIZE(COMPUTE_RESOURCE_LIMITS)));
m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_TMPRING_SIZE, COMPUTE_TMPRING_SIZE,
ARRAY_SIZE(COMPUTE_TMPRING_SIZE)));
m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_RESTART_X, COMPUTE_RESTART_VALUES,
ARRAY_SIZE(COMPUTE_RESTART_VALUES)));
m_IndirectBuf.AddPacket(PM4SetShaderRegPacket(mmCOMPUTE_USER_DATA_0, COMPUTE_USER_DATA_VALUES,
ARRAY_SIZE(COMPUTE_USER_DATA_VALUES)));
m_IndirectBuf.AddPacket(PM4DispatchDirectPacket(m_DimX, m_DimY, m_DimZ, DISPATCH_INIT_VALUE));
// EVENT_WRITE.partial_flush causes problems with preemptions in
// GWS testing. Since this is specific to this PM4 command and
// doesn't affect AQL, it's easier to fix KFDTest than the
// firmware.
//
// Replace PartialFlush with an ReleaseMem (with no interrupt) + WaitRegMem
//
// Original: m_IndirectBuf.AddPacket(PM4PartialFlushPacket());
uint32_t *nop = m_IndirectBuf.AddPacket(PM4NopPacket(2)); // NOP packet with one dword payload for the release-mem fence
m_IndirectBuf.AddPacket(PM4ReleaseMemoryPacket(m_FamilyId, true, (uint64_t)&nop[1], 0xdeadbeef));
m_IndirectBuf.AddPacket(PM4WaitRegMemPacket(true, (uint64_t)&nop[1], 0xdeadbeef, 4));
}
+79
View File
@@ -0,0 +1,79 @@
/*
* Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __KFD_DISPATCH__H__
#define __KFD_DISPATCH__H__
#include "KFDTestUtil.hpp"
#include "IndirectBuffer.hpp"
#include "BaseQueue.hpp"
class Dispatch {
public:
Dispatch(const HsaMemoryBuffer& isaBuf, const bool eventAutoReset = false);
~Dispatch();
void SetArgs(void* pArg1, void* pArg2);
void SetDim(unsigned int x, unsigned int y, unsigned int z);
void Submit(BaseQueue& queue);
void Sync(unsigned int timeout = HSA_EVENTTIMEOUT_INFINITE);
int SyncWithStatus(unsigned int timeout);
void SetScratch(int numWaves, int waveSize, HSAuint64 scratch_base);
void SetSpiPriority(unsigned int priority);
void SetPriv(bool priv);
HsaEvent *GetHsaEvent() { return m_pEop; }
private:
void BuildIb();
private:
const HsaMemoryBuffer& m_IsaBuf;
IndirectBuffer m_IndirectBuf;
unsigned int m_DimX;
unsigned int m_DimY;
unsigned int m_DimZ;
void* m_pArg1;
void* m_pArg2;
HsaEvent* m_pEop;
bool m_ScratchEn;
unsigned int m_ComputeTmpringSize;
HSAuint64 m_scratch_base;
unsigned int m_SpiPriority;
unsigned int m_FamilyId;
bool m_NeedCwsrWA;
};
#endif // __KFD_DISPATCH__H__
@@ -0,0 +1,76 @@
/*
* Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "GoogleTestExtension.hpp"
#include "OSWrapper.hpp"
bool Ok2Run(unsigned int testProfile) {
bool testMatchProfile = true;
if ((testProfile & g_TestRunProfile) == 0) {
WARN() << "Test is skipped beacuse profile does not match current run mode" << std::endl;
testMatchProfile = false;
}
return testMatchProfile;
}
// This predication is used when specific HW capabilities must exist for the test to succeed.
bool TestReqEnvCaps(unsigned int envCaps) {
bool testMatchEnv = true;
if ((envCaps & g_TestENVCaps) != envCaps) {
WARN() << "Test is skipped due to HW capability issues" << std::endl;
testMatchEnv = false;
}
return testMatchEnv;
}
// This predication is used when specific HW capabilities must be absent for the test to succeed.
// e.g Testing capabilities not supported by HW scheduling
bool TestReqNoEnvCaps(unsigned int envCaps) {
bool testMatchEnv = true;
if ((envCaps & g_TestENVCaps) != 0) {
WARN() << "Test is skipped due to HW capability issues" << std::endl;
testMatchEnv = false;
}
return testMatchEnv;
}
std::ostream& operator<< (KFDLog log, LOGTYPE level) {
const char *heading;
if (level == LOGTYPE_WARNING) {
SetConsoleTextColor(TEXTCOLOR_YELLOW);
heading = "[----------] ";
} else {
SetConsoleTextColor(TEXTCOLOR_GREEN);
heading = "[ ] ";
}
std::clog << heading;
SetConsoleTextColor(TEXTCOLOR_WHITE);
return std::clog;
}
@@ -0,0 +1,92 @@
/*
* Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __GOOGLETEST_EXTENSION__H__
#define __GOOGLETEST_EXTENSION__H__
#include <gtest/gtest.h>
#include "hsakmt/hsakmt.h"
#include "KFDTestFlags.hpp"
enum LOGTYPE {
LOGTYPE_INFO, // msg header in green
LOGTYPE_WARNING // msg header in yellow
};
class KFDLog{};
std::ostream& operator << (KFDLog log, LOGTYPE level);
// @brief Log additional details, to be displayed in the same format as other google test outputs
// Currently not supported by gtest
// Should be used like cout: LOG() << "message" << value << std::endl;
#define LOG() KFDLog() << LOGTYPE_INFO
#define WARN() KFDLog() << LOGTYPE_WARNING
class KFDRecord: public testing::Test {
public:
KFDRecord(const char *val): m_val(val) {}
KFDRecord(std::string &val): m_val(val) {}
KFDRecord(HSAint64 val): m_val(std::to_string(val)) {}
KFDRecord(HSAuint64 val): m_val(std::to_string(val)) {}
KFDRecord(double val): m_val(std::to_string(val)) {}
~KFDRecord() {
RecordProperty(m_key.str().c_str(), m_val.c_str());
}
std::stringstream &get_key_stream() {
return m_key;
}
virtual void TestBody() {};
private:
std::string m_val;
std::stringstream m_key;
};
#define RECORD(val) (KFDRecord(val).get_key_stream())
// All tests MUST be in a try catch since the gtest flag to throw an exception on any fatal failure is enabled
#define TEST_START(testProfile) if (Ok2Run(testProfile)) try {
#define TEST_END } catch (...) {}
// Used to wrap setup and teardown functions, anything that is built-in gtest and is not a test
#define ROUTINE_START try {
#define ROUTINE_END }catch(...) {}
#define TEST_REQUIRE_ENV_CAPABILITIES(envCaps) if (!TestReqEnvCaps(envCaps)) return;
#define TEST_REQUIRE_NO_ENV_CAPABILITIES(envCaps) if (!TestReqNoEnvCaps(envCaps)) return;
#define ASSERT_SUCCESS(_val) ASSERT_EQ(HSAKMT_STATUS_SUCCESS, (_val))
#define EXPECT_SUCCESS(_val) EXPECT_EQ(HSAKMT_STATUS_SUCCESS, (_val))
#define ASSERT_NOTNULL(_val) ASSERT_NE((void *)NULL, _val)
#define EXPECT_NOTNULL(_val) EXPECT_NE((void *)NULL, _val)
// @brief Determines if it is ok to run a test given input flags
bool Ok2Run(unsigned int testProfile);
// @brief Checks if all HW capabilities needed for a test to run exist
bool TestReqEnvCaps(unsigned int hwCaps);
// @brief Checks if all HW capabilities that prevents a test from running are absent
bool TestReqNoEnvCaps(unsigned int hwCaps);
#endif
@@ -0,0 +1,53 @@
/*
* Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "IndirectBuffer.hpp"
#include "GoogleTestExtension.hpp"
#include "pm4_pkt_struct_common.h"
#include "PM4Packet.hpp"
IndirectBuffer::IndirectBuffer(PACKETTYPE type, unsigned int sizeInDWords, unsigned int NodeId)
:m_NumOfPackets(0), m_MaxSize(sizeInDWords), m_ActualSize(0), m_PacketTypeAllowed(type) {
m_IndirectBuf = new HsaMemoryBuffer(sizeInDWords*sizeof(unsigned int), NodeId, true/*zero*/,
false/*local*/, true/*exec*/, false/*isScratch*/,
false/*isReadOnly*/, true/*isUncached*/);
}
IndirectBuffer::~IndirectBuffer(void) {
delete m_IndirectBuf;
}
uint32_t *IndirectBuffer::AddPacket(const BasePacket &packet) {
EXPECT_EQ(packet.PacketType(), m_PacketTypeAllowed) << "Cannot add a packet since packet type doesn't match queue";
unsigned int writePtr = m_ActualSize;
EXPECT_GE(m_MaxSize, packet.SizeInDWords() + writePtr) << "Cannot add a packet, not enough room";
memcpy(m_IndirectBuf->As<unsigned int*>() + writePtr , packet.GetPacket(), packet.SizeInBytes());
m_ActualSize += packet.SizeInDWords();
m_NumOfPackets++;
return m_IndirectBuf->As<HSAuint32 *>() + writePtr;
}
@@ -0,0 +1,60 @@
/*
* Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __INDIRECT_BUFFER__H__
#define __INDIRECT_BUFFER__H__
#include "BasePacket.hpp"
#include "KFDTestUtil.hpp"
/** @class IndirectBuffer
* When working with an indirect buffer, create IndirectBuffer, fill it with all the packets you want,
* create an indirect packet to point to it, and submit the packet to queue
*/
class IndirectBuffer {
public:
// @param[size] Queue max size in DWords
// @param[type] Packet type allowed in queue
IndirectBuffer(PACKETTYPE type, unsigned int sizeInDWords, unsigned int NodeId);
~IndirectBuffer(void);
// @brief Add packet to queue, all validations are done with gtest ASSERT and EXPECT
uint32_t *AddPacket(const BasePacket &packet);
// @returns Actual size of the indirect queue in DWords, equivalent to write pointer
unsigned int SizeInDWord() { return m_ActualSize; }
// @returns Indirect queue address
unsigned int *Addr() { return m_IndirectBuf->As<unsigned int*>(); }
protected:
// Number of packets in the queue
unsigned int m_NumOfPackets;
// Max size of queue in DWords
unsigned int m_MaxSize;
// Current size of queue in DWords
unsigned int m_ActualSize;
HsaMemoryBuffer *m_IndirectBuf;
// What packets are supported in this queue
PACKETTYPE m_PacketTypeAllowed;
};
#endif // __INDIRECT_BUFFER__H__
@@ -0,0 +1,74 @@
/*
* Copyright (C) 2022 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "GoogleTestExtension.hpp"
#include "KFDASMTest.hpp"
#include "ShaderStore.hpp"
#include "Assemble.hpp"
void KFDASMTest::SetUp() {}
void KFDASMTest::TearDown() {}
static const std::vector<uint32_t> TargetList = {
0x080001,
0x080002,
0x080003,
0x080005,
0x080100,
0x090000,
0x090002,
0x090004,
0x090006,
0x090008,
0x090009,
0x09000a,
0x09000c,
0x090400,
0x0a0100,
0x0a0101,
0x0a0102,
0x0a0103,
0x0a0300,
0x0a0301,
0x0a0302,
0x0a0303,
0x0a0304,
0x0a0305,
0x0a0306,
};
TEST_F(KFDASMTest, AssembleShaders) {
TEST_START(TESTPROFILE_RUNALL)
for (auto &t : TargetList) {
Assembler asmblr(t);
LOG() << "Running ASM test for target " << asmblr.GetTargetAsic() << std::endl;
for (auto &s : ShaderList) {
EXPECT_SUCCESS(asmblr.RunAssemble(s));
}
}
TEST_END
}
@@ -0,0 +1,39 @@
/*
* Copyright (C) 2022 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __KFD_ASM_TEST__H__
#define __KFD_ASM_TEST__H__
#include <gtest/gtest.h>
class KFDASMTest : public testing::Test {
public:
KFDASMTest() {}
~KFDASMTest() {}
protected:
virtual void SetUp();
virtual void TearDown();
};
#endif // __KFD_ASM_TEST__H__
@@ -0,0 +1,233 @@
/*
* Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include <syslog.h>
#include "KFDBaseComponentTest.hpp"
#include "KFDTestUtil.hpp"
void KFDBaseComponentTest::SetUpTestCase() {
}
void KFDBaseComponentTest::TearDownTestCase() {
}
void KFDBaseComponentTest::SetUp() {
ROUTINE_START
ASSERT_SUCCESS(hsaKmtOpenKFD());
EXPECT_SUCCESS(hsaKmtGetVersion(&m_VersionInfo));
memset( &m_SystemProperties, 0, sizeof(m_SystemProperties) );
memset(m_RenderNodes, 0, sizeof(m_RenderNodes));
/** In order to be correctly testing the KFD interfaces and ensure
* that the KFD acknowledges relevant node parameters
* for the rest of the tests and used for more specific topology tests,
* call to GetSystemProperties for a system snapshot of the topology here
*/
ASSERT_SUCCESS(hsaKmtAcquireSystemProperties(&m_SystemProperties));
ASSERT_GT(m_SystemProperties.NumNodes, HSAuint32(0)) << "HSA has no nodes.";
m_NodeInfo.Init(m_SystemProperties.NumNodes);
// setting memory flags with default values , can be modified according to needs
m_MemoryFlags.ui32.NonPaged = 0; // Paged
m_MemoryFlags.ui32.CachePolicy = HSA_CACHING_NONCACHED; // Non cached
m_MemoryFlags.ui32.ReadOnly = 0; // Read/Write
m_MemoryFlags.ui32.PageSize = HSA_PAGE_SIZE_4KB; // 4KB page
m_MemoryFlags.ui32.HostAccess = 1; // Host accessible
m_MemoryFlags.ui32.NoSubstitute = 0; // Fall back to node 0 if needed
m_MemoryFlags.ui32.GDSMemory = 0;
m_MemoryFlags.ui32.Scratch = 0;
const HsaNodeProperties *nodeProperties = m_NodeInfo.HsaDefaultGPUNodeProperties();
ASSERT_NOTNULL(nodeProperties) << "failed to get HSA default GPU Node properties";
m_FamilyId = FamilyIdFromNode(nodeProperties);
GetHwQueueInfo(nodeProperties, &m_numCpQueues, &m_numSdmaEngines,
&m_numSdmaXgmiEngines, &m_numSdmaQueuesPerEngine);
g_baseTest = this;
m_pAsm = new Assembler(GetGfxVersion(nodeProperties));
const testing::TestInfo* curr_test_info =
::testing::UnitTest::GetInstance()->current_test_info();
openlog("KFDTEST", LOG_CONS , LOG_USER);
syslog(LOG_INFO, "[Node#%03d] STARTED ========== %s.%s ==========",
m_NodeInfo.HsaDefaultGPUNode(),
curr_test_info->test_case_name(), curr_test_info->name());
ROUTINE_END
}
void KFDBaseComponentTest::TearDown() {
ROUTINE_START
for (int i = 0; i < MAX_RENDER_NODES; i++) {
if (m_RenderNodes[i].fd <= 0)
continue;
amdgpu_device_deinitialize(m_RenderNodes[i].device_handle);
drmClose(m_RenderNodes[i].fd);
}
EXPECT_SUCCESS(hsaKmtReleaseSystemProperties());
EXPECT_SUCCESS(hsaKmtCloseKFD());
g_baseTest = NULL;
if (m_pAsm)
delete m_pAsm;
m_pAsm = nullptr;
const testing::TestInfo* curr_test_info =
::testing::UnitTest::GetInstance()->current_test_info();
if (curr_test_info->result()->Passed())
syslog(LOG_INFO, "[Node#%03d] PASSED ========== %s.%s ==========",
m_NodeInfo.HsaDefaultGPUNode(),
curr_test_info->test_case_name(), curr_test_info->name());
else
syslog(LOG_WARNING, "[Node#%03d] FAILED ========== %s.%s ==========",
m_NodeInfo.HsaDefaultGPUNode(),
curr_test_info->test_case_name(), curr_test_info->name());
closelog();
ROUTINE_END
}
HSAuint64 KFDBaseComponentTest::GetSysMemSize() {
const HsaNodeProperties *nodeProps;
HsaMemoryProperties cpuMemoryProps;
HSAuint64 systemMemSize = 0;
/* Find System Memory size */
for (unsigned node = 0; node < m_SystemProperties.NumNodes; node++) {
nodeProps = m_NodeInfo.GetNodeProperties(node);
if (nodeProps != NULL && nodeProps->NumCPUCores > 0 && nodeProps->NumMemoryBanks > 0) {
/* For NUMA nodes, memory is distributed among different nodes.
* Compute total system memory size. KFD driver also computes
* the system memory (si_meminfo) similarly
*/
EXPECT_SUCCESS(hsaKmtGetNodeMemoryProperties(node, 1, &cpuMemoryProps));
systemMemSize += cpuMemoryProps.SizeInBytes;
}
}
return systemMemSize;
}
HSAuint64 KFDBaseComponentTest::GetVramSize(int defaultGPUNode) {
const HsaNodeProperties *nodeProps;
/* Find framebuffer size */
nodeProps = m_NodeInfo.GetNodeProperties(defaultGPUNode);
EXPECT_NE((const HsaNodeProperties *)NULL, nodeProps);
HSAuint32 numBanks = nodeProps->NumMemoryBanks;
HsaMemoryProperties memoryProps[numBanks];
EXPECT_SUCCESS(hsaKmtGetNodeMemoryProperties(defaultGPUNode, numBanks, memoryProps));
unsigned bank;
for (bank = 0; bank < numBanks; bank++) {
if (memoryProps[bank].HeapType == HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE
|| memoryProps[bank].HeapType == HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC)
return memoryProps[bank].SizeInBytes;
}
return 0;
}
unsigned int KFDBaseComponentTest::GetFamilyIdFromNodeId(unsigned int nodeId)
{
return FamilyIdFromNode(m_NodeInfo.GetNodeProperties(nodeId));
}
/*
* Some asics need CWSR workround for DEGFX11_12113
*/
bool KFDBaseComponentTest::NeedCwsrWA(unsigned int nodeId)
{
bool needCwsrWA = false;
const HsaNodeProperties *props = m_NodeInfo.GetNodeProperties(nodeId);
needCwsrWA = props->EngineId.ui32.Major == 11 &&
props->EngineId.ui32.Minor == 0 &&
(props->EngineId.ui32.Stepping == 0 ||
props->EngineId.ui32.Stepping == 1 ||
props->EngineId.ui32.Stepping == 2 ||
props->EngineId.ui32.Stepping == 5 ||
(props->EngineId.ui32.Stepping == 3 && props->NumArrays > 1));
return needCwsrWA;
}
bool KFDBaseComponentTest::NeedNonPagedWptr(unsigned int nodeId)
{
return GetFamilyIdFromNodeId(nodeId) >= FAMILY_GFX11;
}
int KFDBaseComponentTest::FindDRMRenderNode(int gpuNode) {
HsaNodeProperties *nodeProperties;
_HSAKMT_STATUS status;
nodeProperties = new HsaNodeProperties();
status = hsaKmtGetNodeProperties(gpuNode, nodeProperties);
EXPECT_SUCCESS(status) << "Node index: " << gpuNode << "hsaKmtGetNodeProperties returned status " << status;
if (status != HSAKMT_STATUS_SUCCESS) {
delete nodeProperties;
return -EINVAL;
}
int minor = nodeProperties->DrmRenderMinor;
if (minor < 128) {
LOG() << "Failed to get minor number " << minor << std::endl;
return -EINVAL;
}
int index = minor - 128;
if (m_RenderNodes[index].fd == 0) {
m_RenderNodes[index].fd = drmOpenRender(minor);
if (m_RenderNodes[index].fd < 0) {
LOG() << "Failed to open render node" << std::endl;
return -EINVAL;
}
if (amdgpu_device_initialize(m_RenderNodes[index].fd,
&m_RenderNodes[index].major_version,
&m_RenderNodes[index].minor_version,
&m_RenderNodes[index].device_handle) != 0) {
drmClose(m_RenderNodes[index].fd);
m_RenderNodes[index].fd = 0;
LOG() << "Failed to initialize amdgpu device" << std::endl;
return -EINVAL;
}
}
return index;
}
@@ -0,0 +1,142 @@
/*
* Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __KFD_BASE_COMPONENT_TEST__H__
#define __KFD_BASE_COMPONENT_TEST__H__
#include <gtest/gtest.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <xf86drm.h>
#include <amdgpu.h>
#include <amdgpu_drm.h>
#include <sys/param.h>
#include "hsakmt/hsakmt.h"
#include "OSWrapper.hpp"
#include "KFDTestUtil.hpp"
#include "Assemble.hpp"
#include "ShaderStore.hpp"
// @class KFDBaseComponentTest
class KFDBaseComponentTest : public testing::Test {
public:
KFDBaseComponentTest(void) { m_MemoryFlags.Value = 0; }
~KFDBaseComponentTest(void) {}
HSAuint64 GetSysMemSize();
HSAuint64 GetVramSize(int defaultGPUNode);
#define MAX_RENDER_NODES 64
struct {
int fd;
uint32_t major_version;
uint32_t minor_version;
amdgpu_device_handle device_handle;
uint32_t bdf;
} m_RenderNodes[MAX_RENDER_NODES];
// @brief Finds DRM Render node corresponding to gpuNode
// @return DRM Render Node if successful or -1 on failure
int FindDRMRenderNode(int gpuNode);
unsigned int GetFamilyIdFromNodeId(unsigned int nodeId);
bool NeedCwsrWA(unsigned int nodeId);
bool NeedNonPagedWptr(unsigned int nodeId);
unsigned int GetFamilyIdFromDefaultNode(){ return m_FamilyId; }
// @brief Executed before the first test that uses KFDBaseComponentTest.
static void SetUpTestCase();
// @brief Executed after the last test from KFDBaseComponentTest.
static void TearDownTestCase();
protected:
HsaVersionInfo m_VersionInfo;
HsaSystemProperties m_SystemProperties;
unsigned int m_FamilyId;
unsigned int m_numCpQueues;
unsigned int m_numSdmaEngines;
unsigned int m_numSdmaXgmiEngines;
unsigned int m_numSdmaQueuesPerEngine;
HsaMemFlags m_MemoryFlags;
HsaNodeInfo m_NodeInfo;
HSAint32 m_xnack;
Assembler* m_pAsm;
// @brief Executed before every test that uses KFDBaseComponentTest class and sets all common settings for the tests.
virtual void SetUp();
// @brief Executed after every test that uses KFDBaseComponentTest class.
virtual void TearDown();
bool SVMAPISupported() {
bool supported = m_NodeInfo.HsaDefaultGPUNodeProperties()
->Capability.ui32.SVMAPISupported;
if (!supported)
LOG() << "SVM API not supported" << std::endl;
return supported;
}
// Set xnack_override to -1 if parameter is not passed in, to avoid unnecessary code churn
void SVMSetXNACKMode(int xnack_override = -1) {
if (!SVMAPISupported())
return;
m_xnack = -1;
HSAKMT_STATUS ret = hsaKmtGetXNACKMode(&m_xnack);
if (ret != HSAKMT_STATUS_SUCCESS) {
LOG() << "Failed " << ret << " to get XNACK mode" << std::endl;
return;
}
HSAint32 xnack_on = -1;
char *hsa_xnack = getenv("HSA_XNACK");
// HSA_XNACK takes priority over kfdtest parameters
if (hsa_xnack)
xnack_on = strncmp(hsa_xnack, "0", 1);
else if (xnack_override > -1)
xnack_on = xnack_override;
else
return;
// No need to set XNACK if it's already the current value
if (xnack_on == m_xnack)
return;
ret = hsaKmtSetXNACKMode(xnack_on);
if (ret != HSAKMT_STATUS_SUCCESS)
LOG() << "Failed " << ret << " to set XNACK mode " << xnack_on << std::endl;
else
LOG() << "Setting XNACK mode to " << xnack_on << std::endl;
}
void SVMRestoreXNACKMode() {
if (!SVMAPISupported())
return;
if (m_xnack == -1)
return;
hsaKmtSetXNACKMode(m_xnack);
}
};
extern KFDBaseComponentTest* g_baseTest;
#endif // __KFD_BASE_COMPONENT_TEST__H__
+226
View File
@@ -0,0 +1,226 @@
/*
* Copyright (C) 2015-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "KFDCWSRTest.hpp"
#include "Dispatch.hpp"
void KFDCWSRTest::SetUp() {
ROUTINE_START
KFDBaseComponentTest::SetUp();
ROUTINE_END
}
void KFDCWSRTest::TearDown() {
ROUTINE_START
KFDBaseComponentTest::TearDown();
ROUTINE_END
}
static inline uint32_t checkCWSREnabled() {
uint32_t cwsr_enable = 0;
fscanf_dec("/sys/module/amdgpu/parameters/cwsr_enable", &cwsr_enable);
return cwsr_enable;
}
/**
* KFDCWSRTest.BasicTest
*
* This test dispatches the PersistentIterateIsa shader, which continuously increments a vgpr for
* (num_witems / WAVE_SIZE) waves. While this shader is running, dequeue/requeue requests
* are sent in a loop to trigger CWSRs.
*
* This is a paremeterized test. See the INSTANTIATE_TEST_CASE_P below for an explanation
* on the parameters.
*
* This test defines a CWSR threshold. The shader will continuously loop until inputBuf is
* filled with the known stop value, which occurs once cwsr_thresh CWSRs have been
* successfully triggered.
*
* 4 parameterized tests are defined:
*
* KFDCWSRTest.BasicTest/0
* KFDCWSRTest.BasicTest/1
* KFDCWSRTest.BasicTest/2
* KFDCWSRTest.BasicTest/3
*
* 0: 1 work-item, CWSR threshold of 10
* 1: 256 work-items (multi-wave), CWSR threshold of 50
* 2: 512 work-items (multi-wave), CWSR threshold of 100
* 3: 1024 work-items (multi-wave), CWSR threshold of 1000
*/
TEST_P(KFDCWSRTest, BasicTest) {
TEST_START(TESTPROFILE_RUNALL);
int num_witems = std::get<0>(GetParam());
int cwsr_thresh = std::get<1>(GetParam());
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
// Increase delay on emulator by this factor.
const int delayMult = (g_IsEmuMode ? 20 : 1);
if ((m_FamilyId >= FAMILY_VI) && (checkCWSREnabled())) {
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true, false, true);
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(PersistentIterateIsa, isaBuffer.As<char*>()));
unsigned stopval = 0x1234'5678;
unsigned outval = 0x8765'4321;
// 4B per work-item ==> 1 page per 1024 work-items (take ceiling)
unsigned bufSize = PAGE_SIZE * ((num_witems / 1024) + (num_witems % 1024 != 0));
HsaMemoryBuffer inputBuf(bufSize, defaultGPUNode, true, false, false);
HsaMemoryBuffer outputBuf(bufSize, defaultGPUNode, true, false, false);
unsigned int* input = inputBuf.As<unsigned int*>();
unsigned int* output = outputBuf.As<unsigned int*>();
inputBuf.Fill(0);
outputBuf.Fill(outval);
PM4Queue queue;
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
Dispatch dispatch(isaBuffer);
dispatch.SetArgs(input, output);
dispatch.SetDim(num_witems, 1, 1);
dispatch.Submit(queue);
Delay(5 * delayMult);
LOG() << "Starting iteration for " << std::dec << num_witems
<< " work items(s) (targeting " << std::dec << cwsr_thresh
<< " CWSRs)" << std::endl;
for (int num_cwsrs = 0; num_cwsrs < cwsr_thresh; num_cwsrs++) {
// Send dequeue request
EXPECT_SUCCESS(queue.Update(0, BaseQueue::DEFAULT_PRIORITY, false));
Delay(5 * delayMult);
// Send requeue request
EXPECT_SUCCESS(queue.Update(100, BaseQueue::DEFAULT_PRIORITY, false));
Delay(50 * delayMult);
// Check for reg mangling
for (int i = 0; i < num_witems; i++) {
EXPECT_EQ(outval, output[i]);
}
}
LOG() << "Successful completion for " << std::dec << num_witems
<< " work item(s) (CWSRs triggered: " << std::dec << cwsr_thresh
<< ")" << std::endl;
LOG() << "Signalling shader stop..." << std::endl;
inputBuf.Fill(stopval);
// Wait for shader to finish or timeout if shader has vm page fault
EXPECT_EQ(0, dispatch.SyncWithStatus(180000));
EXPECT_SUCCESS(queue.Destroy());
} else {
LOG() << "Skipping test: No CWSR present for family ID 0x" << m_FamilyId << "." << std::endl;
}
TEST_END
}
/**
* Instantiates various KFDCWSRTest.BasicTest parameterizations
* Tuple Format: (num_witems, cwsr_thresh)
*
* num_witems: Defines the number of work-items.
* cwsr_thresh: Defines the number of CWSRs to trigger.
*/
INSTANTIATE_TEST_CASE_P(
, KFDCWSRTest,
::testing::Values(
std::make_tuple(1, 10), /* Single Wave Test, 10 CWSR Triggers */
std::make_tuple(256, 50), /* Multi Wave Test, 50 CWSR Triggers */
std::make_tuple(512, 100), /* Multi Wave Test, 100 CWSR Triggers */
std::make_tuple(1024, 1000) /* Multi Wave Test, 1000 CWSR Triggers */
)
);
/**
* KFDCWSRTest.InterruptRestore
*
* This test verifies that CP can preempt an HQD while it is restoring a dispatch.
* Create queue 1.
* Start a dispatch on queue 1 which runs indefinitely and fills all CU wave slots.
* Create queue 2, triggering context save on queue 1.
* Start a dispatch on queue 2 which runs indefinitely and fills all CU wave slots.
* Create queue 3, triggering context save and restore on queues 1 and 2.
* Preempt runlist. One or both queues must interrupt context restore to preempt.
*/
TEST_F(KFDCWSRTest, InterruptRestore) {
TEST_START(TESTPROFILE_RUNALL);
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
if ((m_FamilyId >= FAMILY_VI) && (checkCWSREnabled())) {
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(InfiniteLoopIsa, isaBuffer.As<char*>()));
PM4Queue queue1, queue2, queue3;
ASSERT_SUCCESS(queue1.Create(defaultGPUNode));
Dispatch *dispatch1, *dispatch2;
dispatch1 = new Dispatch(isaBuffer);
dispatch2 = new Dispatch(isaBuffer);
dispatch1->SetDim(0x10000, 1, 1);
dispatch2->SetDim(0x10000, 1, 1);
dispatch1->Submit(queue1);
ASSERT_SUCCESS(queue2.Create(defaultGPUNode));
dispatch2->Submit(queue2);
// Give waves time to launch.
Delay(1);
ASSERT_SUCCESS(queue3.Create(defaultGPUNode));
EXPECT_SUCCESS(queue1.Destroy());
EXPECT_SUCCESS(queue2.Destroy());
EXPECT_SUCCESS(queue3.Destroy());
delete dispatch1;
delete dispatch2;
} else {
LOG() << "Skipping test: No CWSR present for family ID 0x" << m_FamilyId << "." << std::endl;
}
TEST_END
}
@@ -0,0 +1,43 @@
/*
* Copyright (C) 2015-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __KFD_CWSR_TEST__H__
#define __KFD_CWSR_TEST__H__
#include <gtest/gtest.h>
#include "PM4Queue.hpp"
#include "KFDBaseComponentTest.hpp"
class KFDCWSRTest : public KFDBaseComponentTest,
public ::testing::WithParamInterface<std::tuple<int, int>> {
public:
KFDCWSRTest() {}
~KFDCWSRTest() {}
protected:
virtual void SetUp();
virtual void TearDown();
};
#endif // __KFD_CWSR_TEST__H__
+731
View File
@@ -0,0 +1,731 @@
/*
* Copyright (C) 2016-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "BaseDebug.hpp"
#include "KFDDBGTest.hpp"
#include <sys/ptrace.h>
#include <poll.h>
#include "hsakmt/linux/kfd_ioctl.h"
#include "KFDQMTest.hpp"
#include "PM4Queue.hpp"
#include "PM4Packet.hpp"
#include "Dispatch.hpp"
#include <string>
void KFDDBGTest::SetUp() {
ROUTINE_START
KFDBaseComponentTest::SetUp();
ROUTINE_END
}
void KFDDBGTest::TearDown() {
ROUTINE_START
/* Reset the user trap handler */
hsaKmtSetTrapHandler(m_NodeInfo.HsaDefaultGPUNode(), 0, 0, 0, 0);
KFDBaseComponentTest::TearDown();
ROUTINE_END
}
/*
* To test debug attaching to a spawned process (i.e. attach prior to the tracee
* opening a KFD device), have the child request the parent to PTRACE attach and
* wait for the parent to debug attach then allow the child to runtime enable.
*
* The following will be exercised:
* - The KFD shall create a KFD process on behalf of the tracee during debug
* attach since the tracee has not opened a KFD device.
* - Runtime enable on the tracee shall raise an event to the debugging parent
* and block until parent has signalled that it has recieved the runtime
* enable event.
* - Tracee should follow a similar hand shake for runtime disable and debug
* detach should follow.
*
* */
TEST_F(KFDDBGTest, AttachToSpawnedProcess) {
TEST_START(TESTPROFILE_RUNALL)
if (m_FamilyId >= FAMILY_AI) {
if (hsaKmtCheckRuntimeDebugSupport()) {
LOG() << "Skip test as debug API not supported";
goto exit;
}
pid_t childPid = fork();
if (childPid == 0) { /* Debugged process */
uint32_t rDebug;
int r;
/* Let parent become the debugger and wait for attach. */
ptrace(PTRACE_TRACEME);
raise(SIGSTOP);
r = hsaKmtOpenKFD();
if (r != HSAKMT_STATUS_SUCCESS) {
WARN() << "KFD open failed in debugged process" << std::endl;
exit(1);
}
LOG() << std::dec << "--- Debugged PID " << getpid() << " runtime enable" << std::endl;
r = hsaKmtRuntimeEnable(&rDebug, true);
if (r != HSAKMT_STATUS_SUCCESS) {
WARN() << "Runtime enabled failed" << std::endl;
exit(1);
}
LOG() << std::dec << "--- Debugged PID " << getpid() << " runtime disable and exit" << std::endl;
hsaKmtRuntimeDisable();
exit(0);
} else {
BaseDebug *debug = new BaseDebug();
struct kfd_runtime_info r_info;
memset(&r_info, 0, sizeof(struct kfd_runtime_info));
uint64_t runtimeMask = KFD_EC_MASK(EC_PROCESS_RUNTIME);
int childStatus;
waitpid(childPid, &childStatus, 0);
while (!WIFSTOPPED(childStatus));
/* Attach and let new debugged process continue with runtime enable */
LOG() << std::dec << "Attaching to PID " << childPid << std::endl;
ASSERT_SUCCESS(debug->Attach(&r_info, sizeof(r_info), childPid, runtimeMask));
ASSERT_EQ(r_info.runtime_state, DEBUG_RUNTIME_STATE_DISABLED);
ASSERT_EQ(r_info.ttmp_setup, false);
ptrace(PTRACE_CONT, childPid, NULL, NULL);
/* Wait and unblock runtime enable */
ASSERT_SUCCESS(debug->QueryDebugEvent(&runtimeMask, NULL, NULL, 5000));
ASSERT_EQ(runtimeMask, KFD_EC_MASK(EC_PROCESS_RUNTIME));
ASSERT_SUCCESS(debug->SendRuntimeEvent(runtimeMask, 0, 0));
/* Wait and unblock runtime disable */
ASSERT_SUCCESS(debug->QueryDebugEvent(&runtimeMask, NULL, NULL, 5000));
ASSERT_EQ(runtimeMask, KFD_EC_MASK(EC_PROCESS_RUNTIME));
ASSERT_SUCCESS(debug->SendRuntimeEvent(runtimeMask, 0, 0));
LOG() << std::dec << "Detaching from PID " << childPid << std::endl;
debug->Detach();
ptrace(PTRACE_DETACH, childPid, NULL, NULL);
LOG() << std::dec << "Waiting on PID " << childPid << " to exit" << std::endl;
waitpid(childPid, &childStatus, 0);
EXPECT_EQ(WIFEXITED(childStatus), true);
EXPECT_EQ(WEXITSTATUS(childStatus), HSAKMT_STATUS_SUCCESS);
}
} else {
LOG() << "Skipping test: Test not supported on family ID 0x"
<< m_FamilyId << "." << std::endl;
}
exit:
LOG() << std::endl;
TEST_END
}
/*
* Unlike AttachToSpawnedProcess, the debug parent will only attach after
* a non-blocked runtime enable by the tracee. The parent should expect
* a status update that the tracee is runtime enabled on debug attach.
* Cleanup with appropriate runtime disable and debug detach handshake.
*/
TEST_F(KFDDBGTest, AttachToRunningProcess) {
TEST_START(TESTPROFILE_RUNALL)
if (m_FamilyId >= FAMILY_AI) {
if (hsaKmtCheckRuntimeDebugSupport()) {
LOG() << "Skip test as debug API not supported";
goto exit;
}
pid_t childPid = fork();
if (childPid == 0) { /* Debugged process */
uint32_t rDebug;
int r;
r = hsaKmtOpenKFD();
if (r != HSAKMT_STATUS_SUCCESS) {
WARN() << "KFD open failed in debugged process" << std::endl;
exit(1);
}
LOG() << std::dec << "--- Debugged PID " << getpid() << " runtime enable" << std::endl;
r = hsaKmtRuntimeEnable(&rDebug, true);
if (r != HSAKMT_STATUS_SUCCESS) {
WARN() << "Runtime enabled failed" << std::endl;
exit(1);
}
/* Let parent become the debugger and wait for attach. */
ptrace(PTRACE_TRACEME);
raise(SIGSTOP);
LOG() << std::dec << "--- Debugged PID " << getpid() << " runtime disable and exit" << std::endl;
hsaKmtRuntimeDisable();
exit(0);
} else {
BaseDebug *debug = new BaseDebug();
struct kfd_runtime_info r_info;
memset(&r_info, 0, sizeof(struct kfd_runtime_info));
uint64_t runtimeMask = KFD_EC_MASK(EC_PROCESS_RUNTIME);
int childStatus;
waitpid(childPid, &childStatus, 0);
while (!WIFSTOPPED(childStatus));
/* Attach to running process and let it continue */
LOG() << std::dec << "Attaching to PID " << childPid << std::endl;
ASSERT_SUCCESS(debug->Attach(&r_info, sizeof(r_info), childPid, runtimeMask));
ASSERT_EQ(r_info.runtime_state, DEBUG_RUNTIME_STATE_ENABLED);
ASSERT_EQ(r_info.ttmp_setup, true);
ptrace(PTRACE_CONT, childPid, NULL, NULL);
/* Wait and unblock runtime disable */
ASSERT_SUCCESS(debug->QueryDebugEvent(&runtimeMask, NULL, NULL, 5000));
ASSERT_EQ(runtimeMask, KFD_EC_MASK(EC_PROCESS_RUNTIME));
ASSERT_SUCCESS(debug->SendRuntimeEvent(runtimeMask, 0, 0));
LOG() << std::dec << "Detaching from PID " << childPid << std::endl;
debug->Detach();
ptrace(PTRACE_DETACH, childPid, NULL, NULL);
LOG() << std::dec << "Waiting on PID " << childPid << " to exit" << std::endl;
waitpid(childPid, &childStatus, 0);
EXPECT_EQ(WIFEXITED(childStatus), true);
EXPECT_EQ(WEXITSTATUS(childStatus), HSAKMT_STATUS_SUCCESS);
}
} else {
LOG() << "Skipping test: Test not supported on family ID 0x"
<< m_FamilyId << "." << std::endl;
}
exit:
LOG() << std::endl;
TEST_END
}
TEST_F(KFDDBGTest, HitTrapEvent) {
TEST_START(TESTPROFILE_RUNALL)
if (m_FamilyId >= FAMILY_AI) {
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
if (hsaKmtCheckRuntimeDebugSupport()) {
LOG() << "Skip test as debug API not supported";
goto exit;
}
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
// create shader and trap bufs then enable 2nd level trap
HsaMemoryBuffer isaBuf(PAGE_SIZE, defaultGPUNode, true, false, true);
HsaMemoryBuffer trapStatusBuf(PAGE_SIZE, defaultGPUNode, true, false, false);
HsaMemoryBuffer trap(PAGE_SIZE*2, defaultGPUNode, true, false, true);
HsaMemoryBuffer tmaBuf(PAGE_SIZE, defaultGPUNode, false, false, false);
ASSERT_SUCCESS(hsaKmtSetTrapHandler(defaultGPUNode,
trap.As<void *>(),
0x1000,
tmaBuf.As<void*>(),
0x1000));
// compile and dispatch shader
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(JumpToTrapIsa, isaBuf.As<char*>()));
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(TrapHandlerIsa, trap.As<char*>()));
uint32_t rDebug;
ASSERT_SUCCESS(hsaKmtRuntimeEnable(&rDebug, true));
BaseDebug *debug = new BaseDebug();
struct kfd_runtime_info r_info;
memset(&r_info, 0, sizeof(struct kfd_runtime_info));
ASSERT_SUCCESS(debug->Attach(&r_info, sizeof(r_info), getpid(), 0));
ASSERT_EQ(r_info.runtime_state, DEBUG_RUNTIME_STATE_ENABLED);
PM4Queue queue;
HsaQueueResource *qResources;
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
unsigned int* trapStatus = trapStatusBuf.As<unsigned int*>();
trapStatus[0] = 0;
Dispatch *dispatch;
dispatch = new Dispatch(isaBuf);
dispatch->SetArgs(&trapStatus[0], NULL);
dispatch->SetDim(1, 1, 1);
/* Subscribe to trap events and submit the queue */
uint64_t trapMask = KFD_EC_MASK(EC_QUEUE_WAVE_TRAP);
debug->SetExceptionsEnabled(trapMask);
dispatch->Submit(queue);
/* Wait for trap event */
uint32_t QueueId = -1;
ASSERT_SUCCESS(debug->QueryDebugEvent(&trapMask, NULL, &QueueId, 5000));
ASSERT_NE(QueueId, -1);
ASSERT_EQ(trapMask, KFD_EC_MASK(EC_QUEUE_WAVE_TRAP) | KFD_EC_MASK(EC_QUEUE_NEW));
dispatch->Sync();
EXPECT_SUCCESS(queue.Destroy());
ASSERT_NE(trapStatus[0], 0);
debug->Detach();
hsaKmtRuntimeDisable();
} else {
LOG() << "Skipping test: Test not supported on family ID 0x"
<< m_FamilyId << "." << std::endl;
}
exit:
LOG() << std::endl;
TEST_END
}
TEST_F(KFDDBGTest, HitTrapOnWaveStartEndEvent) {
TEST_START(TESTPROFILE_RUNALL)
if (m_FamilyId >= FAMILY_AI) {
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
if (hsaKmtCheckRuntimeDebugSupport()) {
LOG() << "Skip test as debug API not supported";
goto exit;
}
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
// create shader and trap bufs then enable 2nd level trap
HsaMemoryBuffer isaBuf(PAGE_SIZE, defaultGPUNode, true, false, true);
HsaMemoryBuffer trap(PAGE_SIZE*2, defaultGPUNode, true, false, true);
HsaMemoryBuffer tmaBuf(PAGE_SIZE, defaultGPUNode, false, false, false);
ASSERT_SUCCESS(hsaKmtSetTrapHandler(defaultGPUNode,
trap.As<void *>(),
0x1000,
tmaBuf.As<void*>(),
0x1000));
// compile and dispatch shader
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(NoopIsa, isaBuf.As<char*>()));
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(TrapHandlerIsa, trap.As<char*>()));
uint32_t rDebug;
ASSERT_SUCCESS(hsaKmtRuntimeEnable(&rDebug, true));
BaseDebug *debug = new BaseDebug();
struct kfd_runtime_info r_info;
memset(&r_info, 0, sizeof(struct kfd_runtime_info));
ASSERT_SUCCESS(debug->Attach(&r_info, sizeof(r_info), getpid(), 0));
ASSERT_EQ(r_info.runtime_state, DEBUG_RUNTIME_STATE_ENABLED);
PM4Queue queue;
HsaQueueResource *qResources;
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
for (int i = 0; i < 2; i++) {
uint32_t enableMask = !!!(i % 2) ? KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START :
KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
uint32_t supportedMask = enableMask;
debug->SetWaveLaunchOverride(KFD_DBG_TRAP_OVERRIDE_OR,
&enableMask,
&supportedMask);
if (!!!(supportedMask & enableMask)) {
EXPECT_SUCCESS(queue.Destroy());
debug->Detach();
hsaKmtRuntimeDisable();
LOG() << "Skipping test: Trap on start/end override not supported." << std::endl;
goto exit;
}
ASSERT_EQ(enableMask, 0); // previous set mask
Dispatch *dispatch;
dispatch = new Dispatch(isaBuf);
dispatch->SetArgs(NULL, NULL);
dispatch->SetDim(1, 1, 1);
/* Subscribe to trap events and submit the queue */
uint64_t trapMask = KFD_EC_MASK(EC_QUEUE_WAVE_TRAP);
debug->SetExceptionsEnabled(trapMask);
dispatch->Submit(queue);
/* Wait for trap event */
uint32_t QueueId = -1;
ASSERT_SUCCESS(debug->QueryDebugEvent(&trapMask, NULL, &QueueId, 5000));
ASSERT_NE(QueueId, -1);
ASSERT_EQ(trapMask, KFD_EC_MASK(EC_QUEUE_WAVE_TRAP) | KFD_EC_MASK(EC_QUEUE_NEW));
dispatch->Sync();
}
EXPECT_SUCCESS(queue.Destroy());
debug->Detach();
hsaKmtRuntimeDisable();
} else {
LOG() << "Skipping test: Test not supported on family ID 0x"
<< m_FamilyId << "." << std::endl;
}
exit:
LOG() << std::endl;
TEST_END
}
TEST_F(KFDDBGTest, SuspendQueues) {
TEST_START(TESTPROFILE_RUNALL)
if (m_FamilyId >= FAMILY_AI) {
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
if (hsaKmtCheckRuntimeDebugSupport()) {
LOG() << "Skip test as debug API not supported";
goto exit;
}
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
// create shader and trap bufs then enable 2nd level trap
HsaMemoryBuffer isaBuf(PAGE_SIZE, defaultGPUNode, true, false, true);
// compile and dispatch shader
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(JumpToTrapIsa, isaBuf.As<char*>()));
uint32_t rDebug;
ASSERT_SUCCESS(hsaKmtRuntimeEnable(&rDebug, true));
BaseDebug *debug = new BaseDebug();
struct kfd_runtime_info r_info;
memset(&r_info, 0, sizeof(struct kfd_runtime_info));
ASSERT_SUCCESS(debug->Attach(&r_info, sizeof(r_info), getpid(), 0));
ASSERT_EQ(r_info.runtime_state, DEBUG_RUNTIME_STATE_ENABLED);
PM4Queue queue;
HsaQueueResource *qResources;
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
qResources = queue.GetResource();
HSA_QUEUEID Queues[] = { qResources->QueueId };
Dispatch *dispatch;
dispatch = new Dispatch(isaBuf);
dispatch->SetDim(1, 1, 1);
dispatch->Submit(queue);
uint32_t NumQueues = 1;
uint32_t QueueIds[NumQueues];
struct kfd_queue_snapshot_entry Snapshots[NumQueues];
memset(Snapshots, 0, NumQueues * sizeof(struct kfd_queue_snapshot_entry));
ASSERT_SUCCESS(debug->SuspendQueues(&NumQueues, Queues, &QueueIds[0], 0));
// Suspend should fail as new queues cannot be suspended
ASSERT_EQ(NumQueues, 0);
ASSERT_NE(QueueIds[0] & KFD_DBG_QUEUE_INVALID_MASK, 0);
// Snapshot queue, clear new queue status and suspend successfully.
ASSERT_SUCCESS(debug->QueueSnapshot(0, (uint64_t)(&(Snapshots[0])), &NumQueues));
ASSERT_EQ(NumQueues, 1);
ASSERT_EQ(Snapshots[0].ctx_save_restore_area_size, 0);
ASSERT_SUCCESS(debug->QueueSnapshot(KFD_EC_MASK(EC_QUEUE_NEW), (uint64_t)(&(Snapshots[0])),
&NumQueues));
ASSERT_EQ(NumQueues, 1);
ASSERT_GT(Snapshots[0].ctx_save_restore_area_size, 0);
ASSERT_SUCCESS(debug->SuspendQueues(&NumQueues, Queues, &QueueIds[0], 0));
ASSERT_EQ(NumQueues, 1);
ASSERT_EQ(QueueIds[0] & KFD_DBG_QUEUE_INVALID_MASK, 0);
// Resume and destroy queue then clean up.
ASSERT_SUCCESS(debug->ResumeQueues(&NumQueues, Queues, &QueueIds[0]));
ASSERT_EQ(NumQueues, 1);
ASSERT_EQ(QueueIds[0] & KFD_DBG_QUEUE_INVALID_MASK, 0);
EXPECT_SUCCESS(queue.Destroy());
debug->Detach();
hsaKmtRuntimeDisable();
} else {
LOG() << "Skipping test: Test not supported on family ID 0x"
<< m_FamilyId << "." << std::endl;
}
exit:
LOG() << std::endl;
TEST_END
}
TEST_F(KFDDBGTest, HitMemoryViolation) {
TEST_START(TESTPROFILE_RUNALL)
if (m_FamilyId >= FAMILY_AI) {
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
if (hsaKmtCheckRuntimeDebugSupport()) {
LOG() << "Skip test as debug API not supported";
goto exit;
}
pid_t childPid = fork();
if (childPid == 0) { // Debugged process
uint32_t rDebug;
int r;
// Refresh setup for HSA device and mem buffer use in child
KFDBaseComponentTest::TearDown();
KFDBaseComponentTest::SetUp();
// Let parent become the debugger and wait for attach.
ptrace(PTRACE_TRACEME);
raise(SIGSTOP);
r = hsaKmtRuntimeEnable(&rDebug, true);
if (r != HSAKMT_STATUS_SUCCESS) {
WARN() << "Runtime enabled failed" << std::endl;
exit(1);
}
HsaMemoryBuffer isaBuf(PAGE_SIZE, defaultGPUNode, true, false, true);
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(PersistentIterateIsa, isaBuf.As<char*>()));
PM4Queue queue;
HsaQueueResource *qResources;
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
// Create memory violation event on dispatch
HsaEvent *vmFaultEvent;
HsaEventDescriptor eventDesc;
eventDesc.EventType = HSA_EVENTTYPE_MEMORY;
eventDesc.NodeId = defaultGPUNode;
eventDesc.SyncVar.SyncVar.UserData = NULL;
eventDesc.SyncVar.SyncVarSize = 0;
r = hsaKmtCreateEvent(&eventDesc, true, false, &vmFaultEvent);
if (r != HSAKMT_STATUS_SUCCESS) {
WARN() << "Creating VM fault event failed" << std::endl;
exit(1);
}
Dispatch *dispatch;
dispatch = new Dispatch(isaBuf);
dispatch->SetDim(1, 1, 1);
dispatch->SetPriv(false); //Override GFX11 CWSR WA
dispatch->Submit(queue);
// Queue immediately dies so halt process for tracer device inspection.
raise(SIGSTOP);
exit(0);
} else {
BaseDebug *debug = new BaseDebug();
struct kfd_runtime_info r_info;
memset(&r_info, 0, sizeof(struct kfd_runtime_info));
uint64_t runtimeMask = KFD_EC_MASK(EC_PROCESS_RUNTIME);
uint64_t memViolMask = KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION);
uint64_t subscribeMask = runtimeMask | memViolMask;
uint64_t queryMask = 0;
int childStatus;
waitpid(childPid, &childStatus, 0);
while (!WIFSTOPPED(childStatus));
ASSERT_SUCCESS(debug->Attach(&r_info, sizeof(r_info), childPid, subscribeMask));
ASSERT_EQ(r_info.runtime_state, DEBUG_RUNTIME_STATE_DISABLED);
ASSERT_EQ(r_info.ttmp_setup, false);
ptrace(PTRACE_CONT, childPid, NULL, NULL);
// Wait and unblock runtime enable
ASSERT_SUCCESS(debug->QueryDebugEvent(&runtimeMask, NULL, NULL, 5000));
ASSERT_EQ(runtimeMask, KFD_EC_MASK(EC_PROCESS_RUNTIME));
ASSERT_SUCCESS(debug->SendRuntimeEvent(runtimeMask, 0, 0));
// Wait for memory violation
uint32_t deviceId = -1;
ASSERT_SUCCESS(debug->QueryDebugEvent(&queryMask, &deviceId, NULL, 5000));
ASSERT_NE(deviceId, -1);
ASSERT_EQ(queryMask, memViolMask);
// Assume tracee queue has died and halted process
ptrace(PTRACE_CONT, childPid, NULL, NULL);
const std::vector<int> gpuNodes = m_NodeInfo.GetNodesWithGPU();
uint32_t snapshotSize = gpuNodes.size();
struct kfd_dbg_device_info_entry deviceInfo[snapshotSize];
memset(deviceInfo, 0, snapshotSize * sizeof(struct kfd_dbg_device_info_entry));
// Check device snapshot aligns with memory violation on target device.
ASSERT_SUCCESS(debug->DeviceSnapshot(memViolMask, (uint64_t)(&deviceInfo[0]),
&snapshotSize));
ASSERT_EQ(snapshotSize, gpuNodes.size());
for (int i = 0; i < snapshotSize; i++) {
if (deviceInfo[i].exception_status & memViolMask) {
ASSERT_EQ(deviceInfo[i].gpu_id, deviceId);
break;
}
}
debug->Detach();
ptrace(PTRACE_DETACH, childPid, NULL, NULL);
waitpid(childPid, &childStatus, 0);
EXPECT_EQ(WIFEXITED(childStatus), true);
EXPECT_EQ(WEXITSTATUS(childStatus), HSAKMT_STATUS_SUCCESS);
}
} else {
LOG() << "Skipping test: Test not supported on family ID 0x"
<< m_FamilyId << "." << std::endl;
}
exit:
LOG() << std::endl;
TEST_END
}
TEST_F(KFDDBGTest, HitAddressWatch) {
TEST_START(TESTPROFILE_RUNALL)
if (m_FamilyId >= FAMILY_VI) {
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
if (hsaKmtCheckRuntimeDebugSupport()) {
LOG() << "Skip test as debug API not supported";
goto exit;
}
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
HsaNodeProperties nodeProps;
ASSERT_SUCCESS(hsaKmtGetNodeProperties(defaultGPUNode, &nodeProps));
HsaMemoryBuffer readerBuf(PAGE_SIZE, defaultGPUNode, true, false, true);
HsaMemoryBuffer writerBuf(PAGE_SIZE, defaultGPUNode, true, false, true);
HsaMemoryBuffer trap(PAGE_SIZE*2, defaultGPUNode, true, false, true);
HsaMemoryBuffer tmaBuf(PAGE_SIZE, defaultGPUNode, false, false, false);
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(WatchReadIsa, readerBuf.As<char*>()));
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(WatchWriteIsa, writerBuf.As<char*>()));
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(TrapHandlerIsa, trap.As<char*>()));
ASSERT_SUCCESS(hsaKmtSetTrapHandler(defaultGPUNode,
trap.As<void *>(),
0x1000,
tmaBuf.As<void*>(),
0x1000));
uint32_t rDebug;
ASSERT_SUCCESS(hsaKmtRuntimeEnable(&rDebug, true));
struct kfd_runtime_info r_info;
memset(&r_info, 0, sizeof(struct kfd_runtime_info));
BaseDebug *debug = new BaseDebug();
ASSERT_SUCCESS(debug->Attach(&r_info, sizeof(r_info), getpid(), 0));
ASSERT_EQ(r_info.runtime_state, DEBUG_RUNTIME_STATE_ENABLED);
const std::vector<int> gpuNodes = m_NodeInfo.GetNodesWithGPU();
uint32_t numDevices = gpuNodes.size();
struct kfd_dbg_device_info_entry deviceInfo[numDevices];
memset(deviceInfo, 0, numDevices * sizeof(struct kfd_dbg_device_info_entry));
ASSERT_SUCCESS(debug->DeviceSnapshot(0, (uint64_t)(&deviceInfo[0]), &numDevices));
ASSERT_EQ(numDevices, gpuNodes.size());
bool is_precise = nodeProps.Capability.ui32.PreciseMemoryOperationsSupported;
if (is_precise) {
uint32_t trapFlags = KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP;
ASSERT_SUCCESS(debug->SetFlags(&trapFlags));
}
uint32_t enableMask = KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH;
uint32_t supportedMask = enableMask;
ASSERT_SUCCESS(debug->SetWaveLaunchOverride(KFD_DBG_TRAP_OVERRIDE_OR,
&enableMask,
&supportedMask));
ASSERT_NE(supportedMask & KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH, 0);
ASSERT_EQ(enableMask & KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH, 0); // previous set mask
PM4Queue queue;
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
const uint32_t watchMask = -1 & UINT_MAX;
HsaMemoryBuffer targetBuf(PAGE_SIZE, defaultGPUNode, true, false, false);
HsaMemoryBuffer resultBuf(PAGE_SIZE, defaultGPUNode, true, false, false);
unsigned int *target = targetBuf.As<unsigned int*>();
unsigned int *result = resultBuf.As<unsigned int*>();
for (int mode = KFD_DBG_TRAP_ADDRESS_WATCH_MODE_READ;
mode < KFD_DBG_TRAP_ADDRESS_WATCH_MODE_ALL; mode++) {
// atomics may not be supported on all devices so skip for now.
if (mode != KFD_DBG_TRAP_ADDRESS_WATCH_MODE_READ &&
mode != KFD_DBG_TRAP_ADDRESS_WATCH_MODE_NONREAD)
continue;
uint32_t watchId = -1;
ASSERT_SUCCESS(debug->SetAddressWatch((uint64_t)(&target[0]), mode,
watchMask, deviceInfo[0].gpu_id, &watchId));
ASSERT_EQ(watchId, 0);
const HsaMemoryBuffer &shaderBuf =
mode == KFD_DBG_TRAP_ADDRESS_WATCH_MODE_READ ? readerBuf : writerBuf;
uint32_t preciseMask = 0x1;
uint32_t watchStsMask = 0x80;
result[0] = preciseMask;
Dispatch dispatch(shaderBuf);
dispatch.SetDim(1, 1, 1);
dispatch.SetArgs(&target[0], &result[0]);
dispatch.SetPriv(false); // Override GFX11 CWSR WA
dispatch.Submit(queue);
dispatch.Sync();
ASSERT_EQ(result[0] & watchStsMask, watchStsMask);
if (is_precise)
ASSERT_EQ(result[0] & preciseMask, preciseMask);
ASSERT_SUCCESS(debug->ClearAddressWatch(deviceInfo[0].gpu_id, watchId));
resultBuf.Fill(0);
targetBuf.Fill(0);
}
ASSERT_SUCCESS(queue.Destroy());
debug->Detach();
hsaKmtRuntimeDisable();
} else {
LOG() << "Skipping test: Test not supported on family ID 0x"
<< m_FamilyId << "." << std::endl;
}
exit:
LOG() << std::endl;
TEST_END
}
@@ -0,0 +1,41 @@
/*
* Copyright (C) 2016-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __KFD_DBG_TEST__H__
#define __KFD_DBG_TEST__H__
#include <gtest/gtest.h>
#include "KFDBaseComponentTest.hpp"
class KFDDBGTest : public KFDBaseComponentTest {
public:
KFDDBGTest() {}
~KFDDBGTest() {}
protected:
virtual void SetUp();
virtual void TearDown();
};
#endif // __KFD_DBG_TEST__H__
@@ -0,0 +1,444 @@
/*
* Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include <math.h>
#include <limits.h>
#include "KFDEventTest.hpp"
#include "PM4Queue.hpp"
#include "PM4Packet.hpp"
void KFDEventTest::SetUp() {
ROUTINE_START
KFDBaseComponentTest::SetUp();
m_pHsaEvent = NULL;
ROUTINE_END
}
void KFDEventTest::TearDown() {
ROUTINE_START
// Not all tests create an event, destroy only if there is one
if (m_pHsaEvent != NULL) {
// hsaKmtDestroyEvent moved to TearDown to make sure it is being called
EXPECT_SUCCESS(hsaKmtDestroyEvent(m_pHsaEvent));
}
KFDBaseComponentTest::TearDown();
ROUTINE_END
}
TEST_F(KFDEventTest, CreateDestroyEvent) {
TEST_START(TESTPROFILE_RUNALL);
ASSERT_SUCCESS(CreateQueueTypeEvent(false, false, m_NodeInfo.HsaDefaultGPUNode(), &m_pHsaEvent));
EXPECT_NE(0, m_pHsaEvent->EventData.HWData2);
// Destroy event is being called in test TearDown
TEST_END;
}
TEST_F(KFDEventTest, CreateMaxEvents) {
TEST_START(TESTPROFILE_RUNALL);
static const unsigned int MAX_EVENT_NUMBER = 256;
HsaEvent* pHsaEvent[MAX_EVENT_NUMBER];
unsigned int i = 0;
for (i = 0; i < MAX_EVENT_NUMBER; i++) {
pHsaEvent[i] = NULL;
ASSERT_SUCCESS(CreateQueueTypeEvent(false, false, m_NodeInfo.HsaDefaultGPUNode(), &pHsaEvent[i]));
}
for (i = 0; i < MAX_EVENT_NUMBER; i++) {
EXPECT_SUCCESS(hsaKmtDestroyEvent(pHsaEvent[i]));
}
TEST_END;
}
TEST_F(KFDEventTest, SignalEvent) {
TEST_START(TESTPROFILE_RUNALL);
PM4Queue queue;
HsaEvent *tmp_event;
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
ASSERT_SUCCESS(CreateQueueTypeEvent(false, false, defaultGPUNode, &tmp_event));
/* Intentionally let event id for m_pHsaEvent be non zero */
ASSERT_SUCCESS(CreateQueueTypeEvent(false, false, defaultGPUNode, &m_pHsaEvent));
ASSERT_NE(0, m_pHsaEvent->EventData.HWData2);
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
/* From gfx9 onward, m_pHsaEvent->EventId will also be passed to int_ctxid in
* the Release Mem packet, which is used as context id in ISR.
*/
queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(m_FamilyId, false,
m_pHsaEvent->EventData.HWData2, m_pHsaEvent->EventId));
queue.Wait4PacketConsumption();
EXPECT_SUCCESS(hsaKmtWaitOnEvent(m_pHsaEvent, g_TestTimeOut));
EXPECT_SUCCESS(hsaKmtDestroyEvent(tmp_event));
EXPECT_SUCCESS(queue.Destroy());
TEST_END;
}
/* test event signaling with event age enabled wait */
TEST_F(KFDEventTest, SignalEventExt) {
TEST_START(TESTPROFILE_RUNALL);
PM4Queue queue;
HsaEvent *tmp_event;
uint64_t event_age;
if (m_VersionInfo.KernelInterfaceMajorVersion == 1 &&
m_VersionInfo.KernelInterfaceMinorVersion < 14) {
LOG() << "event age tracking isn't supported in KFD. Exiting." << std::endl;
return;
}
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
ASSERT_SUCCESS(CreateQueueTypeEvent(false, false, defaultGPUNode, &tmp_event));
/* Intentionally let event id for m_pHsaEvent be non zero */
ASSERT_SUCCESS(CreateQueueTypeEvent(false, false, defaultGPUNode, &m_pHsaEvent));
ASSERT_NE(0, m_pHsaEvent->EventData.HWData2);
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
/* 1. event_age gets incremented every time when the event signals */
event_age = 1;
queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(m_FamilyId, false,
m_pHsaEvent->EventData.HWData2, m_pHsaEvent->EventId));
EXPECT_SUCCESS(hsaKmtWaitOnEvent_Ext(m_pHsaEvent, g_TestTimeOut, &event_age));
ASSERT_EQ(event_age, 2);
queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(m_FamilyId, false,
m_pHsaEvent->EventData.HWData2, m_pHsaEvent->EventId));
EXPECT_SUCCESS(hsaKmtWaitOnEvent_Ext(m_pHsaEvent, g_TestTimeOut, &event_age));
ASSERT_EQ(event_age, 3);
/* 2. event wait return without sleep after the event signals */
queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(m_FamilyId, false,
m_pHsaEvent->EventData.HWData2, m_pHsaEvent->EventId));
sleep(1); /* wait for event signaling */
EXPECT_SUCCESS(hsaKmtWaitOnEvent_Ext(m_pHsaEvent, g_TestTimeOut, &event_age));
ASSERT_EQ(event_age, 4);
/* 3. signaling from CPU */
hsaKmtSetEvent(m_pHsaEvent);
EXPECT_SUCCESS(hsaKmtWaitOnEvent_Ext(m_pHsaEvent, g_TestTimeOut, &event_age));
ASSERT_EQ(event_age, 5);
/* 4. when event_age is 0, hsaKmtWaitOnEvent_Ext always sleeps */
event_age = 0;
ASSERT_EQ(HSAKMT_STATUS_WAIT_TIMEOUT, hsaKmtWaitOnEvent_Ext(m_pHsaEvent, g_TestTimeOut, &event_age));
/* 5. when event_age is 0, it always stays 0 after the event signals */
queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(m_FamilyId, false,
m_pHsaEvent->EventData.HWData2, m_pHsaEvent->EventId));
EXPECT_SUCCESS(hsaKmtWaitOnEvent_Ext(m_pHsaEvent, g_TestTimeOut, &event_age));
ASSERT_EQ(event_age, 0);
EXPECT_SUCCESS(hsaKmtDestroyEvent(tmp_event));
EXPECT_SUCCESS(queue.Destroy());
TEST_END;
}
static uint64_t gettime() {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return ((int64_t)ts.tv_sec) * 1000 * 1000 * 1000 + ts.tv_nsec;
}
static inline double pow2_round_up(int num) {
return pow(2, ceil(log(num)/log(2)));
}
class QueueAndSignalBenchmark {
private:
static const int HISTORY_SIZE = 100;
int mNumEvents;
int mHistorySlot;
uint64_t mTimeHistory[HISTORY_SIZE];
uint64_t mLatHistory[HISTORY_SIZE];
public:
QueueAndSignalBenchmark(int events) : mNumEvents(events), mHistorySlot(0) {
memset(mTimeHistory, 0, sizeof(mTimeHistory));
memset(mLatHistory, 0, sizeof(mLatHistory));
}
int queueAndSignalEvents(int node, int eventCount, uint64_t &time, uint64_t &latency) {
int r;
uint64_t startTime;
PM4Queue queue;
unsigned int familyId = g_baseTest->GetFamilyIdFromNodeId(node);
HsaEvent** pHsaEvent = reinterpret_cast<HsaEvent**>(calloc(eventCount, sizeof(HsaEvent*)));
size_t packetSize = PM4ReleaseMemoryPacket(familyId, false, 0, 0).SizeInBytes();
int qSize = fmax(PAGE_SIZE, pow2_round_up(packetSize*eventCount + 1));
time = 0;
r = queue.Create(node, qSize);
if (r != HSAKMT_STATUS_SUCCESS)
goto exit;
for (int i = 0; i < eventCount; i++) {
r = CreateQueueTypeEvent(false, false, node, &pHsaEvent[i]);
if (r != HSAKMT_STATUS_SUCCESS)
goto exit;
queue.PlacePacket(PM4ReleaseMemoryPacket(familyId, false, pHsaEvent[i]->EventData.HWData2, pHsaEvent[i]->EventId));
}
startTime = gettime();
queue.SubmitPacket();
for (int i = 0; i < eventCount; i++) {
r = hsaKmtWaitOnEvent(pHsaEvent[i], g_TestTimeOut);
if (r != HSAKMT_STATUS_SUCCESS)
goto exit;
if (i == 0)
latency = gettime() - startTime;
}
time = gettime() - startTime;
exit:
for (int i = 0; i < eventCount; i++) {
if (pHsaEvent[i])
hsaKmtDestroyEvent(pHsaEvent[i]);
}
queue.Destroy();
return r;
}
void run(int node) {
int r = 0;
uint64_t time = 0, latency = 0;
uint64_t avgLat = 0, avgTime = 0;
uint64_t minTime = ULONG_MAX, maxTime = 0;
uint64_t minLat = ULONG_MAX, maxLat = 0;
ASSERT_EQ(queueAndSignalEvents(node, mNumEvents, time, latency), HSAKMT_STATUS_SUCCESS);
mTimeHistory[mHistorySlot%HISTORY_SIZE] = time;
mLatHistory[mHistorySlot%HISTORY_SIZE] = latency;
for (int i = 0; i < HISTORY_SIZE; i++) {
minTime = mTimeHistory[i] < minTime ? mTimeHistory[i] : minTime;
maxTime = mTimeHistory[i] > maxTime ? mTimeHistory[i] : maxTime;
avgTime += mTimeHistory[i];
minLat = mLatHistory[i] < minLat ? mLatHistory[i] : minLat;
maxLat = mLatHistory[i] > maxLat ? mLatHistory[i] : maxLat;
avgLat += mLatHistory[i];
}
avgTime /= HISTORY_SIZE;
avgLat /= HISTORY_SIZE;
mHistorySlot++;
printf("\033[KEvents: %d History: %d/%d\n", mNumEvents, mHistorySlot, HISTORY_SIZE);
printf("\033[KMin Latency: %f ms\n", (float)minLat/1000000);
printf("\033[KMax Latency: %f ms\n", (float)maxLat/1000000);
printf("\033[KAvg Latency: %f ms\n", (float)avgLat/1000000);
printf("\033[K Min Rate: %f IH/ms\n", ((float)mNumEvents)/maxTime*1000000);
printf("\033[K Max Rate: %f IH/ms\n", ((float)mNumEvents)/minTime*1000000);
printf("\033[K Avg Rate: %f IH/ms\n", ((float)mNumEvents)/avgTime*1000000);
}
};
TEST_F(KFDEventTest, MeasureInterruptConsumption) {
TEST_START(TESTPROFILE_RUNALL);
QueueAndSignalBenchmark latencyBench(128);
QueueAndSignalBenchmark sustainedBench(4095);
printf("\033[2J");
while (true) {
printf("\033[H");
printf("--------------------------\n");
latencyBench.run(m_NodeInfo.HsaDefaultGPUNode());
printf("--------------------------\n");
sustainedBench.run(m_NodeInfo.HsaDefaultGPUNode());
printf("--------------------------\n");
}
TEST_END;
}
TEST_F(KFDEventTest, SignalMaxEvents) {
TEST_START(TESTPROFILE_RUNALL);
static const unsigned int MAX_EVENT_NUMBER = 4095;
uint64_t time, latency;
QueueAndSignalBenchmark maxEventTest(MAX_EVENT_NUMBER);
maxEventTest.queueAndSignalEvents(m_NodeInfo.HsaDefaultGPUNode(), MAX_EVENT_NUMBER,
time, latency);
TEST_END;
}
TEST_F(KFDEventTest, SignalMultipleEventsWaitForAll) {
TEST_START(TESTPROFILE_RUNALL);
static const unsigned int EVENT_NUMBER = 64; // 64 is the maximum for hsaKmtWaitOnMultipleEvents
static const unsigned int WAIT_BETWEEN_SUBMISSIONS_MS = 50;
HsaEvent* pHsaEvent[EVENT_NUMBER];
unsigned int i = 0;
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
for (i = 0; i < EVENT_NUMBER; i++) {
pHsaEvent[i] = NULL;
ASSERT_SUCCESS(CreateQueueTypeEvent(false, false, defaultGPUNode, &pHsaEvent[i]));
}
PM4Queue queue;
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
unsigned int pktSizeDwords = 0;
for (i = 0; i < EVENT_NUMBER; i++) {
queue.PlaceAndSubmitPacket(PM4ReleaseMemoryPacket(m_FamilyId, false, pHsaEvent[i]->EventData.HWData2,
pHsaEvent[i]->EventId));
queue.Wait4PacketConsumption();
Delay(WAIT_BETWEEN_SUBMISSIONS_MS);
}
EXPECT_SUCCESS(hsaKmtWaitOnMultipleEvents(pHsaEvent, EVENT_NUMBER, true, g_TestTimeOut));
EXPECT_SUCCESS(queue.Destroy());
for (i = 0; i < EVENT_NUMBER; i++)
EXPECT_SUCCESS(hsaKmtDestroyEvent(pHsaEvent[i]));
TEST_END;
}
/* Send an event interrupt with 0 context ID. Test that KFD handles it
* gracefully and with good performance. On current GPUs and firmware it
* should be handled on a fast path.
*/
TEST_F(KFDEventTest, SignalInvalidEvent) {
TEST_START(TESTPROFILE_RUNALL);
PM4Queue queue;
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
// Create some dummy events, to make the slow path a bit slower
static const unsigned int EVENT_NUMBER = 4094;
HsaEvent* pHsaEvent[EVENT_NUMBER];
for (int i = 0; i < EVENT_NUMBER; i++) {
pHsaEvent[i] = NULL;
ASSERT_SUCCESS(CreateQueueTypeEvent(false, false, defaultGPUNode, &pHsaEvent[i]));
}
ASSERT_SUCCESS(CreateQueueTypeEvent(false, false, defaultGPUNode, &m_pHsaEvent));
ASSERT_NE(0, m_pHsaEvent->EventData.HWData2);
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
static const unsigned int REPS = 2000;
HSAuint64 duration[REPS];
HSAuint64 total = 0, min = 1000000, max = 0;
for (int i = 0; i < REPS; i++) {
// Invalid signal packet
queue.PlacePacket(PM4ReleaseMemoryPacket(m_FamilyId, false, 0, 0));
// Submit valid signal packet
queue.PlacePacket(PM4ReleaseMemoryPacket(m_FamilyId, false,
m_pHsaEvent->EventData.HWData2, m_pHsaEvent->EventId));
HSAuint64 startTime = GetSystemTickCountInMicroSec();
queue.SubmitPacket();
EXPECT_SUCCESS(hsaKmtWaitOnEvent(m_pHsaEvent, g_TestTimeOut));
duration[i] = GetSystemTickCountInMicroSec() - startTime;
total += duration[i];
if (duration[i] < min)
min = duration[i];
if (duration[i] > max)
max = duration[i];
}
double mean = (double)(total - min - max) / (REPS - 2);
double variance = 0;
bool skippedMin = false, skippedMax = false;
HSAuint64 newMin = max, newMax = min;
for (int i = 0; i < REPS; i++) {
if (!skippedMin && duration[i] == min) {
skippedMin = true;
continue;
}
if (!skippedMax && duration[i] == max) {
skippedMax = true;
continue;
}
if (duration[i] < newMin)
newMin = duration[i];
if (duration[i] > newMax)
newMax = duration[i];
double diff = mean - duration[i];
variance += diff*diff;
}
variance /= REPS - 2;
double stdDev = sqrt(variance);
LOG() << "Time for event handling (min/avg/max [std.dev] in us) " << std::dec
<< newMin << "/" << mean << "/" << newMax << " [" << stdDev << "]\n";
EXPECT_SUCCESS(queue.Destroy());
for (int i = 0; i < EVENT_NUMBER; i++)
EXPECT_SUCCESS(hsaKmtDestroyEvent(pHsaEvent[i]));
TEST_END;
}
@@ -0,0 +1,44 @@
/*
* Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "KFDBaseComponentTest.hpp"
#ifndef __KFD_EVENT_TEST__H__
#define __KFD_EVENT_TEST__H__
class KFDEventTest : public KFDBaseComponentTest {
public:
KFDEventTest(void) {}
~KFDEventTest(void) {}
// @brief Executed before every test in KFDEventTest.
virtual void SetUp();
// @brief Executed after every test in KFDEventTest.
virtual void TearDown();
protected:
static const unsigned int EVENT_TIMEOUT = 5000; // 5 seconds
HsaEvent* m_pHsaEvent;
};
#endif // __KFD_EVENT_TEST__H__
@@ -0,0 +1,579 @@
/*
* Copyright (C) 2017-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include <vector>
#include <string>
#include "KFDEvictTest.hpp"
#include "PM4Queue.hpp"
#include "PM4Packet.hpp"
#include "SDMAPacket.hpp"
#include "SDMAQueue.hpp"
#include "Dispatch.hpp"
#define N_PROCESSES (2) /* Number of processes running in parallel, must be at least 2 */
#define ALLOCATE_BUF_SIZE_MB (64)
#define ALLOCATE_RETRY_TIMES (3)
#define MAX_WAVEFRONTS (512)
#define SDMA_NOP 0x0
void KFDEvictTest::SetUp() {
ROUTINE_START
KFDBaseComponentTest::SetUp();
ROUTINE_END
}
void KFDEvictTest::TearDown() {
ROUTINE_START
KFDBaseComponentTest::TearDown();
ROUTINE_END
}
void KFDEvictTest::AllocBuffers(HSAuint32 defaultGPUNode, HSAuint32 count, HSAuint64 vramBufSize,
std::vector<void *> &pBuffers) {
HSAuint64 totalMB;
totalMB = N_PROCESSES*count*(vramBufSize>>20);
if (m_IsParent) {
LOG() << "Allocating " << N_PROCESSES << "*" << count << "*" << (vramBufSize>>20) << "(="
<< totalMB << ")MB VRAM in KFD" << std::endl;
}
HsaMemMapFlags mapFlags = {0};
HSAKMT_STATUS ret;
HSAuint32 retry = 0;
m_Flags.Value = 0;
m_Flags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
m_Flags.ui32.HostAccess = 0;
m_Flags.ui32.NonPaged = 1;
for (HSAuint32 i = 0; i < count; ) {
ret = hsaKmtAllocMemory(defaultGPUNode, vramBufSize, m_Flags, &m_pBuf);
if (ret == HSAKMT_STATUS_SUCCESS) {
if (is_dgpu()) {
if (hsaKmtMapMemoryToGPUNodes(m_pBuf, vramBufSize, NULL,
mapFlags, 1, reinterpret_cast<HSAuint32 *>(&defaultGPUNode)) == HSAKMT_STATUS_ERROR) {
EXPECT_SUCCESS(hsaKmtFreeMemory(m_pBuf, vramBufSize));
LOG() << "Map failed for " << i << "/" << count << " buffer. Retrying allocation" << std::endl;
goto retry;
}
}
pBuffers.push_back(m_pBuf);
i++;
retry = 0;
continue;
}
retry:
if (retry++ > ALLOCATE_RETRY_TIMES) {
break;
}
/* Wait for 1 second to try allocate again */
sleep(1);
}
}
void KFDEvictTest::FreeBuffers(std::vector<void *> &pBuffers, HSAuint64 vramBufSize) {
for (HSAuint32 i = 0; i < pBuffers.size(); i++) {
m_pBuf = pBuffers[i];
if (m_pBuf != NULL) {
if (is_dgpu())
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(m_pBuf));
EXPECT_SUCCESS(hsaKmtFreeMemory(m_pBuf, vramBufSize));
}
}
}
void KFDEvictTest::AllocAmdgpuBo(int rn, HSAuint64 vramBufSize, amdgpu_bo_handle &handle) {
struct amdgpu_bo_alloc_request alloc;
alloc.alloc_size = vramBufSize / N_PROCESSES;
alloc.phys_alignment = PAGE_SIZE;
alloc.preferred_heap = AMDGPU_GEM_DOMAIN_VRAM;
alloc.flags = AMDGPU_GEM_CREATE_VRAM_CLEARED;
if (m_IsParent) {
LOG() << "Allocating " << N_PROCESSES << "*" << (vramBufSize >> 20) / N_PROCESSES << "(="
<< (vramBufSize >> 20) << ")MB VRAM in GFX" << std::endl;
}
ASSERT_EQ(0, amdgpu_bo_alloc(m_RenderNodes[rn].device_handle, &alloc, &handle));
}
void KFDEvictTest::FreeAmdgpuBo(amdgpu_bo_handle handle) {
ASSERT_EQ(0, amdgpu_bo_free(handle));
}
static int amdgpu_bo_alloc_and_map(amdgpu_device_handle dev, unsigned size,
unsigned alignment, unsigned heap, uint64_t flags,
amdgpu_bo_handle *bo, void **cpu, uint64_t *mc_address,
amdgpu_va_handle *va_handle) {
struct amdgpu_bo_alloc_request request = {};
amdgpu_bo_handle buf_handle;
amdgpu_va_handle handle;
uint64_t vmc_addr;
int r;
request.alloc_size = size;
request.phys_alignment = alignment;
request.preferred_heap = heap;
request.flags = flags;
r = amdgpu_bo_alloc(dev, &request, &buf_handle);
if (r)
return r;
r = amdgpu_va_range_alloc(dev,
amdgpu_gpu_va_range_general,
size, alignment, 0, &vmc_addr,
&handle, 0);
if (r)
goto error_va_alloc;
r = amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_MAP);
if (r)
goto error_va_map;
r = amdgpu_bo_cpu_map(buf_handle, cpu);
if (r)
goto error_cpu_map;
*bo = buf_handle;
*mc_address = vmc_addr;
*va_handle = handle;
return 0;
error_cpu_map:
amdgpu_bo_cpu_unmap(buf_handle);
error_va_map:
amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
error_va_alloc:
amdgpu_bo_free(buf_handle);
return r;
}
static inline int amdgpu_bo_unmap_and_free(amdgpu_bo_handle bo, amdgpu_va_handle va_handle,
uint64_t mc_addr, uint64_t size) {
amdgpu_bo_cpu_unmap(bo);
amdgpu_bo_va_op(bo, 0, size, mc_addr, 0, AMDGPU_VA_OP_UNMAP);
amdgpu_va_range_free(va_handle);
amdgpu_bo_free(bo);
return 0;
}
static inline int amdgpu_get_bo_list(amdgpu_device_handle dev, amdgpu_bo_handle bo1,
amdgpu_bo_handle bo2, amdgpu_bo_list_handle *list) {
amdgpu_bo_handle resources[] = {bo1, bo2};
return amdgpu_bo_list_create(dev, bo2 ? 2 : 1, resources, NULL, list);
}
void KFDEvictTest::AmdgpuCommandSubmissionSdmaNop(int rn, amdgpu_bo_handle handle,
PM4Queue *computeQueue = NULL) {
amdgpu_context_handle contextHandle;
amdgpu_bo_handle ibResultHandle;
void *ibResultCpu;
uint64_t ibResultMcAddress;
struct amdgpu_cs_request ibsRequest;
struct amdgpu_cs_ib_info ibInfo;
struct amdgpu_cs_fence fenceStatus;
amdgpu_bo_list_handle boList;
amdgpu_va_handle vaHandle;
uint32_t *ptr;
uint32_t expired;
unsigned failCount = 0;
ASSERT_EQ(0, amdgpu_cs_ctx_create(m_RenderNodes[rn].device_handle, &contextHandle));
ASSERT_EQ(0, amdgpu_bo_alloc_and_map(m_RenderNodes[rn].device_handle,
PAGE_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT, 0,
&ibResultHandle, &ibResultCpu,
&ibResultMcAddress, &vaHandle));
ASSERT_EQ(0, amdgpu_get_bo_list(m_RenderNodes[rn].device_handle, ibResultHandle, handle,
&boList));
/* Fill Nop cammands in IB */
ptr = reinterpret_cast<uint32_t *>(ibResultCpu);
for (int i = 0; i < 16; i++)
ptr[i] = SDMA_NOP;
memset(&ibInfo, 0, sizeof(struct amdgpu_cs_ib_info));
ibInfo.ib_mc_address = ibResultMcAddress;
ibInfo.size = 16;
memset(&ibsRequest, 0, sizeof(struct amdgpu_cs_request));
ibsRequest.ip_type = AMDGPU_HW_IP_DMA;
ibsRequest.ring = 0;
ibsRequest.number_of_ibs = 1;
ibsRequest.ibs = &ibInfo;
ibsRequest.resources = boList;
ibsRequest.fence_info.handle = NULL;
memset(&fenceStatus, 0, sizeof(struct amdgpu_cs_fence));
for (int i = 0; i < 100; i++) {
int r = amdgpu_cs_submit(contextHandle, 0, &ibsRequest, 1);
Delay(50);
if (r) {
failCount++;
ASSERT_LE(failCount, 2);
continue;
}
fenceStatus.context = contextHandle;
fenceStatus.ip_type = AMDGPU_HW_IP_DMA;
fenceStatus.ip_instance = 0;
fenceStatus.ring = 0;
fenceStatus.fence = ibsRequest.seq_no;
EXPECT_EQ(0, amdgpu_cs_query_fence_status(&fenceStatus,
g_TestTimeOut*1000000,
0, &expired));
if (!expired)
WARN() << "CS did not signal completion" << std::endl;
/* If a compute queue is given, submit a short compute job
* every 16 loops (about once a second). If the process was
* evicted, restore can take quite long.
*/
if (computeQueue && (i & 0xf) == 0) {
computeQueue->PlaceAndSubmitPacket(PM4NopPacket());
computeQueue->Wait4PacketConsumption(NULL, 10000);
}
}
EXPECT_EQ(0, amdgpu_bo_list_destroy(boList));
EXPECT_EQ(0, amdgpu_bo_unmap_and_free(ibResultHandle, vaHandle,
ibResultMcAddress, PAGE_SIZE));
EXPECT_EQ(0, amdgpu_cs_ctx_free(contextHandle));
}
/* Evict and restore procedure basic test
*
* Use N_PROCESSES processes to allocate vram buf size larger than total vram size
*
* ALLOCATE_BUF_SIZE_MB buf allocation size
*
* buf is equal to (vramSizeMB / (vramBufSizeMB * N_PROCESSES) ) + 8
* Total vram all processes allocated: 8GB for 4GB Fiji, and 20GB for 16GB Vega10
*
* Eviction and restore will happen many times:
* ttm will evict buffers of another process if there is not enough free vram
* process restore will evict buffers of another process
*
* Sometimes the allocation may fail (maybe that is normal)
* ALLOCATE_RETRY_TIMES max retry times to allocate
*
* This is basic test with no queue, so vram is not used by the GPU during test
*
* TODO:
* - Synchronization between the processes, so they know for sure when
* they are done allocating memory
*/
TEST_F(KFDEvictTest, BasicTest) {
TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
TEST_START(TESTPROFILE_RUNALL);
HSAuint32 defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
HSAuint64 vramBufSize = ALLOCATE_BUF_SIZE_MB * 1024 * 1024;
HSAuint64 vramSize = GetVramSize(defaultGPUNode);
HSAuint64 sysMemSize = GetSysMemSize();
if (!vramSize) {
LOG() << "Skipping test: No VRAM found." << std::endl;
return;
}
if (m_NodeInfo.IsAppAPU(defaultGPUNode)) {
LOG() << "Skipping test on AppAPU." << std::endl;
return;
}
LOG() << "Found VRAM of " << std::dec << (vramSize >> 20) << "MB" << std::endl;
LOG() << "Found System RAM of " << std::dec << (sysMemSize >> 20) << "MB" << std::endl;
// Use 7/8 of VRAM between all processes
HSAuint64 testSize = vramSize * 7 / 8;
HSAuint32 count = testSize / (vramBufSize * N_PROCESSES);
if (count == 0) {
LOG() << "Skipping test: Not enough system memory available." << std::endl;
return;
}
/* Fork the child processes */
ForkChildProcesses(N_PROCESSES);
int rn = FindDRMRenderNode(defaultGPUNode);
if (rn < 0) {
LOG() << "Skipping test: Could not find render node for default GPU." << std::endl;
WaitChildProcesses();
return;
}
std::vector<void *> pBuffers;
AllocBuffers(defaultGPUNode, count, vramBufSize, pBuffers);
/* Allocate gfx vram size of at most one third system memory */
HSAuint64 size = sysMemSize / 3 < testSize / 2 ? sysMemSize / 3 : testSize / 2;
amdgpu_bo_handle handle;
AllocAmdgpuBo(rn, size, handle);
AmdgpuCommandSubmissionSdmaNop(rn, handle);
FreeAmdgpuBo(handle);
LOG() << m_psName << "free buffer" << std::endl;
FreeBuffers(pBuffers, vramBufSize);
WaitChildProcesses();
TEST_END
}
/* Evict and restore queue test
*
* N_PROCESSES processes read all local buffers in parallel while buffers are evicted and restored
* If GPU vm page fault happens, then test shader will stop and failed to write specific value
* at dest buffer. Test will report failed.
*
* Steps:
* - fork N_PROCESSES processes, each process does the same below
* - allocate local buffers, each buffer size is 64MB
* - allocate zero initialized host access address buffer and result buffer
* address buffer to pass address of local buffers to shader
* result buffer to store shader output result
* - submit queue to run ReadMemory shader
* - shader start m_DimX wavefronts, each wavefront keep reading one local buffer
* - notify shader to quit
* - check result buffer with specific value to confirm all wavefronts quit normally
*/
TEST_F(KFDEvictTest, QueueTest) {
TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
TEST_START(TESTPROFILE_RUNALL)
HSAuint32 defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
unsigned int count = MAX_WAVEFRONTS;
const HsaNodeProperties *pNodeProperties = m_NodeInfo.HsaDefaultGPUNodeProperties();
/* Skip test for chip if it doesn't have CWSR, which the test depends on */
if (m_FamilyId < FAMILY_VI || isTonga(pNodeProperties)) {
LOG() << std::hex << "Skipping test: No CWSR present for family ID 0x" << m_FamilyId << "." << std::endl;
return;
}
if (m_NodeInfo.IsAppAPU(defaultGPUNode)) {
LOG() << "Skipping test on AppAPU." << std::endl;
return;
}
HSAuint32 i;
HSAuint64 vramSize = GetVramSize(defaultGPUNode);
HSAuint64 sysMemSize = GetSysMemSize();
if (!vramSize) {
LOG() << "Skipping test: No VRAM found." << std::endl;
return;
}
LOG() << "Found VRAM of " << std::dec << (vramSize >> 20) << "MB" << std::endl;
LOG() << "Found System RAM of " << std::dec << (sysMemSize >> 20) << "MB" << std::endl;
// Use 7/8 of VRAM between all processes
HSAuint64 testSize = vramSize * 7 / 8;
HSAuint32 vramBufSize = testSize / (count * N_PROCESSES);
vramBufSize = (vramBufSize / (1024 * 1024)) * (1024 * 1024);
if (vramBufSize == 0) {
LOG() << "Skipping test: Not enough system memory available." << std::endl;
return;
}
/* Assert all buffer address can be stored within one page
* because only one page host memory srcBuf is allocated
*/
ASSERT_LE(count, PAGE_SIZE/sizeof(unsigned int *));
/* Fork the child processes */
ForkChildProcesses(N_PROCESSES);
int rn = FindDRMRenderNode(defaultGPUNode);
if (rn < 0) {
LOG() << "Skipping test: Could not find render node for default GPU." << std::endl;
WaitChildProcesses();
return;
}
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
HsaMemoryBuffer addrBuffer(PAGE_SIZE, defaultGPUNode);
HsaMemoryBuffer resultBuffer(PAGE_SIZE, defaultGPUNode);
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(ReadMemoryIsa, isaBuffer.As<char*>()));
PM4Queue pm4Queue;
ASSERT_SUCCESS(pm4Queue.Create(defaultGPUNode));
Dispatch dispatch0(isaBuffer);
std::vector<void *> pBuffers;
AllocBuffers(defaultGPUNode, count, vramBufSize, pBuffers);
/* Allocate gfx vram size of at most one third system memory */
HSAuint64 size = sysMemSize / 3 < testSize / 2 ? sysMemSize / 3 : testSize / 2;
amdgpu_bo_handle handle;
AllocAmdgpuBo(rn, size, handle);
unsigned int wavefront_num = pBuffers.size();
LOG() << m_psName << "wavefront number " << wavefront_num << std::endl;
void **localBufAddr = addrBuffer.As<void **>();
unsigned int *result = resultBuffer.As<uint32_t *>();
for (i = 0; i < wavefront_num; i++)
*(localBufAddr + i) = pBuffers[i];
for (i = 0; i < wavefront_num; i++)
*(result + i) = vramBufSize;
dispatch0.SetArgs(localBufAddr, result);
dispatch0.SetDim(wavefront_num, 1, 1);
/* Submit the packet and start shader */
dispatch0.Submit(pm4Queue);
AmdgpuCommandSubmissionSdmaNop(rn, handle);
/* Uncomment this line for debugging */
// LOG() << m_psName << "notify shader to quit" << std::endl;
/* Fill address buffer so shader quits */
addrBuffer.Fill(0x5678);
/* Wait for shader to finish or timeout if shader has vm page fault */
EXPECT_EQ(0, dispatch0.SyncWithStatus(180000));
EXPECT_SUCCESS(pm4Queue.Destroy());
FreeAmdgpuBo(handle);
/* Uncomment this line for debugging */
// LOG() << m_psName << "free buffer" << std::endl;
/* Cleanup */
FreeBuffers(pBuffers, vramBufSize);
/* Check if all wavefronts finished successfully */
for (i = 0; i < wavefront_num; i++)
EXPECT_EQ(0x5678, *(result + i));
WaitChildProcesses();
TEST_END
}
/* Evict a queue running in bursts, so that the process has a chance
* to be idle when restored but the queue needs to resume to perform
* more work later. This test is designed to stress the idle process
* eviction optimization in KFD that leaves idle processes evicted
* until the next time the doorbell page is accessed.
*/
TEST_F(KFDEvictTest, BurstyTest) {
TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
TEST_START(TESTPROFILE_RUNALL);
HSAuint32 defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
HSAuint64 vramBufSize = ALLOCATE_BUF_SIZE_MB * 1024 * 1024;
if (m_NodeInfo.IsAppAPU(defaultGPUNode)) {
LOG() << "Skipping test on AppAPU." << std::endl;
return;
}
HSAuint64 vramSize = GetVramSize(defaultGPUNode);
HSAuint64 sysMemSize = GetSysMemSize();
if (!vramSize) {
LOG() << "Skipping test: No VRAM found." << std::endl;
return;
}
LOG() << "Found VRAM of " << std::dec << (vramSize >> 20) << "MB" << std::endl;
LOG() << "Found System RAM of " << std::dec << (sysMemSize >> 20) << "MB" << std::endl;
// Use 7/8 of VRAM between all processes
HSAuint64 testSize = vramSize * 7 / 8;
HSAuint32 count = testSize / (vramBufSize * N_PROCESSES);
if (count == 0) {
LOG() << "Skipping test: Not enough system memory available." << std::endl;
return;
}
/* Fork the child processes */
ForkChildProcesses(N_PROCESSES);
int rn = FindDRMRenderNode(defaultGPUNode);
if (rn < 0) {
LOG() << "Skipping test: Could not find render node for default GPU." << std::endl;
WaitChildProcesses();
return;
}
PM4Queue pm4Queue;
ASSERT_SUCCESS(pm4Queue.Create(defaultGPUNode));
std::vector<void *> pBuffers;
AllocBuffers(defaultGPUNode, count, vramBufSize, pBuffers);
/* Allocate gfx vram size of at most one third system memory */
HSAuint64 size = sysMemSize / 3 < testSize / 2 ? sysMemSize / 3 : testSize / 2;
amdgpu_bo_handle handle;
AllocAmdgpuBo(rn, size, handle);
AmdgpuCommandSubmissionSdmaNop(rn, handle, &pm4Queue);
FreeAmdgpuBo(handle);
LOG() << m_psName << "free buffer" << std::endl;
FreeBuffers(pBuffers, vramBufSize);
EXPECT_SUCCESS(pm4Queue.Destroy());
WaitChildProcesses();
TEST_END
}
@@ -0,0 +1,56 @@
/*
* Copyright (C) 2017-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __KFD_EVICT_TEST__H__
#define __KFD_EVICT_TEST__H__
#include <string>
#include <vector>
#include "KFDMultiProcessTest.hpp"
#include "PM4Queue.hpp"
// @class KFDEvictTest
// Test eviction and restore procedure using two processes
class KFDEvictTest : public KFDMultiProcessTest {
public:
KFDEvictTest(void) {}
~KFDEvictTest(void) {}
protected:
virtual void SetUp();
virtual void TearDown();
void AllocBuffers(HSAuint32 defaultGPUNode, HSAuint32 count, HSAuint64 vramBufSize,
std::vector<void *> &pBuffers);
void FreeBuffers(std::vector<void *> &pBuffers, HSAuint64 vramBufSize);
void AllocAmdgpuBo(int rn, HSAuint64 vramBufSize, amdgpu_bo_handle &handle);
void FreeAmdgpuBo(amdgpu_bo_handle handle);
void AmdgpuCommandSubmissionSdmaNop(int rn, amdgpu_bo_handle handle,
PM4Queue *computeQueue);
protected: // Members
HsaMemFlags m_Flags;
void* m_pBuf;
};
#endif // __KFD_EVICT_TEST__H__
@@ -0,0 +1,392 @@
/*
* Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "KFDExceptionTest.hpp"
#include "PM4Queue.hpp"
#include "PM4Packet.hpp"
#include "SDMAPacket.hpp"
#include "SDMAQueue.hpp"
#include "Dispatch.hpp"
#include <sys/mman.h>
void KFDExceptionTest::SetUp() {
ROUTINE_START
KFDBaseComponentTest::SetUp();
LOG() << "This Exception test might cause expected page fault "
"error logs at kernel level." << std::endl;
ROUTINE_END
}
void KFDExceptionTest::TearDown() {
ROUTINE_START
KFDBaseComponentTest::TearDown();
// WORKAROUND: This needs to be fixed in the kernel
// Wait 500ms for the kernel to process any fault storms before the
// next test to avoid reporting incorrect faults in the next test.
Delay(500);
ROUTINE_END
}
/* Test for memory exception. The function expects a Memory Fault to be
* triggered by the GPU when it tries to copy dword from pSrc to pDst.
* Should be called from a Child Process since the Memory Fault causes
* all the queues to be halted.
*/
void KFDExceptionTest::TestMemoryException(int defaultGPUNode, HSAuint64 pSrc,
HSAuint64 pDst, unsigned int dimX,
unsigned int dimY, unsigned int dimZ) {
PM4Queue queue;
HsaEvent *vmFaultEvent;
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
HSAuint64 faultAddress, page_mask = ~((HSAuint64)PAGE_SIZE - 1);
Dispatch dispatch(isaBuffer, false);
HsaEventDescriptor eventDesc;
eventDesc.EventType = HSA_EVENTTYPE_MEMORY;
eventDesc.NodeId = defaultGPUNode;
eventDesc.SyncVar.SyncVar.UserData = NULL;
eventDesc.SyncVar.SyncVarSize = 0;
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()));
m_ChildStatus = queue.Create(defaultGPUNode);
if (m_ChildStatus != HSAKMT_STATUS_SUCCESS) {
WARN() << "Queue create failed" << std::endl;
return;
}
m_ChildStatus = hsaKmtCreateEvent(&eventDesc, true, false, &vmFaultEvent);
if (m_ChildStatus != HSAKMT_STATUS_SUCCESS) {
WARN() << "Event create failed" << std::endl;
goto queuefail;
}
dispatch.SetDim(dimX, dimY, dimZ);
dispatch.SetArgs(reinterpret_cast<void *>(pSrc), reinterpret_cast<void *>(pDst));
dispatch.Submit(queue);
m_ChildStatus = hsaKmtWaitOnEvent(vmFaultEvent, g_TestTimeOut);
if (m_ChildStatus != HSAKMT_STATUS_SUCCESS) {
WARN() << "Wait failed. No Exception triggered" << std::endl;
goto eventfail;
}
if (vmFaultEvent->EventData.EventType != HSA_EVENTTYPE_MEMORY) {
WARN() << "Unexpected Event Received " << vmFaultEvent->EventData.EventType
<< std::endl;
m_ChildStatus = HSAKMT_STATUS_ERROR;
goto eventfail;
}
faultAddress = vmFaultEvent->EventData.EventData.MemoryAccessFault.VirtualAddress;
if (faultAddress != (pSrc & page_mask) &&
faultAddress != (pDst & page_mask) ) {
WARN() << "Unexpected Fault Address " << faultAddress
<< " expected " << (pSrc & page_mask) << " or "
<< (pDst & page_mask) << std::endl;
m_ChildStatus = HSAKMT_STATUS_ERROR;
}
eventfail:
hsaKmtDestroyEvent(vmFaultEvent);
queuefail:
queue.Destroy();
}
void KFDExceptionTest::TestSdmaException(int defaultGPUNode, void *pDst) {
SDMAQueue queue;
HsaEvent *vmFaultEvent;
HSAuint64 faultAddress, page_mask = ~((HSAuint64)PAGE_SIZE - 1);
HsaEventDescriptor eventDesc;
eventDesc.EventType = HSA_EVENTTYPE_MEMORY;
eventDesc.NodeId = defaultGPUNode;
eventDesc.SyncVar.SyncVar.UserData = NULL;
eventDesc.SyncVar.SyncVarSize = 0;
m_ChildStatus = queue.Create(defaultGPUNode);
if (m_ChildStatus != HSAKMT_STATUS_SUCCESS) {
WARN() << "Queue create failed" << std::endl;
return;
}
m_ChildStatus = hsaKmtCreateEvent(&eventDesc, true, false, &vmFaultEvent);
if (m_ChildStatus != HSAKMT_STATUS_SUCCESS) {
WARN() << "Event create failed" << std::endl;
goto queuefail;
}
queue.PlaceAndSubmitPacket(SDMAWriteDataPacket(queue.GetFamilyId(),
reinterpret_cast<void *>(pDst),
0x02020202));
m_ChildStatus = hsaKmtWaitOnEvent(vmFaultEvent, g_TestTimeOut);
if (m_ChildStatus != HSAKMT_STATUS_SUCCESS) {
WARN() << "Wait failed. No Exception triggered" << std::endl;
goto eventfail;
}
if (vmFaultEvent->EventData.EventType != HSA_EVENTTYPE_MEMORY) {
WARN() << "Unexpected Event Received " << vmFaultEvent->EventData.EventType
<< std::endl;
m_ChildStatus = HSAKMT_STATUS_ERROR;
goto eventfail;
}
faultAddress = vmFaultEvent->EventData.EventData.MemoryAccessFault.VirtualAddress;
if (faultAddress != ((HSAuint64)pDst & page_mask) ) {
WARN() << "Unexpected Fault Address " << faultAddress
<< " expected " << ((HSAuint64)pDst & page_mask) << std::endl;
m_ChildStatus = HSAKMT_STATUS_ERROR;
}
eventfail:
hsaKmtDestroyEvent(vmFaultEvent);
queuefail:
queue.Destroy();
}
/* Test Bad Address access in a child process */
TEST_F(KFDExceptionTest, AddressFault) {
TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
TEST_START(TESTPROFILE_RUNALL);
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
if (m_FamilyId == FAMILY_RV) {
LOG() << "Skipping test: IOMMU issues on Raven." << std::endl;
return;
}
m_ChildPid = fork();
if (m_ChildPid == 0) {
KFDBaseComponentTest::TearDown();
KFDBaseComponentTest::SetUp();
HsaMemoryBuffer srcBuffer(PAGE_SIZE, defaultGPUNode, false);
srcBuffer.Fill(0xAA55AA55);
TestMemoryException(defaultGPUNode, srcBuffer.As<HSAuint64>(),
0x12345678ULL);
} else {
int childStatus;
waitpid(m_ChildPid, &childStatus, 0);
if (is_dgpu()) {
EXPECT_EQ(WIFEXITED(childStatus), true);
EXPECT_EQ(WEXITSTATUS(childStatus), HSAKMT_STATUS_SUCCESS);
} else {
EXPECT_EQ(WIFSIGNALED(childStatus), true);
EXPECT_EQ(WTERMSIG(childStatus), SIGSEGV);
}
}
TEST_END
}
/* Allocate Read Only buffer. Test Memory Exception failure by
* attempting to write to that buffer in the child process.
*/
TEST_F(KFDExceptionTest, PermissionFault) {
TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
TEST_START(TESTPROFILE_RUNALL)
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
if (m_FamilyId == FAMILY_RV) {
LOG() << "Skipping test: IOMMU issues on Raven." << std::endl;
return;
}
m_ChildPid = fork();
if (m_ChildPid == 0) {
KFDBaseComponentTest::TearDown();
KFDBaseComponentTest::SetUp();
HsaMemoryBuffer readOnlyBuffer(PAGE_SIZE, defaultGPUNode, false /*zero*/,
false /*isLocal*/, true /*isExec*/,
false /*isScratch*/, true /*isReadOnly*/);
HsaMemoryBuffer srcSysBuffer(PAGE_SIZE, defaultGPUNode, false);
srcSysBuffer.Fill(0xAA55AA55);
TestMemoryException(defaultGPUNode, srcSysBuffer.As<HSAuint64>(),
readOnlyBuffer.As<HSAuint64>());
} else {
int childStatus;
waitpid(m_ChildPid, &childStatus, 0);
if (is_dgpu()) {
EXPECT_EQ(WIFEXITED(childStatus), true);
EXPECT_EQ(WEXITSTATUS(childStatus), HSAKMT_STATUS_SUCCESS);
} else {
EXPECT_EQ(WIFSIGNALED(childStatus), true);
EXPECT_EQ(WTERMSIG(childStatus), SIGSEGV);
}
}
TEST_END
}
/* Allocate Read Only user pointer buffer. Test Memory Exception failure by
* attempting to write to that buffer in the child process.
*/
TEST_F(KFDExceptionTest, PermissionFaultUserPointer) {
TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
TEST_START(TESTPROFILE_RUNALL)
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
if (m_FamilyId == FAMILY_RV) {
LOG() << "Skipping test: IOMMU issues on Raven." << std::endl;
return;
}
m_ChildPid = fork();
if (m_ChildPid == 0) {
KFDBaseComponentTest::TearDown();
KFDBaseComponentTest::SetUp();
void *pBuf = mmap(NULL, PAGE_SIZE, PROT_READ,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
ASSERT_NE(pBuf, MAP_FAILED);
EXPECT_SUCCESS(hsaKmtRegisterMemory(pBuf, PAGE_SIZE));
EXPECT_SUCCESS(hsaKmtMapMemoryToGPU(pBuf, PAGE_SIZE, NULL));
HsaMemoryBuffer srcSysBuffer(PAGE_SIZE, defaultGPUNode, false);
srcSysBuffer.Fill(0xAA55AA55);
TestMemoryException(defaultGPUNode, srcSysBuffer.As<HSAuint64>(),
(HSAuint64)pBuf);
} else {
int childStatus;
waitpid(m_ChildPid, &childStatus, 0);
if (is_dgpu()) {
EXPECT_EQ(WIFEXITED(childStatus), true);
EXPECT_EQ(WEXITSTATUS(childStatus), HSAKMT_STATUS_SUCCESS);
} else {
EXPECT_EQ(WIFSIGNALED(childStatus), true);
EXPECT_EQ(WTERMSIG(childStatus), SIGSEGV);
}
}
TEST_END
}
/* Test VM fault storm handling by copying to/from invalid pointers
* with lots of work items at the same time
*/
TEST_F(KFDExceptionTest, FaultStorm) {
TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
TEST_START(TESTPROFILE_RUNALL)
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
if (m_FamilyId == FAMILY_RV) {
LOG() << "Skipping test: IOMMU issues on Raven." << std::endl;
return;
}
HSAKMT_STATUS status;
m_ChildPid = fork();
if (m_ChildPid == 0) {
KFDBaseComponentTest::TearDown();
KFDBaseComponentTest::SetUp();
TestMemoryException(defaultGPUNode, 0x12345678, 0x76543210, 1024, 1024, 1);
} else {
int childStatus;
waitpid(m_ChildPid, &childStatus, 0);
if (is_dgpu()) {
EXPECT_EQ(WIFEXITED(childStatus), true);
EXPECT_EQ(WEXITSTATUS(childStatus), HSAKMT_STATUS_SUCCESS);
} else {
EXPECT_EQ(WIFSIGNALED(childStatus), true);
EXPECT_EQ(WTERMSIG(childStatus), SIGSEGV);
}
}
TEST_END
}
/*
*/
TEST_F(KFDExceptionTest, SdmaQueueException) {
TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
TEST_START(TESTPROFILE_RUNALL)
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
if (m_FamilyId == FAMILY_RV) {
LOG() << "Skipping test: IOMMU issues on Raven." << std::endl;
return;
}
HSAKMT_STATUS status;
m_ChildPid = fork();
if (m_ChildPid == 0) {
unsigned int* pDb = NULL;
unsigned int *nullPtr = NULL;
KFDBaseComponentTest::TearDown();
KFDBaseComponentTest::SetUp();
m_MemoryFlags.ui32.NonPaged = 1;
m_MemoryFlags.ui32.HostAccess = 0;
ASSERT_SUCCESS(hsaKmtAllocMemory(defaultGPUNode, PAGE_SIZE, m_MemoryFlags,
reinterpret_cast<void**>(&pDb)));
// verify that pDb is not null before it's being used
ASSERT_NE(nullPtr, pDb) << "hsaKmtAllocMemory returned a null pointer";
ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(pDb, PAGE_SIZE, NULL));
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(pDb));
TestSdmaException(defaultGPUNode, pDb);
EXPECT_SUCCESS(hsaKmtFreeMemory(pDb, PAGE_SIZE));
} else {
int childStatus;
waitpid(m_ChildPid, &childStatus, 0);
if (is_dgpu()) {
EXPECT_EQ(WIFEXITED(childStatus), true);
EXPECT_EQ(WEXITSTATUS(childStatus), HSAKMT_STATUS_SUCCESS);
} else {
EXPECT_EQ(WIFSIGNALED(childStatus), true);
EXPECT_EQ(WTERMSIG(childStatus), SIGSEGV);
}
}
TEST_END
}
@@ -0,0 +1,66 @@
/*
* Copyright (C) 2014-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __KFD_EXCEPTION_TEST__H__
#define __KFD_EXCEPTION_TEST__H__
#include <gtest/gtest.h>
#include "KFDBaseComponentTest.hpp"
class KFDExceptionTest : public KFDBaseComponentTest {
public:
KFDExceptionTest() : m_ChildPid(-1) {
/* Because there could be early return before m_ChildPid is set
* by fork(), we should initialize m_ChildPid to a non-zero value
* to avoid possible exit of the main process.
*/
}
~KFDExceptionTest() {
/* exit() is necessary for the child process. Otherwise when the
* child process finishes, gtest assumes the test has finished and
* starts the next test while the parent is still active.
*/
if (m_ChildPid == 0) {
if (!m_ChildStatus && HasFatalFailure())
m_ChildStatus = HSAKMT_STATUS_ERROR;
exit(m_ChildStatus);
}
}
protected:
virtual void SetUp();
virtual void TearDown();
void TestMemoryException(int defaultGPUNode, HSAuint64 pSrc, HSAuint64 pDst,
unsigned int dimX = 1, unsigned int dimY = 1,
unsigned int dimZ = 1);
void TestSdmaException(int defaultGPUNode, void *pDst);
protected: // Members
pid_t m_ChildPid;
HSAKMT_STATUS m_ChildStatus;
};
#endif // __KFD_EXCEPTION_TEST__H__
+111
View File
@@ -0,0 +1,111 @@
/*
* Copyright (C) 2014-2019 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "KFDGWSTest.hpp"
#include "PM4Queue.hpp"
#include "PM4Packet.hpp"
#include "Dispatch.hpp"
void KFDGWSTest::SetUp() {
ROUTINE_START
KFDBaseComponentTest::SetUp();
ROUTINE_END
}
void KFDGWSTest::TearDown() {
ROUTINE_START
KFDBaseComponentTest::TearDown();
ROUTINE_END
}
TEST_F(KFDGWSTest, Allocate) {
TEST_START(TESTPROFILE_RUNALL);
HSAuint32 firstGWS;
PM4Queue queue;
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
const HsaNodeProperties *pNodeProperties = m_NodeInfo.HsaDefaultGPUNodeProperties();
if (!pNodeProperties || !pNodeProperties->NumGws) {
LOG() << "Skip test: GPU node doesn't support GWS" << std::endl;
return;
}
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
ASSERT_SUCCESS(hsaKmtAllocQueueGWS(queue.GetResource()->QueueId,
pNodeProperties->NumGws,&firstGWS));
EXPECT_EQ(0, firstGWS);
EXPECT_SUCCESS(queue.Destroy());
TEST_END
}
TEST_F(KFDGWSTest, Semaphore) {
TEST_START(TESTPROFILE_RUNALL);
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
const HsaNodeProperties *pNodeProperties = m_NodeInfo.HsaDefaultGPUNodeProperties();
HSAuint32 firstGWS;
HSAuint32 numResources = 1;
PM4Queue queue;
if (!pNodeProperties || !pNodeProperties->NumGws) {
LOG() << "Skip test: GPU node doesn't support GWS" << std::endl;
return;
}
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
HsaMemoryBuffer buffer(PAGE_SIZE, defaultGPUNode, true, false, false);
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
ASSERT_SUCCESS(hsaKmtAllocQueueGWS(queue.GetResource()->QueueId,
pNodeProperties->NumGws,&firstGWS));
EXPECT_EQ(0, firstGWS);
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(GwsInitIsa, isaBuffer.As<char*>()));
Dispatch dispatch0(isaBuffer);
buffer.Fill(numResources, 0, 4);
dispatch0.SetArgs(buffer.As<void*>(), NULL);
dispatch0.Submit(queue);
dispatch0.Sync();
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(GwsAtomicIncreaseIsa, isaBuffer.As<char*>()));
Dispatch dispatch(isaBuffer);
dispatch.SetArgs(buffer.As<void*>(), NULL);
dispatch.SetDim(1024, 16, 16);
dispatch.Submit(queue);
dispatch.Sync();
EXPECT_EQ(1024*16*16+1, *buffer.As<uint32_t *>());
EXPECT_SUCCESS(queue.Destroy());
TEST_END
}
@@ -0,0 +1,41 @@
/*
* Copyright (C) 2014-2019 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __KFD_GWS_TEST__H__
#define __KFD_GWS_TEST__H__
#include <gtest/gtest.h>
#include "KFDBaseComponentTest.hpp"
class KFDGWSTest : public KFDBaseComponentTest {
public:
KFDGWSTest() {}
~KFDGWSTest() {}
protected:
virtual void SetUp();
virtual void TearDown();
};
#endif // __KFD_GWS_TEST__H__
@@ -0,0 +1,233 @@
/*
* Copyright (C) 2016-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "KFDGraphicsInterop.hpp"
#include "Dispatch.hpp"
#include "PM4Queue.hpp"
TEST_F(KFDGraphicsInterop, RegisterGraphicsHandle) {
TEST_START(TESTPROFILE_RUNALL)
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
const HsaNodeProperties *pNodeProps =
m_NodeInfo.GetNodeProperties(defaultGPUNode);
const HSAuint32 familyID = FamilyIdFromNode(pNodeProps);
if (isTonga(pNodeProps)) {
LOG() << "Skipping test: Tonga workaround in thunk returns incorrect allocation size." << std::endl;
return;
}
HSAuint32 nodes[1] = {(uint32_t)defaultGPUNode};
const char metadata[] = "This data is really meta.";
unsigned metadata_size = strlen(metadata)+1;
int rn = FindDRMRenderNode(defaultGPUNode);
if (rn < 0) {
LOG() << "Skipping test: Could not find render node for default GPU node." << std::endl;
return;
}
// Create the buffer with metadata and get a dmabuf handle to it
struct amdgpu_bo_alloc_request alloc;
amdgpu_bo_handle handle;
if (familyID == FAMILY_CZ || isTonga(pNodeProps))
alloc.alloc_size = PAGE_SIZE * 8;
else
alloc.alloc_size = PAGE_SIZE;
alloc.phys_alignment = PAGE_SIZE;
alloc.preferred_heap = AMDGPU_GEM_DOMAIN_VRAM;
alloc.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
ASSERT_EQ(0, amdgpu_bo_alloc(m_RenderNodes[rn].device_handle, &alloc, &handle));
void *pCpuMap;
ASSERT_EQ(0, amdgpu_bo_cpu_map(handle, &pCpuMap));
memset(pCpuMap, 0xaa, PAGE_SIZE);
EXPECT_EQ(0, amdgpu_bo_cpu_unmap(handle));
struct amdgpu_bo_metadata meta;
meta.flags = 0;
meta.tiling_info = 0;
meta.size_metadata = metadata_size;
memcpy(meta.umd_metadata, metadata, metadata_size);
EXPECT_EQ(0, amdgpu_bo_set_metadata(handle, &meta));
uint32_t dmabufFd;
EXPECT_EQ(0, amdgpu_bo_export(handle, amdgpu_bo_handle_type_dma_buf_fd, &dmabufFd));
// Register it with HSA
HsaGraphicsResourceInfo info;
ASSERT_SUCCESS(hsaKmtRegisterGraphicsHandleToNodes(dmabufFd, &info,
1, nodes));
/* DMA buffer handle and GEM handle are no longer needed, KFD
* should have taken a reference to the BO
*/
EXPECT_EQ(0, close(dmabufFd));
EXPECT_EQ(0, amdgpu_bo_free(handle));
// Check that buffer size and metadata match
EXPECT_EQ(info.SizeInBytes, alloc.alloc_size);
EXPECT_EQ(info.MetadataSizeInBytes, metadata_size);
EXPECT_EQ(0, strcmp(metadata, (const char *)info.Metadata));
// Map the buffer
ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(info.MemoryAddress,
info.SizeInBytes,
NULL));
// Copy contents to a system memory buffer for comparison
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(CopyDwordIsa, isaBuffer.As<char*>()));
HsaMemoryBuffer dstBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/);
PM4Queue queue;
ASSERT_SUCCESS(queue.Create(defaultGPUNode));
Dispatch dispatch(isaBuffer);
dispatch.SetArgs(info.MemoryAddress, dstBuffer.As<void*>());
dispatch.Submit(queue);
dispatch.Sync(g_TestTimeOut);
EXPECT_SUCCESS(queue.Destroy());
EXPECT_EQ(dstBuffer.As<unsigned int *>()[0], 0xaaaaaaaa);
// Test QueryMem before the cleanup
HsaPointerInfo ptrInfo;
EXPECT_SUCCESS(hsaKmtQueryPointerInfo((const void *)info.MemoryAddress, &ptrInfo));
EXPECT_EQ(ptrInfo.Type, HSA_POINTER_REGISTERED_GRAPHICS);
EXPECT_EQ(ptrInfo.Node, (HSAuint32)defaultGPUNode);
EXPECT_EQ(ptrInfo.GPUAddress, (HSAuint64)info.MemoryAddress);
EXPECT_EQ(ptrInfo.SizeInBytes, alloc.alloc_size);
EXPECT_EQ(ptrInfo.MemFlags.ui32.CoarseGrain, 1);
// Cleanup
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(info.MemoryAddress));
EXPECT_SUCCESS(hsaKmtDeregisterMemory(info.MemoryAddress));
TEST_END
}
#if 0
/* This test isn't testing things the way we wanted it to. It is flaky and
* will end up failing if the memory is evicted, which isn't possible for what
* it is intended to test. It needs a rework
*/
/* Third-party device memory can be registered for GPU access in
* ROCm stack. Test this feature. Third party device is mimicked
* in multi-GPU system using Graphics stack (libdrm). CPU accessible
* device memory is allocated using Graphics stack on gpuNode2 and
* this memory will be registered on gpuNode1 for GPU access.
*/
TEST_F(KFDGraphicsInterop, RegisterForeignDeviceMem) {
TEST_START(TESTPROFILE_RUNALL)
if (!is_dgpu()) {
LOG() << "Skipping test: Only supported on multi-dGPU system." << std::endl;
return;
}
const std::vector<int> gpuNodes = m_NodeInfo.GetNodesWithGPU();
if (gpuNodes.size() < 2) {
LOG() << "Skipping test: At least two GPUs are required." << std::endl;
return;
}
/* gpuNode2 must have public memory (large bar) to allocate CPU accessible
* device memory.
*/
HSAint32 gpuNode1 = m_NodeInfo.HsaDefaultGPUNode(), gpuNode2 = 0;
const HsaNodeProperties *pNodeProperties;
gpuNode2 = m_NodeInfo.FindLargeBarGPUNode();
if (gpuNode2 < 0) {
LOG() << "Skipping test: At least one large bar GPU is required." << std::endl;
return;
}
if (gpuNode1 == gpuNode2) {
for (unsigned i = 0; i < gpuNodes.size(); i++) {
if (gpuNodes.at(i) != gpuNode2) {
gpuNode1 = gpuNodes.at(i);
break;
}
}
}
const HsaNodeProperties *pNodeProps =
m_NodeInfo.GetNodeProperties(gpuNode2);
const HSAuint32 familyID = FamilyIdFromNode(pNodeProps);
int rn = FindDRMRenderNode(gpuNode2);
if (rn < 0) {
LOG() << "Skipping test: Cound not find render node for 2nd GPU." << std::endl;
return;
}
// Allocate CPU accessible device memory on gpuNode2
struct amdgpu_bo_alloc_request alloc;
amdgpu_bo_handle handle;
if (familyID == FAMILY_CZ || isTonga(pNodeProps))
alloc.alloc_size = PAGE_SIZE * 8;
else
alloc.alloc_size = PAGE_SIZE;
alloc.phys_alignment = PAGE_SIZE;
alloc.preferred_heap = AMDGPU_GEM_DOMAIN_VRAM;
alloc.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
ASSERT_EQ(0, amdgpu_bo_alloc(m_RenderNodes[rn].device_handle, &alloc, &handle));
void *pCpuMap;
ASSERT_EQ(0, amdgpu_bo_cpu_map(handle, &pCpuMap));
memset(pCpuMap, 0xAA, PAGE_SIZE);
/* Register third-party device memory in KFD. Test GPU access
* by carrying out a simple copy test
*/
HsaMemoryBuffer lockDeviceMemory(pCpuMap, PAGE_SIZE);
HsaMemoryBuffer isaBuffer(PAGE_SIZE, gpuNode1, true/*zero*/, false/*local*/, true/*exec*/);
HsaMemoryBuffer dstBuffer(PAGE_SIZE, gpuNode1, true/*zero*/);
PM4Queue queue;
Dispatch dispatch(isaBuffer);
m_pIsaGen->GetCopyDwordIsa(isaBuffer);
ASSERT_SUCCESS(queue.Create(gpuNode1));
dispatch.SetArgs(lockDeviceMemory.As<void*>(), dstBuffer.As<void*>());
dispatch.Submit(queue);
dispatch.Sync(g_TestTimeOut);
EXPECT_SUCCESS(queue.Destroy());
EXPECT_EQ(dstBuffer.As<HSAuint32*>()[0], 0xAAAAAAAA);
EXPECT_EQ(0, amdgpu_bo_cpu_unmap(handle));
EXPECT_EQ(0, amdgpu_bo_free(handle));
TEST_END
}
#endif
@@ -0,0 +1,37 @@
/*
* Copyright (C) 2016-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "KFDMemoryTest.hpp"
#ifndef __KFD_GRAPHICS_INTEROP_TEST__H__
#define __KFD_GRAPHICS_INTEROP_TEST__H__
// @class KFDGraphicsInteropTest
// Adds access to graphics device for interoperability testing
class KFDGraphicsInterop : public KFDMemoryTest {
public:
KFDGraphicsInterop(void) {}
~KFDGraphicsInterop(void) {}
};
#endif
+129
View File
@@ -0,0 +1,129 @@
/*
* Copyright (C) 2019 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "KFDHWSTest.hpp"
void KFDHWSTest::SetUp() {
ROUTINE_START
KFDBaseComponentTest::SetUp();
ROUTINE_END
}
void KFDHWSTest::TearDown() {
ROUTINE_START
KFDBaseComponentTest::TearDown();
ROUTINE_END
}
void KFDHWSTest::RunTest(unsigned nProcesses, unsigned nQueues, unsigned nLoops) {
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
unsigned q, l;
bool timeout = false;
/* Fork the child processes */
ForkChildProcesses(nProcesses);
// Create queues
PM4Queue *queues = new PM4Queue[nQueues];
for (q = 0; q < nQueues; q++)
ASSERT_SUCCESS(queues[q].Create(defaultGPUNode));
// Create dispatch pointers. Each loop iteration creates fresh dispatches
Dispatch **dispatch = new Dispatch*[nQueues];
for (q = 0; q < nQueues; q++)
dispatch[q] = NULL;
// Logging: Each process prints its index after each loop iteration, all in one line.
std::ostream &log = LOG() << std::dec << "Process " << m_ProcessIndex << " starting." << std::endl;
// Run work on all queues
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(NoopIsa, isaBuffer.As<char*>()));
for (l = 0; l < nLoops; l++) {
for (q = 0; q < nQueues; q++) {
if (dispatch[q])
delete dispatch[q];
dispatch[q] = new Dispatch(isaBuffer);
dispatch[q]->SetArgs(NULL, NULL);
dispatch[q]->SetDim(1, 1, 1);
dispatch[q]->Submit(queues[q]);
}
for (q = 0; q < nQueues; q++) {
timeout = dispatch[q]->SyncWithStatus(g_TestTimeOut);
if (timeout)
goto timeout;
}
log << m_ProcessIndex;
}
timeout:
log << std::endl;
if (timeout) {
WARN() << "Process " << m_ProcessIndex << " timeout." << std::endl;
} else {
LOG() << "Process " << m_ProcessIndex << " done. Waiting ..." << std::endl;
// Wait here before destroying queues. If another process' queues
// are soft-hanging, destroying queues can resolve the soft-hang
// by changing the run list. Make sure the other process's
// dispatches have a chance to time out first.
Delay(g_TestTimeOut+1000);
}
// Destroy queues and dispatches. Destroying the queues first
// ensures that the memory allocated by the Dispatch is no longer
// accessed by the GPU.
LOG() << "Process " << m_ProcessIndex << " cleaning up." << std::endl;
for (q = 0; q < nQueues; q++) {
EXPECT_SUCCESS(queues[q].Destroy());
if (dispatch[q])
delete dispatch[q];
}
delete[] queues;
delete[] dispatch;
// This is after all the cleanup to avoid leaving any garbage
// behind, but before WaitChildProcesses to ensure a child process
// with a timeout exits with an error that can be detected by the
// parent.
ASSERT_FALSE(timeout);
WaitChildProcesses();
}
TEST_F(KFDHWSTest, MultiProcessOversubscribed) {
TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
TEST_START(TESTPROFILE_RUNALL);
RunTest(3, 13, 40);
TEST_END
}
@@ -0,0 +1,45 @@
/*
* Copyright (C) 2019 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __KFD_HWS_TEST__H__
#define __KFD_HWS_TEST__H__
#include <gtest/gtest.h>
#include "PM4Queue.hpp"
#include "KFDMultiProcessTest.hpp"
#include "Dispatch.hpp"
class KFDHWSTest : public KFDMultiProcessTest {
public:
KFDHWSTest() {}
~KFDHWSTest() {}
protected:
virtual void SetUp();
virtual void TearDown();
void RunTest(unsigned nProcesses, unsigned nQueues, unsigned nLoops);
};
#endif // __KFD_QCM_TEST__H__
+218
View File
@@ -0,0 +1,218 @@
/*
* Copyright (C) 2017-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "KFDIPCTest.hpp"
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <vector>
#include "PM4Queue.hpp"
#include "PM4Packet.hpp"
#include "SDMAQueue.hpp"
#include "SDMAPacket.hpp"
void KFDIPCTest::SetUp() {
ROUTINE_START
KFDBaseComponentTest::SetUp();
ROUTINE_END
}
void KFDIPCTest::TearDown() {
ROUTINE_START
KFDBaseComponentTest::TearDown();
ROUTINE_END
}
KFDIPCTest::~KFDIPCTest(void) {
/* exit() is necessary for the child process. Otherwise when the
* child process finishes, gtest assumes the test has finished and
* starts the next test while the parent is still active.
*/
if (m_ChildPid == 0)
exit(::testing::UnitTest::GetInstance()->current_test_info()->result()->Failed());
}
/* Import shared Local Memory from parent process. Check for the pattern
* filled in by the parent process. Then fill a new pattern.
*
* Check import handle has same HsaMemFlags as export handle to verify thunk and KFD
* import export handle ioctl pass HsaMemFlags correctly.
*/
void KFDIPCTest::BasicTestChildProcess(int defaultGPUNode, int *pipefd, HsaMemFlags mflags) {
/* Open KFD device for child process. This needs to called before
* any memory definitions
*/
TearDown();
SetUp();
SDMAQueue sdmaQueue;
HsaSharedMemoryHandle sharedHandleLM;
HSAuint64 size = PAGE_SIZE, sharedSize;
HsaMemoryBuffer tempSysBuffer(size, defaultGPUNode, false);
HSAuint32 *sharedLocalBuffer = NULL;
HsaMemMapFlags mapFlags = {0};
/* Read from Pipe the shared Handle. Import shared Local Memory */
ASSERT_GE(read(pipefd[0], reinterpret_cast<void*>(&sharedHandleLM), sizeof(sharedHandleLM)), 0);
ASSERT_SUCCESS(hsaKmtRegisterSharedHandle(&sharedHandleLM,
reinterpret_cast<void**>(&sharedLocalBuffer), &sharedSize));
ASSERT_SUCCESS(hsaKmtMapMemoryToGPUNodes(sharedLocalBuffer, sharedSize, NULL,
mapFlags, 1, reinterpret_cast<HSAuint32 *>(&defaultGPUNode)));
/* Check for pattern in the shared Local Memory */
ASSERT_SUCCESS(sdmaQueue.Create(defaultGPUNode));
size = size < sharedSize ? size : sharedSize;
sdmaQueue.PlaceAndSubmitPacket(SDMACopyDataPacket(sdmaQueue.GetFamilyId(), tempSysBuffer.As<HSAuint32*>(),
sharedLocalBuffer, size));
sdmaQueue.Wait4PacketConsumption();
EXPECT_TRUE(WaitOnValue(tempSysBuffer.As<HSAuint32*>(), 0xAAAAAAAA));
/* Fill in the Local Memory with different pattern */
sdmaQueue.PlaceAndSubmitPacket(SDMAWriteDataPacket(sdmaQueue.GetFamilyId(), sharedLocalBuffer, 0xBBBBBBBB));
sdmaQueue.Wait4PacketConsumption();
HsaPointerInfo ptrInfo;
EXPECT_SUCCESS(hsaKmtQueryPointerInfo(sharedLocalBuffer, &ptrInfo));
EXPECT_EQ(ptrInfo.Type, HSA_POINTER_REGISTERED_SHARED);
EXPECT_EQ(ptrInfo.Node, (HSAuint32)defaultGPUNode);
EXPECT_EQ(ptrInfo.GPUAddress, (HSAuint64)sharedLocalBuffer);
EXPECT_EQ(ptrInfo.SizeInBytes, sharedSize);
EXPECT_EQ(ptrInfo.MemFlags.Value, mflags.Value);
/* Clean up */
EXPECT_SUCCESS(sdmaQueue.Destroy());
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(sharedLocalBuffer));
EXPECT_SUCCESS(hsaKmtDeregisterMemory(sharedLocalBuffer));
}
/* Fill a pattern into Local Memory and share with the child process.
* Then wait until Child process to exit and check for the new pattern
* filled in by the child process.
*/
void KFDIPCTest::BasicTestParentProcess(int defaultGPUNode, pid_t cpid, int *pipefd, HsaMemFlags mflags) {
HSAuint64 size = PAGE_SIZE, sharedSize;
int status;
HSAuint64 AlternateVAGPU;
void *toShareLocalBuffer;
HsaMemoryBuffer tempSysBuffer(PAGE_SIZE, defaultGPUNode, false);
SDMAQueue sdmaQueue;
HsaSharedMemoryHandle sharedHandleLM;
HsaMemMapFlags mapFlags = {0};
ASSERT_SUCCESS(hsaKmtAllocMemory(defaultGPUNode, size, mflags, &toShareLocalBuffer));
/* Fill a Local Buffer with a pattern */
ASSERT_SUCCESS(hsaKmtMapMemoryToGPUNodes(toShareLocalBuffer, size, &AlternateVAGPU,
mapFlags, 1, reinterpret_cast<HSAuint32 *>(&defaultGPUNode)));
tempSysBuffer.Fill(0xAAAAAAAA);
/* Copy pattern in Local Memory before sharing it */
ASSERT_SUCCESS(sdmaQueue.Create(defaultGPUNode));
sdmaQueue.PlaceAndSubmitPacket(SDMACopyDataPacket(sdmaQueue.GetFamilyId(), toShareLocalBuffer,
tempSysBuffer.As<HSAuint32*>(), size));
sdmaQueue.Wait4PacketConsumption();
/* Share it with the child process */
ASSERT_SUCCESS(hsaKmtShareMemory(toShareLocalBuffer, size, &sharedHandleLM));
ASSERT_GE(write(pipefd[1], reinterpret_cast<void*>(&sharedHandleLM), sizeof(sharedHandleLM)), 0);
/* Wait for the child to finish */
waitpid(cpid, &status, 0);
EXPECT_EQ(WIFEXITED(status), 1);
EXPECT_EQ(WEXITSTATUS(status), 0);
/* Check for the new pattern filled in by child process */
sdmaQueue.PlaceAndSubmitPacket(SDMACopyDataPacket(sdmaQueue.GetFamilyId(), tempSysBuffer.As<HSAuint32*>(),
toShareLocalBuffer, size));
sdmaQueue.Wait4PacketConsumption();
EXPECT_TRUE(WaitOnValue(tempSysBuffer.As<HSAuint32*>(), 0xBBBBBBBB));
/* Clean up */
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(toShareLocalBuffer));
EXPECT_SUCCESS(sdmaQueue.Destroy());
}
/* Test IPC memory.
* 1. Parent Process [Create/Fill] LocalMemory (LM) --share--> Child Process
* 2. Child Process import LM and check for the pattern.
* 3. Child Process fill in a new pattern and quit.
* 4. Parent Process wait for the Child process to finish and then check for
* the new pattern in LM
*
* IPC support is limited to Local Memory.
*/
TEST_F(KFDIPCTest, BasicTest) {
TEST_START(TESTPROFILE_RUNALL)
const std::vector<int>& GpuNodes = m_NodeInfo.GetNodesWithGPU();
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
int pipefd[2];
HsaMemFlags mflags = {0};
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
if (!GetVramSize(defaultGPUNode)) {
LOG() << "Skipping test: No VRAM found." << std::endl;
return;
}
/* Test libhsakmt fork() clean up by defining some buffers. These
* buffers gets duplicated in the child process but not are not valid
* as it doesn't have proper mapping in GPU. The clean up code in libhsakmt
* should handle it
*/
volatile HSAuint32 stackData[1];
HsaMemoryBuffer tmpSysBuffer(PAGE_SIZE, defaultGPUNode, false);
HsaMemoryBuffer tmpUserptrBuffer((void *)&stackData[0], sizeof(HSAuint32));
/* Create Pipes for communicating shared handles */
ASSERT_EQ(pipe(pipefd), 0);
/* Create a child process and share the above Local Memory with it */
mflags.ui32.NonPaged = 1;
mflags.ui32.CoarseGrain = 1;
m_ChildPid = fork();
if (m_ChildPid == 0)
BasicTestChildProcess(defaultGPUNode, pipefd, mflags); /* Child Process */
else
BasicTestParentProcess(defaultGPUNode, m_ChildPid, pipefd, mflags); /* Parent proces */
/* Code path executed by both parent and child with respective fds */
close(pipefd[1]);
close(pipefd[0]);
TEST_END
}
+134
View File
@@ -0,0 +1,134 @@
/*
* Copyright (C) 2017-2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "KFDBaseComponentTest.hpp"
#include "BaseQueue.hpp"
#ifndef __KFD_MEMORY_TEST__H__
#define __KFD_MEMORY_TEST__H__
#define CMA_MEMORY_TEST_ARRAY_SIZE 4
#define CMA_TEST_COUNT 3
enum CMA_MEM_TYPE {
CMA_MEM_TYPE_SYSTEM = 0,
CMA_MEM_TYPE_USERPTR,
CMA_MEM_TYPE_LOCAL_MEM,
};
enum CMA_TEST_TYPE {
CMA_READ_TEST = 0,
CMA_WRITE_TEST
};
enum CMA_TEST_STATUS {
CMA_TEST_SUCCESS = 0,
CMA_IPC_PIPE_ERROR = 1,
CMA_CHECK_PATTERN_ERROR,
CMA_TEST_ABORT,
CMA_TEST_NOMEM,
CMA_PARENT_FAIL,
CMA_TEST_HSA_READ_FAIL,
CMA_TEST_HSA_WRITE_FAIL
};
/* @struct testMemoryDescriptor
* @brief Describes test buffers for Cross Memory Attach Test.
*/
struct testMemoryDescriptor {
CMA_MEM_TYPE m_MemType;
HSAuint64 m_MemSize;
/* The buffer will be initialized with this pattern */
HSAuint32 m_FillPattern;
/* After CMA test, this pattern is expected in the first word */
HSAuint32 m_CheckFirstWordPattern;
/* After CMA test, this pattern is expected in the last word */
HSAuint32 m_CheckLastWordPattern;
testMemoryDescriptor(CMA_MEM_TYPE memType, HSAuint64 memSize,
HSAuint32 fillPattern, HSAuint32 firstCheckPattern,
HSAuint32 lastCheckPattern) :
m_MemType(memType),
m_MemSize(memSize),
m_FillPattern(fillPattern),
m_CheckFirstWordPattern(firstCheckPattern),
m_CheckLastWordPattern(lastCheckPattern) {}
~testMemoryDescriptor(){}
};
/* @class KFDCMAArray
* @brief Array of buffers that will be passed between the parent and child
* process for Cross memory read and write tests
*/
class KFDCMAArray {
/* Used to store the actual buffer array */
HsaMemoryBuffer* m_MemArray[CMA_MEMORY_TEST_ARRAY_SIZE];
/* Used for passing to thunk CMA functions */
HsaMemoryRange m_HsaMemoryRange[CMA_MEMORY_TEST_ARRAY_SIZE];
/* Though previous arrays are fixed sizes only m_ValidCount
* ones are valid
*/
HSAuint64 m_ValidCount;
QueueArray m_QueueArray;
public:
KFDCMAArray();
~KFDCMAArray() {
Destroy();
}
CMA_TEST_STATUS Init(testMemoryDescriptor(*memDescriptor)[CMA_MEMORY_TEST_ARRAY_SIZE], int node);
CMA_TEST_STATUS Destroy();
HsaMemoryRange* getMemoryRange() { return m_HsaMemoryRange; }
HSAuint64 getValidRangeCount() { return m_ValidCount; }
void FillPattern(testMemoryDescriptor(*memDescriptor)[CMA_MEMORY_TEST_ARRAY_SIZE]);
CMA_TEST_STATUS checkPattern(testMemoryDescriptor(*memDescriptor)[CMA_MEMORY_TEST_ARRAY_SIZE]);
CMA_TEST_STATUS sendCMAArray(int writePipe);
CMA_TEST_STATUS recvCMAArray(int readPipe);
};
// @class KFDIPCTest
class KFDIPCTest : public KFDBaseComponentTest {
public:
KFDIPCTest(void) : m_ChildPid(-1) {}
~KFDIPCTest(void);
protected:
virtual void SetUp();
virtual void TearDown();
/* For IPC testing */
void BasicTestChildProcess(int defaultGPUNode, int *pipefd, HsaMemFlags mflags);
void BasicTestParentProcess(int defaultGPUNode, pid_t childPid, int *pipefd, HsaMemFlags mflags);
/* For CMA testing */
CMA_TEST_STATUS CrossMemoryAttachChildProcess(int defaultGPUNode, int writePipe,
int readPipe, CMA_TEST_TYPE testType);
CMA_TEST_STATUS CrossMemoryAttachParentProcess(int defaultGPUNode, pid_t cid,
int writePipe, int readPipe, CMA_TEST_TYPE testType);
protected:
pid_t m_ChildPid;
};
#endif // __KFD_MEMORY_TEST__H__

Some files were not shown because too many files have changed in this diff Show More