Move Images code to hsa-runtime folder

Change-Id: I53c1845d985ac3e9708d952865009c0021f3bb4f


[ROCm/ROCR-Runtime commit: 7e3db20826]
Esse commit está contido em:
Sean Keely
2020-04-30 00:18:36 -05:00
commit 1fc7f2dec7
70 arquivos alterados com 73713 adições e 47 exclusões
@@ -55,7 +55,9 @@ if(UNIX)
set(PS ":")
set(CMAKE_CXX_FLAGS "-Wall -std=c++11 ${EXTRA_CFLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fpic")
if (CMAKE_COMPILER_IS_GNUCXX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,--unresolved-symbols=ignore-in-shared-libs")
endif ()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-aliasing")
if ( CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" )
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64 -msse -msse2" )
@@ -63,7 +65,7 @@ if(UNIX)
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32" )
endif ()
if ( "${CMAKE_BUILD_TYPE}" STREQUAL Debug )
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb" )
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -ggdb" )
endif ()
add_definitions(-D __STDC_LIMIT_MACROS)
add_definitions(-D __STDC_CONSTANT_MACROS)
@@ -18,7 +18,7 @@ if (ROCM_CCACHE_BUILD)
endif() # if (ROCM_CCACHE_BUILD)
## Include the cmake_modules utils.cmake
list ( APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../cmake_modules" )
list ( APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../hsa-runtime/cmake_modules" )
include ( utils )
## Compiler preproc definitions.
@@ -59,12 +59,12 @@ include_directories( ${HSA_INC_PATH} )
link_directories( ${HSA_LIB_PATH} )
## Find self
if( "${EXT_SOURCE_DIR}" STREQUAL "" )
get_include_path( EXT_SOURCE_FILE null NAMES "image/image_runtime.h" HINTS "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/../../ext/" )
get_filename_component( EXT_SOURCE_DIR "${EXT_SOURCE_FILE}/.." ABSOLUTE )
unset( EXT_SOURCE_FILE CACHE )
if( "${IMAGE_SOURCE_DIR}" STREQUAL "" )
get_include_path( IMG_SOURCE_FILE null NAMES "image_runtime.h" HINTS "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/../hsa-runtime/image/" )
get_filename_component( IMAGE_SOURCE_DIR "${IMG_SOURCE_FILE}" ABSOLUTE )
unset( IMG_SOURCE_FILE CACHE )
endif()
set( EXT_SOURCE_DIR ${EXT_SOURCE_DIR} CACHE PATH "Image lib source dir" FORCE )
set( IMAGE_SOURCE_DIR ${IMAGE_SOURCE_DIR} CACHE PATH "Image lib source dir" FORCE )
get_filename_component( OPEN_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/.." ABSOLUTE CACHE )
set( OPEN_SOURCE_DIR ${OPEN_SOURCE_DIR} CACHE PATH "Open source root dir" FORCE )
@@ -87,58 +87,58 @@ if ( "${CMAKE_BUILD_TYPE}" STREQUAL Debug )
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb" )
endif ()
set ( DRVDEF "${EXT_SOURCE_DIR}/make/image.so.def" )
set ( DRVDEF "${IMAGE_SOURCE_DIR}/image.so.def" )
set ( CMAKE_SHARED_LINKER_FLAGS "-Wl,-Bdynamic -Wl,-z,noexecstack -Wl,--version-script=${DRVDEF} -Wl,--enable-new-dtags" )
## Library path(s).
include_directories(${EXT_SOURCE_DIR}/..)
include_directories(${IMAGE_SOURCE_DIR}/)
include_directories(${OPEN_SOURCE_DIR}/hsa-runtime)
include_directories(${OPEN_SOURCE_DIR}/hsa-runtime/inc)
include_directories(${OPEN_SOURCE_DIR}/hsa-runtime/core/inc)
include_directories(${EXT_SOURCE_DIR}/image/addrlib)
include_directories(${EXT_SOURCE_DIR}/image/addrlib/inc)
include_directories(${EXT_SOURCE_DIR}/image/addrlib/src)
include_directories(${EXT_SOURCE_DIR}/image/addrlib/src/core)
include_directories(${EXT_SOURCE_DIR}/image/addrlib/src/r800)
include_directories(${EXT_SOURCE_DIR}/image/addrlib/src/gfx9)
include_directories(${EXT_SOURCE_DIR}/image/addrlib/src/gfx10)
include_directories(${EXT_SOURCE_DIR}/image/addrlib/src/chip/r800)
include_directories(${EXT_SOURCE_DIR}/image/addrlib/src/chip/gfx9)
include_directories(${EXT_SOURCE_DIR}/image/addrlib/src/chip/gfx10)
include_directories(${IMAGE_SOURCE_DIR}/addrlib)
include_directories(${IMAGE_SOURCE_DIR}/addrlib/inc)
include_directories(${IMAGE_SOURCE_DIR}/addrlib/src)
include_directories(${IMAGE_SOURCE_DIR}/addrlib/src/core)
include_directories(${IMAGE_SOURCE_DIR}/addrlib/src/r800)
include_directories(${IMAGE_SOURCE_DIR}/addrlib/src/gfx9)
include_directories(${IMAGE_SOURCE_DIR}/addrlib/src/gfx10)
include_directories(${IMAGE_SOURCE_DIR}/addrlib/src/chip/r800)
include_directories(${IMAGE_SOURCE_DIR}/addrlib/src/chip/gfx9)
include_directories(${IMAGE_SOURCE_DIR}/addrlib/src/chip/gfx10)
set ( IMAGE_SRCS ${EXT_SOURCE_DIR}/image/addrlib/src/addrinterface.cpp
${EXT_SOURCE_DIR}/image/addrlib/src/core/coord.cpp
${EXT_SOURCE_DIR}/image/addrlib/src/core/addrlib.cpp
${EXT_SOURCE_DIR}/image/addrlib/src/core/addrlib1.cpp
${EXT_SOURCE_DIR}/image/addrlib/src/core/addrlib2.cpp
${EXT_SOURCE_DIR}/image/addrlib/src/core/addrobject.cpp
${EXT_SOURCE_DIR}/image/addrlib/src/core/addrelemlib.cpp
${EXT_SOURCE_DIR}/image/addrlib/src/r800/ciaddrlib.cpp
${EXT_SOURCE_DIR}/image/addrlib/src/r800/egbaddrlib.cpp
${EXT_SOURCE_DIR}/image/addrlib/src/r800/siaddrlib.cpp
${EXT_SOURCE_DIR}/image/addrlib/src/gfx9/gfx9addrlib.cpp
${EXT_SOURCE_DIR}/image/addrlib/src/gfx10/gfx10addrlib.cpp
${EXT_SOURCE_DIR}/image/amd_ext.cpp
${EXT_SOURCE_DIR}/image/device_info.cpp
${EXT_SOURCE_DIR}/image/hsa_ext_image.cpp
${EXT_SOURCE_DIR}/image/image_runtime.cpp
${EXT_SOURCE_DIR}/image/image_manager.cpp
${EXT_SOURCE_DIR}/image/image_manager_kv.cpp
${EXT_SOURCE_DIR}/image/image_manager_ai.cpp
${EXT_SOURCE_DIR}/image/image_manager_nv.cpp
${EXT_SOURCE_DIR}/image/image_lut_kv.cpp
${EXT_SOURCE_DIR}/image/blit_object_gfx7xx.cpp
${EXT_SOURCE_DIR}/image/blit_object_gfx8xx.cpp
${EXT_SOURCE_DIR}/image/blit_object_gfx9xx.cpp
${EXT_SOURCE_DIR}/image/opencl_blit_objects.cpp
${EXT_SOURCE_DIR}/image/blit_kernel.cpp
set ( IMAGE_SRCS ${IMAGE_SOURCE_DIR}/addrlib/src/addrinterface.cpp
${IMAGE_SOURCE_DIR}/addrlib/src/core/coord.cpp
${IMAGE_SOURCE_DIR}/addrlib/src/core/addrlib.cpp
${IMAGE_SOURCE_DIR}/addrlib/src/core/addrlib1.cpp
${IMAGE_SOURCE_DIR}/addrlib/src/core/addrlib2.cpp
${IMAGE_SOURCE_DIR}/addrlib/src/core/addrobject.cpp
${IMAGE_SOURCE_DIR}/addrlib/src/core/addrelemlib.cpp
${IMAGE_SOURCE_DIR}/addrlib/src/r800/ciaddrlib.cpp
${IMAGE_SOURCE_DIR}/addrlib/src/r800/egbaddrlib.cpp
${IMAGE_SOURCE_DIR}/addrlib/src/r800/siaddrlib.cpp
${IMAGE_SOURCE_DIR}/addrlib/src/gfx9/gfx9addrlib.cpp
${IMAGE_SOURCE_DIR}/addrlib/src/gfx10/gfx10addrlib.cpp
${IMAGE_SOURCE_DIR}/amd_ext.cpp
${IMAGE_SOURCE_DIR}/device_info.cpp
${IMAGE_SOURCE_DIR}/hsa_ext_image.cpp
${IMAGE_SOURCE_DIR}/image_runtime.cpp
${IMAGE_SOURCE_DIR}/image_manager.cpp
${IMAGE_SOURCE_DIR}/image_manager_kv.cpp
${IMAGE_SOURCE_DIR}/image_manager_ai.cpp
${IMAGE_SOURCE_DIR}/image_manager_nv.cpp
${IMAGE_SOURCE_DIR}/image_lut_kv.cpp
${IMAGE_SOURCE_DIR}/blit_object_gfx7xx.cpp
${IMAGE_SOURCE_DIR}/blit_object_gfx8xx.cpp
${IMAGE_SOURCE_DIR}/blit_object_gfx9xx.cpp
${IMAGE_SOURCE_DIR}/opencl_blit_objects.cpp
${IMAGE_SOURCE_DIR}/blit_kernel.cpp
${OPEN_SOURCE_DIR}/hsa-runtime/core/common/shared.cpp
${OPEN_SOURCE_DIR}/hsa-runtime/core/common/hsa_table_interface.cpp
)
add_subdirectory(${EXT_SOURCE_DIR}/image/blit_src ${CMAKE_BINARY_DIR}/image_blit)
set_source_files_properties(${EXT_SOURCE_DIR}/image/opencl_blit_objects.cpp PROPERTIES GENERATED 1)
add_subdirectory(${IMAGE_SOURCE_DIR}/blit_src ${CMAKE_CURRENT_BINARY_DIR}/image_blit)
set_source_files_properties(${IMAGE_SOURCE_DIR}/opencl_blit_objects.cpp PROPERTIES GENERATED 1)
add_library ( ${IMAGE_TARGET} SHARED ${IMAGE_SRCS} )
add_dependencies( ${IMAGE_TARGET} opencl_blit_objects.cpp )
@@ -205,3 +205,15 @@ function ( get_version DEFAULT_VERSION_STRING )
#message("${VERSION_JOB}")
endfunction()
## Collects subdirectory names and returns them in a list
function ( listsubdirs DIRPATH SUBDIRECTORIES )
file( GLOB CONTENTS RELATIVE ${DIRPATH} "${DIRPATH}/*" )
set ( FOLDERS, "" )
foreach( ITEM IN LISTS CONTENTS)
if( IS_DIRECTORY "${DIRPATH}/${ITEM}" )
list( APPEND FOLDERS ${ITEM} )
endif()
endforeach()
set (${SUBDIRECTORIES} ${FOLDERS} PARENT_SCOPE)
endfunction()
@@ -0,0 +1,63 @@
include $(EXT_DEPTH)/make/extdefs
include $(EXT_DEPTH)/make/Makefile.$(EXT_OS_PLATFORM).ext
LCXXINCS += -I$(EXT_DEPTH)/.. \
-I$(RUNTIME_SRC_TOP) \
-I$(DEPTH)/drivers/inc/asic_reg \
-I$(EXT_DEPTH)/image/addrlib \
-I$(EXT_DEPTH)/image/addrlib/core \
-I$(EXT_DEPTH)/image/addrlib/inc \
-I$(EXT_DEPTH)/image/addrlib/inc/chip \
-I$(EXT_DEPTH)/image/addrlib/inc/chip/r800 \
-I$(EXT_DEPTH)/image/addrlib/r800 \
-I$(EXT_DEPTH)/image/addrlib/r800/chip \
-I$(EXT_DEPTH)/image/addrlib/inc/chip/gfx9 \
-I$(EXT_DEPTH)/image/addrlib/gfx9 \
-I$(EXT_DEPTH)/image/addrlib/gfx9/chip \
-I$(SCLIB_DEPTH)/Interface \
-I$(DEPTH)/drivers/hsathk/include \
-DBRAHMA_BUILD=1
CPPFILES = addrinterface.cpp \
addrelemlib.cpp \
addrlib.cpp \
addrlib1.cpp \
addrlib2.cpp \
addrobject.cpp \
ciaddrlib.cpp \
egbaddrlib.cpp \
siaddrlib.cpp \
hsa_ext_image.cpp \
image_runtime.cpp \
image_manager.cpp \
image_manager_kv.cpp \
image_manager_ai.cpp \
image_lut_kv.cpp \
blit_kernel.cpp \
coord.cpp \
gfx9addrlib.cpp \
rbmap.cpp \
blit_object_gfx7xx.cpp \
blit_object_gfx8xx.cpp \
blit_object_gfx9xx.cpp \
opencl_blit_objects.cpp \
EXT_IMGDIR = $(EXT_DEPTH)/image
$(EXT_IMGDIR)/opencl_blit_objects.cpp: $(EXT_IMGDIR)/opencl_blit_objects.cpp_TC
cp -f $< $@
LLOPTS += -L "$(DEPTH)/drivers/hsathk/build/$(EXT_OS_BUILD)/B_$(BUILD_TYPE)" -lhsakmt
LIB_TARGET = image
EXT_SRCDIR = $(EXT_DEPTH)/image \
$(EXT_DEPTH)/image/addrlib \
$(EXT_DEPTH)/image/addrlib/core \
$(EXT_DEPTH)/image/addrlib/r800 \
$(EXT_DEPTH)/image/addrlib/gfx9
SRCPATH = $(EXT_SRCDIR)
include $(EXT_DEPTH)/make/extrules
Diferenças do arquivo suprimidas por serem muito extensas Carregar Diff
@@ -0,0 +1,754 @@
/*
* Copyright © 2007-2019 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
* AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*/
/**
****************************************************************************************************
* @file addrtypes.h
* @brief Contains the helper function and constants
****************************************************************************************************
*/
#ifndef __ADDR_TYPES_H__
#define __ADDR_TYPES_H__
#if defined(__APPLE__) && !defined(HAVE_TSERVER)
// External definitions header maintained by Apple driver team, but not for diag team under Mac.
// Helps address compilation issues & reduces code covered by NDA
#include "addrExtDef.h"
#else
// Windows and/or Linux
#if !defined(VOID)
typedef void VOID;
#endif
#if !defined(FLOAT)
typedef float FLOAT;
#endif
#if !defined(CHAR)
typedef char CHAR;
#endif
#if !defined(INT)
typedef int INT;
#endif
#include <stdarg.h> // va_list...etc need this header
#endif // defined (__APPLE__) && !defined(HAVE_TSERVER)
/**
****************************************************************************************************
* Calling conventions
****************************************************************************************************
*/
#ifndef ADDR_CDECL
#if defined(__GNUC__)
#define ADDR_CDECL __attribute__((cdecl))
#else
#define ADDR_CDECL __cdecl
#endif
#endif
#ifndef ADDR_STDCALL
#if defined(__GNUC__)
#if defined(__amd64__) || defined(__x86_64__)
#define ADDR_STDCALL
#else
#define ADDR_STDCALL __attribute__((stdcall))
#endif
#else
#define ADDR_STDCALL __stdcall
#endif
#endif
#ifndef ADDR_FASTCALL
#if defined(BRAHMA_ARM)
#define ADDR_FASTCALL
#elif defined(__GNUC__)
#define ADDR_FASTCALL __attribute__((regparm(0)))
#else
#define ADDR_FASTCALL __fastcall
#endif
#endif
#ifndef GC_CDECL
#define GC_CDECL ADDR_CDECL
#endif
#ifndef GC_STDCALL
#define GC_STDCALL ADDR_STDCALL
#endif
#ifndef GC_FASTCALL
#define GC_FASTCALL ADDR_FASTCALL
#endif
#if defined(__GNUC__)
#define ADDR_INLINE static inline // inline needs to be static to link
#else
// win32, win64, other platforms
#define ADDR_INLINE __inline
#endif // #if defined(__GNUC__)
#define ADDR_API ADDR_FASTCALL //default call convention is fast call
/**
****************************************************************************************************
* Global defines used by other modules
****************************************************************************************************
*/
#if !defined(TILEINDEX_INVALID)
#define TILEINDEX_INVALID -1
#endif
#if !defined(TILEINDEX_LINEAR_GENERAL)
#define TILEINDEX_LINEAR_GENERAL -2
#endif
#if !defined(TILEINDEX_LINEAR_ALIGNED)
#define TILEINDEX_LINEAR_ALIGNED 8
#endif
/**
****************************************************************************************************
* Return codes
****************************************************************************************************
*/
typedef enum _ADDR_E_RETURNCODE
{
// General Return
ADDR_OK = 0,
ADDR_ERROR = 1,
// Specific Errors
ADDR_OUTOFMEMORY,
ADDR_INVALIDPARAMS,
ADDR_NOTSUPPORTED,
ADDR_NOTIMPLEMENTED,
ADDR_PARAMSIZEMISMATCH,
ADDR_INVALIDGBREGVALUES,
} ADDR_E_RETURNCODE;
/**
****************************************************************************************************
* @brief
* Neutral enums that define tile modes for all H/W
* @note
* R600/R800 tiling mode can be cast to hw enums directly but never cast into HW enum from
* ADDR_TM_2D_TILED_XTHICK
*
****************************************************************************************************
*/
typedef enum _AddrTileMode
{
ADDR_TM_LINEAR_GENERAL = 0, ///< Least restrictions, pitch: multiple of 8 if not buffer
ADDR_TM_LINEAR_ALIGNED = 1, ///< Requests pitch or slice to be multiple of 64 pixels
ADDR_TM_1D_TILED_THIN1 = 2, ///< Linear array of 8x8 tiles
ADDR_TM_1D_TILED_THICK = 3, ///< Linear array of 8x8x4 tiles
ADDR_TM_2D_TILED_THIN1 = 4, ///< A set of macro tiles consist of 8x8 tiles
ADDR_TM_2D_TILED_THIN2 = 5, ///< 600 HWL only, macro tile ratio is 1:4
ADDR_TM_2D_TILED_THIN4 = 6, ///< 600 HWL only, macro tile ratio is 1:16
ADDR_TM_2D_TILED_THICK = 7, ///< A set of macro tiles consist of 8x8x4 tiles
ADDR_TM_2B_TILED_THIN1 = 8, ///< 600 HWL only, with bank swap
ADDR_TM_2B_TILED_THIN2 = 9, ///< 600 HWL only, with bank swap and ratio is 1:4
ADDR_TM_2B_TILED_THIN4 = 10, ///< 600 HWL only, with bank swap and ratio is 1:16
ADDR_TM_2B_TILED_THICK = 11, ///< 600 HWL only, with bank swap, consists of 8x8x4 tiles
ADDR_TM_3D_TILED_THIN1 = 12, ///< Macro tiling w/ pipe rotation between slices
ADDR_TM_3D_TILED_THICK = 13, ///< Macro tiling w/ pipe rotation bwtween slices, thick
ADDR_TM_3B_TILED_THIN1 = 14, ///< 600 HWL only, with bank swap
ADDR_TM_3B_TILED_THICK = 15, ///< 600 HWL only, with bank swap, thick
ADDR_TM_2D_TILED_XTHICK = 16, ///< Tile is 8x8x8, valid from NI
ADDR_TM_3D_TILED_XTHICK = 17, ///< Tile is 8x8x8, valid from NI
ADDR_TM_POWER_SAVE = 18, ///< Power save mode, only used by KMD on NI
ADDR_TM_PRT_TILED_THIN1 = 19, ///< No bank/pipe rotation or hashing beyond macrotile size
ADDR_TM_PRT_2D_TILED_THIN1 = 20, ///< Same as 2D_TILED_THIN1, PRT only
ADDR_TM_PRT_3D_TILED_THIN1 = 21, ///< Same as 3D_TILED_THIN1, PRT only
ADDR_TM_PRT_TILED_THICK = 22, ///< No bank/pipe rotation or hashing beyond macrotile size
ADDR_TM_PRT_2D_TILED_THICK = 23, ///< Same as 2D_TILED_THICK, PRT only
ADDR_TM_PRT_3D_TILED_THICK = 24, ///< Same as 3D_TILED_THICK, PRT only
ADDR_TM_UNKNOWN = 25, ///< Unkown tile mode, should be decided by address lib
ADDR_TM_COUNT = 26, ///< Must be the value of the last tile mode
} AddrTileMode;
/**
****************************************************************************************************
* @brief
* Neutral enums that define swizzle modes for Gfx9+ ASIC
* @note
*
* ADDR_SW_LINEAR linear aligned addressing mode, for 1D/2D/3D resource
* ADDR_SW_256B_* addressing block aligned size is 256B, for 2D/3D resource
* ADDR_SW_4KB_* addressing block aligned size is 4KB, for 2D/3D resource
* ADDR_SW_64KB_* addressing block aligned size is 64KB, for 2D/3D resource
*
* ADDR_SW_*_Z For GFX9:
- for 2D resource, represents Z-order swizzle mode for depth/stencil/FMask
- for 3D resource, represents a swizzle mode similar to legacy thick tile mode
For GFX10:
- represents Z-order swizzle mode for depth/stencil/FMask
* ADDR_SW_*_S For GFX9+:
- represents standard swizzle mode defined by MS
* ADDR_SW_*_D For GFX9:
- for 2D resource, represents a swizzle mode for displayable resource
* - for 3D resource, represents a swizzle mode which places each slice in order & pixel
For GFX10:
- for 2D resource, represents a swizzle mode for displayable resource
- for 3D resource, represents a swizzle mode similar to legacy thick tile mode
within slice is placed as 2D ADDR_SW_*_S. Don't use this combination if possible!
* ADDR_SW_*_R For GFX9:
- 2D resource only, represents a swizzle mode for rotated displayable resource
For GFX10:
- represents a swizzle mode for render target resource
*
****************************************************************************************************
*/
typedef enum _AddrSwizzleMode
{
ADDR_SW_LINEAR = 0,
ADDR_SW_256B_S = 1,
ADDR_SW_256B_D = 2,
ADDR_SW_256B_R = 3,
ADDR_SW_4KB_Z = 4,
ADDR_SW_4KB_S = 5,
ADDR_SW_4KB_D = 6,
ADDR_SW_4KB_R = 7,
ADDR_SW_64KB_Z = 8,
ADDR_SW_64KB_S = 9,
ADDR_SW_64KB_D = 10,
ADDR_SW_64KB_R = 11,
ADDR_SW_RESERVED0 = 12,
ADDR_SW_RESERVED1 = 13,
ADDR_SW_RESERVED2 = 14,
ADDR_SW_RESERVED3 = 15,
ADDR_SW_64KB_Z_T = 16,
ADDR_SW_64KB_S_T = 17,
ADDR_SW_64KB_D_T = 18,
ADDR_SW_64KB_R_T = 19,
ADDR_SW_4KB_Z_X = 20,
ADDR_SW_4KB_S_X = 21,
ADDR_SW_4KB_D_X = 22,
ADDR_SW_4KB_R_X = 23,
ADDR_SW_64KB_Z_X = 24,
ADDR_SW_64KB_S_X = 25,
ADDR_SW_64KB_D_X = 26,
ADDR_SW_64KB_R_X = 27,
ADDR_SW_VAR_Z_X = 28,
ADDR_SW_RESERVED4 = 29,
ADDR_SW_RESERVED5 = 30,
ADDR_SW_VAR_R_X = 31,
ADDR_SW_LINEAR_GENERAL = 32,
// Used for represent block with identical size
ADDR_SW_256B = ADDR_SW_256B_S,
ADDR_SW_4KB = ADDR_SW_4KB_S_X,
ADDR_SW_64KB = ADDR_SW_64KB_S_X,
ADDR_SW_VAR = ADDR_SW_RESERVED4,
ADDR_SW_MAX_TYPE = 33,
} AddrSwizzleMode;
/**
****************************************************************************************************
* @brief
* Neutral enums that define image type
* @note
* this is new for address library interface version 2
*
****************************************************************************************************
*/
typedef enum _AddrResourceType
{
ADDR_RSRC_TEX_1D = 0,
ADDR_RSRC_TEX_2D = 1,
ADDR_RSRC_TEX_3D = 2,
ADDR_RSRC_MAX_TYPE = 3,
} AddrResourceType;
/**
****************************************************************************************************
* @brief
* Neutral enums that define resource heap location
* @note
* this is new for address library interface version 2
*
****************************************************************************************************
*/
typedef enum _AddrResrouceLocation
{
ADDR_RSRC_LOC_UNDEF = 0, // Resource heap is undefined/unknown
ADDR_RSRC_LOC_LOCAL = 1, // CPU visable and CPU invisable local heap
ADDR_RSRC_LOC_USWC = 2, // CPU write-combined non-cached nonlocal heap
ADDR_RSRC_LOC_CACHED = 3, // CPU cached nonlocal heap
ADDR_RSRC_LOC_INVIS = 4, // CPU invisable local heap only
ADDR_RSRC_LOC_MAX_TYPE = 5,
} AddrResrouceLocation;
/**
****************************************************************************************************
* @brief
* Neutral enums that define resource basic swizzle mode
* @note
* this is new for address library interface version 2
*
****************************************************************************************************
*/
typedef enum _AddrSwType
{
ADDR_SW_Z = 0, // Resource basic swizzle mode is ZOrder
ADDR_SW_S = 1, // Resource basic swizzle mode is Standard
ADDR_SW_D = 2, // Resource basic swizzle mode is Display
ADDR_SW_R = 3, // Resource basic swizzle mode is Rotated/Render optimized
ADDR_SW_L = 4, // Resource basic swizzle mode is Linear
ADDR_SW_MAX_SWTYPE
} AddrSwType;
/**
****************************************************************************************************
* @brief
* Neutral enums that define mipmap major mode
* @note
* this is new for address library interface version 2
*
****************************************************************************************************
*/
typedef enum _AddrMajorMode
{
ADDR_MAJOR_X = 0,
ADDR_MAJOR_Y = 1,
ADDR_MAJOR_Z = 2,
ADDR_MAJOR_MAX_TYPE = 3,
} AddrMajorMode;
/**
****************************************************************************************************
* AddrFormat
*
* @brief
* Neutral enum for SurfaceFormat
*
****************************************************************************************************
*/
typedef enum _AddrFormat {
ADDR_FMT_INVALID = 0x00000000,
ADDR_FMT_8 = 0x00000001,
ADDR_FMT_4_4 = 0x00000002,
ADDR_FMT_3_3_2 = 0x00000003,
ADDR_FMT_RESERVED_4 = 0x00000004,
ADDR_FMT_16 = 0x00000005,
ADDR_FMT_16_FLOAT = ADDR_FMT_16,
ADDR_FMT_8_8 = 0x00000007,
ADDR_FMT_5_6_5 = 0x00000008,
ADDR_FMT_6_5_5 = 0x00000009,
ADDR_FMT_1_5_5_5 = 0x0000000a,
ADDR_FMT_4_4_4_4 = 0x0000000b,
ADDR_FMT_5_5_5_1 = 0x0000000c,
ADDR_FMT_32 = 0x0000000d,
ADDR_FMT_32_FLOAT = ADDR_FMT_32,
ADDR_FMT_16_16 = 0x0000000f,
ADDR_FMT_16_16_FLOAT = ADDR_FMT_16_16,
ADDR_FMT_8_24 = 0x00000011,
ADDR_FMT_8_24_FLOAT = ADDR_FMT_8_24,
ADDR_FMT_24_8 = 0x00000013,
ADDR_FMT_24_8_FLOAT = ADDR_FMT_24_8,
ADDR_FMT_10_11_11 = 0x00000015,
ADDR_FMT_10_11_11_FLOAT = ADDR_FMT_10_11_11,
ADDR_FMT_11_11_10 = 0x00000017,
ADDR_FMT_11_11_10_FLOAT = ADDR_FMT_11_11_10,
ADDR_FMT_2_10_10_10 = 0x00000019,
ADDR_FMT_8_8_8_8 = 0x0000001a,
ADDR_FMT_10_10_10_2 = 0x0000001b,
ADDR_FMT_X24_8_32_FLOAT = 0x0000001c,
ADDR_FMT_32_32 = 0x0000001d,
ADDR_FMT_32_32_FLOAT = ADDR_FMT_32_32,
ADDR_FMT_16_16_16_16 = 0x0000001f,
ADDR_FMT_16_16_16_16_FLOAT = ADDR_FMT_16_16_16_16,
ADDR_FMT_RESERVED_33 = 0x00000021,
ADDR_FMT_32_32_32_32 = 0x00000022,
ADDR_FMT_32_32_32_32_FLOAT = ADDR_FMT_32_32_32_32,
ADDR_FMT_RESERVED_36 = 0x00000024,
ADDR_FMT_1 = 0x00000025,
ADDR_FMT_1_REVERSED = 0x00000026,
ADDR_FMT_GB_GR = 0x00000027,
ADDR_FMT_BG_RG = 0x00000028,
ADDR_FMT_32_AS_8 = 0x00000029,
ADDR_FMT_32_AS_8_8 = 0x0000002a,
ADDR_FMT_5_9_9_9_SHAREDEXP = 0x0000002b,
ADDR_FMT_8_8_8 = 0x0000002c,
ADDR_FMT_16_16_16 = 0x0000002d,
ADDR_FMT_16_16_16_FLOAT = ADDR_FMT_16_16_16,
ADDR_FMT_32_32_32 = 0x0000002f,
ADDR_FMT_32_32_32_FLOAT = ADDR_FMT_32_32_32,
ADDR_FMT_BC1 = 0x00000031,
ADDR_FMT_BC2 = 0x00000032,
ADDR_FMT_BC3 = 0x00000033,
ADDR_FMT_BC4 = 0x00000034,
ADDR_FMT_BC5 = 0x00000035,
ADDR_FMT_BC6 = 0x00000036,
ADDR_FMT_BC7 = 0x00000037,
ADDR_FMT_32_AS_32_32_32_32 = 0x00000038,
ADDR_FMT_APC3 = 0x00000039,
ADDR_FMT_APC4 = 0x0000003a,
ADDR_FMT_APC5 = 0x0000003b,
ADDR_FMT_APC6 = 0x0000003c,
ADDR_FMT_APC7 = 0x0000003d,
ADDR_FMT_CTX1 = 0x0000003e,
ADDR_FMT_RESERVED_63 = 0x0000003f,
ADDR_FMT_ASTC_4x4 = 0x00000040,
ADDR_FMT_ASTC_5x4 = 0x00000041,
ADDR_FMT_ASTC_5x5 = 0x00000042,
ADDR_FMT_ASTC_6x5 = 0x00000043,
ADDR_FMT_ASTC_6x6 = 0x00000044,
ADDR_FMT_ASTC_8x5 = 0x00000045,
ADDR_FMT_ASTC_8x6 = 0x00000046,
ADDR_FMT_ASTC_8x8 = 0x00000047,
ADDR_FMT_ASTC_10x5 = 0x00000048,
ADDR_FMT_ASTC_10x6 = 0x00000049,
ADDR_FMT_ASTC_10x8 = 0x0000004a,
ADDR_FMT_ASTC_10x10 = 0x0000004b,
ADDR_FMT_ASTC_12x10 = 0x0000004c,
ADDR_FMT_ASTC_12x12 = 0x0000004d,
ADDR_FMT_ETC2_64BPP = 0x0000004e,
ADDR_FMT_ETC2_128BPP = 0x0000004f,
} AddrFormat;
/**
****************************************************************************************************
* AddrDepthFormat
*
* @brief
* Neutral enum for addrFlt32ToDepthPixel
*
****************************************************************************************************
*/
typedef enum _AddrDepthFormat
{
ADDR_DEPTH_INVALID = 0x00000000,
ADDR_DEPTH_16 = 0x00000001,
ADDR_DEPTH_X8_24 = 0x00000002,
ADDR_DEPTH_8_24 = 0x00000003,
ADDR_DEPTH_X8_24_FLOAT = 0x00000004,
ADDR_DEPTH_8_24_FLOAT = 0x00000005,
ADDR_DEPTH_32_FLOAT = 0x00000006,
ADDR_DEPTH_X24_8_32_FLOAT = 0x00000007,
} AddrDepthFormat;
/**
****************************************************************************************************
* AddrColorFormat
*
* @brief
* Neutral enum for ColorFormat
*
****************************************************************************************************
*/
typedef enum _AddrColorFormat
{
ADDR_COLOR_INVALID = 0x00000000,
ADDR_COLOR_8 = 0x00000001,
ADDR_COLOR_4_4 = 0x00000002,
ADDR_COLOR_3_3_2 = 0x00000003,
ADDR_COLOR_RESERVED_4 = 0x00000004,
ADDR_COLOR_16 = 0x00000005,
ADDR_COLOR_16_FLOAT = 0x00000006,
ADDR_COLOR_8_8 = 0x00000007,
ADDR_COLOR_5_6_5 = 0x00000008,
ADDR_COLOR_6_5_5 = 0x00000009,
ADDR_COLOR_1_5_5_5 = 0x0000000a,
ADDR_COLOR_4_4_4_4 = 0x0000000b,
ADDR_COLOR_5_5_5_1 = 0x0000000c,
ADDR_COLOR_32 = 0x0000000d,
ADDR_COLOR_32_FLOAT = 0x0000000e,
ADDR_COLOR_16_16 = 0x0000000f,
ADDR_COLOR_16_16_FLOAT = 0x00000010,
ADDR_COLOR_8_24 = 0x00000011,
ADDR_COLOR_8_24_FLOAT = 0x00000012,
ADDR_COLOR_24_8 = 0x00000013,
ADDR_COLOR_24_8_FLOAT = 0x00000014,
ADDR_COLOR_10_11_11 = 0x00000015,
ADDR_COLOR_10_11_11_FLOAT = 0x00000016,
ADDR_COLOR_11_11_10 = 0x00000017,
ADDR_COLOR_11_11_10_FLOAT = 0x00000018,
ADDR_COLOR_2_10_10_10 = 0x00000019,
ADDR_COLOR_8_8_8_8 = 0x0000001a,
ADDR_COLOR_10_10_10_2 = 0x0000001b,
ADDR_COLOR_X24_8_32_FLOAT = 0x0000001c,
ADDR_COLOR_32_32 = 0x0000001d,
ADDR_COLOR_32_32_FLOAT = 0x0000001e,
ADDR_COLOR_16_16_16_16 = 0x0000001f,
ADDR_COLOR_16_16_16_16_FLOAT = 0x00000020,
ADDR_COLOR_RESERVED_33 = 0x00000021,
ADDR_COLOR_32_32_32_32 = 0x00000022,
ADDR_COLOR_32_32_32_32_FLOAT = 0x00000023,
} AddrColorFormat;
/**
****************************************************************************************************
* AddrSurfaceNumber
*
* @brief
* Neutral enum for SurfaceNumber
*
****************************************************************************************************
*/
typedef enum _AddrSurfaceNumber {
ADDR_NUMBER_UNORM = 0x00000000,
ADDR_NUMBER_SNORM = 0x00000001,
ADDR_NUMBER_USCALED = 0x00000002,
ADDR_NUMBER_SSCALED = 0x00000003,
ADDR_NUMBER_UINT = 0x00000004,
ADDR_NUMBER_SINT = 0x00000005,
ADDR_NUMBER_SRGB = 0x00000006,
ADDR_NUMBER_FLOAT = 0x00000007,
} AddrSurfaceNumber;
/**
****************************************************************************************************
* AddrSurfaceSwap
*
* @brief
* Neutral enum for SurfaceSwap
*
****************************************************************************************************
*/
typedef enum _AddrSurfaceSwap {
ADDR_SWAP_STD = 0x00000000,
ADDR_SWAP_ALT = 0x00000001,
ADDR_SWAP_STD_REV = 0x00000002,
ADDR_SWAP_ALT_REV = 0x00000003,
} AddrSurfaceSwap;
/**
****************************************************************************************************
* AddrHtileBlockSize
*
* @brief
* Size of HTILE blocks, valid values are 4 or 8 for now
****************************************************************************************************
*/
typedef enum _AddrHtileBlockSize
{
ADDR_HTILE_BLOCKSIZE_4 = 4,
ADDR_HTILE_BLOCKSIZE_8 = 8,
} AddrHtileBlockSize;
/**
****************************************************************************************************
* AddrPipeCfg
*
* @brief
* The pipe configuration field specifies both the number of pipes and
* how pipes are interleaved on the surface.
* The expression of number of pipes, the shader engine tile size, and packer tile size
* is encoded in a PIPE_CONFIG register field.
* In general the number of pipes usually matches the number of memory channels of the
* hardware configuration.
* For hw configurations w/ non-pow2 memory number of memory channels, it usually matches
* the number of ROP units(? TODO: which registers??)
* The enum value = hw enum + 1 which is to reserve 0 for requesting default.
****************************************************************************************************
*/
typedef enum _AddrPipeCfg
{
ADDR_PIPECFG_INVALID = 0,
ADDR_PIPECFG_P2 = 1, /// 2 pipes,
ADDR_PIPECFG_P4_8x16 = 5, /// 4 pipes,
ADDR_PIPECFG_P4_16x16 = 6,
ADDR_PIPECFG_P4_16x32 = 7,
ADDR_PIPECFG_P4_32x32 = 8,
ADDR_PIPECFG_P8_16x16_8x16 = 9, /// 8 pipes
ADDR_PIPECFG_P8_16x32_8x16 = 10,
ADDR_PIPECFG_P8_32x32_8x16 = 11,
ADDR_PIPECFG_P8_16x32_16x16 = 12,
ADDR_PIPECFG_P8_32x32_16x16 = 13,
ADDR_PIPECFG_P8_32x32_16x32 = 14,
ADDR_PIPECFG_P8_32x64_32x32 = 15,
ADDR_PIPECFG_P16_32x32_8x16 = 17, /// 16 pipes
ADDR_PIPECFG_P16_32x32_16x16 = 18,
ADDR_PIPECFG_UNUSED = 19,
ADDR_PIPECFG_MAX = 20,
} AddrPipeCfg;
/**
****************************************************************************************************
* AddrTileType
*
* @brief
* Neutral enums that specifies micro tile type (MICRO_TILE_MODE)
****************************************************************************************************
*/
typedef enum _AddrTileType
{
ADDR_DISPLAYABLE = 0, ///< Displayable tiling
ADDR_NON_DISPLAYABLE = 1, ///< Non-displayable tiling, a.k.a thin micro tiling
ADDR_DEPTH_SAMPLE_ORDER = 2, ///< Same as non-displayable plus depth-sample-order
ADDR_ROTATED = 3, ///< Rotated displayable tiling
ADDR_THICK = 4, ///< Thick micro-tiling, only valid for THICK and XTHICK
} AddrTileType;
////////////////////////////////////////////////////////////////////////////////////////////////////
//
// Type definitions: short system-independent names for address library types
//
////////////////////////////////////////////////////////////////////////////////////////////////////
#if !defined(__APPLE__) || defined(HAVE_TSERVER)
#ifndef BOOL_32 // no bool type in C
/// @brief Boolean type, since none is defined in C
/// @ingroup type
#define BOOL_32 int
#endif
#ifndef INT_32
#define INT_32 int
#endif
#ifndef UINT_32
#define UINT_32 unsigned int
#endif
#ifndef INT_16
#define INT_16 short
#endif
#ifndef UINT_16
#define UINT_16 unsigned short
#endif
#ifndef INT_8
#define INT_8 char
#endif
#ifndef UINT_8
#define UINT_8 unsigned char
#endif
#ifndef NULL
#define NULL 0
#endif
#ifndef TRUE
#define TRUE 1
#endif
#ifndef FALSE
#define FALSE 0
#endif
//
// 64-bit integer types depend on the compiler
//
#if defined( __GNUC__ ) || defined( __WATCOMC__ )
#define INT_64 long long
#define UINT_64 unsigned long long
#elif defined( _WIN32 )
#define INT_64 __int64
#define UINT_64 unsigned __int64
#else
#error Unsupported compiler and/or operating system for 64-bit integers
/// @brief 64-bit signed integer type (compiler dependent)
/// @ingroup type
///
/// The addrlib defines a 64-bit signed integer type for either
/// Gnu/Watcom compilers (which use the first syntax) or for
/// the Windows VCC compiler (which uses the second syntax).
#define INT_64 long long OR __int64
/// @brief 64-bit unsigned integer type (compiler dependent)
/// @ingroup type
///
/// The addrlib defines a 64-bit unsigned integer type for either
/// Gnu/Watcom compilers (which use the first syntax) or for
/// the Windows VCC compiler (which uses the second syntax).
///
#define UINT_64 unsigned long long OR unsigned __int64
#endif
#endif // #if !defined(__APPLE__) || defined(HAVE_TSERVER)
// ADDR64X is used to print addresses in hex form on both Windows and Linux
//
#if defined( __GNUC__ ) || defined( __WATCOMC__ )
#define ADDR64X "llx"
#define ADDR64D "lld"
#elif defined( _WIN32 )
#define ADDR64X "I64x"
#define ADDR64D "I64d"
#else
#error Unsupported compiler and/or operating system for 64-bit integers
/// @brief Addrlib device address 64-bit printf tag (compiler dependent)
/// @ingroup type
///
/// This allows printf to display an ADDR_64 for either the Windows VCC compiler
/// (which used this value) or the Gnu/Watcom compilers (which use "llx".
/// An example of use is printf("addr 0x%"ADDR64X"\n", address);
///
#define ADDR64X "llx" OR "I64x"
#define ADDR64D "lld" OR "I64d"
#endif
/// @brief Union for storing a 32-bit float or 32-bit integer
/// @ingroup type
///
/// This union provides a simple way to convert between a 32-bit float
/// and a 32-bit integer. It also prevents the compiler from producing
/// code that alters NaN values when assiging or coying floats.
/// Therefore, all address library routines that pass or return 32-bit
/// floating point data do so by passing or returning a FLT_32.
///
typedef union {
INT_32 i;
UINT_32 u;
float f;
} ADDR_FLT_32;
////////////////////////////////////////////////////////////////////////////////////////////////////
//
// Macros for controlling linking and building on multiple systems
//
////////////////////////////////////////////////////////////////////////////////////////////////////
#if defined(_MSC_VER)
#if defined(va_copy)
#undef va_copy //redefine va_copy to support VC2013
#endif
#endif
#if !defined(va_copy)
#define va_copy(dst, src) \
((void) memcpy(&(dst), &(src), sizeof(va_list)))
#endif
#endif // __ADDR_TYPES_H__
Diferenças do arquivo suprimidas por serem muito extensas Carregar Diff
@@ -0,0 +1,148 @@
/*
* Copyright © 2017-2019 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
* AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*/
#ifndef _AMDGPU_ASIC_ADDR_H
#define _AMDGPU_ASIC_ADDR_H
#define ATI_VENDOR_ID 0x1002
#define AMD_VENDOR_ID 0x1022
// AMDGPU_VENDOR_IS_AMD(vendorId)
#define AMDGPU_VENDOR_IS_AMD(v) ((v == ATI_VENDOR_ID) || (v == AMD_VENDOR_ID))
#define FAMILY_UNKNOWN 0x00
#define FAMILY_TN 0x69
#define FAMILY_SI 0x6E
#define FAMILY_CI 0x78
#define FAMILY_KV 0x7D
#define FAMILY_VI 0x82
#define FAMILY_POLARIS 0x82
#define FAMILY_CZ 0x87
#define FAMILY_AI 0x8D
#define FAMILY_RV 0x8E
#define FAMILY_NV 0x8F
// AMDGPU_FAMILY_IS(familyId, familyName)
#define FAMILY_IS(f, fn) (f == FAMILY_##fn)
#define FAMILY_IS_TN(f) FAMILY_IS(f, TN)
#define FAMILY_IS_SI(f) FAMILY_IS(f, SI)
#define FAMILY_IS_CI(f) FAMILY_IS(f, CI)
#define FAMILY_IS_KV(f) FAMILY_IS(f, KV)
#define FAMILY_IS_VI(f) FAMILY_IS(f, VI)
#define FAMILY_IS_POLARIS(f) FAMILY_IS(f, POLARIS)
#define FAMILY_IS_CZ(f) FAMILY_IS(f, CZ)
#define FAMILY_IS_AI(f) FAMILY_IS(f, AI)
#define FAMILY_IS_RV(f) FAMILY_IS(f, RV)
#define FAMILY_IS_NV(f) FAMILY_IS(f, NV)
#define AMDGPU_UNKNOWN 0xFF
#define AMDGPU_TAHITI_RANGE 0x05, 0x14
#define AMDGPU_PITCAIRN_RANGE 0x15, 0x28
#define AMDGPU_CAPEVERDE_RANGE 0x29, 0x3C
#define AMDGPU_OLAND_RANGE 0x3C, 0x46
#define AMDGPU_HAINAN_RANGE 0x46, 0xFF
#define AMDGPU_BONAIRE_RANGE 0x14, 0x28
#define AMDGPU_HAWAII_RANGE 0x28, 0x3C
#define AMDGPU_SPECTRE_RANGE 0x01, 0x41
#define AMDGPU_SPOOKY_RANGE 0x41, 0x81
#define AMDGPU_KALINDI_RANGE 0x81, 0xA1
#define AMDGPU_GODAVARI_RANGE 0xA1, 0xFF
#define AMDGPU_ICELAND_RANGE 0x01, 0x14
#define AMDGPU_TONGA_RANGE 0x14, 0x28
#define AMDGPU_FIJI_RANGE 0x3C, 0x50
#define AMDGPU_POLARIS10_RANGE 0x50, 0x5A
#define AMDGPU_POLARIS11_RANGE 0x5A, 0x64
#define AMDGPU_POLARIS12_RANGE 0x64, 0x6E
#define AMDGPU_VEGAM_RANGE 0x6E, 0xFF
#define AMDGPU_CARRIZO_RANGE 0x01, 0x21
#define AMDGPU_STONEY_RANGE 0x61, 0xFF
#define AMDGPU_VEGA10_RANGE 0x01, 0x14
#define AMDGPU_VEGA12_RANGE 0x14, 0x28
#define AMDGPU_VEGA20_RANGE 0x28, 0x32
#define AMDGPU_ARCTURUS_RANGE 0x32, 0xFF
#define AMDGPU_RAVEN_RANGE 0x01, 0x81
#define AMDGPU_RAVEN2_RANGE 0x81, 0x91
#define AMDGPU_RENOIR_RANGE 0x91, 0xFF
#define AMDGPU_NAVI10_RANGE 0x01, 0x0A
#define AMDGPU_NAVI12_RANGE 0x0A, 0x14
#define AMDGPU_NAVI14_RANGE 0x14, 0x28
#define AMDGPU_EXPAND_FIX(x) x
#define AMDGPU_RANGE_HELPER(val, min, max) ((val >= min) && (val < max))
#define AMDGPU_IN_RANGE(val, ...) AMDGPU_EXPAND_FIX(AMDGPU_RANGE_HELPER(val, __VA_ARGS__))
// ASICREV_IS(eRevisionId, revisionName)
#define ASICREV_IS(r, rn) AMDGPU_IN_RANGE(r, AMDGPU_##rn##_RANGE)
#define ASICREV_IS_TAHITI_P(r) ASICREV_IS(r, TAHITI)
#define ASICREV_IS_PITCAIRN_PM(r) ASICREV_IS(r, PITCAIRN)
#define ASICREV_IS_CAPEVERDE_M(r) ASICREV_IS(r, CAPEVERDE)
#define ASICREV_IS_OLAND_M(r) ASICREV_IS(r, OLAND)
#define ASICREV_IS_HAINAN_V(r) ASICREV_IS(r, HAINAN)
#define ASICREV_IS_BONAIRE_M(r) ASICREV_IS(r, BONAIRE)
#define ASICREV_IS_HAWAII_P(r) ASICREV_IS(r, HAWAII)
#define ASICREV_IS_SPECTRE(r) ASICREV_IS(r, SPECTRE)
#define ASICREV_IS_SPOOKY(r) ASICREV_IS(r, SPOOKY)
#define ASICREV_IS_KALINDI(r) ASICREV_IS(r, KALINDI)
#define ASICREV_IS_KALINDI_GODAVARI(r) ASICREV_IS(r, GODAVARI)
#define ASICREV_IS_ICELAND_M(r) ASICREV_IS(r, ICELAND)
#define ASICREV_IS_TONGA_P(r) ASICREV_IS(r, TONGA)
#define ASICREV_IS_FIJI_P(r) ASICREV_IS(r, FIJI)
#define ASICREV_IS_POLARIS10_P(r) ASICREV_IS(r, POLARIS10)
#define ASICREV_IS_POLARIS11_M(r) ASICREV_IS(r, POLARIS11)
#define ASICREV_IS_POLARIS12_V(r) ASICREV_IS(r, POLARIS12)
#define ASICREV_IS_VEGAM_P(r) ASICREV_IS(r, VEGAM)
#define ASICREV_IS_CARRIZO(r) ASICREV_IS(r, CARRIZO)
#define ASICREV_IS_STONEY(r) ASICREV_IS(r, STONEY)
#define ASICREV_IS_VEGA10_M(r) ASICREV_IS(r, VEGA10)
#define ASICREV_IS_VEGA10_P(r) ASICREV_IS(r, VEGA10)
#define ASICREV_IS_VEGA12_P(r) ASICREV_IS(r, VEGA12)
#define ASICREV_IS_VEGA12_p(r) ASICREV_IS(r, VEGA12)
#define ASICREV_IS_VEGA20_P(r) ASICREV_IS(r, VEGA20)
#define ASICREV_IS_ARCTURUS(r) ASICREV_IS(r, ARCTURUS)
#define ASICREV_IS_RAVEN(r) ASICREV_IS(r, RAVEN)
#define ASICREV_IS_RAVEN2(r) ASICREV_IS(r, RAVEN2)
#define ASICREV_IS_RENOIR(r) ASICREV_IS(r, RENOIR)
#define ASICREV_IS_NAVI10_P(r) ASICREV_IS(r, NAVI10)
#define ASICREV_IS_NAVI12(r) ASICREV_IS(r, NAVI12)
#define ASICREV_IS_NAVI14(r) ASICREV_IS(r, NAVI14)
#endif // _AMDGPU_ASIC_ADDR_H
@@ -0,0 +1,61 @@
/*
* Copyright © 2007-2019 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
* AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*/
#if !defined (__GFX10_GB_REG_H__)
#define __GFX10_GB_REG_H__
/*
* gfx10_gb_reg.h
*
* Register Spec Release: 1.0
*
*/
union GB_ADDR_CONFIG
{
struct
{
#if defined(LITTLEENDIAN_CPU)
unsigned int NUM_PIPES : 3;
unsigned int PIPE_INTERLEAVE_SIZE : 3;
unsigned int MAX_COMPRESSED_FRAGS : 2;
unsigned int NUM_PKRS : 3;
unsigned int : 21;
#elif defined(BIGENDIAN_CPU)
unsigned int : 21;
unsigned int NUM_PKRS : 3;
unsigned int MAX_COMPRESSED_FRAGS : 2;
unsigned int PIPE_INTERLEAVE_SIZE : 3;
unsigned int NUM_PIPES : 3;
#endif
} bitfields, bits;
unsigned int u32All;
int i32All;
float f32All;
};
#endif
@@ -0,0 +1,79 @@
/*
* Copyright © 2007-2019 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
* AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*/
#if !defined (__GFX9_GB_REG_H__)
#define __GFX9_GB_REG_H__
/*
* gfx9_gb_reg.h
*
* Register Spec Release: 1.0
*
*/
union GB_ADDR_CONFIG {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int NUM_PIPES : 3;
unsigned int PIPE_INTERLEAVE_SIZE : 3;
unsigned int MAX_COMPRESSED_FRAGS : 2;
unsigned int BANK_INTERLEAVE_SIZE : 3;
unsigned int : 1;
unsigned int NUM_BANKS : 3;
unsigned int : 1;
unsigned int SHADER_ENGINE_TILE_SIZE : 3;
unsigned int NUM_SHADER_ENGINES : 2;
unsigned int NUM_GPUS : 3;
unsigned int MULTI_GPU_TILE_SIZE : 2;
unsigned int NUM_RB_PER_SE : 2;
unsigned int ROW_SIZE : 2;
unsigned int NUM_LOWER_PIPES : 1;
unsigned int SE_ENABLE : 1;
#elif defined(BIGENDIAN_CPU)
unsigned int SE_ENABLE : 1;
unsigned int NUM_LOWER_PIPES : 1;
unsigned int ROW_SIZE : 2;
unsigned int NUM_RB_PER_SE : 2;
unsigned int MULTI_GPU_TILE_SIZE : 2;
unsigned int NUM_GPUS : 3;
unsigned int NUM_SHADER_ENGINES : 2;
unsigned int SHADER_ENGINE_TILE_SIZE : 3;
unsigned int : 1;
unsigned int NUM_BANKS : 3;
unsigned int : 1;
unsigned int BANK_INTERLEAVE_SIZE : 3;
unsigned int MAX_COMPRESSED_FRAGS : 2;
unsigned int PIPE_INTERLEAVE_SIZE : 3;
unsigned int NUM_PIPES : 3;
#endif
} bitfields, bits;
unsigned int u32All;
signed int i32All;
float f32All;
};
#endif
@@ -0,0 +1,154 @@
/*
* Copyright © 2007-2019 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
* AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*/
#if !defined (__SI_GB_REG_H__)
#define __SI_GB_REG_H__
/*****************************************************************************************************************
*
* si_gb_reg.h
*
* Register Spec Release: Chip Spec 0.28
*
*****************************************************************************************************************/
/*
* GB_ADDR_CONFIG struct
*/
#if defined(LITTLEENDIAN_CPU)
typedef struct _GB_ADDR_CONFIG_T {
unsigned int num_pipes : 3;
unsigned int : 1;
unsigned int pipe_interleave_size : 3;
unsigned int : 1;
unsigned int bank_interleave_size : 3;
unsigned int : 1;
unsigned int num_shader_engines : 2;
unsigned int : 2;
unsigned int shader_engine_tile_size : 3;
unsigned int : 1;
unsigned int num_gpus : 3;
unsigned int : 1;
unsigned int multi_gpu_tile_size : 2;
unsigned int : 2;
unsigned int row_size : 2;
unsigned int num_lower_pipes : 1;
unsigned int : 1;
} GB_ADDR_CONFIG_T;
#elif defined(BIGENDIAN_CPU)
typedef struct _GB_ADDR_CONFIG_T {
unsigned int : 1;
unsigned int num_lower_pipes : 1;
unsigned int row_size : 2;
unsigned int : 2;
unsigned int multi_gpu_tile_size : 2;
unsigned int : 1;
unsigned int num_gpus : 3;
unsigned int : 1;
unsigned int shader_engine_tile_size : 3;
unsigned int : 2;
unsigned int num_shader_engines : 2;
unsigned int : 1;
unsigned int bank_interleave_size : 3;
unsigned int : 1;
unsigned int pipe_interleave_size : 3;
unsigned int : 1;
unsigned int num_pipes : 3;
} GB_ADDR_CONFIG_T;
#endif
typedef union {
unsigned int val : 32;
GB_ADDR_CONFIG_T f;
} GB_ADDR_CONFIG;
#if defined(LITTLEENDIAN_CPU)
typedef struct _GB_TILE_MODE_T {
unsigned int micro_tile_mode : 2;
unsigned int array_mode : 4;
unsigned int pipe_config : 5;
unsigned int tile_split : 3;
unsigned int bank_width : 2;
unsigned int bank_height : 2;
unsigned int macro_tile_aspect : 2;
unsigned int num_banks : 2;
unsigned int micro_tile_mode_new : 3;
unsigned int sample_split : 2;
unsigned int : 5;
} GB_TILE_MODE_T;
typedef struct _GB_MACROTILE_MODE_T {
unsigned int bank_width : 2;
unsigned int bank_height : 2;
unsigned int macro_tile_aspect : 2;
unsigned int num_banks : 2;
unsigned int : 24;
} GB_MACROTILE_MODE_T;
#elif defined(BIGENDIAN_CPU)
typedef struct _GB_TILE_MODE_T {
unsigned int : 5;
unsigned int sample_split : 2;
unsigned int micro_tile_mode_new : 3;
unsigned int num_banks : 2;
unsigned int macro_tile_aspect : 2;
unsigned int bank_height : 2;
unsigned int bank_width : 2;
unsigned int tile_split : 3;
unsigned int pipe_config : 5;
unsigned int array_mode : 4;
unsigned int micro_tile_mode : 2;
} GB_TILE_MODE_T;
typedef struct _GB_MACROTILE_MODE_T {
unsigned int : 24;
unsigned int num_banks : 2;
unsigned int macro_tile_aspect : 2;
unsigned int bank_height : 2;
unsigned int bank_width : 2;
} GB_MACROTILE_MODE_T;
#endif
typedef union {
unsigned int val : 32;
GB_TILE_MODE_T f;
} GB_TILE_MODE;
typedef union {
unsigned int val : 32;
GB_MACROTILE_MODE_T f;
} GB_MACROTILE_MODE;
#endif
@@ -0,0 +1,948 @@
/*
* Copyright © 2007-2019 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
* AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*/
/**
****************************************************************************************************
* @file addrcommon.h
* @brief Contains the helper function and constants.
****************************************************************************************************
*/
#ifndef __ADDR_COMMON_H__
#define __ADDR_COMMON_H__
#include "addrinterface.h"
#if !defined(DEBUG)
#ifdef NDEBUG
#define DEBUG 0
#else
#define DEBUG 1
#endif
#endif
// ADDR_LNX_KERNEL_BUILD is for internal build
// Moved from addrinterface.h so __KERNEL__ is not needed any more
#if ADDR_LNX_KERNEL_BUILD // || (defined(__GNUC__) && defined(__KERNEL__))
#include <string.h>
#elif !defined(__APPLE__) || defined(HAVE_TSERVER)
#include <stdlib.h>
#include <string.h>
#endif
#include <assert.h>
#include "util/macros.h"
////////////////////////////////////////////////////////////////////////////////////////////////////
// Platform specific debug break defines
////////////////////////////////////////////////////////////////////////////////////////////////////
#if DEBUG
#if defined(__GNUC__)
#define ADDR_DBG_BREAK() assert(false)
#elif defined(__APPLE__)
#define ADDR_DBG_BREAK() { IOPanic("");}
#else
#define ADDR_DBG_BREAK() { __debugbreak(); }
#endif
#else
#define ADDR_DBG_BREAK() do {} while(0)
#endif
////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////
// Debug assertions used in AddrLib
////////////////////////////////////////////////////////////////////////////////////////////////////
#if defined(_WIN32) && (_MSC_VER >= 1400)
#define ADDR_ANALYSIS_ASSUME(expr) __analysis_assume(expr)
#else
#define ADDR_ANALYSIS_ASSUME(expr) do { (void)(expr); } while (0)
#endif
#define ADDR_ASSERT(__e) assert(__e)
#define ADDR_ASSERT_ALWAYS() ADDR_DBG_BREAK()
#define ADDR_UNHANDLED_CASE() ADDR_ASSERT(!"Unhandled case")
#define ADDR_NOT_IMPLEMENTED() ADDR_ASSERT(!"Not implemented");
////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////
// Debug print macro from legacy address library
////////////////////////////////////////////////////////////////////////////////////////////////////
#if DEBUG
#define ADDR_PRNT(a) Object::DebugPrint a
/// @brief Macro for reporting informational messages
/// @ingroup util
///
/// This macro optionally prints an informational message to stdout.
/// The first parameter is a condition -- if it is true, nothing is done.
/// The second pararmeter MUST be a parenthesis-enclosed list of arguments,
/// starting with a string. This is passed to printf() or an equivalent
/// in order to format the informational message. For example,
/// ADDR_INFO(0, ("test %d",3) ); prints out "test 3".
///
#define ADDR_INFO(cond, a) \
{ if (!(cond)) { ADDR_PRNT(a); } }
/// @brief Macro for reporting error warning messages
/// @ingroup util
///
/// This macro optionally prints an error warning message to stdout,
/// followed by the file name and line number where the macro was called.
/// The first parameter is a condition -- if it is true, nothing is done.
/// The second pararmeter MUST be a parenthesis-enclosed list of arguments,
/// starting with a string. This is passed to printf() or an equivalent
/// in order to format the informational message. For example,
/// ADDR_WARN(0, ("test %d",3) ); prints out "test 3" followed by
/// a second line with the file name and line number.
///
#define ADDR_WARN(cond, a) \
{ if (!(cond)) \
{ ADDR_PRNT(a); \
ADDR_PRNT((" WARNING in file %s, line %d\n", __FILE__, __LINE__)); \
} }
/// @brief Macro for reporting fatal error conditions
/// @ingroup util
///
/// This macro optionally stops execution of the current routine
/// after printing an error warning message to stdout,
/// followed by the file name and line number where the macro was called.
/// The first parameter is a condition -- if it is true, nothing is done.
/// The second pararmeter MUST be a parenthesis-enclosed list of arguments,
/// starting with a string. This is passed to printf() or an equivalent
/// in order to format the informational message. For example,
/// ADDR_EXIT(0, ("test %d",3) ); prints out "test 3" followed by
/// a second line with the file name and line number, then stops execution.
///
#define ADDR_EXIT(cond, a) \
{ if (!(cond)) \
{ ADDR_PRNT(a); ADDR_DBG_BREAK();\
} }
#else // DEBUG
#define ADDRDPF 1 ? (void)0 : (void)
#define ADDR_PRNT(a) do {} while(0)
#define ADDR_DBG_BREAK() do {} while(0)
#define ADDR_INFO(cond, a) do {} while(0)
#define ADDR_WARN(cond, a) do {} while(0)
#define ADDR_EXIT(cond, a) do {} while(0)
#endif // DEBUG
////////////////////////////////////////////////////////////////////////////////////////////////////
#define ADDR_C_ASSERT(__e) STATIC_ASSERT(__e)
namespace Addr
{
namespace V1
{
////////////////////////////////////////////////////////////////////////////////////////////////////
// Common constants
////////////////////////////////////////////////////////////////////////////////////////////////////
static const UINT_32 MicroTileWidth = 8; ///< Micro tile width, for 1D and 2D tiling
static const UINT_32 MicroTileHeight = 8; ///< Micro tile height, for 1D and 2D tiling
static const UINT_32 ThickTileThickness = 4; ///< Micro tile thickness, for THICK modes
static const UINT_32 XThickTileThickness = 8; ///< Extra thick tiling thickness
static const UINT_32 PowerSaveTileBytes = 64; ///< Nuber of bytes per tile for power save 64
static const UINT_32 CmaskCacheBits = 1024; ///< Number of bits for CMASK cache
static const UINT_32 CmaskElemBits = 4; ///< Number of bits for CMASK element
static const UINT_32 HtileCacheBits = 16384; ///< Number of bits for HTILE cache 512*32
static const UINT_32 MicroTilePixels = MicroTileWidth * MicroTileHeight;
static const INT_32 TileIndexInvalid = TILEINDEX_INVALID;
static const INT_32 TileIndexLinearGeneral = TILEINDEX_LINEAR_GENERAL;
static const INT_32 TileIndexNoMacroIndex = -3;
} // V1
namespace V2
{
////////////////////////////////////////////////////////////////////////////////////////////////////
// Common constants
////////////////////////////////////////////////////////////////////////////////////////////////////
static const UINT_32 MaxSurfaceHeight = 16384;
} // V2
////////////////////////////////////////////////////////////////////////////////////////////////////
// Common macros
////////////////////////////////////////////////////////////////////////////////////////////////////
#define BITS_PER_BYTE 8
#define BITS_TO_BYTES(x) ( ((x) + (BITS_PER_BYTE-1)) / BITS_PER_BYTE )
#define BYTES_TO_BITS(x) ( (x) * BITS_PER_BYTE )
/// Helper macros to select a single bit from an int (undefined later in section)
#define _BIT(v,b) (((v) >> (b) ) & 1)
/**
****************************************************************************************************
* @brief Enums to identify AddrLib type
****************************************************************************************************
*/
enum LibClass
{
BASE_ADDRLIB = 0x0,
R600_ADDRLIB = 0x6,
R800_ADDRLIB = 0x8,
SI_ADDRLIB = 0xa,
CI_ADDRLIB = 0xb,
AI_ADDRLIB = 0xd,
};
/**
****************************************************************************************************
* ChipFamily
*
* @brief
* Neutral enums that specifies chip family.
*
****************************************************************************************************
*/
enum ChipFamily
{
ADDR_CHIP_FAMILY_IVLD, ///< Invalid family
ADDR_CHIP_FAMILY_R6XX,
ADDR_CHIP_FAMILY_R7XX,
ADDR_CHIP_FAMILY_R8XX,
ADDR_CHIP_FAMILY_NI,
ADDR_CHIP_FAMILY_SI,
ADDR_CHIP_FAMILY_CI,
ADDR_CHIP_FAMILY_VI,
ADDR_CHIP_FAMILY_AI,
ADDR_CHIP_FAMILY_NAVI,
};
/**
****************************************************************************************************
* ConfigFlags
*
* @brief
* This structure is used to set configuration flags.
****************************************************************************************************
*/
union ConfigFlags
{
struct
{
/// These flags are set up internally thru AddrLib::Create() based on ADDR_CREATE_FLAGS
UINT_32 optimalBankSwap : 1; ///< New bank tiling for RV770 only
UINT_32 noCubeMipSlicesPad : 1; ///< Disables faces padding for cubemap mipmaps
UINT_32 fillSizeFields : 1; ///< If clients fill size fields in all input and
/// output structure
UINT_32 ignoreTileInfo : 1; ///< Don't use tile info structure
UINT_32 useTileIndex : 1; ///< Make tileIndex field in input valid
UINT_32 useCombinedSwizzle : 1; ///< Use combined swizzle
UINT_32 checkLast2DLevel : 1; ///< Check the last 2D mip sub level
UINT_32 useHtileSliceAlign : 1; ///< Do htile single slice alignment
UINT_32 allowLargeThickTile : 1; ///< Allow 64*thickness*bytesPerPixel > rowSize
UINT_32 disableLinearOpt : 1; ///< Disallow tile modes to be optimized to linear
UINT_32 use32bppFor422Fmt : 1; ///< View 422 formats as 32 bits per pixel element
UINT_32 forceDccAndTcCompat : 1; ///< Force enable DCC and TC compatibility
UINT_32 nonPower2MemConfig : 1; ///< Physical video memory size is not power of 2
UINT_32 reserved : 19; ///< Reserved bits for future use
};
UINT_32 value;
};
////////////////////////////////////////////////////////////////////////////////////////////////////
// Misc helper functions
////////////////////////////////////////////////////////////////////////////////////////////////////
/**
****************************************************************************************************
* AddrXorReduce
*
* @brief
* Xor the right-side numberOfBits bits of x.
****************************************************************************************************
*/
static inline UINT_32 XorReduce(
UINT_32 x,
UINT_32 numberOfBits)
{
UINT_32 i;
UINT_32 result = x & 1;
for (i=1; i<numberOfBits; i++)
{
result ^= ((x>>i) & 1);
}
return result;
}
/**
****************************************************************************************************
* IsPow2
*
* @brief
* Check if the size (UINT_32) is pow 2
****************************************************************************************************
*/
static inline UINT_32 IsPow2(
UINT_32 dim) ///< [in] dimension of miplevel
{
ADDR_ASSERT(dim > 0);
return !(dim & (dim - 1));
}
/**
****************************************************************************************************
* IsPow2
*
* @brief
* Check if the size (UINT_64) is pow 2
****************************************************************************************************
*/
static inline UINT_64 IsPow2(
UINT_64 dim) ///< [in] dimension of miplevel
{
ADDR_ASSERT(dim > 0);
return !(dim & (dim - 1));
}
/**
****************************************************************************************************
* ByteAlign
*
* @brief
* Align UINT_32 "x" to "align" alignment, "align" should be power of 2
****************************************************************************************************
*/
static inline UINT_32 PowTwoAlign(
UINT_32 x,
UINT_32 align)
{
//
// Assert that x is a power of two.
//
ADDR_ASSERT(IsPow2(align));
return (x + (align - 1)) & (~(align - 1));
}
/**
****************************************************************************************************
* ByteAlign
*
* @brief
* Align UINT_64 "x" to "align" alignment, "align" should be power of 2
****************************************************************************************************
*/
static inline UINT_64 PowTwoAlign(
UINT_64 x,
UINT_64 align)
{
//
// Assert that x is a power of two.
//
ADDR_ASSERT(IsPow2(align));
return (x + (align - 1)) & (~(align - 1));
}
/**
****************************************************************************************************
* Min
*
* @brief
* Get the min value between two unsigned values
****************************************************************************************************
*/
static inline UINT_32 Min(
UINT_32 value1,
UINT_32 value2)
{
return ((value1 < (value2)) ? (value1) : value2);
}
/**
****************************************************************************************************
* Min
*
* @brief
* Get the min value between two signed values
****************************************************************************************************
*/
static inline INT_32 Min(
INT_32 value1,
INT_32 value2)
{
return ((value1 < (value2)) ? (value1) : value2);
}
/**
****************************************************************************************************
* Max
*
* @brief
* Get the max value between two unsigned values
****************************************************************************************************
*/
static inline UINT_32 Max(
UINT_32 value1,
UINT_32 value2)
{
return ((value1 > (value2)) ? (value1) : value2);
}
/**
****************************************************************************************************
* Max
*
* @brief
* Get the max value between two signed values
****************************************************************************************************
*/
static inline INT_32 Max(
INT_32 value1,
INT_32 value2)
{
return ((value1 > (value2)) ? (value1) : value2);
}
/**
****************************************************************************************************
* NextPow2
*
* @brief
* Compute the mipmap's next level dim size
****************************************************************************************************
*/
static inline UINT_32 NextPow2(
UINT_32 dim) ///< [in] dimension of miplevel
{
UINT_32 newDim = 1;
if (dim > 0x7fffffff)
{
ADDR_ASSERT_ALWAYS();
newDim = 0x80000000;
}
else
{
while (newDim < dim)
{
newDim <<= 1;
}
}
return newDim;
}
/**
****************************************************************************************************
* Log2NonPow2
*
* @brief
* Compute log of base 2 no matter the target is power of 2 or not
****************************************************************************************************
*/
static inline UINT_32 Log2NonPow2(
UINT_32 x) ///< [in] the value should calculate log based 2
{
UINT_32 y;
y = 0;
while (x > 1)
{
x >>= 1;
y++;
}
return y;
}
/**
****************************************************************************************************
* Log2
*
* @brief
* Compute log of base 2
****************************************************************************************************
*/
static inline UINT_32 Log2(
UINT_32 x) ///< [in] the value should calculate log based 2
{
// Assert that x is a power of two.
ADDR_ASSERT(IsPow2(x));
return Log2NonPow2(x);
}
/**
****************************************************************************************************
* QLog2
*
* @brief
* Compute log of base 2 quickly (<= 16)
****************************************************************************************************
*/
static inline UINT_32 QLog2(
UINT_32 x) ///< [in] the value should calculate log based 2
{
ADDR_ASSERT(x <= 16);
UINT_32 y = 0;
switch (x)
{
case 1:
y = 0;
break;
case 2:
y = 1;
break;
case 4:
y = 2;
break;
case 8:
y = 3;
break;
case 16:
y = 4;
break;
default:
ADDR_ASSERT_ALWAYS();
}
return y;
}
/**
****************************************************************************************************
* SafeAssign
*
* @brief
* NULL pointer safe assignment
****************************************************************************************************
*/
static inline VOID SafeAssign(
UINT_32* pLVal, ///< [in] Pointer to left val
UINT_32 rVal) ///< [in] Right value
{
if (pLVal)
{
*pLVal = rVal;
}
}
/**
****************************************************************************************************
* SafeAssign
*
* @brief
* NULL pointer safe assignment for 64bit values
****************************************************************************************************
*/
static inline VOID SafeAssign(
UINT_64* pLVal, ///< [in] Pointer to left val
UINT_64 rVal) ///< [in] Right value
{
if (pLVal)
{
*pLVal = rVal;
}
}
/**
****************************************************************************************************
* SafeAssign
*
* @brief
* NULL pointer safe assignment for AddrTileMode
****************************************************************************************************
*/
static inline VOID SafeAssign(
AddrTileMode* pLVal, ///< [in] Pointer to left val
AddrTileMode rVal) ///< [in] Right value
{
if (pLVal)
{
*pLVal = rVal;
}
}
/**
****************************************************************************************************
* RoundHalf
*
* @brief
* return (x + 1) / 2
****************************************************************************************************
*/
static inline UINT_32 RoundHalf(
UINT_32 x) ///< [in] input value
{
ADDR_ASSERT(x != 0);
#if 1
return (x >> 1) + (x & 1);
#else
return (x + 1) >> 1;
#endif
}
/**
****************************************************************************************************
* SumGeo
*
* @brief
* Calculate sum of a geometric progression whose ratio is 1/2
****************************************************************************************************
*/
static inline UINT_32 SumGeo(
UINT_32 base, ///< [in] First term in the geometric progression
UINT_32 num) ///< [in] Number of terms to be added into sum
{
ADDR_ASSERT(base > 0);
UINT_32 sum = 0;
UINT_32 i = 0;
for (; (i < num) && (base > 1); i++)
{
sum += base;
base = RoundHalf(base);
}
sum += num - i;
return sum;
}
/**
****************************************************************************************************
* GetBit
*
* @brief
* Extract bit N value (0 or 1) of a UINT32 value.
****************************************************************************************************
*/
static inline UINT_32 GetBit(
UINT_32 u32, ///< [in] UINT32 value
UINT_32 pos) ///< [in] bit position from LSB, valid range is [0..31]
{
ADDR_ASSERT(pos <= 31);
return (u32 >> pos) & 0x1;
}
/**
****************************************************************************************************
* GetBits
*
* @brief
* Copy 'bitsNum' bits from src start from srcStartPos into destination from dstStartPos
* srcStartPos: 0~31 for UINT_32
* bitsNum : 1~32 for UINT_32
* srcStartPos: 0~31 for UINT_32
* src start position
* |
* src : b[31] b[30] b[29] ... ... ... ... ... ... ... ... b[end]..b[beg] ... b[1] b[0]
* || Bits num || copy length || Bits num ||
* dst : b[31] b[30] b[29] ... b[end]..b[beg] ... ... ... ... ... ... ... ... b[1] b[0]
* |
* dst start position
****************************************************************************************************
*/
static inline UINT_32 GetBits(
UINT_32 src,
UINT_32 srcStartPos,
UINT_32 bitsNum,
UINT_32 dstStartPos)
{
ADDR_ASSERT((srcStartPos < 32) && (dstStartPos < 32) && (bitsNum > 0));
ADDR_ASSERT((bitsNum + dstStartPos <= 32) && (bitsNum + srcStartPos <= 32));
return ((src >> srcStartPos) << (32 - bitsNum)) >> (32 - bitsNum - dstStartPos);
}
/**
****************************************************************************************************
* MortonGen2d
*
* @brief
* Generate 2D Morton interleave code with num lowest bits in each channel
****************************************************************************************************
*/
static inline UINT_32 MortonGen2d(
UINT_32 x, ///< [in] First channel
UINT_32 y, ///< [in] Second channel
UINT_32 num) ///< [in] Number of bits extracted from each channel
{
UINT_32 mort = 0;
for (UINT_32 i = 0; i < num; i++)
{
mort |= (GetBit(y, i) << (2 * i));
mort |= (GetBit(x, i) << (2 * i + 1));
}
return mort;
}
/**
****************************************************************************************************
* MortonGen3d
*
* @brief
* Generate 3D Morton interleave code with num lowest bits in each channel
****************************************************************************************************
*/
static inline UINT_32 MortonGen3d(
UINT_32 x, ///< [in] First channel
UINT_32 y, ///< [in] Second channel
UINT_32 z, ///< [in] Third channel
UINT_32 num) ///< [in] Number of bits extracted from each channel
{
UINT_32 mort = 0;
for (UINT_32 i = 0; i < num; i++)
{
mort |= (GetBit(z, i) << (3 * i));
mort |= (GetBit(y, i) << (3 * i + 1));
mort |= (GetBit(x, i) << (3 * i + 2));
}
return mort;
}
/**
****************************************************************************************************
* ReverseBitVector
*
* @brief
* Return reversed lowest num bits of v: v[0]v[1]...v[num-2]v[num-1]
****************************************************************************************************
*/
static inline UINT_32 ReverseBitVector(
UINT_32 v, ///< [in] Reverse operation base value
UINT_32 num) ///< [in] Number of bits used in reverse operation
{
UINT_32 reverse = 0;
for (UINT_32 i = 0; i < num; i++)
{
reverse |= (GetBit(v, num - 1 - i) << i);
}
return reverse;
}
/**
****************************************************************************************************
* FoldXor2d
*
* @brief
* Xor bit vector v[num-1]v[num-2]...v[1]v[0] with v[num]v[num+1]...v[2*num-2]v[2*num-1]
****************************************************************************************************
*/
static inline UINT_32 FoldXor2d(
UINT_32 v, ///< [in] Xor operation base value
UINT_32 num) ///< [in] Number of bits used in fold xor operation
{
return (v & ((1 << num) - 1)) ^ ReverseBitVector(v >> num, num);
}
/**
****************************************************************************************************
* DeMort
*
* @brief
* Return v[0] | v[2] | v[4] | v[6]... | v[2*num - 2]
****************************************************************************************************
*/
static inline UINT_32 DeMort(
UINT_32 v, ///< [in] DeMort operation base value
UINT_32 num) ///< [in] Number of bits used in fold DeMort operation
{
UINT_32 d = 0;
for (UINT_32 i = 0; i < num; i++)
{
d |= ((v & (1 << (i << 1))) >> i);
}
return d;
}
/**
****************************************************************************************************
* FoldXor3d
*
* @brief
* v[0]...v[num-1] ^ v[3*num-1]v[3*num-3]...v[num+2]v[num] ^ v[3*num-2]...v[num+1]v[num-1]
****************************************************************************************************
*/
static inline UINT_32 FoldXor3d(
UINT_32 v, ///< [in] Xor operation base value
UINT_32 num) ///< [in] Number of bits used in fold xor operation
{
UINT_32 t = v & ((1 << num) - 1);
t ^= ReverseBitVector(DeMort(v >> num, num), num);
t ^= ReverseBitVector(DeMort(v >> (num + 1), num), num);
return t;
}
/**
****************************************************************************************************
* InitChannel
*
* @brief
* Set channel initialization value via a return value
****************************************************************************************************
*/
static inline ADDR_CHANNEL_SETTING InitChannel(
UINT_32 valid, ///< [in] valid setting
UINT_32 channel, ///< [in] channel setting
UINT_32 index) ///< [in] index setting
{
ADDR_CHANNEL_SETTING t;
t.valid = valid;
t.channel = channel;
t.index = index;
return t;
}
/**
****************************************************************************************************
* InitChannel
*
* @brief
* Set channel initialization value via channel pointer
****************************************************************************************************
*/
static inline VOID InitChannel(
UINT_32 valid, ///< [in] valid setting
UINT_32 channel, ///< [in] channel setting
UINT_32 index, ///< [in] index setting
ADDR_CHANNEL_SETTING *pChanSet) ///< [out] channel setting to be initialized
{
pChanSet->valid = valid;
pChanSet->channel = channel;
pChanSet->index = index;
}
/**
****************************************************************************************************
* InitChannel
*
* @brief
* Set channel initialization value via another channel
****************************************************************************************************
*/
static inline VOID InitChannel(
ADDR_CHANNEL_SETTING *pChanDst, ///< [in] channel setting to be copied from
ADDR_CHANNEL_SETTING *pChanSrc) ///< [out] channel setting to be initialized
{
pChanDst->valid = pChanSrc->valid;
pChanDst->channel = pChanSrc->channel;
pChanDst->index = pChanSrc->index;
}
/**
****************************************************************************************************
* GetMaxValidChannelIndex
*
* @brief
* Get max valid index for a specific channel
****************************************************************************************************
*/
static inline UINT_32 GetMaxValidChannelIndex(
const ADDR_CHANNEL_SETTING *pChanSet, ///< [in] channel setting to be initialized
UINT_32 searchCount,///< [in] number of channel setting to be searched
UINT_32 channel) ///< [in] channel to be searched
{
UINT_32 index = 0;
for (UINT_32 i = 0; i < searchCount; i++)
{
if (pChanSet[i].valid && (pChanSet[i].channel == channel))
{
index = Max(index, static_cast<UINT_32>(pChanSet[i].index));
}
}
return index;
}
/**
****************************************************************************************************
* GetCoordActiveMask
*
* @brief
* Get bit mask which indicates which positions in the equation match the target coord
****************************************************************************************************
*/
static inline UINT_32 GetCoordActiveMask(
const ADDR_CHANNEL_SETTING *pChanSet, ///< [in] channel setting to be initialized
UINT_32 searchCount,///< [in] number of channel setting to be searched
UINT_32 channel, ///< [in] channel to be searched
UINT_32 index) ///< [in] index to be searched
{
UINT_32 mask = 0;
for (UINT_32 i = 0; i < searchCount; i++)
{
if ((pChanSet[i].valid == TRUE) &&
(pChanSet[i].channel == channel) &&
(pChanSet[i].index == index))
{
mask |= (1 << i);
}
}
return mask;
}
/**
****************************************************************************************************
* ShiftCeil
*
* @brief
* Apply righ-shift with ceiling
****************************************************************************************************
*/
static inline UINT_32 ShiftCeil(
UINT_32 a, ///< [in] value to be right-shifted
UINT_32 b) ///< [in] number of bits to shift
{
return (a >> b) + (((a & ((1 << b) - 1)) != 0) ? 1 : 0);
}
} // Addr
#endif // __ADDR_COMMON_H__
Diferenças do arquivo suprimidas por serem muito extensas Carregar Diff
@@ -0,0 +1,279 @@
/*
* Copyright © 2007-2019 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
* AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*/
/**
****************************************************************************************************
* @file addrelemlib.h
* @brief Contains the class for element/pixel related functions.
****************************************************************************************************
*/
#ifndef __ELEM_LIB_H__
#define __ELEM_LIB_H__
#include "addrinterface.h"
#include "addrobject.h"
#include "addrcommon.h"
namespace Addr
{
class Lib;
// The masks for property bits within the Properties INT_32
union ComponentFlags
{
struct
{
UINT_32 byteAligned : 1; ///< all components are byte aligned
UINT_32 exportNorm : 1; ///< components support R6xx NORM compression
UINT_32 floatComp : 1; ///< there is at least one floating point component
};
UINT_32 value;
};
// Copy from legacy lib's NumberType
enum NumberType
{
// The following number types have the range [-1..1]
ADDR_NO_NUMBER, // This component doesn't exist and has no default value
ADDR_EPSILON, // Force component value to integer 0x00000001
ADDR_ZERO, // Force component value to integer 0x00000000
ADDR_ONE, // Force component value to floating point 1.0
// Above values don't have any bits per component (keep ADDR_ONE the last of these)
ADDR_UNORM, // Unsigned normalized (repeating fraction) full precision
ADDR_SNORM, // Signed normalized (repeating fraction) full precision
ADDR_GAMMA, // Gamma-corrected, full precision
ADDR_UNORM_R5XXRB, // Unsigned normalized (repeating fraction) for r5xx RB
ADDR_SNORM_R5XXRB, // Signed normalized (repeating fraction) for r5xx RB
ADDR_GAMMA_R5XXRB, // Gamma-corrected for r5xx RB (note: unnormalized value)
ADDR_UNORM_R5XXBC, // Unsigned normalized (repeating fraction) for r5xx BC
ADDR_SNORM_R5XXBC, // Signed normalized (repeating fraction) for r5xx BC
ADDR_GAMMA_R5XXBC, // Gamma-corrected for r5xx BC (note: unnormalized value)
ADDR_UNORM_R6XX, // Unsigned normalized (repeating fraction) for R6xx
ADDR_UNORM_R6XXDB, // Unorms for 24-bit depth: one value differs from ADDR_UNORM_R6XX
ADDR_SNORM_R6XX, // Signed normalized (repeating fraction) for R6xx
ADDR_GAMMA8_R6XX, // Gamma-corrected for r6xx
ADDR_GAMMA8_R7XX_TP, // Gamma-corrected for r7xx TP 12bit unorm 8.4.
ADDR_U4FLOATC, // Unsigned float: 4-bit exponent, bias=15, no NaN, clamp [0..1]
ADDR_GAMMA_4SEG, // Gamma-corrected, four segment approximation
ADDR_U0FIXED, // Unsigned 0.N-bit fixed point
// The following number types have large ranges (LEAVE ADDR_USCALED first or fix Finish routine)
ADDR_USCALED, // Unsigned integer converted to/from floating point
ADDR_SSCALED, // Signed integer converted to/from floating point
ADDR_USCALED_R5XXRB, // Unsigned integer to/from floating point for r5xx RB
ADDR_SSCALED_R5XXRB, // Signed integer to/from floating point for r5xx RB
ADDR_UINT_BITS, // Keep in unsigned integer form, clamped to specified range
ADDR_SINT_BITS, // Keep in signed integer form, clamped to specified range
ADDR_UINTBITS, // @@ remove Keep in unsigned integer form, use modulus to reduce bits
ADDR_SINTBITS, // @@ remove Keep in signed integer form, use modulus to reduce bits
// The following number types and ADDR_U4FLOATC have exponents
// (LEAVE ADDR_S8FLOAT first or fix Finish routine)
ADDR_S8FLOAT, // Signed floating point with 8-bit exponent, bias=127
ADDR_S8FLOAT32, // 32-bit IEEE float, passes through NaN values
ADDR_S5FLOAT, // Signed floating point with 5-bit exponent, bias=15
ADDR_S5FLOATM, // Signed floating point with 5-bit exponent, bias=15, no NaN/Inf
ADDR_U5FLOAT, // Signed floating point with 5-bit exponent, bias=15
ADDR_U3FLOATM, // Unsigned floating point with 3-bit exponent, bias=3
ADDR_S5FIXED, // Signed 5.N-bit fixed point, with rounding
ADDR_END_NUMBER // Used for range comparisons
};
// Copy from legacy lib's AddrElement
enum ElemMode
{
// These formats allow both packing an unpacking
ADDR_ROUND_BY_HALF, // add 1/2 and truncate when packing this element
ADDR_ROUND_TRUNCATE, // truncate toward 0 for sign/mag, else toward neg
ADDR_ROUND_DITHER, // Pack by dithering -- requires (x,y) position
// These formats only allow unpacking, no packing
ADDR_UNCOMPRESSED, // Elements are not compressed: one data element per pixel/texel
ADDR_EXPANDED, // Elements are split up and stored in multiple data elements
ADDR_PACKED_STD, // Elements are compressed into ExpandX by ExpandY data elements
ADDR_PACKED_REV, // Like ADDR_PACKED, but X order of pixels is reverved
ADDR_PACKED_GBGR, // Elements are compressed 4:2:2 in G1B_G0R order (high to low)
ADDR_PACKED_BGRG, // Elements are compressed 4:2:2 in BG1_RG0 order (high to low)
ADDR_PACKED_BC1, // Each data element is uncompressed to a 4x4 pixel/texel array
ADDR_PACKED_BC2, // Each data element is uncompressed to a 4x4 pixel/texel array
ADDR_PACKED_BC3, // Each data element is uncompressed to a 4x4 pixel/texel array
ADDR_PACKED_BC4, // Each data element is uncompressed to a 4x4 pixel/texel array
ADDR_PACKED_BC5, // Each data element is uncompressed to a 4x4 pixel/texel array
ADDR_PACKED_ETC2_64BPP, // ETC2 formats that use 64bpp to represent each 4x4 block
ADDR_PACKED_ETC2_128BPP, // ETC2 formats that use 128bpp to represent each 4x4 block
ADDR_PACKED_ASTC, // Various ASTC formats, all are 128bpp with varying block sizes
// These formats provide various kinds of compression
ADDR_ZPLANE_R5XX, // Compressed Zplane using r5xx architecture format
ADDR_ZPLANE_R6XX, // Compressed Zplane using r6xx architecture format
//@@ Fill in the compression modes
ADDR_END_ELEMENT // Used for range comparisons
};
enum DepthPlanarType
{
ADDR_DEPTH_PLANAR_NONE = 0, // No plane z/stencl
ADDR_DEPTH_PLANAR_R600 = 1, // R600 z and stencil planes are store within a tile
ADDR_DEPTH_PLANAR_R800 = 2, // R800 has separate z and stencil planes
};
/**
****************************************************************************************************
* PixelFormatInfo
*
* @brief
* Per component info
*
****************************************************************************************************
*/
struct PixelFormatInfo
{
UINT_32 compBit[4];
NumberType numType[4];
UINT_32 compStart[4];
ElemMode elemMode;
UINT_32 comps; ///< Number of components
};
/**
****************************************************************************************************
* @brief This class contains asic indepentent element related attributes and operations
****************************************************************************************************
*/
class ElemLib : public Object
{
protected:
ElemLib(Lib* pAddrLib);
public:
/// Makes this class virtual
virtual ~ElemLib();
static ElemLib* Create(
const Lib* pAddrLib);
/// The implementation is only for R6xx/R7xx, so make it virtual in case we need for R8xx
BOOL_32 PixGetExportNorm(
AddrColorFormat colorFmt,
AddrSurfaceNumber numberFmt, AddrSurfaceSwap swap) const;
/// Below method are asic independent, so make them just static.
/// Remove static if we need different operation in hwl.
VOID Flt32ToDepthPixel(
AddrDepthFormat format, const ADDR_FLT_32 comps[2], UINT_8 *pPixel) const;
VOID Flt32ToColorPixel(
AddrColorFormat format, AddrSurfaceNumber surfNum, AddrSurfaceSwap surfSwap,
const ADDR_FLT_32 comps[4], UINT_8 *pPixel) const;
static VOID Flt32sToInt32s(
ADDR_FLT_32 value, UINT_32 bits, NumberType numberType, UINT_32* pResult);
static VOID Int32sToPixel(
UINT_32 numComps, UINT_32* pComps, UINT_32* pCompBits, UINT_32* pCompStart,
ComponentFlags properties, UINT_32 resultBits, UINT_8* pPixel);
VOID PixGetColorCompInfo(
AddrColorFormat format, AddrSurfaceNumber number, AddrSurfaceSwap swap,
PixelFormatInfo* pInfo) const;
VOID PixGetDepthCompInfo(
AddrDepthFormat format, PixelFormatInfo* pInfo) const;
UINT_32 GetBitsPerPixel(
AddrFormat format, ElemMode* pElemMode = NULL,
UINT_32* pExpandX = NULL, UINT_32* pExpandY = NULL, UINT_32* pBitsUnused = NULL);
static VOID SetClearComps(
ADDR_FLT_32 comps[4], BOOL_32 clearColor, BOOL_32 float32);
VOID AdjustSurfaceInfo(
ElemMode elemMode, UINT_32 expandX, UINT_32 expandY,
UINT_32* pBpp, UINT_32* pBasePitch, UINT_32* pWidth, UINT_32* pHeight);
VOID RestoreSurfaceInfo(
ElemMode elemMode, UINT_32 expandX, UINT_32 expandY,
UINT_32* pBpp, UINT_32* pWidth, UINT_32* pHeight);
/// Checks if depth and stencil are planar inside a tile
BOOL_32 IsDepthStencilTilePlanar()
{
return (m_depthPlanarType == ADDR_DEPTH_PLANAR_R600) ? TRUE : FALSE;
}
/// Sets m_configFlags, copied from AddrLib
VOID SetConfigFlags(ConfigFlags flags)
{
m_configFlags = flags;
}
static BOOL_32 IsCompressed(AddrFormat format);
static BOOL_32 IsBlockCompressed(AddrFormat format);
static BOOL_32 IsExpand3x(AddrFormat format);
static BOOL_32 IsMacroPixelPacked(AddrFormat format);
protected:
static VOID GetCompBits(
UINT_32 c0, UINT_32 c1, UINT_32 c2, UINT_32 c3,
PixelFormatInfo* pInfo,
ElemMode elemMode = ADDR_ROUND_BY_HALF);
static VOID GetCompType(
AddrColorFormat format, AddrSurfaceNumber numType,
PixelFormatInfo* pInfo);
static VOID GetCompSwap(
AddrSurfaceSwap swap, PixelFormatInfo* pInfo);
static VOID SwapComps(
UINT_32 c0, UINT_32 c1, PixelFormatInfo* pInfo);
private:
UINT_32 m_fp16ExportNorm; ///< If allow FP16 to be reported as EXPORT_NORM
DepthPlanarType m_depthPlanarType;
ConfigFlags m_configFlags; ///< Copy of AddrLib's configFlags
Addr::Lib* const m_pAddrLib; ///< Pointer to parent addrlib instance
};
} //Addr
#endif
@@ -0,0 +1,660 @@
/*
* Copyright © 2007-2019 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
* AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*/
/**
****************************************************************************************************
* @file addrlib.cpp
* @brief Contains the implementation for the Addr::Lib class.
****************************************************************************************************
*/
#include "addrinterface.h"
#include "addrlib.h"
#include "addrcommon.h"
#if defined(__APPLE__)
UINT_32 div64_32(UINT_64 n, UINT_32 base)
{
UINT_64 rem = n;
UINT_64 b = base;
UINT_64 res, d = 1;
UINT_32 high = rem >> 32;
res = 0;
if (high >= base)
{
high /= base;
res = (UINT_64) high << 32;
rem -= (UINT_64) (high * base) << 32;
}
while (((INT_64)b > 0) && (b < rem))
{
b = b + b;
d = d + d;
}
do
{
if (rem >= b)
{
rem -= b;
res += d;
}
b >>= 1;
d >>= 1;
} while (d);
n = res;
return rem;
}
extern "C"
UINT_32 __umoddi3(UINT_64 n, UINT_32 base)
{
return div64_32(n, base);
}
#endif // __APPLE__
namespace Addr
{
////////////////////////////////////////////////////////////////////////////////////////////////////
// Constructor/Destructor
////////////////////////////////////////////////////////////////////////////////////////////////////
/**
****************************************************************************************************
* Lib::Lib
*
* @brief
* Constructor for the AddrLib class
*
****************************************************************************************************
*/
Lib::Lib() :
m_class(BASE_ADDRLIB),
m_chipFamily(ADDR_CHIP_FAMILY_IVLD),
m_chipRevision(0),
m_version(ADDRLIB_VERSION),
m_pipes(0),
m_banks(0),
m_pipeInterleaveBytes(0),
m_rowSize(0),
m_minPitchAlignPixels(1),
m_maxSamples(8),
m_pElemLib(NULL)
{
m_configFlags.value = 0;
}
/**
****************************************************************************************************
* Lib::Lib
*
* @brief
* Constructor for the AddrLib class with hClient as parameter
*
****************************************************************************************************
*/
Lib::Lib(const Client* pClient) :
Object(pClient),
m_class(BASE_ADDRLIB),
m_chipFamily(ADDR_CHIP_FAMILY_IVLD),
m_chipRevision(0),
m_version(ADDRLIB_VERSION),
m_pipes(0),
m_banks(0),
m_pipeInterleaveBytes(0),
m_rowSize(0),
m_minPitchAlignPixels(1),
m_maxSamples(8),
m_pElemLib(NULL)
{
m_configFlags.value = 0;
}
/**
****************************************************************************************************
* Lib::~AddrLib
*
* @brief
* Destructor for the AddrLib class
*
****************************************************************************************************
*/
Lib::~Lib()
{
if (m_pElemLib)
{
delete m_pElemLib;
m_pElemLib = NULL;
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////
// Initialization/Helper
////////////////////////////////////////////////////////////////////////////////////////////////////
/**
****************************************************************************************************
* Lib::Create
*
* @brief
* Creates and initializes AddrLib object.
*
* @return
* ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::Create(
const ADDR_CREATE_INPUT* pCreateIn, ///< [in] pointer to ADDR_CREATE_INPUT
ADDR_CREATE_OUTPUT* pCreateOut) ///< [out] pointer to ADDR_CREATE_OUTPUT
{
Lib* pLib = NULL;
ADDR_E_RETURNCODE returnCode = ADDR_OK;
if (pCreateIn->createFlags.fillSizeFields == TRUE)
{
if ((pCreateIn->size != sizeof(ADDR_CREATE_INPUT)) ||
(pCreateOut->size != sizeof(ADDR_CREATE_OUTPUT)))
{
returnCode = ADDR_PARAMSIZEMISMATCH;
}
}
if ((returnCode == ADDR_OK) &&
(pCreateIn->callbacks.allocSysMem != NULL) &&
(pCreateIn->callbacks.freeSysMem != NULL))
{
Client client = {
pCreateIn->hClient,
pCreateIn->callbacks
};
switch (pCreateIn->chipEngine)
{
case CIASICIDGFXENGINE_SOUTHERNISLAND:
switch (pCreateIn->chipFamily)
{
case FAMILY_SI:
pLib = SiHwlInit(&client);
break;
case FAMILY_VI:
case FAMILY_CZ:
case FAMILY_CI:
case FAMILY_KV: // CI based fusion
pLib = CiHwlInit(&client);
break;
default:
ADDR_ASSERT_ALWAYS();
break;
}
break;
case CIASICIDGFXENGINE_ARCTICISLAND:
switch (pCreateIn->chipFamily)
{
case FAMILY_AI:
case FAMILY_RV:
pLib = Gfx9HwlInit(&client);
break;
case FAMILY_NV:
pLib = Gfx10HwlInit(&client);
break;
default:
ADDR_ASSERT_ALWAYS();
break;
}
break;
default:
ADDR_ASSERT_ALWAYS();
break;
}
}
if (pLib != NULL)
{
BOOL_32 initValid;
// Pass createFlags to configFlags first since these flags may be overwritten
pLib->m_configFlags.noCubeMipSlicesPad = pCreateIn->createFlags.noCubeMipSlicesPad;
pLib->m_configFlags.fillSizeFields = pCreateIn->createFlags.fillSizeFields;
pLib->m_configFlags.useTileIndex = pCreateIn->createFlags.useTileIndex;
pLib->m_configFlags.useCombinedSwizzle = pCreateIn->createFlags.useCombinedSwizzle;
pLib->m_configFlags.checkLast2DLevel = pCreateIn->createFlags.checkLast2DLevel;
pLib->m_configFlags.useHtileSliceAlign = pCreateIn->createFlags.useHtileSliceAlign;
pLib->m_configFlags.allowLargeThickTile = pCreateIn->createFlags.allowLargeThickTile;
pLib->m_configFlags.forceDccAndTcCompat = pCreateIn->createFlags.forceDccAndTcCompat;
pLib->m_configFlags.nonPower2MemConfig = pCreateIn->createFlags.nonPower2MemConfig;
pLib->m_configFlags.disableLinearOpt = FALSE;
pLib->SetChipFamily(pCreateIn->chipFamily, pCreateIn->chipRevision);
pLib->SetMinPitchAlignPixels(pCreateIn->minPitchAlignPixels);
// Global parameters initialized and remaining configFlags bits are set as well
initValid = pLib->HwlInitGlobalParams(pCreateIn);
if (initValid)
{
pLib->m_pElemLib = ElemLib::Create(pLib);
}
else
{
pLib->m_pElemLib = NULL; // Don't go on allocating element lib
returnCode = ADDR_INVALIDGBREGVALUES;
}
if (pLib->m_pElemLib == NULL)
{
delete pLib;
pLib = NULL;
ADDR_ASSERT_ALWAYS();
}
else
{
pLib->m_pElemLib->SetConfigFlags(pLib->m_configFlags);
}
}
pCreateOut->hLib = pLib;
if ((pLib != NULL) &&
(returnCode == ADDR_OK))
{
pCreateOut->numEquations =
pLib->HwlGetEquationTableInfo(&pCreateOut->pEquationTable);
pLib->SetMaxAlignments();
}
else if ((pLib == NULL) &&
(returnCode == ADDR_OK))
{
// Unknown failures, we return the general error code
returnCode = ADDR_ERROR;
}
return returnCode;
}
/**
****************************************************************************************************
* Lib::SetChipFamily
*
* @brief
* Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
* @return
* N/A
****************************************************************************************************
*/
VOID Lib::SetChipFamily(
UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h
UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h
{
ChipFamily family = HwlConvertChipFamily(uChipFamily, uChipRevision);
ADDR_ASSERT(family != ADDR_CHIP_FAMILY_IVLD);
m_chipFamily = family;
m_chipRevision = uChipRevision;
}
/**
****************************************************************************************************
* Lib::SetMinPitchAlignPixels
*
* @brief
* Set m_minPitchAlignPixels with input param
*
* @return
* N/A
****************************************************************************************************
*/
VOID Lib::SetMinPitchAlignPixels(
UINT_32 minPitchAlignPixels) ///< [in] minmum pitch alignment in pixels
{
m_minPitchAlignPixels = (minPitchAlignPixels == 0) ? 1 : minPitchAlignPixels;
}
/**
****************************************************************************************************
* Lib::SetMaxAlignments
*
* @brief
* Set max alignments
*
* @return
* N/A
****************************************************************************************************
*/
VOID Lib::SetMaxAlignments()
{
m_maxBaseAlign = HwlComputeMaxBaseAlignments();
m_maxMetaBaseAlign = HwlComputeMaxMetaBaseAlignments();
}
/**
****************************************************************************************************
* Lib::GetLib
*
* @brief
* Get AddrLib pointer
*
* @return
* An AddrLib class pointer
****************************************************************************************************
*/
Lib* Lib::GetLib(
ADDR_HANDLE hLib) ///< [in] handle of ADDR_HANDLE
{
return static_cast<Addr::Lib*>(hLib);
}
/**
****************************************************************************************************
* Lib::GetMaxAlignments
*
* @brief
* Gets maximum alignments for data surface (include FMask)
*
* @return
* ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::GetMaxAlignments(
ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut ///< [out] output structure
) const
{
ADDR_E_RETURNCODE returnCode = ADDR_OK;
if (GetFillSizeFieldsFlags() == TRUE)
{
if (pOut->size != sizeof(ADDR_GET_MAX_ALIGNMENTS_OUTPUT))
{
returnCode = ADDR_PARAMSIZEMISMATCH;
}
}
if (returnCode == ADDR_OK)
{
if (m_maxBaseAlign != 0)
{
pOut->baseAlign = m_maxBaseAlign;
}
else
{
returnCode = ADDR_NOTIMPLEMENTED;
}
}
return returnCode;
}
/**
****************************************************************************************************
* Lib::GetMaxMetaAlignments
*
* @brief
* Gets maximum alignments for metadata (CMask, DCC and HTile)
*
* @return
* ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::GetMaxMetaAlignments(
ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut ///< [out] output structure
) const
{
ADDR_E_RETURNCODE returnCode = ADDR_OK;
if (GetFillSizeFieldsFlags() == TRUE)
{
if (pOut->size != sizeof(ADDR_GET_MAX_ALIGNMENTS_OUTPUT))
{
returnCode = ADDR_PARAMSIZEMISMATCH;
}
}
if (returnCode == ADDR_OK)
{
if (m_maxMetaBaseAlign != 0)
{
pOut->baseAlign = m_maxMetaBaseAlign;
}
else
{
returnCode = ADDR_NOTIMPLEMENTED;
}
}
return returnCode;
}
/**
****************************************************************************************************
* Lib::Bits2Number
*
* @brief
* Cat a array of binary bit to a number
*
* @return
* The number combined with the array of bits
****************************************************************************************************
*/
UINT_32 Lib::Bits2Number(
UINT_32 bitNum, ///< [in] how many bits
...) ///< [in] varaible bits value starting from MSB
{
UINT_32 number = 0;
UINT_32 i;
va_list bits_ptr;
va_start(bits_ptr, bitNum);
for(i = 0; i < bitNum; i++)
{
number |= va_arg(bits_ptr, UINT_32);
number <<= 1;
}
number >>= 1;
va_end(bits_ptr);
return number;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
// Element lib
////////////////////////////////////////////////////////////////////////////////////////////////////
/**
****************************************************************************************************
* Lib::Flt32ToColorPixel
*
* @brief
* Convert a FLT_32 value to a depth/stencil pixel value
* @return
* ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::Flt32ToDepthPixel(
const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn,
ELEM_FLT32TODEPTHPIXEL_OUTPUT* pOut) const
{
ADDR_E_RETURNCODE returnCode = ADDR_OK;
if (GetFillSizeFieldsFlags() == TRUE)
{
if ((pIn->size != sizeof(ELEM_FLT32TODEPTHPIXEL_INPUT)) ||
(pOut->size != sizeof(ELEM_FLT32TODEPTHPIXEL_OUTPUT)))
{
returnCode = ADDR_PARAMSIZEMISMATCH;
}
}
if (returnCode == ADDR_OK)
{
GetElemLib()->Flt32ToDepthPixel(pIn->format, pIn->comps, pOut->pPixel);
UINT_32 depthBase = 0;
UINT_32 stencilBase = 0;
UINT_32 depthBits = 0;
UINT_32 stencilBits = 0;
switch (pIn->format)
{
case ADDR_DEPTH_16:
depthBits = 16;
break;
case ADDR_DEPTH_X8_24:
case ADDR_DEPTH_8_24:
case ADDR_DEPTH_X8_24_FLOAT:
case ADDR_DEPTH_8_24_FLOAT:
depthBase = 8;
depthBits = 24;
stencilBits = 8;
break;
case ADDR_DEPTH_32_FLOAT:
depthBits = 32;
break;
case ADDR_DEPTH_X24_8_32_FLOAT:
depthBase = 8;
depthBits = 32;
stencilBits = 8;
break;
default:
break;
}
// Overwrite base since R800 has no "tileBase"
if (GetElemLib()->IsDepthStencilTilePlanar() == FALSE)
{
depthBase = 0;
stencilBase = 0;
}
depthBase *= 64;
stencilBase *= 64;
pOut->stencilBase = stencilBase;
pOut->depthBase = depthBase;
pOut->depthBits = depthBits;
pOut->stencilBits = stencilBits;
}
return returnCode;
}
/**
****************************************************************************************************
* Lib::Flt32ToColorPixel
*
* @brief
* Convert a FLT_32 value to a red/green/blue/alpha pixel value
* @return
* ADDR_E_RETURNCODE
****************************************************************************************************
*/
ADDR_E_RETURNCODE Lib::Flt32ToColorPixel(
const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn,
ELEM_FLT32TOCOLORPIXEL_OUTPUT* pOut) const
{
ADDR_E_RETURNCODE returnCode = ADDR_OK;
if (GetFillSizeFieldsFlags() == TRUE)
{
if ((pIn->size != sizeof(ELEM_FLT32TOCOLORPIXEL_INPUT)) ||
(pOut->size != sizeof(ELEM_FLT32TOCOLORPIXEL_OUTPUT)))
{
returnCode = ADDR_PARAMSIZEMISMATCH;
}
}
if (returnCode == ADDR_OK)
{
GetElemLib()->Flt32ToColorPixel(pIn->format,
pIn->surfNum,
pIn->surfSwap,
pIn->comps,
pOut->pPixel);
}
return returnCode;
}
/**
****************************************************************************************************
* Lib::GetExportNorm
*
* @brief
* Check one format can be EXPORT_NUM
* @return
* TRUE if EXPORT_NORM can be used
****************************************************************************************************
*/
BOOL_32 Lib::GetExportNorm(
const ELEM_GETEXPORTNORM_INPUT* pIn) const
{
ADDR_E_RETURNCODE returnCode = ADDR_OK;
BOOL_32 enabled = FALSE;
if (GetFillSizeFieldsFlags() == TRUE)
{
if (pIn->size != sizeof(ELEM_GETEXPORTNORM_INPUT))
{
returnCode = ADDR_PARAMSIZEMISMATCH;
}
}
if (returnCode == ADDR_OK)
{
enabled = GetElemLib()->PixGetExportNorm(pIn->format, pIn->num, pIn->swap);
}
return enabled;
}
/**
****************************************************************************************************
* Lib::GetBpe
*
* @brief
* Get bits-per-element for specified format
* @return
* bits-per-element of specified format
****************************************************************************************************
*/
UINT_32 Lib::GetBpe(AddrFormat format) const
{
return GetElemLib()->GetBitsPerPixel(format);
}
} // Addr
@@ -0,0 +1,415 @@
/*
* Copyright © 2007-2019 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
* AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*/
/**
****************************************************************************************************
* @file addrlib.h
* @brief Contains the Addr::Lib base class definition.
****************************************************************************************************
*/
#ifndef __ADDR_LIB_H__
#define __ADDR_LIB_H__
#include "addrinterface.h"
#include "addrobject.h"
#include "addrelemlib.h"
#include "amdgpu_asic_addr.h"
#ifndef CIASICIDGFXENGINE_R600
#define CIASICIDGFXENGINE_R600 0x00000006
#endif
#ifndef CIASICIDGFXENGINE_R800
#define CIASICIDGFXENGINE_R800 0x00000008
#endif
#ifndef CIASICIDGFXENGINE_SOUTHERNISLAND
#define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A
#endif
#ifndef CIASICIDGFXENGINE_ARCTICISLAND
#define CIASICIDGFXENGINE_ARCTICISLAND 0x0000000D
#endif
namespace Addr
{
/**
****************************************************************************************************
* @brief Neutral enums that define pipeinterleave
****************************************************************************************************
*/
enum PipeInterleave
{
ADDR_PIPEINTERLEAVE_256B = 256,
ADDR_PIPEINTERLEAVE_512B = 512,
ADDR_PIPEINTERLEAVE_1KB = 1024,
ADDR_PIPEINTERLEAVE_2KB = 2048,
};
/**
****************************************************************************************************
* @brief Neutral enums that define DRAM row size
****************************************************************************************************
*/
enum RowSize
{
ADDR_ROWSIZE_1KB = 1024,
ADDR_ROWSIZE_2KB = 2048,
ADDR_ROWSIZE_4KB = 4096,
ADDR_ROWSIZE_8KB = 8192,
};
/**
****************************************************************************************************
* @brief Neutral enums that define bank interleave
****************************************************************************************************
*/
enum BankInterleave
{
ADDR_BANKINTERLEAVE_1 = 1,
ADDR_BANKINTERLEAVE_2 = 2,
ADDR_BANKINTERLEAVE_4 = 4,
ADDR_BANKINTERLEAVE_8 = 8,
};
/**
****************************************************************************************************
* @brief Neutral enums that define shader engine tile size
****************************************************************************************************
*/
enum ShaderEngineTileSize
{
ADDR_SE_TILESIZE_16 = 16,
ADDR_SE_TILESIZE_32 = 32,
};
/**
****************************************************************************************************
* @brief Neutral enums that define bank swap size
****************************************************************************************************
*/
enum BankSwapSize
{
ADDR_BANKSWAP_128B = 128,
ADDR_BANKSWAP_256B = 256,
ADDR_BANKSWAP_512B = 512,
ADDR_BANKSWAP_1KB = 1024,
};
/**
****************************************************************************************************
* @brief Enums that define max compressed fragments config
****************************************************************************************************
*/
enum NumMaxCompressedFragmentsConfig
{
ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS = 0x00000000,
ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS = 0x00000001,
ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS = 0x00000002,
ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS = 0x00000003,
};
/**
****************************************************************************************************
* @brief Enums that define num pipes config
****************************************************************************************************
*/
enum NumPipesConfig
{
ADDR_CONFIG_1_PIPE = 0x00000000,
ADDR_CONFIG_2_PIPE = 0x00000001,
ADDR_CONFIG_4_PIPE = 0x00000002,
ADDR_CONFIG_8_PIPE = 0x00000003,
ADDR_CONFIG_16_PIPE = 0x00000004,
ADDR_CONFIG_32_PIPE = 0x00000005,
ADDR_CONFIG_64_PIPE = 0x00000006,
};
/**
****************************************************************************************************
* @brief Enums that define num banks config
****************************************************************************************************
*/
enum NumBanksConfig
{
ADDR_CONFIG_1_BANK = 0x00000000,
ADDR_CONFIG_2_BANK = 0x00000001,
ADDR_CONFIG_4_BANK = 0x00000002,
ADDR_CONFIG_8_BANK = 0x00000003,
ADDR_CONFIG_16_BANK = 0x00000004,
};
/**
****************************************************************************************************
* @brief Enums that define num rb per shader engine config
****************************************************************************************************
*/
enum NumRbPerShaderEngineConfig
{
ADDR_CONFIG_1_RB_PER_SHADER_ENGINE = 0x00000000,
ADDR_CONFIG_2_RB_PER_SHADER_ENGINE = 0x00000001,
ADDR_CONFIG_4_RB_PER_SHADER_ENGINE = 0x00000002,
};
/**
****************************************************************************************************
* @brief Enums that define num shader engines config
****************************************************************************************************
*/
enum NumShaderEnginesConfig
{
ADDR_CONFIG_1_SHADER_ENGINE = 0x00000000,
ADDR_CONFIG_2_SHADER_ENGINE = 0x00000001,
ADDR_CONFIG_4_SHADER_ENGINE = 0x00000002,
ADDR_CONFIG_8_SHADER_ENGINE = 0x00000003,
};
/**
****************************************************************************************************
* @brief Enums that define pipe interleave size config
****************************************************************************************************
*/
enum PipeInterleaveSizeConfig
{
ADDR_CONFIG_PIPE_INTERLEAVE_256B = 0x00000000,
ADDR_CONFIG_PIPE_INTERLEAVE_512B = 0x00000001,
ADDR_CONFIG_PIPE_INTERLEAVE_1KB = 0x00000002,
ADDR_CONFIG_PIPE_INTERLEAVE_2KB = 0x00000003,
};
/**
****************************************************************************************************
* @brief Enums that define row size config
****************************************************************************************************
*/
enum RowSizeConfig
{
ADDR_CONFIG_1KB_ROW = 0x00000000,
ADDR_CONFIG_2KB_ROW = 0x00000001,
ADDR_CONFIG_4KB_ROW = 0x00000002,
};
/**
****************************************************************************************************
* @brief Enums that define bank interleave size config
****************************************************************************************************
*/
enum BankInterleaveSizeConfig
{
ADDR_CONFIG_BANK_INTERLEAVE_1 = 0x00000000,
ADDR_CONFIG_BANK_INTERLEAVE_2 = 0x00000001,
ADDR_CONFIG_BANK_INTERLEAVE_4 = 0x00000002,
ADDR_CONFIG_BANK_INTERLEAVE_8 = 0x00000003,
};
/**
****************************************************************************************************
* @brief Enums that define engine tile size config
****************************************************************************************************
*/
enum ShaderEngineTileSizeConfig
{
ADDR_CONFIG_SE_TILE_16 = 0x00000000,
ADDR_CONFIG_SE_TILE_32 = 0x00000001,
};
/**
****************************************************************************************************
* @brief This class contains asic independent address lib functionalities
****************************************************************************************************
*/
class Lib : public Object
{
public:
virtual ~Lib();
static ADDR_E_RETURNCODE Create(
const ADDR_CREATE_INPUT* pCreateInfo, ADDR_CREATE_OUTPUT* pCreateOut);
/// Pair of Create
VOID Destroy()
{
delete this;
}
static Lib* GetLib(ADDR_HANDLE hLib);
/// Returns AddrLib version (from compiled binary instead include file)
UINT_32 GetVersion()
{
return m_version;
}
/// Returns asic chip family name defined by AddrLib
ChipFamily GetChipFamily()
{
return m_chipFamily;
}
ADDR_E_RETURNCODE Flt32ToDepthPixel(
const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn,
ELEM_FLT32TODEPTHPIXEL_OUTPUT* pOut) const;
ADDR_E_RETURNCODE Flt32ToColorPixel(
const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn,
ELEM_FLT32TOCOLORPIXEL_OUTPUT* pOut) const;
BOOL_32 GetExportNorm(const ELEM_GETEXPORTNORM_INPUT* pIn) const;
ADDR_E_RETURNCODE GetMaxAlignments(ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut) const;
ADDR_E_RETURNCODE GetMaxMetaAlignments(ADDR_GET_MAX_ALIGNMENTS_OUTPUT* pOut) const;
UINT_32 GetBpe(AddrFormat format) const;
protected:
Lib(); // Constructor is protected
Lib(const Client* pClient);
/// Pure virtual function to get max base alignments
virtual UINT_32 HwlComputeMaxBaseAlignments() const = 0;
/// Gets maximum alignements for metadata
virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const
{
ADDR_NOT_IMPLEMENTED();
return 0;
}
VOID ValidBaseAlignments(UINT_32 alignment) const
{
#if DEBUG
ADDR_ASSERT(alignment <= m_maxBaseAlign);
#endif
}
VOID ValidMetaBaseAlignments(UINT_32 metaAlignment) const
{
#if DEBUG
ADDR_ASSERT(metaAlignment <= m_maxMetaBaseAlign);
#endif
}
//
// Initialization
//
/// Pure Virtual function for Hwl computing internal global parameters from h/w registers
virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn) = 0;
/// Pure Virtual function for Hwl converting chip family
virtual ChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision) = 0;
/// Get equation table pointer and number of equations
virtual UINT_32 HwlGetEquationTableInfo(const ADDR_EQUATION** ppEquationTable) const
{
*ppEquationTable = NULL;
return 0;
}
//
// Misc helper
//
static UINT_32 Bits2Number(UINT_32 bitNum, ...);
static UINT_32 GetNumFragments(UINT_32 numSamples, UINT_32 numFrags)
{
return (numFrags != 0) ? numFrags : Max(1u, numSamples);
}
/// Returns pointer of ElemLib
ElemLib* GetElemLib() const
{
return m_pElemLib;
}
/// Returns fillSizeFields flag
UINT_32 GetFillSizeFieldsFlags() const
{
return m_configFlags.fillSizeFields;
}
private:
// Disallow the copy constructor
Lib(const Lib& a);
// Disallow the assignment operator
Lib& operator=(const Lib& a);
VOID SetChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision);
VOID SetMinPitchAlignPixels(UINT_32 minPitchAlignPixels);
VOID SetMaxAlignments();
protected:
LibClass m_class; ///< Store class type (HWL type)
ChipFamily m_chipFamily; ///< Chip family translated from the one in atiid.h
UINT_32 m_chipRevision; ///< Revision id from xxx_id.h
UINT_32 m_version; ///< Current version
//
// Global parameters
//
ConfigFlags m_configFlags; ///< Global configuration flags. Note this is setup by
/// AddrLib instead of Client except forceLinearAligned
UINT_32 m_pipes; ///< Number of pipes
UINT_32 m_banks; ///< Number of banks
/// For r800 this is MC_ARB_RAMCFG.NOOFBANK
/// Keep it here to do default parameter calculation
UINT_32 m_pipeInterleaveBytes;
///< Specifies the size of contiguous address space
/// within each tiling pipe when making linear
/// accesses. (Formerly Group Size)
UINT_32 m_rowSize; ///< DRAM row size, in bytes
UINT_32 m_minPitchAlignPixels; ///< Minimum pitch alignment in pixels
UINT_32 m_maxSamples; ///< Max numSamples
UINT_32 m_maxBaseAlign; ///< Max base alignment for data surface
UINT_32 m_maxMetaBaseAlign; ///< Max base alignment for metadata
private:
ElemLib* m_pElemLib; ///< Element Lib pointer
};
Lib* SiHwlInit (const Client* pClient);
Lib* CiHwlInit (const Client* pClient);
Lib* Gfx9HwlInit (const Client* pClient);
Lib* Gfx10HwlInit(const Client* pClient);
} // Addr
#endif
Diferenças do arquivo suprimidas por serem muito extensas Carregar Diff
@@ -0,0 +1,544 @@
/*
* Copyright © 2007-2019 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
* AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*/
/**
****************************************************************************************************
* @file addrlib1.h
* @brief Contains the Addr::V1::Lib class definition.
****************************************************************************************************
*/
#ifndef __ADDR_LIB1_H__
#define __ADDR_LIB1_H__
#include "addrlib.h"
namespace Addr
{
namespace V1
{
/**
****************************************************************************************************
* @brief Neutral enums that define bank swap size
****************************************************************************************************
*/
enum SampleSplitSize
{
ADDR_SAMPLESPLIT_1KB = 1024,
ADDR_SAMPLESPLIT_2KB = 2048,
ADDR_SAMPLESPLIT_4KB = 4096,
ADDR_SAMPLESPLIT_8KB = 8192,
};
/**
****************************************************************************************************
* @brief Flags for AddrTileMode
****************************************************************************************************
*/
struct TileModeFlags
{
UINT_32 thickness : 4;
UINT_32 isLinear : 1;
UINT_32 isMicro : 1;
UINT_32 isMacro : 1;
UINT_32 isMacro3d : 1;
UINT_32 isPrt : 1;
UINT_32 isPrtNoRotation : 1;
UINT_32 isBankSwapped : 1;
};
static const UINT_32 Block64K = 0x10000;
static const UINT_32 PrtTileSize = Block64K;
/**
****************************************************************************************************
* @brief This class contains asic independent address lib functionalities
****************************************************************************************************
*/
class Lib : public Addr::Lib
{
public:
virtual ~Lib();
static Lib* GetLib(
ADDR_HANDLE hLib);
/// Returns tileIndex support
BOOL_32 UseTileIndex(INT_32 index) const
{
return m_configFlags.useTileIndex && (index != TileIndexInvalid);
}
/// Returns combined swizzle support
BOOL_32 UseCombinedSwizzle() const
{
return m_configFlags.useCombinedSwizzle;
}
//
// Interface stubs
//
ADDR_E_RETURNCODE ComputeSurfaceInfo(
const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoord(
const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputeSurfaceCoordFromAddr(
const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputeSliceTileSwizzle(
const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn,
ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ExtractBankPipeSwizzle(
const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn,
ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut) const;
ADDR_E_RETURNCODE CombineBankPipeSwizzle(
const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT* pIn,
ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputeBaseSwizzle(
const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn,
ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputeFmaskInfo(
const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut);
ADDR_E_RETURNCODE ComputeFmaskAddrFromCoord(
const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn,
ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputeFmaskCoordFromAddr(
const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn,
ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ConvertTileInfoToHW(
const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn,
ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ConvertTileIndex(
const ADDR_CONVERT_TILEINDEX_INPUT* pIn,
ADDR_CONVERT_TILEINDEX_OUTPUT* pOut) const;
ADDR_E_RETURNCODE GetMacroModeIndex(
const ADDR_GET_MACROMODEINDEX_INPUT* pIn,
ADDR_GET_MACROMODEINDEX_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ConvertTileIndex1(
const ADDR_CONVERT_TILEINDEX1_INPUT* pIn,
ADDR_CONVERT_TILEINDEX_OUTPUT* pOut) const;
ADDR_E_RETURNCODE GetTileIndex(
const ADDR_GET_TILEINDEX_INPUT* pIn,
ADDR_GET_TILEINDEX_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputeHtileInfo(
const ADDR_COMPUTE_HTILE_INFO_INPUT* pIn,
ADDR_COMPUTE_HTILE_INFO_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputeCmaskInfo(
const ADDR_COMPUTE_CMASK_INFO_INPUT* pIn,
ADDR_COMPUTE_CMASK_INFO_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputeDccInfo(
const ADDR_COMPUTE_DCCINFO_INPUT* pIn,
ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputeHtileAddrFromCoord(
const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,
ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputeCmaskAddrFromCoord(
const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputeHtileCoordFromAddr(
const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,
ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputeCmaskCoordFromAddr(
const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT* pIn,
ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputePrtInfo(
const ADDR_PRT_INFO_INPUT* pIn,
ADDR_PRT_INFO_OUTPUT* pOut) const;
protected:
Lib(); // Constructor is protected
Lib(const Client* pClient);
/// Pure Virtual function for Hwl computing surface info
virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo(
const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0;
/// Pure Virtual function for Hwl computing surface address from coord
virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoord(
const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const = 0;
/// Pure Virtual function for Hwl computing surface coord from address
virtual ADDR_E_RETURNCODE HwlComputeSurfaceCoordFromAddr(
const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const = 0;
/// Pure Virtual function for Hwl computing surface tile swizzle
virtual ADDR_E_RETURNCODE HwlComputeSliceTileSwizzle(
const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn,
ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut) const = 0;
/// Pure Virtual function for Hwl extracting bank/pipe swizzle from base256b
virtual ADDR_E_RETURNCODE HwlExtractBankPipeSwizzle(
const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn,
ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut) const = 0;
/// Pure Virtual function for Hwl combining bank/pipe swizzle
virtual ADDR_E_RETURNCODE HwlCombineBankPipeSwizzle(
UINT_32 bankSwizzle, UINT_32 pipeSwizzle, ADDR_TILEINFO* pTileInfo,
UINT_64 baseAddr, UINT_32* pTileSwizzle) const = 0;
/// Pure Virtual function for Hwl computing base swizzle
virtual ADDR_E_RETURNCODE HwlComputeBaseSwizzle(
const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn,
ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const = 0;
/// Pure Virtual function for Hwl computing HTILE base align
virtual UINT_32 HwlComputeHtileBaseAlign(
BOOL_32 isTcCompatible, BOOL_32 isLinear, ADDR_TILEINFO* pTileInfo) const = 0;
/// Pure Virtual function for Hwl computing HTILE bpp
virtual UINT_32 HwlComputeHtileBpp(
BOOL_32 isWidth8, BOOL_32 isHeight8) const = 0;
/// Pure Virtual function for Hwl computing HTILE bytes
virtual UINT_64 HwlComputeHtileBytes(
UINT_32 pitch, UINT_32 height, UINT_32 bpp,
BOOL_32 isLinear, UINT_32 numSlices, UINT_64* pSliceBytes, UINT_32 baseAlign) const = 0;
/// Pure Virtual function for Hwl computing FMASK info
virtual ADDR_E_RETURNCODE HwlComputeFmaskInfo(
const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut) = 0;
/// Pure Virtual function for Hwl FMASK address from coord
virtual ADDR_E_RETURNCODE HwlComputeFmaskAddrFromCoord(
const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn,
ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const = 0;
/// Pure Virtual function for Hwl FMASK coord from address
virtual ADDR_E_RETURNCODE HwlComputeFmaskCoordFromAddr(
const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn,
ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const = 0;
/// Pure Virtual function for Hwl convert tile info from real value to HW value
virtual ADDR_E_RETURNCODE HwlConvertTileInfoToHW(
const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn,
ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const = 0;
/// Pure Virtual function for Hwl compute mipmap info
virtual BOOL_32 HwlComputeMipLevel(
ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const = 0;
/// Pure Virtual function for Hwl compute max cmask blockMax value
virtual BOOL_32 HwlGetMaxCmaskBlockMax() const = 0;
/// Pure Virtual function for Hwl compute fmask bits
virtual UINT_32 HwlComputeFmaskBits(
const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
UINT_32* pNumSamples) const = 0;
/// Virtual function to get index (not pure then no need to implement this in all hwls
virtual ADDR_E_RETURNCODE HwlGetTileIndex(
const ADDR_GET_TILEINDEX_INPUT* pIn,
ADDR_GET_TILEINDEX_OUTPUT* pOut) const
{
return ADDR_NOTSUPPORTED;
}
/// Virtual function for Hwl to compute Dcc info
virtual ADDR_E_RETURNCODE HwlComputeDccInfo(
const ADDR_COMPUTE_DCCINFO_INPUT* pIn,
ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const
{
return ADDR_NOTSUPPORTED;
}
/// Virtual function to get cmask address for tc compatible cmask
virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord(
const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const
{
return ADDR_NOTSUPPORTED;
}
/// Virtual function to get htile address for tc compatible htile
virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord(
const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,
ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) const
{
return ADDR_NOTSUPPORTED;
}
// Compute attributes
// HTILE
UINT_32 ComputeHtileInfo(
ADDR_HTILE_FLAGS flags,
UINT_32 pitchIn, UINT_32 heightIn, UINT_32 numSlices,
BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8,
ADDR_TILEINFO* pTileInfo,
UINT_32* pPitchOut, UINT_32* pHeightOut, UINT_64* pHtileBytes,
UINT_32* pMacroWidth = NULL, UINT_32* pMacroHeight = NULL,
UINT_64* pSliceSize = NULL, UINT_32* pBaseAlign = NULL) const;
// CMASK
ADDR_E_RETURNCODE ComputeCmaskInfo(
ADDR_CMASK_FLAGS flags,
UINT_32 pitchIn, UINT_32 heightIn, UINT_32 numSlices, BOOL_32 isLinear,
ADDR_TILEINFO* pTileInfo, UINT_32* pPitchOut, UINT_32* pHeightOut, UINT_64* pCmaskBytes,
UINT_32* pMacroWidth, UINT_32* pMacroHeight, UINT_64* pSliceSize = NULL,
UINT_32* pBaseAlign = NULL, UINT_32* pBlockMax = NULL) const;
virtual VOID HwlComputeTileDataWidthAndHeightLinear(
UINT_32* pMacroWidth, UINT_32* pMacroHeight,
UINT_32 bpp, ADDR_TILEINFO* pTileInfo) const;
// CMASK & HTILE addressing
virtual UINT_64 HwlComputeXmaskAddrFromCoord(
UINT_32 pitch, UINT_32 height, UINT_32 x, UINT_32 y, UINT_32 slice,
UINT_32 numSlices, UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8,
BOOL_32 isHeight8, ADDR_TILEINFO* pTileInfo,
UINT_32* bitPosition) const;
virtual VOID HwlComputeXmaskCoordFromAddr(
UINT_64 addr, UINT_32 bitPosition, UINT_32 pitch, UINT_32 height, UINT_32 numSlices,
UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8,
ADDR_TILEINFO* pTileInfo, UINT_32* pX, UINT_32* pY, UINT_32* pSlice) const;
// Surface mipmap
VOID ComputeMipLevel(
ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
/// Pure Virtual function for Hwl to get macro tiled alignment info
virtual BOOL_32 HwlGetAlignmentInfoMacroTiled(
const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
UINT_32* pPitchAlign, UINT_32* pHeightAlign, UINT_32* pSizeAlign) const = 0;
virtual VOID HwlOverrideTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const
{
// not supported in hwl layer
}
virtual VOID HwlOptimizeTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const
{
// not supported in hwl layer
}
virtual VOID HwlSelectTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const
{
// not supported in hwl layer
}
AddrTileMode DegradeLargeThickTile(AddrTileMode tileMode, UINT_32 bpp) const;
VOID PadDimensions(
AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, UINT_32 padDims, UINT_32 mipLevel,
UINT_32* pPitch, UINT_32* pPitchAlign, UINT_32* pHeight, UINT_32 heightAlign,
UINT_32* pSlices, UINT_32 sliceAlign) const;
virtual VOID HwlPadDimensions(
AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, UINT_32 mipLevel,
UINT_32* pPitch, UINT_32* pPitchAlign, UINT_32 height, UINT_32 heightAlign) const
{
}
//
// Addressing shared for linear/1D tiling
//
UINT_64 ComputeSurfaceAddrFromCoordLinear(
UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample,
UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSlices,
UINT_32* pBitPosition) const;
VOID ComputeSurfaceCoordFromAddrLinear(
UINT_64 addr, UINT_32 bitPosition, UINT_32 bpp,
UINT_32 pitch, UINT_32 height, UINT_32 numSlices,
UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample) const;
VOID ComputeSurfaceCoordFromAddrMicroTiled(
UINT_64 addr, UINT_32 bitPosition,
UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits,
UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample,
AddrTileType microTileType, BOOL_32 isDepthSampleOrder) const;
ADDR_E_RETURNCODE ComputeMicroTileEquation(
UINT_32 bpp, AddrTileMode tileMode,
AddrTileType microTileType, ADDR_EQUATION* pEquation) const;
UINT_32 ComputePixelIndexWithinMicroTile(
UINT_32 x, UINT_32 y, UINT_32 z,
UINT_32 bpp, AddrTileMode tileMode, AddrTileType microTileType) const;
/// Pure Virtual function for Hwl computing coord from offset inside micro tile
virtual VOID HwlComputePixelCoordFromOffset(
UINT_32 offset, UINT_32 bpp, UINT_32 numSamples,
AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits,
UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample,
AddrTileType microTileType, BOOL_32 isDepthSampleOrder) const = 0;
//
// Addressing shared by all
//
virtual UINT_32 HwlGetPipes(
const ADDR_TILEINFO* pTileInfo) const;
UINT_32 ComputePipeFromAddr(
UINT_64 addr, UINT_32 numPipes) const;
virtual ADDR_E_RETURNCODE ComputePipeEquation(
UINT_32 log2BytesPP, UINT_32 threshX, UINT_32 threshY, ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const
{
return ADDR_NOTSUPPORTED;
}
/// Pure Virtual function for Hwl computing pipe from coord
virtual UINT_32 ComputePipeFromCoord(
UINT_32 x, UINT_32 y, UINT_32 slice, AddrTileMode tileMode,
UINT_32 pipeSwizzle, BOOL_32 flags, ADDR_TILEINFO* pTileInfo) const = 0;
/// Pure Virtual function for Hwl computing coord Y for 8 pipe cmask/htile
virtual UINT_32 HwlComputeXmaskCoordYFrom8Pipe(
UINT_32 pipe, UINT_32 x) const = 0;
//
// Misc helper
//
static const TileModeFlags ModeFlags[ADDR_TM_COUNT];
static UINT_32 Thickness(
AddrTileMode tileMode);
// Checking tile mode
static BOOL_32 IsMacroTiled(AddrTileMode tileMode);
static BOOL_32 IsMacro3dTiled(AddrTileMode tileMode);
static BOOL_32 IsLinear(AddrTileMode tileMode);
static BOOL_32 IsMicroTiled(AddrTileMode tileMode);
static BOOL_32 IsPrtTileMode(AddrTileMode tileMode);
static BOOL_32 IsPrtNoRotationTileMode(AddrTileMode tileMode);
/// Return TRUE if tile info is needed
BOOL_32 UseTileInfo() const
{
return !m_configFlags.ignoreTileInfo;
}
/// Adjusts pitch alignment for flipping surface
VOID AdjustPitchAlignment(
ADDR_SURFACE_FLAGS flags, UINT_32* pPitchAlign) const;
/// Overwrite tile config according to tile index
virtual ADDR_E_RETURNCODE HwlSetupTileCfg(
UINT_32 bpp, INT_32 index, INT_32 macroModeIndex,
ADDR_TILEINFO* pInfo, AddrTileMode* mode = NULL, AddrTileType* type = NULL) const;
/// Overwrite macro tile config according to tile index
virtual INT_32 HwlComputeMacroModeIndex(
INT_32 index, ADDR_SURFACE_FLAGS flags, UINT_32 bpp, UINT_32 numSamples,
ADDR_TILEINFO* pTileInfo, AddrTileMode *pTileMode = NULL, AddrTileType *pTileType = NULL
) const
{
return TileIndexNoMacroIndex;
}
/// Pre-handler of 3x pitch (96 bit) adjustment
virtual UINT_32 HwlPreHandleBaseLvl3xPitch(
const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const;
/// Post-handler of 3x pitch adjustment
virtual UINT_32 HwlPostHandleBaseLvl3xPitch(
const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const;
/// Check miplevel after surface adjustment
ADDR_E_RETURNCODE PostComputeMipLevel(
ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
/// Quad buffer stereo support, has its implementation in ind. layer
VOID ComputeQbStereoInfo(
ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
/// Pure virutual function to compute stereo bank swizzle for right eye
virtual UINT_32 HwlComputeQbStereoRightSwizzle(
ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0;
VOID OptimizeTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const;
/// Overwrite tile setting to PRT
virtual VOID HwlSetPrtTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const
{
}
static BOOL_32 DegradeTo1D(
UINT_32 width, UINT_32 height,
UINT_32 macroTilePitchAlign, UINT_32 macroTileHeightAlign);
private:
// Disallow the copy constructor
Lib(const Lib& a);
// Disallow the assignment operator
Lib& operator=(const Lib& a);
UINT_32 ComputeCmaskBaseAlign(
ADDR_CMASK_FLAGS flags, ADDR_TILEINFO* pTileInfo) const;
UINT_64 ComputeCmaskBytes(
UINT_32 pitch, UINT_32 height, UINT_32 numSlices) const;
//
// CMASK/HTILE shared methods
//
VOID ComputeTileDataWidthAndHeight(
UINT_32 bpp, UINT_32 cacheBits, ADDR_TILEINFO* pTileInfo,
UINT_32* pMacroWidth, UINT_32* pMacroHeight) const;
UINT_32 ComputeXmaskCoordYFromPipe(
UINT_32 pipe, UINT_32 x) const;
};
} // V1
} // Addr
#endif
Diferenças do arquivo suprimidas por serem muito extensas Carregar Diff
@@ -0,0 +1,868 @@
/*
* Copyright © 2007-2019 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
* AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*/
/**
************************************************************************************************************************
* @file addrlib2.h
* @brief Contains the Addr::V2::Lib class definition.
************************************************************************************************************************
*/
#ifndef __ADDR2_LIB2_H__
#define __ADDR2_LIB2_H__
#include "addrlib.h"
namespace Addr
{
namespace V2
{
/**
************************************************************************************************************************
* @brief Flags for SwizzleModeTable
************************************************************************************************************************
*/
struct SwizzleModeFlags
{
// Swizzle mode
UINT_32 isLinear : 1; // Linear
// Block size
UINT_32 is256b : 1; // Block size is 256B
UINT_32 is4kb : 1; // Block size is 4KB
UINT_32 is64kb : 1; // Block size is 64KB
UINT_32 isVar : 1; // Block size is variable
UINT_32 isZ : 1; // Z order swizzle mode
UINT_32 isStd : 1; // Standard swizzle mode
UINT_32 isDisp : 1; // Display swizzle mode
UINT_32 isRot : 1; // Rotate swizzle mode
// XOR mode
UINT_32 isXor : 1; // XOR after swizzle if set
UINT_32 isT : 1; // T mode
UINT_32 isRtOpt : 1; // mode opt for render target
UINT_32 reserved : 20; // Reserved bits
};
struct Dim2d
{
UINT_32 w;
UINT_32 h;
};
struct Dim3d
{
UINT_32 w;
UINT_32 h;
UINT_32 d;
};
// Macro define resource block type
enum AddrBlockType
{
AddrBlockMicro = 0, // Resource uses 256B block
AddrBlockThin4KB = 1, // Resource uses thin 4KB block
AddrBlockThick4KB = 2, // Resource uses thick 4KB block
AddrBlockThin64KB = 3, // Resource uses thin 64KB block
AddrBlockThick64KB = 4, // Resource uses thick 64KB block
AddrBlockVar = 5, // Resource uses var block, only valid for GFX9
AddrBlockLinear = 6, // Resource uses linear swizzle mode
AddrBlockMaxTiledType = AddrBlockVar + 1,
};
enum AddrSwSet
{
AddrSwSetZ = 1 << ADDR_SW_Z,
AddrSwSetS = 1 << ADDR_SW_S,
AddrSwSetD = 1 << ADDR_SW_D,
AddrSwSetR = 1 << ADDR_SW_R,
AddrSwSetAll = AddrSwSetZ | AddrSwSetS | AddrSwSetD | AddrSwSetR,
};
const UINT_32 Size256 = 256u;
const UINT_32 Size4K = 4096u;
const UINT_32 Size64K = 65536u;
const UINT_32 Log2Size256 = 8u;
const UINT_32 Log2Size4K = 12u;
const UINT_32 Log2Size64K = 16u;
/**
************************************************************************************************************************
* @brief This class contains asic independent address lib functionalities
************************************************************************************************************************
*/
class Lib : public Addr::Lib
{
public:
virtual ~Lib();
static Lib* GetLib(
ADDR_HANDLE hLib);
//
// Interface stubs
//
// For data surface
ADDR_E_RETURNCODE ComputeSurfaceInfo(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoord(
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputeSurfaceCoordFromAddr(
const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const;
// For HTile
ADDR_E_RETURNCODE ComputeHtileInfo(
const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn,
ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputeHtileAddrFromCoord(
const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut);
ADDR_E_RETURNCODE ComputeHtileCoordFromAddr(
const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,
ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut);
// For CMask
ADDR_E_RETURNCODE ComputeCmaskInfo(
const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn,
ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputeCmaskAddrFromCoord(
const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut);
ADDR_E_RETURNCODE ComputeCmaskCoordFromAddr(
const ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT* pIn,
ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut) const;
// For FMask
ADDR_E_RETURNCODE ComputeFmaskInfo(
const ADDR2_COMPUTE_FMASK_INFO_INPUT* pIn,
ADDR2_COMPUTE_FMASK_INFO_OUTPUT* pOut);
ADDR_E_RETURNCODE ComputeFmaskAddrFromCoord(
const ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputeFmaskCoordFromAddr(
const ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn,
ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const;
// For DCC key
ADDR_E_RETURNCODE ComputeDccInfo(
const ADDR2_COMPUTE_DCCINFO_INPUT* pIn,
ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputeDccAddrFromCoord(
const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut);
// Misc
ADDR_E_RETURNCODE ComputePipeBankXor(
const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut);
ADDR_E_RETURNCODE ComputeSlicePipeBankXor(
const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut);
ADDR_E_RETURNCODE ComputeSubResourceOffsetForSwizzlePattern(
const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut);
ADDR_E_RETURNCODE Addr2GetPreferredSurfaceSetting(
const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const;
virtual BOOL_32 IsValidDisplaySwizzleMode(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
{
ADDR_NOT_IMPLEMENTED();
return ADDR_NOTIMPLEMENTED;
}
protected:
Lib(); // Constructor is protected
Lib(const Client* pClient);
static const UINT_32 MaxNumOfBpp = 5;
static const UINT_32 MaxNumOfAA = 4;
static const Dim2d Block256_2d[MaxNumOfBpp];
static const Dim3d Block1K_3d[MaxNumOfBpp];
static const UINT_32 PrtAlignment = 64 * 1024;
static const UINT_32 MaxMacroBits = 20;
static const UINT_32 MaxMipLevels = 16;
BOOL_32 IsValidSwMode(AddrSwizzleMode swizzleMode) const
{
// Don't dereference a reinterpret_cast pointer so as not to break
// strict-aliasing rules.
UINT_32 mode;
memcpy(&mode, &m_swizzleModeTable[swizzleMode], sizeof(UINT_32));
return mode != 0;
}
// Checking block size
BOOL_32 IsBlock256b(AddrSwizzleMode swizzleMode) const
{
return m_swizzleModeTable[swizzleMode].is256b;
}
BOOL_32 IsBlock4kb(AddrSwizzleMode swizzleMode) const
{
return m_swizzleModeTable[swizzleMode].is4kb;
}
BOOL_32 IsBlock64kb(AddrSwizzleMode swizzleMode) const
{
return m_swizzleModeTable[swizzleMode].is64kb;
}
BOOL_32 IsBlockVariable(AddrSwizzleMode swizzleMode) const
{
return m_swizzleModeTable[swizzleMode].isVar;
}
// Checking swizzle mode
BOOL_32 IsLinear(AddrSwizzleMode swizzleMode) const
{
return m_swizzleModeTable[swizzleMode].isLinear;
}
BOOL_32 IsRtOptSwizzle(AddrSwizzleMode swizzleMode) const
{
return m_swizzleModeTable[swizzleMode].isRtOpt;
}
BOOL_32 IsZOrderSwizzle(AddrSwizzleMode swizzleMode) const
{
return m_swizzleModeTable[swizzleMode].isZ;
}
BOOL_32 IsStandardSwizzle(AddrSwizzleMode swizzleMode) const
{
return m_swizzleModeTable[swizzleMode].isStd;
}
BOOL_32 IsDisplaySwizzle(AddrSwizzleMode swizzleMode) const
{
return m_swizzleModeTable[swizzleMode].isDisp;
}
BOOL_32 IsRotateSwizzle(AddrSwizzleMode swizzleMode) const
{
return m_swizzleModeTable[swizzleMode].isRot;
}
BOOL_32 IsStandardSwizzle(AddrResourceType resourceType, AddrSwizzleMode swizzleMode) const
{
return HwlIsStandardSwizzle(resourceType, swizzleMode);
}
BOOL_32 IsDisplaySwizzle(AddrResourceType resourceType, AddrSwizzleMode swizzleMode) const
{
return HwlIsDisplaySwizzle(resourceType, swizzleMode);
}
BOOL_32 IsXor(AddrSwizzleMode swizzleMode) const
{
return m_swizzleModeTable[swizzleMode].isXor;
}
BOOL_32 IsPrt(AddrSwizzleMode swizzleMode) const
{
return m_swizzleModeTable[swizzleMode].isT;
}
BOOL_32 IsNonPrtXor(AddrSwizzleMode swizzleMode) const
{
return (IsXor(swizzleMode) && (IsPrt(swizzleMode) == FALSE));
}
// Checking resource type
static BOOL_32 IsTex1d(AddrResourceType resourceType)
{
return (resourceType == ADDR_RSRC_TEX_1D);
}
static BOOL_32 IsTex2d(AddrResourceType resourceType)
{
return (resourceType == ADDR_RSRC_TEX_2D);
}
static BOOL_32 IsTex3d(AddrResourceType resourceType)
{
return (resourceType == ADDR_RSRC_TEX_3D);
}
BOOL_32 IsThick(AddrResourceType resourceType, AddrSwizzleMode swizzleMode) const
{
return HwlIsThick(resourceType, swizzleMode);
}
BOOL_32 IsThin(AddrResourceType resourceType, AddrSwizzleMode swizzleMode) const
{
return HwlIsThin(resourceType, swizzleMode);
}
UINT_32 GetBlockSizeLog2(AddrSwizzleMode swizzleMode) const
{
UINT_32 blockSizeLog2 = 0;
if (IsBlock256b(swizzleMode) || IsLinear(swizzleMode))
{
blockSizeLog2 = 8;
}
else if (IsBlock4kb(swizzleMode))
{
blockSizeLog2 = 12;
}
else if (IsBlock64kb(swizzleMode))
{
blockSizeLog2 = 16;
}
else if (IsBlockVariable(swizzleMode) && (m_blockVarSizeLog2 != 0))
{
blockSizeLog2 = m_blockVarSizeLog2;
}
else
{
ADDR_ASSERT_ALWAYS();
}
return blockSizeLog2;
}
UINT_32 GetBlockSize(AddrSwizzleMode swizzleMode) const
{
return (1 << GetBlockSizeLog2(swizzleMode));
}
static UINT_32 GetFmaskBpp(UINT_32 sample, UINT_32 frag)
{
sample = (sample == 0) ? 1 : sample;
frag = (frag == 0) ? sample : frag;
UINT_32 fmaskBpp = QLog2(frag);
if (sample > frag)
{
fmaskBpp++;
}
if (fmaskBpp == 3)
{
fmaskBpp = 4;
}
fmaskBpp = Max(8u, fmaskBpp * sample);
return fmaskBpp;
}
virtual BOOL_32 HwlIsStandardSwizzle(
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const
{
ADDR_NOT_IMPLEMENTED();
return FALSE;
}
virtual BOOL_32 HwlIsDisplaySwizzle(
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const
{
ADDR_NOT_IMPLEMENTED();
return FALSE;
}
virtual BOOL_32 HwlIsThin(
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const
{
ADDR_NOT_IMPLEMENTED();
return FALSE;
}
virtual BOOL_32 HwlIsThick(
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const
{
ADDR_NOT_IMPLEMENTED();
return FALSE;
}
virtual ADDR_E_RETURNCODE HwlComputeHtileInfo(
const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn,
ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut) const
{
ADDR_NOT_IMPLEMENTED();
return ADDR_NOTSUPPORTED;
}
virtual ADDR_E_RETURNCODE HwlComputeCmaskInfo(
const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn,
ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut) const
{
ADDR_NOT_IMPLEMENTED();
return ADDR_NOTSUPPORTED;
}
virtual ADDR_E_RETURNCODE HwlComputeDccInfo(
const ADDR2_COMPUTE_DCCINFO_INPUT* pIn,
ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut) const
{
ADDR_NOT_IMPLEMENTED();
return ADDR_NOTSUPPORTED;
}
virtual ADDR_E_RETURNCODE HwlComputeDccAddrFromCoord(
const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut)
{
ADDR_NOT_IMPLEMENTED();
return ADDR_NOTSUPPORTED;
}
virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord(
const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut)
{
ADDR_NOT_IMPLEMENTED();
return ADDR_NOTSUPPORTED;
}
virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord(
const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut)
{
ADDR_NOT_IMPLEMENTED();
return ADDR_NOTSUPPORTED;
}
virtual ADDR_E_RETURNCODE HwlComputeHtileCoordFromAddr(
const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,
ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut)
{
ADDR_NOT_IMPLEMENTED();
return ADDR_NOTSUPPORTED;
}
virtual ADDR_E_RETURNCODE HwlComputeBlock256Equation(
AddrResourceType rsrcType,
AddrSwizzleMode swMode,
UINT_32 elementBytesLog2,
ADDR_EQUATION* pEquation) const
{
ADDR_NOT_IMPLEMENTED();
return ADDR_NOTSUPPORTED;
}
virtual ADDR_E_RETURNCODE HwlComputeThinEquation(
AddrResourceType rsrcType,
AddrSwizzleMode swMode,
UINT_32 elementBytesLog2,
ADDR_EQUATION* pEquation) const
{
ADDR_NOT_IMPLEMENTED();
return ADDR_NOTSUPPORTED;
}
virtual ADDR_E_RETURNCODE HwlComputeThickEquation(
AddrResourceType rsrcType,
AddrSwizzleMode swMode,
UINT_32 elementBytesLog2,
ADDR_EQUATION* pEquation) const
{
ADDR_NOT_IMPLEMENTED();
return ADDR_NOTSUPPORTED;
}
virtual UINT_32 HwlGetEquationIndex(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const
{
ADDR_NOT_IMPLEMENTED();
return ADDR_INVALID_EQUATION_INDEX;
}
UINT_32 GetEquationIndex(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const
{
return HwlGetEquationIndex(pIn, pOut);
}
virtual ADDR_E_RETURNCODE HwlComputePipeBankXor(
const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const
{
ADDR_NOT_IMPLEMENTED();
return ADDR_NOTSUPPORTED;
}
virtual ADDR_E_RETURNCODE HwlComputeSlicePipeBankXor(
const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) const
{
ADDR_NOT_IMPLEMENTED();
return ADDR_NOTSUPPORTED;
}
virtual ADDR_E_RETURNCODE HwlComputeSubResourceOffsetForSwizzlePattern(
const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) const
{
ADDR_NOT_IMPLEMENTED();
return ADDR_NOTSUPPORTED;
}
virtual ADDR_E_RETURNCODE HwlGetPreferredSurfaceSetting(
const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const
{
ADDR_NOT_IMPLEMENTED();
return ADDR_NOTSUPPORTED;
}
virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoSanityCheck(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
{
ADDR_NOT_IMPLEMENTED();
return ADDR_NOTSUPPORTED;
}
virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoTiled(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const
{
ADDR_NOT_IMPLEMENTED();
return ADDR_NOTIMPLEMENTED;
}
virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoLinear(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const
{
ADDR_NOT_IMPLEMENTED();
return ADDR_NOTIMPLEMENTED;
}
virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoordTiled(
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const
{
ADDR_NOT_IMPLEMENTED();
return ADDR_NOTIMPLEMENTED;
}
ADDR_E_RETURNCODE ComputeBlock256Equation(
AddrResourceType rsrcType,
AddrSwizzleMode swMode,
UINT_32 elementBytesLog2,
ADDR_EQUATION* pEquation) const;
ADDR_E_RETURNCODE ComputeThinEquation(
AddrResourceType rsrcType,
AddrSwizzleMode swMode,
UINT_32 elementBytesLog2,
ADDR_EQUATION* pEquation) const;
ADDR_E_RETURNCODE ComputeThickEquation(
AddrResourceType rsrcType,
AddrSwizzleMode swMode,
UINT_32 elementBytesLog2,
ADDR_EQUATION* pEquation) const;
ADDR_E_RETURNCODE ComputeSurfaceInfoSanityCheck(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
ADDR_E_RETURNCODE ComputeSurfaceInfoLinear(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputeSurfaceInfoTiled(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoordLinear(
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoordTiled(
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputeSurfaceCoordFromAddrLinear(
const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputeSurfaceCoordFromAddrTiled(
const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const;
UINT_32 ComputeSurface2DMicroBlockOffset(
const _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn) const;
UINT_32 ComputeSurface3DMicroBlockOffset(
const _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn) const;
// Misc
ADDR_E_RETURNCODE ComputeBlockDimensionForSurf(
UINT_32* pWidth,
UINT_32* pHeight,
UINT_32* pDepth,
UINT_32 bpp,
UINT_32 numSamples,
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const;
ADDR_E_RETURNCODE ComputeBlockDimension(
UINT_32* pWidth,
UINT_32* pHeight,
UINT_32* pDepth,
UINT_32 bpp,
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const;
virtual VOID ComputeThinBlockDimension(
UINT_32* pWidth,
UINT_32* pHeight,
UINT_32* pDepth,
UINT_32 bpp,
UINT_32 numSamples,
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const;
VOID ComputeThickBlockDimension(
UINT_32* pWidth,
UINT_32* pHeight,
UINT_32* pDepth,
UINT_32 bpp,
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const;
static UINT_64 ComputePadSize(
const Dim3d* pBlkDim,
UINT_32 width,
UINT_32 height,
UINT_32 numSlices,
Dim3d* pPadDim)
{
pPadDim->w = PowTwoAlign(width ,pBlkDim->w);
pPadDim->h = PowTwoAlign(height ,pBlkDim->h);
pPadDim->d = PowTwoAlign(numSlices, pBlkDim->d);
return static_cast<UINT_64>(pPadDim->w) * pPadDim->h * pPadDim->d;
}
static ADDR_E_RETURNCODE ExtractPipeBankXor(
UINT_32 pipeBankXor,
UINT_32 bankBits,
UINT_32 pipeBits,
UINT_32* pBankX,
UINT_32* pPipeX);
static BOOL_32 Valid3DMipSliceIdConstraint(
UINT_32 numSlices,
UINT_32 mipId,
UINT_32 slice)
{
return (Max((numSlices >> mipId), 1u) > slice);
}
Dim3d GetMipTailDim(
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode,
UINT_32 blockWidth,
UINT_32 blockHeight,
UINT_32 blockDepth) const;
static BOOL_32 IsLocalHeap(AddrResrouceLocation resourceType)
{
return ((resourceType == ADDR_RSRC_LOC_LOCAL) ||
(resourceType == ADDR_RSRC_LOC_INVIS));
}
static BOOL_32 IsInvisibleHeap(AddrResrouceLocation resourceType)
{
return (resourceType == ADDR_RSRC_LOC_INVIS);
}
static BOOL_32 IsNonlocalHeap(AddrResrouceLocation resourceType)
{
return ((resourceType == ADDR_RSRC_LOC_USWC) ||
(resourceType == ADDR_RSRC_LOC_CACHED));
}
UINT_32 GetPipeLog2ForMetaAddressing(BOOL_32 pipeAligned, AddrSwizzleMode swizzleMode) const
{
UINT_32 numPipeLog2 = pipeAligned ? Min(m_pipesLog2 + m_seLog2, 5u) : 0;
if (IsXor(swizzleMode))
{
UINT_32 maxPipeLog2 = GetBlockSizeLog2(swizzleMode) - m_pipeInterleaveLog2;
numPipeLog2 = Min(numPipeLog2, maxPipeLog2);
}
return numPipeLog2;
}
UINT_32 GetPipeNumForMetaAddressing(BOOL_32 pipeAligned, AddrSwizzleMode swizzleMode) const
{
return (1 << GetPipeLog2ForMetaAddressing(pipeAligned, swizzleMode));
}
VOID VerifyMipLevelInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
{
#if DEBUG
if (pIn->numMipLevels > 1)
{
UINT_32 actualMipLevels = 1;
switch (pIn->resourceType)
{
case ADDR_RSRC_TEX_3D:
// Fall through to share 2D case
actualMipLevels = Max(actualMipLevels, Log2NonPow2(pIn->numSlices) + 1);
case ADDR_RSRC_TEX_2D:
// Fall through to share 1D case
actualMipLevels = Max(actualMipLevels, Log2NonPow2(pIn->height) + 1);
case ADDR_RSRC_TEX_1D:
// Base 1D case
actualMipLevels = Max(actualMipLevels, Log2NonPow2(pIn->width) + 1);
break;
default:
ADDR_ASSERT_ALWAYS();
break;
}
// Client pass wrong number of MipLevels to addrlib and result will be bad.
// Not sure if we should fail this calling instead of putting an assertion here.
ADDR_ASSERT(actualMipLevels >= pIn->numMipLevels);
}
#endif
}
ADDR_E_RETURNCODE ApplyCustomerPipeBankXor(
AddrSwizzleMode swizzleMode,
UINT_32 pipeBankXor,
UINT_32 bankBits,
UINT_32 pipeBits,
UINT_32* pBlockOffset) const
{
ADDR_E_RETURNCODE returnCode = ADDR_OK;
if (IsXor(swizzleMode))
{
// Apply driver set bankPipeXor
UINT_32 bankX = 0;
UINT_32 pipeX = 0;
returnCode = ExtractPipeBankXor(pipeBankXor, bankBits, pipeBits, &bankX, &pipeX);
*pBlockOffset ^= (pipeX << m_pipeInterleaveLog2);
*pBlockOffset ^= (bankX << (m_pipeInterleaveLog2 + pipeBits));
}
return returnCode;
}
UINT_32 GetPipeXorBits(UINT_32 macroBlockBits) const;
ADDR_E_RETURNCODE ApplyCustomizedPitchHeight(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
UINT_32 elementBytes,
UINT_32 pitchAlignInElement,
UINT_32* pPitch,
UINT_32* pHeight) const;
VOID ComputeQbStereoInfo(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
VOID FilterInvalidEqSwizzleMode(
ADDR2_SWMODE_SET& allowedSwModeSet,
AddrResourceType resourceType,
UINT_32 elemLog2) const;
UINT_32 m_se; ///< Number of shader engine
UINT_32 m_rbPerSe; ///< Number of render backend per shader engine
UINT_32 m_maxCompFrag; ///< Number of max compressed fragment
UINT_32 m_banksLog2; ///< Number of bank Log2
UINT_32 m_pipesLog2; ///< Number of pipe per shader engine Log2
UINT_32 m_seLog2; ///< Number of shader engine Log2
UINT_32 m_rbPerSeLog2; ///< Number of render backend per shader engine Log2
UINT_32 m_maxCompFragLog2; ///< Number of max compressed fragment Log2
UINT_32 m_pipeInterleaveLog2; ///< Log2 of pipe interleave bytes
UINT_32 m_blockVarSizeLog2; ///< Log2 of block var size
SwizzleModeFlags m_swizzleModeTable[ADDR_SW_MAX_TYPE]; ///< Swizzle mode table
// Max number of swizzle mode supported for equation
static const UINT_32 MaxSwModeType = 32;
// Max number of resource type (2D/3D) supported for equation
static const UINT_32 MaxRsrcType = 2;
// Max number of bpp (8bpp/16bpp/32bpp/64bpp/128bpp)
static const UINT_32 MaxElementBytesLog2 = 5;
// Almost all swizzle mode + resource type support equation
static const UINT_32 EquationTableSize = MaxElementBytesLog2 * MaxSwModeType * MaxRsrcType;
// Equation table
ADDR_EQUATION m_equationTable[EquationTableSize];
// Number of equation entries in the table
UINT_32 m_numEquations;
// Equation lookup table according to bpp and tile index
UINT_32 m_equationLookupTable[MaxRsrcType][MaxSwModeType][MaxElementBytesLog2];
private:
// Disallow the copy constructor
Lib(const Lib& a);
// Disallow the assignment operator
Lib& operator=(const Lib& a);
};
} // V2
} // Addr
#endif
@@ -0,0 +1,237 @@
/*
* Copyright © 2007-2019 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
* AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*/
/**
****************************************************************************************************
* @file addrobject.cpp
* @brief Contains the Object base class implementation.
****************************************************************************************************
*/
#include "addrinterface.h"
#include "addrobject.h"
namespace Addr
{
/**
****************************************************************************************************
* Object::Object
*
* @brief
* Constructor for the Object class.
****************************************************************************************************
*/
Object::Object()
{
m_client.handle = NULL;
m_client.callbacks.allocSysMem = NULL;
m_client.callbacks.freeSysMem = NULL;
m_client.callbacks.debugPrint = NULL;
}
/**
****************************************************************************************************
* Object::Object
*
* @brief
* Constructor for the Object class.
****************************************************************************************************
*/
Object::Object(const Client* pClient)
{
m_client = *pClient;
}
/**
****************************************************************************************************
* Object::~Object
*
* @brief
* Destructor for the Object class.
****************************************************************************************************
*/
Object::~Object()
{
}
/**
****************************************************************************************************
* Object::ClientAlloc
*
* @brief
* Calls instanced allocSysMem inside Client
****************************************************************************************************
*/
VOID* Object::ClientAlloc(
size_t objSize, ///< [in] Size to allocate
const Client* pClient) ///< [in] Client pointer
{
VOID* pObjMem = NULL;
if (pClient->callbacks.allocSysMem != NULL)
{
ADDR_ALLOCSYSMEM_INPUT allocInput = {0};
allocInput.size = sizeof(ADDR_ALLOCSYSMEM_INPUT);
allocInput.flags.value = 0;
allocInput.sizeInBytes = static_cast<UINT_32>(objSize);
allocInput.hClient = pClient->handle;
pObjMem = pClient->callbacks.allocSysMem(&allocInput);
}
return pObjMem;
}
/**
****************************************************************************************************
* Object::Alloc
*
* @brief
* A wrapper of ClientAlloc
****************************************************************************************************
*/
VOID* Object::Alloc(
size_t objSize ///< [in] Size to allocate
) const
{
return ClientAlloc(objSize, &m_client);;
}
/**
****************************************************************************************************
* Object::ClientFree
*
* @brief
* Calls freeSysMem inside Client
****************************************************************************************************
*/
VOID Object::ClientFree(
VOID* pObjMem, ///< [in] User virtual address to free.
const Client* pClient) ///< [in] Client pointer
{
if (pClient->callbacks.freeSysMem != NULL)
{
if (pObjMem != NULL)
{
ADDR_FREESYSMEM_INPUT freeInput = {0};
freeInput.size = sizeof(ADDR_FREESYSMEM_INPUT);
freeInput.hClient = pClient->handle;
freeInput.pVirtAddr = pObjMem;
pClient->callbacks.freeSysMem(&freeInput);
}
}
}
/**
****************************************************************************************************
* Object::Free
*
* @brief
* A wrapper of ClientFree
****************************************************************************************************
*/
VOID Object::Free(
VOID* pObjMem ///< [in] User virtual address to free.
) const
{
ClientFree(pObjMem, &m_client);
}
/**
****************************************************************************************************
* Object::operator new
*
* @brief
* Placement new operator. (with pre-allocated memory pointer)
*
* @return
* Returns pre-allocated memory pointer.
****************************************************************************************************
*/
VOID* Object::operator new(
size_t objSize, ///< [in] Size to allocate
VOID* pMem) ///< [in] Pre-allocated pointer
{
return pMem;
}
/**
****************************************************************************************************
* Object::operator delete
*
* @brief
* Frees Object object memory.
****************************************************************************************************
*/
VOID Object::operator delete(
VOID* pObjMem) ///< [in] User virtual address to free.
{
Object* pObj = static_cast<Object*>(pObjMem);
ClientFree(pObjMem, &pObj->m_client);
}
/**
****************************************************************************************************
* Object::DebugPrint
*
* @brief
* Print debug message
*
* @return
* N/A
****************************************************************************************************
*/
VOID Object::DebugPrint(
const CHAR* pDebugString, ///< [in] Debug string
...
) const
{
#if DEBUG
if (m_client.callbacks.debugPrint != NULL)
{
va_list ap;
va_start(ap, pDebugString);
ADDR_DEBUGPRINT_INPUT debugPrintInput = {0};
debugPrintInput.size = sizeof(ADDR_DEBUGPRINT_INPUT);
debugPrintInput.pDebugString = const_cast<CHAR*>(pDebugString);
debugPrintInput.hClient = m_client.handle;
va_copy(debugPrintInput.ap, ap);
m_client.callbacks.debugPrint(&debugPrintInput);
va_end(ap);
}
#endif
}
} // Addr
@@ -0,0 +1,95 @@
/*
* Copyright © 2007-2019 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
* AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*/
/**
****************************************************************************************************
* @file addrobject.h
* @brief Contains the Object base class definition.
****************************************************************************************************
*/
#ifndef __ADDR_OBJECT_H__
#define __ADDR_OBJECT_H__
#include "addrtypes.h"
#include "addrcommon.h"
namespace Addr
{
/**
****************************************************************************************************
* @brief This structure contains client specific data
****************************************************************************************************
*/
struct Client
{
ADDR_CLIENT_HANDLE handle;
ADDR_CALLBACKS callbacks;
};
/**
****************************************************************************************************
* @brief This class is the base class for all ADDR class objects.
****************************************************************************************************
*/
class Object
{
public:
Object();
Object(const Client* pClient);
virtual ~Object();
VOID* operator new(size_t size, VOID* pMem);
VOID operator delete(VOID* pObj);
/// Microsoft compiler requires a matching delete implementation, which seems to be called when
/// bad_alloc is thrown. But currently C++ exception isn't allowed so a dummy implementation is
/// added to eliminate the warning.
VOID operator delete(VOID* pObj, VOID* pMem) { ADDR_ASSERT_ALWAYS(); }
VOID* Alloc(size_t size) const;
VOID Free(VOID* pObj) const;
VOID DebugPrint(const CHAR* pDebugString, ...) const;
const Client* GetClient() const {return &m_client;}
protected:
Client m_client;
static VOID* ClientAlloc(size_t size, const Client* pClient);
static VOID ClientFree(VOID* pObj, const Client* pClient);
private:
// disallow the copy constructor
Object(const Object& a);
// disallow the assignment operator
Object& operator=(const Object& a);
};
} // Addr
#endif
@@ -0,0 +1,715 @@
/*
* Copyright © 2007-2019 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
* AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*/
// Coordinate class implementation
#include "addrcommon.h"
#include "coord.h"
namespace Addr
{
namespace V2
{
Coordinate::Coordinate()
{
dim = 'x';
ord = 0;
}
Coordinate::Coordinate(INT_8 c, INT_32 n)
{
set(c, n);
}
VOID Coordinate::set(INT_8 c, INT_32 n)
{
dim = c;
ord = static_cast<INT_8>(n);
}
UINT_32 Coordinate::ison(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m) const
{
UINT_32 bit = static_cast<UINT_32>(1ull << static_cast<UINT_32>(ord));
UINT_32 out = 0;
switch (dim)
{
case 'm': out = m & bit; break;
case 's': out = s & bit; break;
case 'x': out = x & bit; break;
case 'y': out = y & bit; break;
case 'z': out = z & bit; break;
}
return (out != 0) ? 1 : 0;
}
INT_8 Coordinate::getdim()
{
return dim;
}
INT_8 Coordinate::getord()
{
return ord;
}
BOOL_32 Coordinate::operator==(const Coordinate& b)
{
return (dim == b.dim) && (ord == b.ord);
}
BOOL_32 Coordinate::operator<(const Coordinate& b)
{
BOOL_32 ret;
if (dim == b.dim)
{
ret = ord < b.ord;
}
else
{
if (dim == 's' || b.dim == 'm')
{
ret = TRUE;
}
else if (b.dim == 's' || dim == 'm')
{
ret = FALSE;
}
else if (ord == b.ord)
{
ret = dim < b.dim;
}
else
{
ret = ord < b.ord;
}
}
return ret;
}
BOOL_32 Coordinate::operator>(const Coordinate& b)
{
BOOL_32 lt = *this < b;
BOOL_32 eq = *this == b;
return !lt && !eq;
}
BOOL_32 Coordinate::operator<=(const Coordinate& b)
{
return (*this < b) || (*this == b);
}
BOOL_32 Coordinate::operator>=(const Coordinate& b)
{
return !(*this < b);
}
BOOL_32 Coordinate::operator!=(const Coordinate& b)
{
return !(*this == b);
}
Coordinate& Coordinate::operator++(INT_32)
{
ord++;
return *this;
}
// CoordTerm
CoordTerm::CoordTerm()
{
num_coords = 0;
}
VOID CoordTerm::Clear()
{
num_coords = 0;
}
VOID CoordTerm::add(Coordinate& co)
{
// This function adds a coordinate INT_32o the list
// It will prevent the same coordinate from appearing,
// and will keep the list ordered from smallest to largest
UINT_32 i;
for (i = 0; i < num_coords; i++)
{
if (m_coord[i] == co)
{
break;
}
if (m_coord[i] > co)
{
for (UINT_32 j = num_coords; j > i; j--)
{
m_coord[j] = m_coord[j - 1];
}
m_coord[i] = co;
num_coords++;
break;
}
}
if (i == num_coords)
{
m_coord[num_coords] = co;
num_coords++;
}
}
VOID CoordTerm::add(CoordTerm& cl)
{
for (UINT_32 i = 0; i < cl.num_coords; i++)
{
add(cl.m_coord[i]);
}
}
BOOL_32 CoordTerm::remove(Coordinate& co)
{
BOOL_32 remove = FALSE;
for (UINT_32 i = 0; i < num_coords; i++)
{
if (m_coord[i] == co)
{
remove = TRUE;
num_coords--;
}
if (remove)
{
m_coord[i] = m_coord[i + 1];
}
}
return remove;
}
BOOL_32 CoordTerm::Exists(Coordinate& co)
{
BOOL_32 exists = FALSE;
for (UINT_32 i = 0; i < num_coords; i++)
{
if (m_coord[i] == co)
{
exists = TRUE;
break;
}
}
return exists;
}
VOID CoordTerm::copyto(CoordTerm& cl)
{
cl.num_coords = num_coords;
for (UINT_32 i = 0; i < num_coords; i++)
{
cl.m_coord[i] = m_coord[i];
}
}
UINT_32 CoordTerm::getsize()
{
return num_coords;
}
UINT_32 CoordTerm::getxor(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m) const
{
UINT_32 out = 0;
for (UINT_32 i = 0; i < num_coords; i++)
{
out = out ^ m_coord[i].ison(x, y, z, s, m);
}
return out;
}
VOID CoordTerm::getsmallest(Coordinate& co)
{
co = m_coord[0];
}
UINT_32 CoordTerm::Filter(INT_8 f, Coordinate& co, UINT_32 start, INT_8 axis)
{
for (UINT_32 i = start; i < num_coords;)
{
if (((f == '<' && m_coord[i] < co) ||
(f == '>' && m_coord[i] > co) ||
(f == '=' && m_coord[i] == co)) &&
(axis == '\0' || axis == m_coord[i].getdim()))
{
for (UINT_32 j = i; j < num_coords - 1; j++)
{
m_coord[j] = m_coord[j + 1];
}
num_coords--;
}
else
{
i++;
}
}
return num_coords;
}
Coordinate& CoordTerm::operator[](UINT_32 i)
{
return m_coord[i];
}
BOOL_32 CoordTerm::operator==(const CoordTerm& b)
{
BOOL_32 ret = TRUE;
if (num_coords != b.num_coords)
{
ret = FALSE;
}
else
{
for (UINT_32 i = 0; i < num_coords; i++)
{
// Note: the lists will always be in order, so we can compare the two lists at time
if (m_coord[i] != b.m_coord[i])
{
ret = FALSE;
break;
}
}
}
return ret;
}
BOOL_32 CoordTerm::operator!=(const CoordTerm& b)
{
return !(*this == b);
}
BOOL_32 CoordTerm::exceedRange(UINT_32 xRange, UINT_32 yRange, UINT_32 zRange, UINT_32 sRange)
{
BOOL_32 exceed = FALSE;
for (UINT_32 i = 0; (i < num_coords) && (exceed == FALSE); i++)
{
UINT_32 subject;
switch (m_coord[i].getdim())
{
case 'x':
subject = xRange;
break;
case 'y':
subject = yRange;
break;
case 'z':
subject = zRange;
break;
case 's':
subject = sRange;
break;
case 'm':
subject = 0;
break;
default:
// Invalid input!
ADDR_ASSERT_ALWAYS();
subject = 0;
break;
}
exceed = ((1u << m_coord[i].getord()) <= subject);
}
return exceed;
}
// coordeq
CoordEq::CoordEq()
{
m_numBits = 0;
}
VOID CoordEq::remove(Coordinate& co)
{
for (UINT_32 i = 0; i < m_numBits; i++)
{
m_eq[i].remove(co);
}
}
BOOL_32 CoordEq::Exists(Coordinate& co)
{
BOOL_32 exists = FALSE;
for (UINT_32 i = 0; i < m_numBits; i++)
{
if (m_eq[i].Exists(co))
{
exists = TRUE;
}
}
return exists;
}
VOID CoordEq::resize(UINT_32 n)
{
if (n > m_numBits)
{
for (UINT_32 i = m_numBits; i < n; i++)
{
m_eq[i].Clear();
}
}
m_numBits = n;
}
UINT_32 CoordEq::getsize()
{
return m_numBits;
}
UINT_64 CoordEq::solve(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m) const
{
UINT_64 out = 0;
for (UINT_32 i = 0; i < m_numBits; i++)
{
if (m_eq[i].getxor(x, y, z, s, m) != 0)
{
out |= (1ULL << i);
}
}
return out;
}
VOID CoordEq::solveAddr(
UINT_64 addr, UINT_32 sliceInM,
UINT_32& x, UINT_32& y, UINT_32& z, UINT_32& s, UINT_32& m) const
{
UINT_32 xBitsValid = 0;
UINT_32 yBitsValid = 0;
UINT_32 zBitsValid = 0;
UINT_32 sBitsValid = 0;
UINT_32 mBitsValid = 0;
CoordEq temp = *this;
x = y = z = s = m = 0;
UINT_32 bitsLeft = 0;
for (UINT_32 i = 0; i < temp.m_numBits; i++)
{
UINT_32 termSize = temp.m_eq[i].getsize();
if (termSize == 1)
{
INT_8 bit = (addr >> i) & 1;
INT_8 dim = temp.m_eq[i][0].getdim();
INT_8 ord = temp.m_eq[i][0].getord();
ADDR_ASSERT((ord < 32) || (bit == 0));
switch (dim)
{
case 'x':
xBitsValid |= (1 << ord);
x |= (bit << ord);
break;
case 'y':
yBitsValid |= (1 << ord);
y |= (bit << ord);
break;
case 'z':
zBitsValid |= (1 << ord);
z |= (bit << ord);
break;
case 's':
sBitsValid |= (1 << ord);
s |= (bit << ord);
break;
case 'm':
mBitsValid |= (1 << ord);
m |= (bit << ord);
break;
default:
break;
}
temp.m_eq[i].Clear();
}
else if (termSize > 1)
{
bitsLeft++;
}
}
if (bitsLeft > 0)
{
if (sliceInM != 0)
{
z = m / sliceInM;
zBitsValid = 0xffffffff;
}
do
{
bitsLeft = 0;
for (UINT_32 i = 0; i < temp.m_numBits; i++)
{
UINT_32 termSize = temp.m_eq[i].getsize();
if (termSize == 1)
{
INT_8 bit = (addr >> i) & 1;
INT_8 dim = temp.m_eq[i][0].getdim();
INT_8 ord = temp.m_eq[i][0].getord();
ADDR_ASSERT((ord < 32) || (bit == 0));
switch (dim)
{
case 'x':
xBitsValid |= (1 << ord);
x |= (bit << ord);
break;
case 'y':
yBitsValid |= (1 << ord);
y |= (bit << ord);
break;
case 'z':
zBitsValid |= (1 << ord);
z |= (bit << ord);
break;
case 's':
ADDR_ASSERT_ALWAYS();
break;
case 'm':
ADDR_ASSERT_ALWAYS();
break;
default:
break;
}
temp.m_eq[i].Clear();
}
else if (termSize > 1)
{
CoordTerm tmpTerm = temp.m_eq[i];
for (UINT_32 j = 0; j < termSize; j++)
{
INT_8 dim = temp.m_eq[i][j].getdim();
INT_8 ord = temp.m_eq[i][j].getord();
switch (dim)
{
case 'x':
if (xBitsValid & (1 << ord))
{
UINT_32 v = (((x >> ord) & 1) << i);
addr ^= static_cast<UINT_64>(v);
tmpTerm.remove(temp.m_eq[i][j]);
}
break;
case 'y':
if (yBitsValid & (1 << ord))
{
UINT_32 v = (((y >> ord) & 1) << i);
addr ^= static_cast<UINT_64>(v);
tmpTerm.remove(temp.m_eq[i][j]);
}
break;
case 'z':
if (zBitsValid & (1 << ord))
{
UINT_32 v = (((z >> ord) & 1) << i);
addr ^= static_cast<UINT_64>(v);
tmpTerm.remove(temp.m_eq[i][j]);
}
break;
case 's':
ADDR_ASSERT_ALWAYS();
break;
case 'm':
ADDR_ASSERT_ALWAYS();
break;
default:
break;
}
}
temp.m_eq[i] = tmpTerm;
bitsLeft++;
}
}
} while (bitsLeft > 0);
}
}
VOID CoordEq::copy(CoordEq& o, UINT_32 start, UINT_32 num)
{
o.m_numBits = (num == 0xFFFFFFFF) ? m_numBits : num;
for (UINT_32 i = 0; i < o.m_numBits; i++)
{
m_eq[start + i].copyto(o.m_eq[i]);
}
}
VOID CoordEq::reverse(UINT_32 start, UINT_32 num)
{
UINT_32 n = (num == 0xFFFFFFFF) ? m_numBits : num;
for (UINT_32 i = 0; i < n / 2; i++)
{
CoordTerm temp;
m_eq[start + i].copyto(temp);
m_eq[start + n - 1 - i].copyto(m_eq[start + i]);
temp.copyto(m_eq[start + n - 1 - i]);
}
}
VOID CoordEq::xorin(CoordEq& x, UINT_32 start)
{
UINT_32 n = ((m_numBits - start) < x.m_numBits) ? (m_numBits - start) : x.m_numBits;
for (UINT_32 i = 0; i < n; i++)
{
m_eq[start + i].add(x.m_eq[i]);
}
}
UINT_32 CoordEq::Filter(INT_8 f, Coordinate& co, UINT_32 start, INT_8 axis)
{
for (UINT_32 i = start; i < m_numBits;)
{
UINT_32 m = m_eq[i].Filter(f, co, 0, axis);
if (m == 0)
{
for (UINT_32 j = i; j < m_numBits - 1; j++)
{
m_eq[j] = m_eq[j + 1];
}
m_numBits--;
}
else
{
i++;
}
}
return m_numBits;
}
VOID CoordEq::shift(INT_32 amount, INT_32 start)
{
if (amount != 0)
{
INT_32 numBits = static_cast<INT_32>(m_numBits);
amount = -amount;
INT_32 inc = (amount < 0) ? -1 : 1;
INT_32 i = (amount < 0) ? numBits - 1 : start;
INT_32 end = (amount < 0) ? start - 1 : numBits;
for (; (inc > 0) ? i < end : i > end; i += inc)
{
if ((i + amount < start) || (i + amount >= numBits))
{
m_eq[i].Clear();
}
else
{
m_eq[i + amount].copyto(m_eq[i]);
}
}
}
}
CoordTerm& CoordEq::operator[](UINT_32 i)
{
return m_eq[i];
}
VOID CoordEq::mort2d(Coordinate& c0, Coordinate& c1, UINT_32 start, UINT_32 end)
{
if (end == 0)
{
ADDR_ASSERT(m_numBits > 0);
end = m_numBits - 1;
}
for (UINT_32 i = start; i <= end; i++)
{
UINT_32 select = (i - start) % 2;
Coordinate& c = (select == 0) ? c0 : c1;
m_eq[i].add(c);
c++;
}
}
VOID CoordEq::mort3d(Coordinate& c0, Coordinate& c1, Coordinate& c2, UINT_32 start, UINT_32 end)
{
if (end == 0)
{
ADDR_ASSERT(m_numBits > 0);
end = m_numBits - 1;
}
for (UINT_32 i = start; i <= end; i++)
{
UINT_32 select = (i - start) % 3;
Coordinate& c = (select == 0) ? c0 : ((select == 1) ? c1 : c2);
m_eq[i].add(c);
c++;
}
}
BOOL_32 CoordEq::operator==(const CoordEq& b)
{
BOOL_32 ret = TRUE;
if (m_numBits != b.m_numBits)
{
ret = FALSE;
}
else
{
for (UINT_32 i = 0; i < m_numBits; i++)
{
if (m_eq[i] != b.m_eq[i])
{
ret = FALSE;
break;
}
}
}
return ret;
}
BOOL_32 CoordEq::operator!=(const CoordEq& b)
{
return !(*this == b);
}
} // V2
} // Addr
@@ -0,0 +1,122 @@
/*
* Copyright © 2007-2019 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
* AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*/
// Class used to define a coordinate bit
#ifndef __COORD_H
#define __COORD_H
namespace Addr
{
namespace V2
{
class Coordinate
{
public:
Coordinate();
Coordinate(INT_8 c, INT_32 n);
VOID set(INT_8 c, INT_32 n);
UINT_32 ison(UINT_32 x, UINT_32 y, UINT_32 z = 0, UINT_32 s = 0, UINT_32 m = 0) const;
INT_8 getdim();
INT_8 getord();
BOOL_32 operator==(const Coordinate& b);
BOOL_32 operator<(const Coordinate& b);
BOOL_32 operator>(const Coordinate& b);
BOOL_32 operator<=(const Coordinate& b);
BOOL_32 operator>=(const Coordinate& b);
BOOL_32 operator!=(const Coordinate& b);
Coordinate& operator++(INT_32);
private:
INT_8 dim;
INT_8 ord;
};
class CoordTerm
{
public:
CoordTerm();
VOID Clear();
VOID add(Coordinate& co);
VOID add(CoordTerm& cl);
BOOL_32 remove(Coordinate& co);
BOOL_32 Exists(Coordinate& co);
VOID copyto(CoordTerm& cl);
UINT_32 getsize();
UINT_32 getxor(UINT_32 x, UINT_32 y, UINT_32 z = 0, UINT_32 s = 0, UINT_32 m = 0) const;
VOID getsmallest(Coordinate& co);
UINT_32 Filter(INT_8 f, Coordinate& co, UINT_32 start = 0, INT_8 axis = '\0');
Coordinate& operator[](UINT_32 i);
BOOL_32 operator==(const CoordTerm& b);
BOOL_32 operator!=(const CoordTerm& b);
BOOL_32 exceedRange(UINT_32 xRange, UINT_32 yRange = 0, UINT_32 zRange = 0, UINT_32 sRange = 0);
private:
static const UINT_32 MaxCoords = 8;
UINT_32 num_coords;
Coordinate m_coord[MaxCoords];
};
class CoordEq
{
public:
CoordEq();
VOID remove(Coordinate& co);
BOOL_32 Exists(Coordinate& co);
VOID resize(UINT_32 n);
UINT_32 getsize();
virtual UINT_64 solve(UINT_32 x, UINT_32 y, UINT_32 z = 0, UINT_32 s = 0, UINT_32 m = 0) const;
virtual VOID solveAddr(UINT_64 addr, UINT_32 sliceInM,
UINT_32& x, UINT_32& y, UINT_32& z, UINT_32& s, UINT_32& m) const;
VOID copy(CoordEq& o, UINT_32 start = 0, UINT_32 num = 0xFFFFFFFF);
VOID reverse(UINT_32 start = 0, UINT_32 num = 0xFFFFFFFF);
VOID xorin(CoordEq& x, UINT_32 start = 0);
UINT_32 Filter(INT_8 f, Coordinate& co, UINT_32 start = 0, INT_8 axis = '\0');
VOID shift(INT_32 amount, INT_32 start = 0);
virtual CoordTerm& operator[](UINT_32 i);
VOID mort2d(Coordinate& c0, Coordinate& c1, UINT_32 start = 0, UINT_32 end = 0);
VOID mort3d(Coordinate& c0, Coordinate& c1, Coordinate& c2, UINT_32 start = 0, UINT_32 end = 0);
BOOL_32 operator==(const CoordEq& b);
BOOL_32 operator!=(const CoordEq& b);
private:
static const UINT_32 MaxEqBits = 64;
UINT_32 m_numBits;
CoordTerm m_eq[MaxEqBits];
};
} // V2
} // Addr
#endif
Diferenças do arquivo suprimidas por serem muito extensas Carregar Diff
Diferenças do arquivo suprimidas por serem muito extensas Carregar Diff
@@ -0,0 +1,583 @@
/*
* Copyright © 2007-2019 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
* AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*/
/**
************************************************************************************************************************
* @file gfx10addrlib.h
* @brief Contains the Gfx10Lib class definition.
************************************************************************************************************************
*/
#ifndef __GFX10_ADDR_LIB_H__
#define __GFX10_ADDR_LIB_H__
#include "addrlib2.h"
#include "coord.h"
#include "gfx10SwizzlePattern.h"
namespace Addr
{
namespace V2
{
/**
************************************************************************************************************************
* @brief GFX10 specific settings structure.
************************************************************************************************************************
*/
struct Gfx10ChipSettings
{
struct
{
UINT_32 reserved1 : 32;
// Misc configuration bits
UINT_32 isDcn2 : 1;
UINT_32 supportRbPlus : 1;
UINT_32 dsMipmapHtileFix : 1;
UINT_32 dccUnsup3DSwDis : 1;
UINT_32 reserved2 : 28;
};
};
/**
************************************************************************************************************************
* @brief GFX10 data surface type.
************************************************************************************************************************
*/
enum Gfx10DataType
{
Gfx10DataColor,
Gfx10DataDepthStencil,
Gfx10DataFmask
};
const UINT_32 Gfx10LinearSwModeMask = (1u << ADDR_SW_LINEAR);
const UINT_32 Gfx10Blk256BSwModeMask = (1u << ADDR_SW_256B_S) |
(1u << ADDR_SW_256B_D);
const UINT_32 Gfx10Blk4KBSwModeMask = (1u << ADDR_SW_4KB_S) |
(1u << ADDR_SW_4KB_D) |
(1u << ADDR_SW_4KB_S_X) |
(1u << ADDR_SW_4KB_D_X);
const UINT_32 Gfx10Blk64KBSwModeMask = (1u << ADDR_SW_64KB_S) |
(1u << ADDR_SW_64KB_D) |
(1u << ADDR_SW_64KB_S_T) |
(1u << ADDR_SW_64KB_D_T) |
(1u << ADDR_SW_64KB_Z_X) |
(1u << ADDR_SW_64KB_S_X) |
(1u << ADDR_SW_64KB_D_X) |
(1u << ADDR_SW_64KB_R_X);
const UINT_32 Gfx10BlkVarSwModeMask = (1u << ADDR_SW_VAR_Z_X) |
(1u << ADDR_SW_VAR_R_X);
const UINT_32 Gfx10ZSwModeMask = (1u << ADDR_SW_64KB_Z_X) |
(1u << ADDR_SW_VAR_Z_X);
const UINT_32 Gfx10StandardSwModeMask = (1u << ADDR_SW_256B_S) |
(1u << ADDR_SW_4KB_S) |
(1u << ADDR_SW_64KB_S) |
(1u << ADDR_SW_64KB_S_T) |
(1u << ADDR_SW_4KB_S_X) |
(1u << ADDR_SW_64KB_S_X);
const UINT_32 Gfx10DisplaySwModeMask = (1u << ADDR_SW_256B_D) |
(1u << ADDR_SW_4KB_D) |
(1u << ADDR_SW_64KB_D) |
(1u << ADDR_SW_64KB_D_T) |
(1u << ADDR_SW_4KB_D_X) |
(1u << ADDR_SW_64KB_D_X);
const UINT_32 Gfx10RenderSwModeMask = (1u << ADDR_SW_64KB_R_X) |
(1u << ADDR_SW_VAR_R_X);
const UINT_32 Gfx10XSwModeMask = (1u << ADDR_SW_4KB_S_X) |
(1u << ADDR_SW_4KB_D_X) |
(1u << ADDR_SW_64KB_Z_X) |
(1u << ADDR_SW_64KB_S_X) |
(1u << ADDR_SW_64KB_D_X) |
(1u << ADDR_SW_64KB_R_X) |
Gfx10BlkVarSwModeMask;
const UINT_32 Gfx10TSwModeMask = (1u << ADDR_SW_64KB_S_T) |
(1u << ADDR_SW_64KB_D_T);
const UINT_32 Gfx10XorSwModeMask = Gfx10XSwModeMask |
Gfx10TSwModeMask;
const UINT_32 Gfx10Rsrc1dSwModeMask = Gfx10LinearSwModeMask |
Gfx10RenderSwModeMask |
Gfx10ZSwModeMask;
const UINT_32 Gfx10Rsrc2dSwModeMask = Gfx10LinearSwModeMask |
Gfx10Blk256BSwModeMask |
Gfx10Blk4KBSwModeMask |
Gfx10Blk64KBSwModeMask |
Gfx10BlkVarSwModeMask;
const UINT_32 Gfx10Rsrc3dSwModeMask = (1u << ADDR_SW_LINEAR) |
(1u << ADDR_SW_4KB_S) |
(1u << ADDR_SW_64KB_S) |
(1u << ADDR_SW_64KB_S_T) |
(1u << ADDR_SW_4KB_S_X) |
(1u << ADDR_SW_64KB_Z_X) |
(1u << ADDR_SW_64KB_S_X) |
(1u << ADDR_SW_64KB_D_X) |
(1u << ADDR_SW_64KB_R_X) |
Gfx10BlkVarSwModeMask;
const UINT_32 Gfx10Rsrc2dPrtSwModeMask = (Gfx10Blk4KBSwModeMask | Gfx10Blk64KBSwModeMask) & ~Gfx10XSwModeMask;
const UINT_32 Gfx10Rsrc3dPrtSwModeMask = Gfx10Rsrc2dPrtSwModeMask & ~Gfx10DisplaySwModeMask;
const UINT_32 Gfx10Rsrc3dThin64KBSwModeMask = (1u << ADDR_SW_64KB_Z_X) |
(1u << ADDR_SW_64KB_R_X);
const UINT_32 Gfx10Rsrc3dThinSwModeMask = Gfx10Rsrc3dThin64KBSwModeMask | Gfx10BlkVarSwModeMask;
const UINT_32 Gfx10Rsrc3dThickSwModeMask = Gfx10Rsrc3dSwModeMask & ~(Gfx10Rsrc3dThinSwModeMask | Gfx10LinearSwModeMask);
const UINT_32 Gfx10Rsrc3dThick4KBSwModeMask = Gfx10Rsrc3dThickSwModeMask & Gfx10Blk4KBSwModeMask;
const UINT_32 Gfx10Rsrc3dThick64KBSwModeMask = Gfx10Rsrc3dThickSwModeMask & Gfx10Blk64KBSwModeMask;
const UINT_32 Gfx10MsaaSwModeMask = Gfx10ZSwModeMask |
Gfx10RenderSwModeMask;
const UINT_32 Dcn2NonBpp64SwModeMask = (1u << ADDR_SW_LINEAR) |
(1u << ADDR_SW_4KB_S) |
(1u << ADDR_SW_64KB_S) |
(1u << ADDR_SW_64KB_S_T) |
(1u << ADDR_SW_4KB_S_X) |
(1u << ADDR_SW_64KB_S_X) |
(1u << ADDR_SW_64KB_R_X);
const UINT_32 Dcn2Bpp64SwModeMask = (1u << ADDR_SW_4KB_D) |
(1u << ADDR_SW_64KB_D) |
(1u << ADDR_SW_64KB_D_T) |
(1u << ADDR_SW_4KB_D_X) |
(1u << ADDR_SW_64KB_D_X) |
Dcn2NonBpp64SwModeMask;
/**
************************************************************************************************************************
* @brief This class is the GFX10 specific address library
* function set.
************************************************************************************************************************
*/
class Gfx10Lib : public Lib
{
public:
/// Creates Gfx10Lib object
static Addr::Lib* CreateObj(const Client* pClient)
{
VOID* pMem = Object::ClientAlloc(sizeof(Gfx10Lib), pClient);
return (pMem != NULL) ? new (pMem) Gfx10Lib(pClient) : NULL;
}
protected:
Gfx10Lib(const Client* pClient);
virtual ~Gfx10Lib();
virtual BOOL_32 HwlIsStandardSwizzle(
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const
{
return m_swizzleModeTable[swizzleMode].isStd;
}
virtual BOOL_32 HwlIsDisplaySwizzle(
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const
{
return m_swizzleModeTable[swizzleMode].isDisp;
}
virtual BOOL_32 HwlIsThin(
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const
{
return ((IsTex1d(resourceType) == TRUE) ||
(IsTex2d(resourceType) == TRUE) ||
((IsTex3d(resourceType) == TRUE) &&
(m_swizzleModeTable[swizzleMode].isStd == FALSE) &&
(m_swizzleModeTable[swizzleMode].isDisp == FALSE)));
}
virtual BOOL_32 HwlIsThick(
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const
{
return ((IsTex3d(resourceType) == TRUE) &&
(m_swizzleModeTable[swizzleMode].isStd || m_swizzleModeTable[swizzleMode].isDisp));
}
virtual ADDR_E_RETURNCODE HwlComputeHtileInfo(
const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn,
ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlComputeCmaskInfo(
const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn,
ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlComputeDccInfo(
const ADDR2_COMPUTE_DCCINFO_INPUT* pIn,
ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord(
const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut);
virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord(
const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut);
virtual ADDR_E_RETURNCODE HwlComputeHtileCoordFromAddr(
const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,
ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut);
virtual ADDR_E_RETURNCODE HwlComputeDccAddrFromCoord(
const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut);
virtual UINT_32 HwlGetEquationIndex(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
virtual UINT_32 HwlGetEquationTableInfo(const ADDR_EQUATION** ppEquationTable) const
{
*ppEquationTable = m_equationTable;
return m_numEquations;
}
virtual ADDR_E_RETURNCODE HwlComputePipeBankXor(
const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlComputeSlicePipeBankXor(
const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlComputeSubResourceOffsetForSwizzlePattern(
const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlGetPreferredSurfaceSetting(
const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoSanityCheck(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoTiled(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoLinear(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoordTiled(
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
virtual UINT_32 HwlComputeMaxBaseAlignments() const;
virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const;
virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn);
virtual ChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision);
// Initialize equation table
VOID InitEquationTable();
ADDR_E_RETURNCODE ComputeSurfaceInfoMacroTiled(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputeSurfaceInfoMicroTiled(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoordMacroTiled(
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoordMicroTiled(
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
private:
UINT_32 ComputeOffsetFromSwizzlePattern(
const UINT_64* pPattern,
UINT_32 numBits,
UINT_32 x,
UINT_32 y,
UINT_32 z,
UINT_32 s) const;
UINT_32 ComputeOffsetFromEquation(
const ADDR_EQUATION* pEq,
UINT_32 x,
UINT_32 y,
UINT_32 z) const;
ADDR_E_RETURNCODE ComputeStereoInfo(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
UINT_32 blkHeight,
UINT_32* pAlignY,
UINT_32* pRightXor) const;
Dim3d GetDccCompressBlk(
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode,
UINT_32 bpp) const
{
UINT_32 index = Log2(bpp >> 3);
Dim3d compressBlkDim;
if (IsThin(resourceType, swizzleMode))
{
compressBlkDim.w = Block256_2d[index].w;
compressBlkDim.h = Block256_2d[index].h;
compressBlkDim.d = 1;
}
else
{
compressBlkDim = Block256_3d[index];
}
return compressBlkDim;
}
static void GetMipSize(
UINT_32 mip0Width,
UINT_32 mip0Height,
UINT_32 mip0Depth,
UINT_32 mipId,
UINT_32* pMipWidth,
UINT_32* pMipHeight,
UINT_32* pMipDepth = NULL)
{
*pMipWidth = ShiftCeil(Max(mip0Width, 1u), mipId);
*pMipHeight = ShiftCeil(Max(mip0Height, 1u), mipId);
if (pMipDepth != NULL)
{
*pMipDepth = ShiftCeil(Max(mip0Depth, 1u), mipId);
}
}
const ADDR_SW_PATINFO* GetSwizzlePatternInfo(
AddrSwizzleMode swizzleMode,
AddrResourceType resourceType,
UINT_32 log2Elem,
UINT_32 numFrag) const;
VOID GetSwizzlePatternFromPatternInfo(
const ADDR_SW_PATINFO* pPatInfo,
ADDR_BIT_SETTING (&pSwizzle)[20]) const
{
memcpy(pSwizzle,
GFX10_SW_PATTERN_NIBBLE01[pPatInfo->nibble01Idx],
sizeof(GFX10_SW_PATTERN_NIBBLE01[pPatInfo->nibble01Idx]));
memcpy(&pSwizzle[8],
GFX10_SW_PATTERN_NIBBLE2[pPatInfo->nibble2Idx],
sizeof(GFX10_SW_PATTERN_NIBBLE2[pPatInfo->nibble2Idx]));
memcpy(&pSwizzle[12],
GFX10_SW_PATTERN_NIBBLE3[pPatInfo->nibble3Idx],
sizeof(GFX10_SW_PATTERN_NIBBLE3[pPatInfo->nibble3Idx]));
memcpy(&pSwizzle[16],
GFX10_SW_PATTERN_NIBBLE4[pPatInfo->nibble4Idx],
sizeof(GFX10_SW_PATTERN_NIBBLE4[pPatInfo->nibble4Idx]));
}
VOID ConvertSwizzlePatternToEquation(
UINT_32 elemLog2,
AddrResourceType rsrcType,
AddrSwizzleMode swMode,
const ADDR_SW_PATINFO* pPatInfo,
ADDR_EQUATION* pEquation) const;
static INT_32 GetMetaElementSizeLog2(Gfx10DataType dataType);
static INT_32 GetMetaCacheSizeLog2(Gfx10DataType dataType);
void GetBlk256SizeLog2(
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode,
UINT_32 elemLog2,
UINT_32 numSamplesLog2,
Dim3d* pBlock) const;
void GetCompressedBlockSizeLog2(
Gfx10DataType dataType,
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode,
UINT_32 elemLog2,
UINT_32 numSamplesLog2,
Dim3d* pBlock) const;
INT_32 GetMetaOverlapLog2(
Gfx10DataType dataType,
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode,
UINT_32 elemLog2,
UINT_32 numSamplesLog2) const;
INT_32 Get3DMetaOverlapLog2(
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode,
UINT_32 elemLog2) const;
UINT_32 GetMetaBlkSize(
Gfx10DataType dataType,
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode,
UINT_32 elemLog2,
UINT_32 numSamplesLog2,
BOOL_32 pipeAlign,
Dim3d* pBlock) const;
INT_32 GetPipeRotateAmount(
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const;
INT_32 GetEffectiveNumPipes() const
{
return ((m_settings.supportRbPlus == FALSE) ||
((m_numSaLog2 + 1) >= m_pipesLog2)) ? m_pipesLog2 : m_numSaLog2 + 1;
}
BOOL_32 IsRbAligned(
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const
{
const BOOL_32 isRtopt = IsRtOptSwizzle(swizzleMode);
const BOOL_32 isZ = IsZOrderSwizzle(swizzleMode);
const BOOL_32 isDisplay = IsDisplaySwizzle(swizzleMode);
return (IsTex2d(resourceType) && (isRtopt || isZ)) ||
(IsTex3d(resourceType) && isDisplay);
}
BOOL_32 IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
UINT_32 GetMaxNumMipsInTail(UINT_32 blockSizeLog2, BOOL_32 isThin) const;
static ADDR2_BLOCK_SET GetAllowedBlockSet(ADDR2_SWMODE_SET allowedSwModeSet, AddrResourceType rsrcType)
{
ADDR2_BLOCK_SET allowedBlockSet = {};
allowedBlockSet.micro = (allowedSwModeSet.value & Gfx10Blk256BSwModeMask) ? TRUE : FALSE;
allowedBlockSet.linear = (allowedSwModeSet.value & Gfx10LinearSwModeMask) ? TRUE : FALSE;
allowedBlockSet.var = (allowedSwModeSet.value & Gfx10BlkVarSwModeMask) ? TRUE : FALSE;
if (rsrcType == ADDR_RSRC_TEX_3D)
{
allowedBlockSet.macroThick4KB = (allowedSwModeSet.value & Gfx10Rsrc3dThick4KBSwModeMask) ? TRUE : FALSE;
allowedBlockSet.macroThin64KB = (allowedSwModeSet.value & Gfx10Rsrc3dThin64KBSwModeMask) ? TRUE : FALSE;
allowedBlockSet.macroThick64KB = (allowedSwModeSet.value & Gfx10Rsrc3dThick64KBSwModeMask) ? TRUE : FALSE;
}
else
{
allowedBlockSet.macroThin4KB = (allowedSwModeSet.value & Gfx10Blk4KBSwModeMask) ? TRUE : FALSE;
allowedBlockSet.macroThin64KB = (allowedSwModeSet.value & Gfx10Blk64KBSwModeMask) ? TRUE : FALSE;
}
return allowedBlockSet;
}
static ADDR2_SWTYPE_SET GetAllowedSwSet(ADDR2_SWMODE_SET allowedSwModeSet)
{
ADDR2_SWTYPE_SET allowedSwSet = {};
allowedSwSet.sw_Z = (allowedSwModeSet.value & Gfx10ZSwModeMask) ? TRUE : FALSE;
allowedSwSet.sw_S = (allowedSwModeSet.value & Gfx10StandardSwModeMask) ? TRUE : FALSE;
allowedSwSet.sw_D = (allowedSwModeSet.value & Gfx10DisplaySwModeMask) ? TRUE : FALSE;
allowedSwSet.sw_R = (allowedSwModeSet.value & Gfx10RenderSwModeMask) ? TRUE : FALSE;
return allowedSwSet;
}
BOOL_32 IsInMipTail(
Dim3d mipTailDim,
UINT_32 maxNumMipsInTail,
UINT_32 mipWidth,
UINT_32 mipHeight,
UINT_32 numMipsToTheEnd) const
{
BOOL_32 inTail = ((mipWidth <= mipTailDim.w) &&
(mipHeight <= mipTailDim.h) &&
(numMipsToTheEnd <= maxNumMipsInTail));
return inTail;
}
UINT_32 GetBankXorBits(UINT_32 blockBits) const
{
return (blockBits > m_pipeInterleaveLog2 + m_pipesLog2 + ColumnBits) ?
Min(blockBits - m_pipeInterleaveLog2 - m_pipesLog2 - ColumnBits, BankBits) : 0;
}
BOOL_32 ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
BOOL_32 ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
static const UINT_32 ColumnBits = 2;
static const UINT_32 BankBits = 4;
static const UINT_32 UnalignedDccType = 3;
static const Dim3d Block256_3d[MaxNumOfBpp];
static const Dim3d Block64K_Log2_3d[MaxNumOfBpp];
static const Dim3d Block4K_Log2_3d[MaxNumOfBpp];
static const SwizzleModeFlags SwizzleModeTable[ADDR_SW_MAX_TYPE];
// Number of packers log2
UINT_32 m_numPkrLog2;
// Number of shader array log2
UINT_32 m_numSaLog2;
Gfx10ChipSettings m_settings;
UINT_32 m_colorBaseIndex;
UINT_32 m_xmaskBaseIndex;
UINT_32 m_dccBaseIndex;
};
} // V2
} // Addr
#endif
Diferenças do arquivo suprimidas por serem muito extensas Carregar Diff
@@ -0,0 +1,640 @@
/*
* Copyright © 2007-2019 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
* AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*/
/**
************************************************************************************************************************
* @file gfx9addrlib.h
* @brief Contgfx9ns the Gfx9Lib class definition.
************************************************************************************************************************
*/
#ifndef __GFX9_ADDR_LIB_H__
#define __GFX9_ADDR_LIB_H__
#include "addrlib2.h"
#include "coord.h"
namespace Addr
{
namespace V2
{
/**
************************************************************************************************************************
* @brief GFX9 specific settings structure.
************************************************************************************************************************
*/
struct Gfx9ChipSettings
{
struct
{
// Asic/Generation name
UINT_32 isArcticIsland : 1;
UINT_32 isVega10 : 1;
UINT_32 isRaven : 1;
UINT_32 isVega12 : 1;
UINT_32 isVega20 : 1;
UINT_32 reserved0 : 27;
// Display engine IP version name
UINT_32 isDce12 : 1;
UINT_32 isDcn1 : 1;
UINT_32 reserved1 : 30;
// Misc configuration bits
UINT_32 metaBaseAlignFix : 1;
UINT_32 depthPipeXorDisable : 1;
UINT_32 htileAlignFix : 1;
UINT_32 applyAliasFix : 1;
UINT_32 htileCacheRbConflict: 1;
UINT_32 reserved2 : 27;
};
};
/**
************************************************************************************************************************
* @brief GFX9 data surface type.
************************************************************************************************************************
*/
enum Gfx9DataType
{
Gfx9DataColor,
Gfx9DataDepthStencil,
Gfx9DataFmask
};
const UINT_32 Gfx9LinearSwModeMask = (1u << ADDR_SW_LINEAR);
const UINT_32 Gfx9Blk256BSwModeMask = (1u << ADDR_SW_256B_S) |
(1u << ADDR_SW_256B_D) |
(1u << ADDR_SW_256B_R);
const UINT_32 Gfx9Blk4KBSwModeMask = (1u << ADDR_SW_4KB_Z) |
(1u << ADDR_SW_4KB_S) |
(1u << ADDR_SW_4KB_D) |
(1u << ADDR_SW_4KB_R) |
(1u << ADDR_SW_4KB_Z_X) |
(1u << ADDR_SW_4KB_S_X) |
(1u << ADDR_SW_4KB_D_X) |
(1u << ADDR_SW_4KB_R_X);
const UINT_32 Gfx9Blk64KBSwModeMask = (1u << ADDR_SW_64KB_Z) |
(1u << ADDR_SW_64KB_S) |
(1u << ADDR_SW_64KB_D) |
(1u << ADDR_SW_64KB_R) |
(1u << ADDR_SW_64KB_Z_T) |
(1u << ADDR_SW_64KB_S_T) |
(1u << ADDR_SW_64KB_D_T) |
(1u << ADDR_SW_64KB_R_T) |
(1u << ADDR_SW_64KB_Z_X) |
(1u << ADDR_SW_64KB_S_X) |
(1u << ADDR_SW_64KB_D_X) |
(1u << ADDR_SW_64KB_R_X);
const UINT_32 Gfx9ZSwModeMask = (1u << ADDR_SW_4KB_Z) |
(1u << ADDR_SW_64KB_Z) |
(1u << ADDR_SW_64KB_Z_T) |
(1u << ADDR_SW_4KB_Z_X) |
(1u << ADDR_SW_64KB_Z_X);
const UINT_32 Gfx9StandardSwModeMask = (1u << ADDR_SW_256B_S) |
(1u << ADDR_SW_4KB_S) |
(1u << ADDR_SW_64KB_S) |
(1u << ADDR_SW_64KB_S_T) |
(1u << ADDR_SW_4KB_S_X) |
(1u << ADDR_SW_64KB_S_X);
const UINT_32 Gfx9DisplaySwModeMask = (1u << ADDR_SW_256B_D) |
(1u << ADDR_SW_4KB_D) |
(1u << ADDR_SW_64KB_D) |
(1u << ADDR_SW_64KB_D_T) |
(1u << ADDR_SW_4KB_D_X) |
(1u << ADDR_SW_64KB_D_X);
const UINT_32 Gfx9RotateSwModeMask = (1u << ADDR_SW_256B_R) |
(1u << ADDR_SW_4KB_R) |
(1u << ADDR_SW_64KB_R) |
(1u << ADDR_SW_64KB_R_T) |
(1u << ADDR_SW_4KB_R_X) |
(1u << ADDR_SW_64KB_R_X);
const UINT_32 Gfx9XSwModeMask = (1u << ADDR_SW_4KB_Z_X) |
(1u << ADDR_SW_4KB_S_X) |
(1u << ADDR_SW_4KB_D_X) |
(1u << ADDR_SW_4KB_R_X) |
(1u << ADDR_SW_64KB_Z_X) |
(1u << ADDR_SW_64KB_S_X) |
(1u << ADDR_SW_64KB_D_X) |
(1u << ADDR_SW_64KB_R_X);
const UINT_32 Gfx9TSwModeMask = (1u << ADDR_SW_64KB_Z_T) |
(1u << ADDR_SW_64KB_S_T) |
(1u << ADDR_SW_64KB_D_T) |
(1u << ADDR_SW_64KB_R_T);
const UINT_32 Gfx9XorSwModeMask = Gfx9XSwModeMask |
Gfx9TSwModeMask;
const UINT_32 Gfx9AllSwModeMask = Gfx9LinearSwModeMask |
Gfx9ZSwModeMask |
Gfx9StandardSwModeMask |
Gfx9DisplaySwModeMask |
Gfx9RotateSwModeMask;
const UINT_32 Gfx9Rsrc1dSwModeMask = Gfx9LinearSwModeMask;
const UINT_32 Gfx9Rsrc2dSwModeMask = Gfx9AllSwModeMask;
const UINT_32 Gfx9Rsrc3dSwModeMask = Gfx9AllSwModeMask & ~Gfx9Blk256BSwModeMask & ~Gfx9RotateSwModeMask;
const UINT_32 Gfx9Rsrc2dPrtSwModeMask = (Gfx9Blk4KBSwModeMask | Gfx9Blk64KBSwModeMask) & ~Gfx9XSwModeMask;
const UINT_32 Gfx9Rsrc3dPrtSwModeMask = Gfx9Rsrc2dPrtSwModeMask & ~Gfx9RotateSwModeMask & ~Gfx9DisplaySwModeMask;
const UINT_32 Gfx9Rsrc3dThinSwModeMask = Gfx9DisplaySwModeMask & ~Gfx9Blk256BSwModeMask;
const UINT_32 Gfx9Rsrc3dThin4KBSwModeMask = Gfx9Rsrc3dThinSwModeMask & Gfx9Blk4KBSwModeMask;
const UINT_32 Gfx9Rsrc3dThin64KBSwModeMask = Gfx9Rsrc3dThinSwModeMask & Gfx9Blk64KBSwModeMask;
const UINT_32 Gfx9Rsrc3dThickSwModeMask = Gfx9Rsrc3dSwModeMask & ~(Gfx9Rsrc3dThinSwModeMask | Gfx9LinearSwModeMask);
const UINT_32 Gfx9Rsrc3dThick4KBSwModeMask = Gfx9Rsrc3dThickSwModeMask & Gfx9Blk4KBSwModeMask;
const UINT_32 Gfx9Rsrc3dThick64KBSwModeMask = Gfx9Rsrc3dThickSwModeMask & Gfx9Blk64KBSwModeMask;
const UINT_32 Gfx9MsaaSwModeMask = Gfx9AllSwModeMask & ~Gfx9Blk256BSwModeMask & ~Gfx9LinearSwModeMask;
const UINT_32 Dce12NonBpp32SwModeMask = (1u << ADDR_SW_LINEAR) |
(1u << ADDR_SW_4KB_D) |
(1u << ADDR_SW_4KB_R) |
(1u << ADDR_SW_64KB_D) |
(1u << ADDR_SW_64KB_R) |
(1u << ADDR_SW_4KB_D_X) |
(1u << ADDR_SW_4KB_R_X) |
(1u << ADDR_SW_64KB_D_X) |
(1u << ADDR_SW_64KB_R_X);
const UINT_32 Dce12Bpp32SwModeMask = (1u << ADDR_SW_256B_D) |
(1u << ADDR_SW_256B_R) |
Dce12NonBpp32SwModeMask;
const UINT_32 Dcn1NonBpp64SwModeMask = (1u << ADDR_SW_LINEAR) |
(1u << ADDR_SW_4KB_S) |
(1u << ADDR_SW_64KB_S) |
(1u << ADDR_SW_64KB_S_T) |
(1u << ADDR_SW_4KB_S_X) |
(1u << ADDR_SW_64KB_S_X);
const UINT_32 Dcn1Bpp64SwModeMask = (1u << ADDR_SW_4KB_D) |
(1u << ADDR_SW_64KB_D) |
(1u << ADDR_SW_64KB_D_T) |
(1u << ADDR_SW_4KB_D_X) |
(1u << ADDR_SW_64KB_D_X) |
Dcn1NonBpp64SwModeMask;
/**
************************************************************************************************************************
* @brief GFX9 meta equation parameters
************************************************************************************************************************
*/
struct MetaEqParams
{
UINT_32 maxMip;
UINT_32 elementBytesLog2;
UINT_32 numSamplesLog2;
ADDR2_META_FLAGS metaFlag;
Gfx9DataType dataSurfaceType;
AddrSwizzleMode swizzleMode;
AddrResourceType resourceType;
UINT_32 metaBlkWidthLog2;
UINT_32 metaBlkHeightLog2;
UINT_32 metaBlkDepthLog2;
UINT_32 compBlkWidthLog2;
UINT_32 compBlkHeightLog2;
UINT_32 compBlkDepthLog2;
};
/**
************************************************************************************************************************
* @brief This class is the GFX9 specific address library
* function set.
************************************************************************************************************************
*/
class Gfx9Lib : public Lib
{
public:
/// Creates Gfx9Lib object
static Addr::Lib* CreateObj(const Client* pClient)
{
VOID* pMem = Object::ClientAlloc(sizeof(Gfx9Lib), pClient);
return (pMem != NULL) ? new (pMem) Gfx9Lib(pClient) : NULL;
}
protected:
Gfx9Lib(const Client* pClient);
virtual ~Gfx9Lib();
virtual BOOL_32 HwlIsStandardSwizzle(
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const
{
return m_swizzleModeTable[swizzleMode].isStd ||
(IsTex3d(resourceType) && m_swizzleModeTable[swizzleMode].isDisp);
}
virtual BOOL_32 HwlIsDisplaySwizzle(
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const
{
return IsTex2d(resourceType) && m_swizzleModeTable[swizzleMode].isDisp;
}
virtual BOOL_32 HwlIsThin(
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const
{
return ((IsTex2d(resourceType) == TRUE) ||
((IsTex3d(resourceType) == TRUE) &&
(m_swizzleModeTable[swizzleMode].isZ == FALSE) &&
(m_swizzleModeTable[swizzleMode].isStd == FALSE)));
}
virtual BOOL_32 HwlIsThick(
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const
{
return (IsTex3d(resourceType) &&
(m_swizzleModeTable[swizzleMode].isZ || m_swizzleModeTable[swizzleMode].isStd));
}
virtual ADDR_E_RETURNCODE HwlComputeHtileInfo(
const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn,
ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlComputeCmaskInfo(
const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn,
ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlComputeDccInfo(
const ADDR2_COMPUTE_DCCINFO_INPUT* pIn,
ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord(
const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut);
virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord(
const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut);
virtual ADDR_E_RETURNCODE HwlComputeHtileCoordFromAddr(
const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,
ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut);
virtual ADDR_E_RETURNCODE HwlComputeDccAddrFromCoord(
const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut);
virtual UINT_32 HwlGetEquationIndex(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlComputeBlock256Equation(
AddrResourceType rsrcType,
AddrSwizzleMode swMode,
UINT_32 elementBytesLog2,
ADDR_EQUATION* pEquation) const;
virtual ADDR_E_RETURNCODE HwlComputeThinEquation(
AddrResourceType rsrcType,
AddrSwizzleMode swMode,
UINT_32 elementBytesLog2,
ADDR_EQUATION* pEquation) const;
virtual ADDR_E_RETURNCODE HwlComputeThickEquation(
AddrResourceType rsrcType,
AddrSwizzleMode swMode,
UINT_32 elementBytesLog2,
ADDR_EQUATION* pEquation) const;
// Get equation table pointer and number of equations
virtual UINT_32 HwlGetEquationTableInfo(const ADDR_EQUATION** ppEquationTable) const
{
*ppEquationTable = m_equationTable;
return m_numEquations;
}
virtual BOOL_32 IsEquationSupported(
AddrResourceType rsrcType,
AddrSwizzleMode swMode,
UINT_32 elementBytesLog2) const;
virtual ADDR_E_RETURNCODE HwlComputePipeBankXor(
const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn,
ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlComputeSlicePipeBankXor(
const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn,
ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlComputeSubResourceOffsetForSwizzlePattern(
const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn,
ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlGetPreferredSurfaceSetting(
const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn,
ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoSanityCheck(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoTiled(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoLinear(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoordTiled(
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
virtual UINT_32 HwlComputeMaxBaseAlignments() const;
virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const;
virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn);
virtual ChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision);
virtual VOID ComputeThinBlockDimension(
UINT_32* pWidth,
UINT_32* pHeight,
UINT_32* pDepth,
UINT_32 bpp,
UINT_32 numSamples,
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode) const;
private:
VOID GetRbEquation(CoordEq* pRbEq, UINT_32 rbPerSeLog2, UINT_32 seLog2) const;
VOID GetDataEquation(CoordEq* pDataEq, Gfx9DataType dataSurfaceType,
AddrSwizzleMode swizzleMode, AddrResourceType resourceType,
UINT_32 elementBytesLog2, UINT_32 numSamplesLog2) const;
VOID GetPipeEquation(CoordEq* pPipeEq, CoordEq* pDataEq,
UINT_32 pipeInterleaveLog2, UINT_32 numPipesLog2,
UINT_32 numSamplesLog2, Gfx9DataType dataSurfaceType,
AddrSwizzleMode swizzleMode, AddrResourceType resourceType) const;
VOID GenMetaEquation(CoordEq* pMetaEq, UINT_32 maxMip,
UINT_32 elementBytesLog2, UINT_32 numSamplesLog2,
ADDR2_META_FLAGS metaFlag, Gfx9DataType dataSurfaceType,
AddrSwizzleMode swizzleMode, AddrResourceType resourceType,
UINT_32 metaBlkWidthLog2, UINT_32 metaBlkHeightLog2,
UINT_32 metaBlkDepthLog2, UINT_32 compBlkWidthLog2,
UINT_32 compBlkHeightLog2, UINT_32 compBlkDepthLog2) const;
const CoordEq* GetMetaEquation(const MetaEqParams& metaEqParams);
VOID GetMetaMipInfo(UINT_32 numMipLevels, Dim3d* pMetaBlkDim,
BOOL_32 dataThick, ADDR2_META_MIP_INFO* pInfo,
UINT_32 mip0Width, UINT_32 mip0Height, UINT_32 mip0Depth,
UINT_32* pNumMetaBlkX, UINT_32* pNumMetaBlkY, UINT_32* pNumMetaBlkZ) const;
BOOL_32 IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
ADDR_E_RETURNCODE ComputeSurfaceLinearPadding(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
UINT_32* pMipmap0PaddedWidth,
UINT_32* pSlice0PaddedHeight,
ADDR2_MIP_INFO* pMipInfo = NULL) const;
static ADDR2_BLOCK_SET GetAllowedBlockSet(ADDR2_SWMODE_SET allowedSwModeSet, AddrResourceType rsrcType)
{
ADDR2_BLOCK_SET allowedBlockSet = {};
allowedBlockSet.micro = (allowedSwModeSet.value & Gfx9Blk256BSwModeMask) ? TRUE : FALSE;
allowedBlockSet.linear = (allowedSwModeSet.value & Gfx9LinearSwModeMask) ? TRUE : FALSE;
if (rsrcType == ADDR_RSRC_TEX_3D)
{
allowedBlockSet.macroThin4KB = (allowedSwModeSet.value & Gfx9Rsrc3dThin4KBSwModeMask) ? TRUE : FALSE;
allowedBlockSet.macroThick4KB = (allowedSwModeSet.value & Gfx9Rsrc3dThick4KBSwModeMask) ? TRUE : FALSE;
allowedBlockSet.macroThin64KB = (allowedSwModeSet.value & Gfx9Rsrc3dThin64KBSwModeMask) ? TRUE : FALSE;
allowedBlockSet.macroThick64KB = (allowedSwModeSet.value & Gfx9Rsrc3dThick64KBSwModeMask) ? TRUE : FALSE;
}
else
{
allowedBlockSet.macroThin4KB = (allowedSwModeSet.value & Gfx9Blk4KBSwModeMask) ? TRUE : FALSE;
allowedBlockSet.macroThin64KB = (allowedSwModeSet.value & Gfx9Blk64KBSwModeMask) ? TRUE : FALSE;
}
return allowedBlockSet;
}
static ADDR2_SWTYPE_SET GetAllowedSwSet(ADDR2_SWMODE_SET allowedSwModeSet)
{
ADDR2_SWTYPE_SET allowedSwSet = {};
allowedSwSet.sw_Z = (allowedSwModeSet.value & Gfx9ZSwModeMask) ? TRUE : FALSE;
allowedSwSet.sw_S = (allowedSwModeSet.value & Gfx9StandardSwModeMask) ? TRUE : FALSE;
allowedSwSet.sw_D = (allowedSwModeSet.value & Gfx9DisplaySwModeMask) ? TRUE : FALSE;
allowedSwSet.sw_R = (allowedSwModeSet.value & Gfx9RotateSwModeMask) ? TRUE : FALSE;
return allowedSwSet;
}
BOOL_32 IsInMipTail(
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode,
Dim3d mipTailDim,
UINT_32 width,
UINT_32 height,
UINT_32 depth) const
{
BOOL_32 inTail = ((width <= mipTailDim.w) &&
(height <= mipTailDim.h) &&
(IsThin(resourceType, swizzleMode) || (depth <= mipTailDim.d)));
return inTail;
}
BOOL_32 ValidateNonSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
BOOL_32 ValidateSwModeParams(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
UINT_32 GetBankXorBits(UINT_32 macroBlockBits) const
{
UINT_32 pipeBits = GetPipeXorBits(macroBlockBits);
// Bank xor bits
UINT_32 bankBits = Min(macroBlockBits - pipeBits - m_pipeInterleaveLog2, m_banksLog2);
return bankBits;
}
UINT_32 ComputeSurfaceBaseAlignTiled(AddrSwizzleMode swizzleMode) const
{
UINT_32 baseAlign;
if (IsXor(swizzleMode))
{
baseAlign = GetBlockSize(swizzleMode);
}
else
{
baseAlign = 256;
}
return baseAlign;
}
// Initialize equation table
VOID InitEquationTable();
ADDR_E_RETURNCODE ComputeStereoInfo(
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
UINT_32* pHeightAlign) const;
UINT_32 GetMipChainInfo(
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode,
UINT_32 bpp,
UINT_32 mip0Width,
UINT_32 mip0Height,
UINT_32 mip0Depth,
UINT_32 blockWidth,
UINT_32 blockHeight,
UINT_32 blockDepth,
UINT_32 numMipLevel,
ADDR2_MIP_INFO* pMipInfo) const;
VOID GetMetaMiptailInfo(
ADDR2_META_MIP_INFO* pInfo,
Dim3d mipCoord,
UINT_32 numMipInTail,
Dim3d* pMetaBlkDim) const;
Dim3d GetMipStartPos(
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode,
UINT_32 width,
UINT_32 height,
UINT_32 depth,
UINT_32 blockWidth,
UINT_32 blockHeight,
UINT_32 blockDepth,
UINT_32 mipId,
UINT_32 log2ElementBytes,
UINT_32* pMipTailBytesOffset) const;
AddrMajorMode GetMajorMode(
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode,
UINT_32 mip0WidthInBlk,
UINT_32 mip0HeightInBlk,
UINT_32 mip0DepthInBlk) const
{
BOOL_32 yMajor = (mip0WidthInBlk < mip0HeightInBlk);
BOOL_32 xMajor = (yMajor == FALSE);
if (IsThick(resourceType, swizzleMode))
{
yMajor = yMajor && (mip0HeightInBlk >= mip0DepthInBlk);
xMajor = xMajor && (mip0WidthInBlk >= mip0DepthInBlk);
}
AddrMajorMode majorMode;
if (xMajor)
{
majorMode = ADDR_MAJOR_X;
}
else if (yMajor)
{
majorMode = ADDR_MAJOR_Y;
}
else
{
majorMode = ADDR_MAJOR_Z;
}
return majorMode;
}
Dim3d GetDccCompressBlk(
AddrResourceType resourceType,
AddrSwizzleMode swizzleMode,
UINT_32 bpp) const
{
UINT_32 index = Log2(bpp >> 3);
Dim3d compressBlkDim;
if (IsThin(resourceType, swizzleMode))
{
compressBlkDim.w = Block256_2d[index].w;
compressBlkDim.h = Block256_2d[index].h;
compressBlkDim.d = 1;
}
else if (IsStandardSwizzle(resourceType, swizzleMode))
{
compressBlkDim = Block256_3dS[index];
}
else
{
compressBlkDim = Block256_3dZ[index];
}
return compressBlkDim;
}
static const UINT_32 MaxSeLog2 = 3;
static const UINT_32 MaxRbPerSeLog2 = 2;
static const Dim3d Block256_3dS[MaxNumOfBpp];
static const Dim3d Block256_3dZ[MaxNumOfBpp];
static const UINT_32 MipTailOffset256B[];
static const SwizzleModeFlags SwizzleModeTable[ADDR_SW_MAX_TYPE];
static const UINT_32 MaxCachedMetaEq = 2;
Gfx9ChipSettings m_settings;
CoordEq m_cachedMetaEq[MaxCachedMetaEq];
MetaEqParams m_cachedMetaEqKey[MaxCachedMetaEq];
UINT_32 m_metaEqOverrideIndex;
};
} // V2
} // Addr
#endif
Diferenças do arquivo suprimidas por serem muito extensas Carregar Diff
@@ -0,0 +1,205 @@
/*
* Copyright © 2007-2019 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
* AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*/
/**
****************************************************************************************************
* @file ciaddrlib.h
* @brief Contains the CiLib class definition.
****************************************************************************************************
*/
#ifndef __CI_ADDR_LIB_H__
#define __CI_ADDR_LIB_H__
#include "addrlib1.h"
#include "siaddrlib.h"
namespace Addr
{
namespace V1
{
/**
****************************************************************************************************
* @brief This class is the CI specific address library
* function set.
****************************************************************************************************
*/
class CiLib : public SiLib
{
public:
/// Creates CiLib object
static Addr::Lib* CreateObj(const Client* pClient)
{
VOID* pMem = Object::ClientAlloc(sizeof(CiLib), pClient);
return (pMem != NULL) ? new (pMem) CiLib(pClient) : NULL;
}
private:
CiLib(const Client* pClient);
virtual ~CiLib();
protected:
// Hwl interface - defined in AddrLib1
virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo(
const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlComputeFmaskInfo(
const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut);
virtual ChipFamily HwlConvertChipFamily(
UINT_32 uChipFamily, UINT_32 uChipRevision);
virtual BOOL_32 HwlInitGlobalParams(
const ADDR_CREATE_INPUT* pCreateIn);
virtual ADDR_E_RETURNCODE HwlSetupTileCfg(
UINT_32 bpp, INT_32 index, INT_32 macroModeIndex, ADDR_TILEINFO* pInfo,
AddrTileMode* pMode = 0, AddrTileType* pType = 0) const;
virtual VOID HwlComputeTileDataWidthAndHeightLinear(
UINT_32* pMacroWidth, UINT_32* pMacroHeight,
UINT_32 bpp, ADDR_TILEINFO* pTileInfo) const;
virtual INT_32 HwlComputeMacroModeIndex(
INT_32 tileIndex, ADDR_SURFACE_FLAGS flags, UINT_32 bpp, UINT_32 numSamples,
ADDR_TILEINFO* pTileInfo, AddrTileMode* pTileMode = NULL, AddrTileType* pTileType = NULL
) const;
// Sub-hwl interface - defined in EgBasedLib
virtual VOID HwlSetupTileInfo(
AddrTileMode tileMode, ADDR_SURFACE_FLAGS flags,
UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
ADDR_TILEINFO* inputTileInfo, ADDR_TILEINFO* outputTileInfo,
AddrTileType inTileType, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
virtual INT_32 HwlPostCheckTileIndex(
const ADDR_TILEINFO* pInfo, AddrTileMode mode, AddrTileType type,
INT curIndex = TileIndexInvalid) const;
virtual VOID HwlFmaskPreThunkSurfInfo(
const ADDR_COMPUTE_FMASK_INFO_INPUT* pFmaskIn,
const ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut,
ADDR_COMPUTE_SURFACE_INFO_INPUT* pSurfIn,
ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut) const;
virtual VOID HwlFmaskPostThunkSurfInfo(
const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut,
ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut) const;
virtual AddrTileMode HwlDegradeThickTileMode(
AddrTileMode baseTileMode, UINT_32 numSlices, UINT_32* pBytesPerTile) const;
virtual VOID HwlOverrideTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const;
virtual VOID HwlOptimizeTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const;
virtual VOID HwlSelectTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const;
/// Overwrite tile setting to PRT
virtual VOID HwlSetPrtTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const;
virtual ADDR_E_RETURNCODE HwlComputeDccInfo(
const ADDR_COMPUTE_DCCINFO_INPUT* pIn,
ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord(
const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord(
const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,
ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) const;
virtual UINT_32 HwlComputeMaxBaseAlignments() const;
virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const;
virtual VOID HwlPadDimensions(
AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, UINT_32 mipLevel,
UINT_32* pPitch, UINT_32 *PitchAlign, UINT_32 height, UINT_32 heightAlign) const;
virtual VOID HwlComputeSurfaceAlignmentsMacroTiled(
AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
UINT_32 mipLevel, UINT_32 numSamples, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
private:
VOID ReadGbTileMode(
UINT_32 regValue, TileConfig* pCfg) const;
VOID ReadGbMacroTileCfg(
UINT_32 regValue, ADDR_TILEINFO* pCfg) const;
private:
BOOL_32 InitTileSettingTable(
const UINT_32 *pSetting, UINT_32 noOfEntries);
BOOL_32 InitMacroTileCfgTable(
const UINT_32 *pSetting, UINT_32 noOfEntries);
UINT_64 HwlComputeMetadataNibbleAddress(
UINT_64 uncompressedDataByteAddress,
UINT_64 dataBaseByteAddress,
UINT_64 metadataBaseByteAddress,
UINT_32 metadataBitSize,
UINT_32 elementBitSize,
UINT_32 blockByteSize,
UINT_32 pipeInterleaveBytes,
UINT_32 numOfPipes,
UINT_32 numOfBanks,
UINT_32 numOfSamplesPerSplit) const;
BOOL_32 DepthStencilTileCfgMatch(
const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
BOOL_32 CheckTcCompatibility(const ADDR_TILEINFO* pTileInfo, UINT_32 bpp, AddrTileMode tileMode,
AddrTileType tileType, const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
BOOL_32 SupportDccAndTcCompatibility() const
{
return ((m_settings.isVolcanicIslands == TRUE) || (m_configFlags.forceDccAndTcCompat == TRUE));
}
static const UINT_32 MacroTileTableSize = 16;
static const UINT_32 PrtMacroModeOffset = MacroTileTableSize / 2;
static const INT_32 MinDepth2DThinIndex = 0;
static const INT_32 MaxDepth2DThinIndex = 4;
static const INT_32 Depth1DThinIndex = 5;
ADDR_TILEINFO m_macroTileTable[MacroTileTableSize];
UINT_32 m_noOfMacroEntries;
BOOL_32 m_allowNonDispThickModes;
};
} // V1
} // Addr
#endif
Diferenças do arquivo suprimidas por serem muito extensas Carregar Diff
@@ -0,0 +1,430 @@
/*
* Copyright © 2007-2019 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
* AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*/
/**
****************************************************************************************************
* @file egbaddrlib.h
* @brief Contains the EgBasedLib class definition.
****************************************************************************************************
*/
#ifndef __EG_BASED_ADDR_LIB_H__
#define __EG_BASED_ADDR_LIB_H__
#include "addrlib1.h"
namespace Addr
{
namespace V1
{
/// Structures for functions
struct CoordFromBankPipe
{
UINT_32 xBits : 3;
UINT_32 yBits : 4;
UINT_32 xBit3 : 1;
UINT_32 xBit4 : 1;
UINT_32 xBit5 : 1;
UINT_32 yBit3 : 1;
UINT_32 yBit4 : 1;
UINT_32 yBit5 : 1;
UINT_32 yBit6 : 1;
};
/**
****************************************************************************************************
* @brief This class is the Evergreen based address library
* @note Abstract class
****************************************************************************************************
*/
class EgBasedLib : public Lib
{
protected:
EgBasedLib(const Client* pClient);
virtual ~EgBasedLib();
public:
/// Surface info functions
// NOTE: DispatchComputeSurfaceInfo using TileInfo takes both an input and an output.
// On input:
// One or more fields may be 0 to be calculated/defaulted - pre-SI h/w.
// H/W using tile mode index only accepts none or all 0's - SI and newer h/w.
// It then returns the actual tiling configuration used.
// Other methods' TileInfo must be valid on entry
BOOL_32 DispatchComputeSurfaceInfo(
const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
ADDR_E_RETURNCODE DispatchComputeFmaskInfo(
const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut);
protected:
// Hwl interface
virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo(
const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoord(
const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlComputeSurfaceCoordFromAddr(
const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlComputeSliceTileSwizzle(
const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn,
ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlExtractBankPipeSwizzle(
const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn,
ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlCombineBankPipeSwizzle(
UINT_32 bankSwizzle, UINT_32 pipeSwizzle, ADDR_TILEINFO* pTileInfo,
UINT_64 baseAddr, UINT_32* pTileSwizzle) const;
virtual ADDR_E_RETURNCODE HwlComputeBaseSwizzle(
const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn,
ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlConvertTileInfoToHW(
const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn,
ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const;
virtual UINT_32 HwlComputeHtileBpp(
BOOL_32 isWidth8, BOOL_32 isHeight8) const;
virtual UINT_32 HwlComputeHtileBaseAlign(
BOOL_32 isTcCompatible, BOOL_32 isLinear, ADDR_TILEINFO* pTileInfo) const;
virtual ADDR_E_RETURNCODE HwlComputeFmaskInfo(
const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut);
virtual ADDR_E_RETURNCODE HwlComputeFmaskAddrFromCoord(
const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn,
ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlComputeFmaskCoordFromAddr(
const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn,
ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const;
virtual BOOL_32 HwlGetAlignmentInfoMacroTiled(
const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
UINT_32* pPitchAlign, UINT_32* pHeightAlign, UINT_32* pSizeAlign) const;
virtual UINT_32 HwlComputeQbStereoRightSwizzle(
ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pInfo) const;
virtual VOID HwlComputePixelCoordFromOffset(
UINT_32 offset, UINT_32 bpp, UINT_32 numSamples,
AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits,
UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample,
AddrTileType microTileType, BOOL_32 isDepthSampleOrder) const;
/// Return Cmask block max
virtual BOOL_32 HwlGetMaxCmaskBlockMax() const
{
return 0x3FFF; // 14 bits, 0n16383
}
// Sub-hwl interface
/// Pure virtual function to setup tile info (indices) if client requests to do so
virtual VOID HwlSetupTileInfo(
AddrTileMode tileMode, ADDR_SURFACE_FLAGS flags,
UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
ADDR_TILEINFO* inputTileInfo, ADDR_TILEINFO* outputTileInfo,
AddrTileType inTileType, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0;
/// Pure virtual function to get pitch alignment for linear modes
virtual UINT_32 HwlGetPitchAlignmentLinear(UINT_32 bpp, ADDR_SURFACE_FLAGS flags) const = 0;
/// Pure virtual function to get size adjustment for linear modes
virtual UINT_64 HwlGetSizeAdjustmentLinear(
AddrTileMode tileMode,
UINT_32 bpp, UINT_32 numSamples, UINT_32 baseAlign, UINT_32 pitchAlign,
UINT_32 *pPitch, UINT_32 *pHeight, UINT_32 *pHeightAlign) const = 0;
virtual UINT_32 HwlGetPitchAlignmentMicroTiled(
AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples) const;
virtual UINT_64 HwlGetSizeAdjustmentMicroTiled(
UINT_32 thickness, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples,
UINT_32 baseAlign, UINT_32 pitchAlign,
UINT_32 *pPitch, UINT_32 *pHeight) const;
/// Pure virtual function to do extra sanity check
virtual BOOL_32 HwlSanityCheckMacroTiled(
ADDR_TILEINFO* pTileInfo) const = 0;
/// Pure virtual function to check current level to be the last macro tiled one
virtual VOID HwlCheckLastMacroTiledLvl(
const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0;
/// Adjusts bank before bank is modified by rotation
virtual UINT_32 HwlPreAdjustBank(
UINT_32 tileX, UINT_32 bank, ADDR_TILEINFO* pTileInfo) const = 0;
virtual VOID HwlComputeSurfaceCoord2DFromBankPipe(
AddrTileMode tileMode, UINT_32* pX, UINT_32* pY, UINT_32 slice,
UINT_32 bank, UINT_32 pipe,
UINT_32 bankSwizzle, UINT_32 pipeSwizzle, UINT_32 tileSlices,
BOOL_32 ignoreSE,
ADDR_TILEINFO* pTileInfo) const = 0;
virtual BOOL_32 HwlTileInfoEqual(
const ADDR_TILEINFO* pLeft, const ADDR_TILEINFO* pRight) const;
virtual AddrTileMode HwlDegradeThickTileMode(
AddrTileMode baseTileMode, UINT_32 numSlices, UINT_32* pBytesPerTile) const;
virtual INT_32 HwlPostCheckTileIndex(
const ADDR_TILEINFO* pInfo, AddrTileMode mode, AddrTileType type,
INT curIndex = TileIndexInvalid) const
{
return TileIndexInvalid;
}
virtual VOID HwlFmaskPreThunkSurfInfo(
const ADDR_COMPUTE_FMASK_INFO_INPUT* pFmaskIn,
const ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut,
ADDR_COMPUTE_SURFACE_INFO_INPUT* pSurfIn,
ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut) const
{
}
virtual VOID HwlFmaskPostThunkSurfInfo(
const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut,
ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut) const
{
}
virtual UINT_32 HwlStereoCheckRightOffsetPadding(ADDR_TILEINFO* pTileInfo) const;
virtual BOOL_32 HwlReduceBankWidthHeight(
UINT_32 tileSize, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples,
UINT_32 bankHeightAlign, UINT_32 pipes,
ADDR_TILEINFO* pTileInfo) const;
// Protected non-virtual functions
/// Mip level functions
AddrTileMode ComputeSurfaceMipLevelTileMode(
AddrTileMode baseTileMode, UINT_32 bpp,
UINT_32 pitch, UINT_32 height, UINT_32 numSlices, UINT_32 numSamples,
UINT_32 pitchAlign, UINT_32 heightAlign,
ADDR_TILEINFO* pTileInfo) const;
/// Swizzle functions
VOID ExtractBankPipeSwizzle(
UINT_32 base256b, ADDR_TILEINFO* pTileInfo,
UINT_32* pBankSwizzle, UINT_32* pPipeSwizzle) const;
UINT_32 GetBankPipeSwizzle(
UINT_32 bankSwizzle, UINT_32 pipeSwizzle,
UINT_64 baseAddr, ADDR_TILEINFO* pTileInfo) const;
UINT_32 ComputeSliceTileSwizzle(
AddrTileMode tileMode, UINT_32 baseSwizzle, UINT_32 slice, UINT_64 baseAddr,
ADDR_TILEINFO* pTileInfo) const;
/// Addressing functions
virtual ADDR_E_RETURNCODE ComputeBankEquation(
UINT_32 log2BytesPP, UINT_32 threshX, UINT_32 threshY,
ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const
{
return ADDR_NOTSUPPORTED;
}
UINT_32 ComputeBankFromCoord(
UINT_32 x, UINT_32 y, UINT_32 slice,
AddrTileMode tileMode, UINT_32 bankSwizzle, UINT_32 tileSpitSlice,
ADDR_TILEINFO* pTileInfo) const;
UINT_32 ComputeBankFromAddr(
UINT_64 addr, UINT_32 numBanks, UINT_32 numPipes) const;
UINT_32 ComputePipeRotation(
AddrTileMode tileMode, UINT_32 numPipes) const;
UINT_32 ComputeBankRotation(
AddrTileMode tileMode, UINT_32 numBanks,
UINT_32 numPipes) const;
VOID ComputeSurfaceCoord2DFromBankPipe(
AddrTileMode tileMode, UINT_32 x, UINT_32 y, UINT_32 slice,
UINT_32 bank, UINT_32 pipe,
UINT_32 bankSwizzle, UINT_32 pipeSwizzle, UINT_32 tileSlices,
ADDR_TILEINFO* pTileInfo,
CoordFromBankPipe *pOutput) const;
/// Htile/Cmask functions
UINT_64 ComputeHtileBytes(
UINT_32 pitch, UINT_32 height, UINT_32 bpp,
BOOL_32 isLinear, UINT_32 numSlices, UINT_64* sliceBytes, UINT_32 baseAlign) const;
ADDR_E_RETURNCODE ComputeMacroTileEquation(
UINT_32 log2BytesPP, AddrTileMode tileMode, AddrTileType microTileType,
ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const;
// Static functions
static BOOL_32 IsTileInfoAllZero(const ADDR_TILEINFO* pTileInfo);
static UINT_32 ComputeFmaskNumPlanesFromNumSamples(UINT_32 numSamples);
static UINT_32 ComputeFmaskResolvedBppFromNumSamples(UINT_32 numSamples);
virtual VOID HwlComputeSurfaceAlignmentsMacroTiled(
AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
UINT_32 mipLevel, UINT_32 numSamples, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const
{
}
private:
BOOL_32 ComputeSurfaceInfoLinear(
const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
UINT_32 padDims) const;
BOOL_32 ComputeSurfaceInfoMicroTiled(
const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
UINT_32 padDims,
AddrTileMode expTileMode) const;
BOOL_32 ComputeSurfaceInfoMacroTiled(
const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
UINT_32 padDims,
AddrTileMode expTileMode) const;
BOOL_32 ComputeSurfaceAlignmentsLinear(
AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
UINT_32* pBaseAlign, UINT_32* pPitchAlign, UINT_32* pHeightAlign) const;
BOOL_32 ComputeSurfaceAlignmentsMicroTiled(
AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
UINT_32 mipLevel, UINT_32 numSamples,
UINT_32* pBaseAlign, UINT_32* pPitchAlign, UINT_32* pHeightAlign) const;
BOOL_32 ComputeSurfaceAlignmentsMacroTiled(
AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
UINT_32 mipLevel, UINT_32 numSamples,
ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
/// Surface addressing functions
UINT_64 DispatchComputeSurfaceAddrFromCoord(
const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
VOID DispatchComputeSurfaceCoordFromAddr(
const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const;
UINT_64 ComputeSurfaceAddrFromCoordMicroTiled(
UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample,
UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
AddrTileMode tileMode,
AddrTileType microTileType, BOOL_32 isDepthSampleOrder,
UINT_32* pBitPosition) const;
UINT_64 ComputeSurfaceAddrFromCoordMacroTiled(
UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample,
UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
AddrTileMode tileMode,
AddrTileType microTileType, BOOL_32 ignoreSE, BOOL_32 isDepthSampleOrder,
UINT_32 pipeSwizzle, UINT_32 bankSwizzle,
ADDR_TILEINFO* pTileInfo,
UINT_32* pBitPosition) const;
VOID ComputeSurfaceCoordFromAddrMacroTiled(
UINT_64 addr, UINT_32 bitPosition,
UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits,
AddrTileType microTileType, BOOL_32 ignoreSE, BOOL_32 isDepthSampleOrder,
UINT_32 pipeSwizzle, UINT_32 bankSwizzle,
ADDR_TILEINFO* pTileInfo,
UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample) const;
/// Fmask functions
UINT_64 DispatchComputeFmaskAddrFromCoord(
const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn,
ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
VOID DispatchComputeFmaskCoordFromAddr(
const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn,
ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const;
// FMASK related methods - private
UINT_64 ComputeFmaskAddrFromCoordMicroTiled(
UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample, UINT_32 plane,
UINT_32 pitch, UINT_32 height, UINT_32 numSamples, AddrTileMode tileMode,
BOOL_32 resolved, UINT_32* pBitPosition) const;
VOID ComputeFmaskCoordFromAddrMicroTiled(
UINT_64 addr, UINT_32 bitPosition,
UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
AddrTileMode tileMode, BOOL_32 resolved,
UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample, UINT_32* pPlane) const;
VOID ComputeFmaskCoordFromAddrMacroTiled(
UINT_64 addr, UINT_32 bitPosition,
UINT_32 pitch, UINT_32 height, UINT_32 numSamples, AddrTileMode tileMode,
UINT_32 pipeSwizzle, UINT_32 bankSwizzle,
BOOL_32 ignoreSE,
ADDR_TILEINFO* pTileInfo,
BOOL_32 resolved,
UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample, UINT_32* pPlane) const;
UINT_64 ComputeFmaskAddrFromCoordMacroTiled(
UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample, UINT_32 plane,
UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
AddrTileMode tileMode, UINT_32 pipeSwizzle, UINT_32 bankSwizzle,
BOOL_32 ignoreSE,
ADDR_TILEINFO* pTileInfo,
BOOL_32 resolved,
UINT_32* pBitPosition) const;
/// Sanity check functions
BOOL_32 SanityCheckMacroTiled(
ADDR_TILEINFO* pTileInfo) const;
protected:
UINT_32 m_ranks; ///< Number of ranks - MC_ARB_RAMCFG.NOOFRANK
UINT_32 m_logicalBanks; ///< Logical banks = m_banks * m_ranks if m_banks != 16
UINT_32 m_bankInterleave; ///< Bank interleave, as a multiple of pipe interleave size
};
} // V1
} // Addr
#endif
Diferenças do arquivo suprimidas por serem muito extensas Carregar Diff
@@ -0,0 +1,346 @@
/*
* Copyright © 2007-2019 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
* AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*/
/**
****************************************************************************************************
* @file siaddrlib.h
* @brief Contains the R800Lib class definition.
****************************************************************************************************
*/
#ifndef __SI_ADDR_LIB_H__
#define __SI_ADDR_LIB_H__
#include "addrlib1.h"
#include "egbaddrlib.h"
namespace Addr
{
namespace V1
{
/**
****************************************************************************************************
* @brief Describes the information in tile mode table
****************************************************************************************************
*/
struct TileConfig
{
AddrTileMode mode;
AddrTileType type;
ADDR_TILEINFO info;
};
/**
****************************************************************************************************
* @brief SI specific settings structure.
****************************************************************************************************
*/
struct SiChipSettings
{
UINT_32 isSouthernIsland : 1;
UINT_32 isTahiti : 1;
UINT_32 isPitCairn : 1;
UINT_32 isCapeVerde : 1;
// Oland/Hainan are of GFXIP 6.0, similar with SI
UINT_32 isOland : 1;
UINT_32 isHainan : 1;
// CI
UINT_32 isSeaIsland : 1;
UINT_32 isBonaire : 1;
UINT_32 isKaveri : 1;
UINT_32 isSpectre : 1;
UINT_32 isSpooky : 1;
UINT_32 isKalindi : 1;
UINT_32 isHawaii : 1;
// VI
UINT_32 isVolcanicIslands : 1;
UINT_32 isIceland : 1;
UINT_32 isTonga : 1;
UINT_32 isFiji : 1;
UINT_32 isPolaris10 : 1;
UINT_32 isPolaris11 : 1;
UINT_32 isPolaris12 : 1;
UINT_32 isVegaM : 1;
UINT_32 isCarrizo : 1;
};
/**
****************************************************************************************************
* @brief This class is the SI specific address library
* function set.
****************************************************************************************************
*/
class SiLib : public EgBasedLib
{
public:
/// Creates SiLib object
static Addr::Lib* CreateObj(const Client* pClient)
{
VOID* pMem = Object::ClientAlloc(sizeof(SiLib), pClient);
return (pMem != NULL) ? new (pMem) SiLib(pClient) : NULL;
}
protected:
SiLib(const Client* pClient);
virtual ~SiLib();
// Hwl interface - defined in AddrLib1
virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo(
const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
virtual ADDR_E_RETURNCODE HwlConvertTileInfoToHW(
const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn,
ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const;
virtual UINT_64 HwlComputeXmaskAddrFromCoord(
UINT_32 pitch, UINT_32 height, UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 numSlices,
UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8,
ADDR_TILEINFO* pTileInfo, UINT_32* pBitPosition) const;
virtual VOID HwlComputeXmaskCoordFromAddr(
UINT_64 addr, UINT_32 bitPosition, UINT_32 pitch, UINT_32 height, UINT_32 numSlices,
UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8,
ADDR_TILEINFO* pTileInfo, UINT_32* pX, UINT_32* pY, UINT_32* pSlice) const;
virtual ADDR_E_RETURNCODE HwlGetTileIndex(
const ADDR_GET_TILEINDEX_INPUT* pIn,
ADDR_GET_TILEINDEX_OUTPUT* pOut) const;
virtual BOOL_32 HwlComputeMipLevel(
ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
virtual ChipFamily HwlConvertChipFamily(
UINT_32 uChipFamily, UINT_32 uChipRevision);
virtual BOOL_32 HwlInitGlobalParams(
const ADDR_CREATE_INPUT* pCreateIn);
virtual ADDR_E_RETURNCODE HwlSetupTileCfg(
UINT_32 bpp, INT_32 index, INT_32 macroModeIndex,
ADDR_TILEINFO* pInfo, AddrTileMode* pMode = 0, AddrTileType* pType = 0) const;
virtual VOID HwlComputeTileDataWidthAndHeightLinear(
UINT_32* pMacroWidth, UINT_32* pMacroHeight,
UINT_32 bpp, ADDR_TILEINFO* pTileInfo) const;
virtual UINT_64 HwlComputeHtileBytes(
UINT_32 pitch, UINT_32 height, UINT_32 bpp,
BOOL_32 isLinear, UINT_32 numSlices, UINT_64* pSliceBytes, UINT_32 baseAlign) const;
virtual ADDR_E_RETURNCODE ComputeBankEquation(
UINT_32 log2BytesPP, UINT_32 threshX, UINT_32 threshY,
ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const;
virtual ADDR_E_RETURNCODE ComputePipeEquation(
UINT_32 log2BytesPP, UINT_32 threshX, UINT_32 threshY,
ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const;
virtual UINT_32 ComputePipeFromCoord(
UINT_32 x, UINT_32 y, UINT_32 slice,
AddrTileMode tileMode, UINT_32 pipeSwizzle, BOOL_32 ignoreSE,
ADDR_TILEINFO* pTileInfo) const;
virtual UINT_32 HwlGetPipes(const ADDR_TILEINFO* pTileInfo) const;
/// Pre-handler of 3x pitch (96 bit) adjustment
virtual UINT_32 HwlPreHandleBaseLvl3xPitch(
const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const;
/// Post-handler of 3x pitch adjustment
virtual UINT_32 HwlPostHandleBaseLvl3xPitch(
const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const;
/// Dummy function to finalize the inheritance
virtual UINT_32 HwlComputeXmaskCoordYFrom8Pipe(
UINT_32 pipe, UINT_32 x) const;
// Sub-hwl interface - defined in EgBasedLib
virtual VOID HwlSetupTileInfo(
AddrTileMode tileMode, ADDR_SURFACE_FLAGS flags,
UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
ADDR_TILEINFO* inputTileInfo, ADDR_TILEINFO* outputTileInfo,
AddrTileType inTileType, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
virtual UINT_32 HwlGetPitchAlignmentMicroTiled(
AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples) const;
virtual UINT_64 HwlGetSizeAdjustmentMicroTiled(
UINT_32 thickness, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples,
UINT_32 baseAlign, UINT_32 pitchAlign,
UINT_32 *pPitch, UINT_32 *pHeight) const;
virtual VOID HwlCheckLastMacroTiledLvl(
const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
virtual BOOL_32 HwlTileInfoEqual(
const ADDR_TILEINFO* pLeft, const ADDR_TILEINFO* pRight) const;
virtual AddrTileMode HwlDegradeThickTileMode(
AddrTileMode baseTileMode, UINT_32 numSlices, UINT_32* pBytesPerTile) const;
virtual VOID HwlOverrideTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const;
virtual VOID HwlOptimizeTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const;
virtual VOID HwlSelectTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const;
/// Overwrite tile setting to PRT
virtual VOID HwlSetPrtTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const;
virtual BOOL_32 HwlSanityCheckMacroTiled(
ADDR_TILEINFO* pTileInfo) const
{
return TRUE;
}
virtual UINT_32 HwlGetPitchAlignmentLinear(UINT_32 bpp, ADDR_SURFACE_FLAGS flags) const;
virtual UINT_64 HwlGetSizeAdjustmentLinear(
AddrTileMode tileMode,
UINT_32 bpp, UINT_32 numSamples, UINT_32 baseAlign, UINT_32 pitchAlign,
UINT_32 *pPitch, UINT_32 *pHeight, UINT_32 *pHeightAlign) const;
virtual VOID HwlComputeSurfaceCoord2DFromBankPipe(
AddrTileMode tileMode, UINT_32* pX, UINT_32* pY, UINT_32 slice,
UINT_32 bank, UINT_32 pipe,
UINT_32 bankSwizzle, UINT_32 pipeSwizzle, UINT_32 tileSlices,
BOOL_32 ignoreSE,
ADDR_TILEINFO* pTileInfo) const;
virtual UINT_32 HwlPreAdjustBank(
UINT_32 tileX, UINT_32 bank, ADDR_TILEINFO* pTileInfo) const;
virtual INT_32 HwlPostCheckTileIndex(
const ADDR_TILEINFO* pInfo, AddrTileMode mode, AddrTileType type,
INT curIndex = TileIndexInvalid) const;
virtual VOID HwlFmaskPreThunkSurfInfo(
const ADDR_COMPUTE_FMASK_INFO_INPUT* pFmaskIn,
const ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut,
ADDR_COMPUTE_SURFACE_INFO_INPUT* pSurfIn,
ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut) const;
virtual VOID HwlFmaskPostThunkSurfInfo(
const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut,
ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut) const;
virtual UINT_32 HwlComputeFmaskBits(
const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
UINT_32* pNumSamples) const;
virtual BOOL_32 HwlReduceBankWidthHeight(
UINT_32 tileSize, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples,
UINT_32 bankHeightAlign, UINT_32 pipes,
ADDR_TILEINFO* pTileInfo) const
{
return TRUE;
}
virtual UINT_32 HwlComputeMaxBaseAlignments() const;
virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const;
virtual VOID HwlComputeSurfaceAlignmentsMacroTiled(
AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
UINT_32 mipLevel, UINT_32 numSamples, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
// Get equation table pointer and number of equations
virtual UINT_32 HwlGetEquationTableInfo(const ADDR_EQUATION** ppEquationTable) const
{
*ppEquationTable = m_equationTable;
return m_numEquations;
}
// Check if it is supported for given bpp and tile config to generate an equation
BOOL_32 IsEquationSupported(
UINT_32 bpp, TileConfig tileConfig, INT_32 tileIndex, UINT_32 elementBytesLog2) const;
// Protected non-virtual functions
VOID ComputeTileCoordFromPipeAndElemIdx(
UINT_32 elemIdx, UINT_32 pipe, AddrPipeCfg pipeCfg, UINT_32 pitchInMacroTile,
UINT_32 x, UINT_32 y, UINT_32* pX, UINT_32* pY) const;
UINT_32 TileCoordToMaskElementIndex(
UINT_32 tx, UINT_32 ty, AddrPipeCfg pipeConfig,
UINT_32 *macroShift, UINT_32 *elemIdxBits) const;
BOOL_32 DecodeGbRegs(
const ADDR_REGISTER_VALUE* pRegValue);
const TileConfig* GetTileSetting(
UINT_32 index) const;
// Initialize equation table
VOID InitEquationTable();
UINT_32 GetPipePerSurf(AddrPipeCfg pipeConfig) const;
static const UINT_32 TileTableSize = 32;
TileConfig m_tileTable[TileTableSize];
UINT_32 m_noOfEntries;
// Max number of bpp (8bpp/16bpp/32bpp/64bpp/128bpp)
static const UINT_32 MaxNumElementBytes = 5;
static const BOOL_32 m_EquationSupport[TileTableSize][MaxNumElementBytes];
// Prt tile mode index mask
static const UINT_32 SiPrtTileIndexMask = ((1 << 3) | (1 << 5) | (1 << 6) | (1 << 7) |
(1 << 21) | (1 << 22) | (1 << 23) | (1 << 24) |
(1 << 25) | (1 << 30));
// More than half slots in tile mode table can't support equation
static const UINT_32 EquationTableSize = (MaxNumElementBytes * TileTableSize) / 2;
// Equation table
ADDR_EQUATION m_equationTable[EquationTableSize];
UINT_32 m_numMacroBits[EquationTableSize];
UINT_32 m_blockWidth[EquationTableSize];
UINT_32 m_blockHeight[EquationTableSize];
UINT_32 m_blockSlices[EquationTableSize];
// Number of equation entries in the table
UINT_32 m_numEquations;
// Equation lookup table according to bpp and tile index
UINT_32 m_equationLookupTable[MaxNumElementBytes][TileTableSize];
UINT_32 m_uncompressDepthEqIndex;
SiChipSettings m_settings;
private:
VOID ReadGbTileMode(UINT_32 regValue, TileConfig* pCfg) const;
BOOL_32 InitTileSettingTable(const UINT_32 *pSetting, UINT_32 noOfEntries);
};
} // V1
} // Addr
#endif
@@ -0,0 +1,328 @@
/*
* Copyright © 2014 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef UTIL_MACROS_H
#define UTIL_MACROS_H
#include <assert.h>
/* Compute the size of an array */
#ifndef ARRAY_SIZE
# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#endif
/* For compatibility with Clang's __has_builtin() */
#ifndef __has_builtin
# define __has_builtin(x) 0
#endif
/**
* __builtin_expect macros
*/
#if !defined(HAVE___BUILTIN_EXPECT)
# define __builtin_expect(x, y) (x)
#endif
#ifndef likely
# ifdef HAVE___BUILTIN_EXPECT
# define likely(x) __builtin_expect(!!(x), 1)
# define unlikely(x) __builtin_expect(!!(x), 0)
# else
# define likely(x) (x)
# define unlikely(x) (x)
# endif
#endif
/**
* Static (compile-time) assertion.
*/
#define STATIC_ASSERT(COND) do { \
static_assert(COND, "Addrlib legacy static_assert failure."); \
} while(false)
/**
* Unreachable macro. Useful for suppressing "control reaches end of non-void
* function" warnings.
*/
#if defined(HAVE___BUILTIN_UNREACHABLE) || __has_builtin(__builtin_unreachable)
#define unreachable(str) \
do { \
assert(!str); \
__builtin_unreachable(); \
} while (0)
#elif defined (_MSC_VER)
#define unreachable(str) \
do { \
assert(!str); \
__assume(0); \
} while (0)
#else
#define unreachable(str) assert(!str)
#endif
/**
* Assume macro. Useful for expressing our assumptions to the compiler,
* typically for purposes of silencing warnings.
*/
#if __has_builtin(__builtin_assume)
#define assume(expr) \
do { \
assert(expr); \
__builtin_assume(expr); \
} while (0)
#elif defined HAVE___BUILTIN_UNREACHABLE
#define assume(expr) ((expr) ? ((void) 0) \
: (assert(!"assumption failed"), \
__builtin_unreachable()))
#elif defined (_MSC_VER)
#define assume(expr) __assume(expr)
#else
#define assume(expr) assert(expr)
#endif
/* Attribute const is used for functions that have no effects other than their
* return value, and only rely on the argument values to compute the return
* value. As a result, calls to it can be CSEed. Note that using memory
* pointed to by the arguments is not allowed for const functions.
*/
#ifdef HAVE_FUNC_ATTRIBUTE_CONST
#define ATTRIBUTE_CONST __attribute__((__const__))
#else
#define ATTRIBUTE_CONST
#endif
#ifdef HAVE_FUNC_ATTRIBUTE_FLATTEN
#define FLATTEN __attribute__((__flatten__))
#else
#define FLATTEN
#endif
#ifdef HAVE_FUNC_ATTRIBUTE_FORMAT
#define PRINTFLIKE(f, a) __attribute__ ((format(__printf__, f, a)))
#else
#define PRINTFLIKE(f, a)
#endif
#ifdef HAVE_FUNC_ATTRIBUTE_MALLOC
#define MALLOCLIKE __attribute__((__malloc__))
#else
#define MALLOCLIKE
#endif
/* Forced function inlining */
/* Note: Clang also sets __GNUC__ (see other cases below) */
#ifndef ALWAYS_INLINE
# if defined(__GNUC__)
# define ALWAYS_INLINE inline __attribute__((always_inline))
# elif defined(_MSC_VER)
# define ALWAYS_INLINE __forceinline
# else
# define ALWAYS_INLINE inline
# endif
#endif
/* Used to optionally mark structures with misaligned elements or size as
* packed, to trade off performance for space.
*/
#ifdef HAVE_FUNC_ATTRIBUTE_PACKED
#define PACKED __attribute__((__packed__))
#else
#define PACKED
#endif
/* Attribute pure is used for functions that have no effects other than their
* return value. As a result, calls to it can be dead code eliminated.
*/
#ifdef HAVE_FUNC_ATTRIBUTE_PURE
#define ATTRIBUTE_PURE __attribute__((__pure__))
#else
#define ATTRIBUTE_PURE
#endif
#ifdef HAVE_FUNC_ATTRIBUTE_RETURNS_NONNULL
#define ATTRIBUTE_RETURNS_NONNULL __attribute__((__returns_nonnull__))
#else
#define ATTRIBUTE_RETURNS_NONNULL
#endif
#ifndef NORETURN
# ifdef _MSC_VER
# define NORETURN __declspec(noreturn)
# elif defined HAVE_FUNC_ATTRIBUTE_NORETURN
# define NORETURN __attribute__((__noreturn__))
# else
# define NORETURN
# endif
#endif
#ifdef __cplusplus
/**
* Macro function that evaluates to true if T is a trivially
* destructible type -- that is, if its (non-virtual) destructor
* performs no action and all member variables and base classes are
* trivially destructible themselves.
*/
# if (defined(__clang__) && defined(__has_feature))
# if __has_feature(has_trivial_destructor)
# define HAS_TRIVIAL_DESTRUCTOR(T) __has_trivial_destructor(T)
# endif
# elif defined(__GNUC__)
# if ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 3)))
# define HAS_TRIVIAL_DESTRUCTOR(T) __has_trivial_destructor(T)
# endif
# elif defined(_MSC_VER) && !defined(__INTEL_COMPILER)
# define HAS_TRIVIAL_DESTRUCTOR(T) __has_trivial_destructor(T)
# endif
# ifndef HAS_TRIVIAL_DESTRUCTOR
/* It's always safe (if inefficient) to assume that a
* destructor is non-trivial.
*/
# define HAS_TRIVIAL_DESTRUCTOR(T) (false)
# endif
#endif
/**
* PUBLIC/USED macros
*
* If we build the library with gcc's -fvisibility=hidden flag, we'll
* use the PUBLIC macro to mark functions that are to be exported.
*
* We also need to define a USED attribute, so the optimizer doesn't
* inline a static function that we later use in an alias. - ajax
*/
#ifndef PUBLIC
# if defined(__GNUC__)
# define PUBLIC __attribute__((visibility("default")))
# define USED __attribute__((used))
# elif defined(_MSC_VER)
# define PUBLIC __declspec(dllexport)
# define USED
# else
# define PUBLIC
# define USED
# endif
#endif
/**
* UNUSED marks variables (or sometimes functions) that have to be defined,
* but are sometimes (or always) unused beyond that. A common case is for
* a function parameter to be used in some build configurations but not others.
* Another case is fallback vfuncs that don't do anything with their params.
*
* Note that this should not be used for identifiers used in `assert()`;
* see ASSERTED below.
*/
#ifdef HAVE_FUNC_ATTRIBUTE_UNUSED
#define UNUSED __attribute__((unused))
#else
#define UNUSED
#endif
/**
* Use ASSERTED to indicate that an identifier is unused outside of an `assert()`,
* so that assert-free builds don't get "unused variable" warnings.
*/
#ifdef NDEBUG
#define ASSERTED UNUSED
#else
#define ASSERTED
#endif
#ifdef HAVE_FUNC_ATTRIBUTE_WARN_UNUSED_RESULT
#define MUST_CHECK __attribute__((warn_unused_result))
#else
#define MUST_CHECK
#endif
#if defined(__GNUC__)
#define ATTRIBUTE_NOINLINE __attribute__((noinline))
#else
#define ATTRIBUTE_NOINLINE
#endif
/**
* Check that STRUCT::FIELD can hold MAXVAL. We use a lot of bitfields
* in Mesa/gallium. We have to be sure they're of sufficient size to
* hold the largest expected value.
* Note that with MSVC, enums are signed and enum bitfields need one extra
* high bit (always zero) to ensure the max value is handled correctly.
* This macro will detect that with MSVC, but not GCC.
*/
#define ASSERT_BITFIELD_SIZE(STRUCT, FIELD, MAXVAL) \
do { \
ASSERTED STRUCT s; \
s.FIELD = (MAXVAL); \
assert((int) s.FIELD == (MAXVAL) && "Insufficient bitfield size!"); \
} while (0)
/** Compute ceiling of integer quotient of A divided by B. */
#define DIV_ROUND_UP( A, B ) ( ((A) + (B) - 1) / (B) )
/** Clamp X to [MIN,MAX]. Turn NaN into MIN, arbitrarily. */
#define CLAMP( X, MIN, MAX ) ( (X)>(MIN) ? ((X)>(MAX) ? (MAX) : (X)) : (MIN) )
/** Minimum of two values: */
#define MIN2( A, B ) ( (A)<(B) ? (A) : (B) )
/** Maximum of two values: */
#define MAX2( A, B ) ( (A)>(B) ? (A) : (B) )
/** Minimum and maximum of three values: */
#define MIN3( A, B, C ) ((A) < (B) ? MIN2(A, C) : MIN2(B, C))
#define MAX3( A, B, C ) ((A) > (B) ? MAX2(A, C) : MAX2(B, C))
/** Align a value to a power of two */
#define ALIGN_POT(x, pot_align) (((x) + (pot_align) - 1) & ~((pot_align) - 1))
/**
* Macro for declaring an explicit conversion operator. Defaults to an
* implicit conversion if C++11 is not supported.
*/
#if __cplusplus >= 201103L
#define EXPLICIT_CONVERSION explicit
#elif defined(__cplusplus)
#define EXPLICIT_CONVERSION
#endif
/** Set a single bit */
#define BITFIELD_BIT(b) (1u << (b))
/** Set all bits up to excluding bit b */
#define BITFIELD_MASK(b) \
((b) == 32 ? (~0u) : BITFIELD_BIT((b) % 32) - 1)
/** Set count bits starting from bit b */
#define BITFIELD_RANGE(b, count) \
(BITFIELD_MASK((b) + (count)) & ~BITFIELD_MASK(b))
/** Set a single bit */
#define BITFIELD64_BIT(b) (1ull << (b))
/** Set all bits up to excluding bit b */
#define BITFIELD64_MASK(b) \
((b) == 64 ? (~0ull) : BITFIELD64_BIT(b) - 1)
/** Set count bits starting from bit b */
#define BITFIELD64_RANGE(b, count) \
(BITFIELD64_MASK((b) + (count)) & ~BITFIELD64_MASK(b))
#endif /* UTIL_MACROS_H */
@@ -0,0 +1,29 @@
#include "inc/hsa.h"
#include "inc/hsa_api_trace.h"
#include "core/inc/hsa_table_interface.h"
#ifdef __cplusplus
extern "C" {
#endif
void HSA_API_EXPORT Load(const ::HsaApiTable* table);
void HSA_API_EXPORT Unload();
// Per library unload callback function. Set by the finalizer or image library
// when needed.
void (*UnloadCallback)() = NULL;
void Load(const ::HsaApiTable* table) {
// Setup to bypass the runtime intercept layer.
hsa_table_interface_init(table);
}
void Unload() {
if (UnloadCallback != NULL) {
UnloadCallback();
}
}
#ifdef __cplusplus
}
#endif
@@ -0,0 +1,975 @@
#include "blit_kernel.h"
#if (defined(WIN32) || defined(_WIN32))
#define NOMINMAX
#endif
#include <algorithm>
#include <atomic>
#include <sstream>
#include <string>
#include "image_manager.h"
#include "image_runtime.h"
#include "util.h"
#undef HSA_ARGUMENT_ALIGN_BYTES
#define HSA_ARGUMENT_ALIGN_BYTES 16
#include "core/inc/hsa_table_interface.h"
extern uint8_t blit_object_gfx7xx[14608];
extern uint8_t blit_object_gfx8xx[15424];
extern uint8_t blit_object_gfx9xx[15432];
extern uint8_t ocl_blit_object_gfx700[];
extern uint8_t ocl_blit_object_gfx701[];
extern uint8_t ocl_blit_object_gfx702[];
extern uint8_t ocl_blit_object_gfx801[];
extern uint8_t ocl_blit_object_gfx802[];
extern uint8_t ocl_blit_object_gfx803[];
extern uint8_t ocl_blit_object_gfx900[];
extern uint8_t ocl_blit_object_gfx902[];
extern uint8_t ocl_blit_object_gfx904[];
extern uint8_t ocl_blit_object_gfx906[];
extern uint8_t ocl_blit_object_gfx908[];
extern uint8_t ocl_blit_object_gfx1010[];
extern uint8_t ocl_blit_object_gfx1011[];
extern uint8_t ocl_blit_object_gfx1012[];
namespace amd {
// Arguments inserted by OCL compiler, all zero here.
struct OCLHiddenArgs {
uint64_t offset_x;
uint64_t offset_y;
uint64_t offset_z;
void* printf_buffer;
void* enqueue;
void* enqueue2;
void* multi_grid;
};
static void* Allocate(hsa_agent_t agent, size_t size) {
//use the host accessible kernarg pool
hsa_amd_memory_pool_t pool = ext_image::ImageRuntime::instance()->kernarg_pool();
void* ptr = NULL;
hsa_status_t status = hsa_amd_memory_pool_allocate(pool, size, 0, &ptr);
assert(status == HSA_STATUS_SUCCESS);
if (status != HSA_STATUS_SUCCESS) return NULL;
status = hsa_amd_agents_allow_access(1, &agent, NULL, ptr);
assert(status == HSA_STATUS_SUCCESS);
if (status != HSA_STATUS_SUCCESS) {
hsa_amd_memory_pool_free(ptr);
return NULL;
}
return ptr;
}
BlitKernel::BlitKernel() {
}
BlitKernel::~BlitKernel() {}
hsa_status_t BlitKernel::Initialize() { return HSA_STATUS_SUCCESS; }
hsa_status_t BlitKernel::Cleanup() {
for (std::pair<const uint64_t, hsa_executable_t> pair :
code_executable_map_) {
hsa_executable_destroy(pair.second);
}
code_executable_map_.clear();
code_object_map_.clear();
return HSA_STATUS_SUCCESS;
}
hsa_status_t BlitKernel::BuildBlitCode(
hsa_agent_t agent, std::vector<BlitCodeInfo>& blit_code_catalog) {
// Find existing kernels in the list that have compatible ISA.
hsa_isa_t agent_isa = {0};
hsa_status_t status =
hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &agent_isa);
if (HSA_STATUS_SUCCESS != status) {
return status;
}
std::lock_guard<std::mutex> lock(lock_);
for (std::pair<uint64_t, hsa_executable_t> pair : code_executable_map_) {
bool isa_compatible = false;
hsa_isa_t code_isa = {pair.first};
status = hsa_isa_compatible(code_isa, agent_isa, &isa_compatible);
if (HSA_STATUS_SUCCESS != status) {
return status;
}
if (isa_compatible) {
return PopulateKernelCode(agent, pair.second, blit_code_catalog);
}
}
// No existing compatible kernels. Build new kernels.
hsa_code_object_t code_object = {0};
// Get the target name
char agent_name[64] = {0};
status = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, &agent_name);
if (HSA_STATUS_SUCCESS != status) {
return status;
}
// Get the patched code object
uint8_t* patched_code_object;
status = BlitKernel::GetPatchedBlitObject(agent_name, &patched_code_object);
if (HSA_STATUS_SUCCESS != status) {
return status;
}
// Pass the patched code object
code_object.handle = reinterpret_cast<uint64_t>(patched_code_object);
code_object_map_[agent_isa.handle] = code_object;
// Create executable.
hsa_executable_t executable = {0};
status = hsa_executable_create(
HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, "", &executable);
if (HSA_STATUS_SUCCESS != status) {
return status;
}
code_executable_map_[agent_isa.handle] = executable;
// Load code object.
status = hsa_executable_load_code_object(executable, agent, code_object, "");
if (HSA_STATUS_SUCCESS != status) {
return status;
}
// Freeze executable.
status = hsa_executable_freeze(executable, "");
if (HSA_STATUS_SUCCESS != status) {
return status;
}
return PopulateKernelCode(agent, executable, blit_code_catalog);
}
hsa_status_t BlitKernel::CopyBufferToImage(
BlitQueue& blit_queue, const std::vector<BlitCodeInfo>& blit_code_catalog,
const void* src_memory, size_t src_row_pitch, size_t src_slice_pitch,
const Image& dst_image, const hsa_ext_image_region_t& image_region) {
if (dst_image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
ImageManager* manager =
ext_image::ImageRuntime::instance()->image_manager(dst_image.component);
const uint32_t element_size =
manager->GetImageProperty(dst_image.component, dst_image.desc.format,
dst_image.desc.geometry).element_size;
const size_t dst_origin = image_region.offset.x * element_size;
char* dst_memory = reinterpret_cast<char*>(dst_image.data) + dst_origin;
const size_t size = image_region.range.x * element_size;
return hsa_memory_copy(dst_memory, src_memory, size);
}
const Image* dst_image_view = NULL;
hsa_status_t status = ConvertImage(dst_image, &dst_image_view);
if (HSA_STATUS_SUCCESS != status) {
return status;
}
assert(dst_image_view != NULL);
hsa_kernel_dispatch_packet_t packet = {0};
const BlitCodeInfo& blit_code =
blit_code_catalog.at(KERNEL_OP_COPY_BUFFER_TO_IMAGE);
packet.kernel_object = blit_code.code_handle_;
packet.group_segment_size = blit_code.group_segment_size_;
packet.private_segment_size = blit_code.private_segment_size_;
// Setup kernel argument.
/*
buffer is start of output pixel in destination buffer
format.x is element count
format.y is element size
format.z is max(dword per pixel, 1)
format.w is texture type.
pixelOrigin is start pixel address.
*/
struct KernelArgs {
const void* buffer;
uint64_t image[5];
int32_t pixelOrigin[4];
uint32_t format[4];
uint64_t pitch;
uint64_t slice_pitch;
OCLHiddenArgs ocl;
};
KernelArgs* args = (KernelArgs*)Allocate(dst_image_view->component, sizeof(KernelArgs));
assert(args != NULL);
memset(args, 0, sizeof(KernelArgs));
args->buffer = src_memory;
for(auto& img : args->image)
img = dst_image_view->Convert();
args->pixelOrigin[0] = image_region.offset.x;
args->pixelOrigin[1] = image_region.offset.y;
args->pixelOrigin[2] = image_region.offset.z;
ImageManager* manager = ext_image::ImageRuntime::instance()->image_manager(
dst_image_view->component);
const uint32_t element_size =
manager->GetImageProperty(dst_image_view->component,
dst_image_view->desc.format,
dst_image_view->desc.geometry).element_size;
// Try to minimize the read operation to buffer by reading the buffer
// up to one DWORD at a time.
uint32_t buffer_read_count = element_size / sizeof(uint32_t);
buffer_read_count = (buffer_read_count == 0) ? 1 : buffer_read_count;
const uint32_t num_channel = GetNumChannel(*dst_image_view);
const uint32_t size_per_channel = element_size / num_channel;
args->format[0] = num_channel;
args->format[1] = size_per_channel;
args->format[2] = buffer_read_count;
args->format[3] = dst_image_view->desc.geometry;
unsigned long buffer_pitch[2] = {0, 0};
CalcBufferRowSlicePitchesInPixel(dst_image_view->desc.geometry, element_size,
image_region.range, src_row_pitch,
src_slice_pitch, buffer_pitch);
args->pitch = buffer_pitch[0];
args->slice_pitch = buffer_pitch[1];
packet.kernarg_address = args;
// Setup packet dimension and working size.
CalcWorkingSize(*dst_image_view, image_region.range, packet);
status = LaunchKernel(blit_queue, packet);
if (&dst_image != dst_image_view) {
Image::Destroy(dst_image_view);
}
hsa_amd_memory_pool_free(args);
return status;
}
hsa_status_t BlitKernel::CopyImageToBuffer(
BlitQueue& blit_queue, const std::vector<BlitCodeInfo>& blit_code_catalog,
const Image& src_image, void* dst_memory, size_t dst_row_pitch,
size_t dst_slice_pitch, const hsa_ext_image_region_t& image_region) {
if (src_image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
ImageManager* manager =
ext_image::ImageRuntime::instance()->image_manager(src_image.component);
const uint32_t element_size =
manager->GetImageProperty(src_image.component, src_image.desc.format,
src_image.desc.geometry).element_size;
const size_t src_origin = image_region.offset.x * element_size;
const char* src_memory =
reinterpret_cast<const char*>(src_image.data) + src_origin;
const size_t size = image_region.range.x * element_size;
return hsa_memory_copy(dst_memory, src_memory, size);
}
const Image* src_image_view = NULL;
hsa_status_t status = ConvertImage(src_image, &src_image_view);
if (HSA_STATUS_SUCCESS != status) {
return status;
}
assert(src_image_view != NULL);
hsa_kernel_dispatch_packet_t packet = {0};
const BlitCodeInfo& blit_code =
blit_code_catalog.at(KERNEL_OP_COPY_IMAGE_TO_BUFFER);
packet.kernel_object = blit_code.code_handle_;
packet.group_segment_size = blit_code.group_segment_size_;
packet.private_segment_size = blit_code.private_segment_size_;
// Setup kernel argument.
/*
buffer is start of output pixel in destination buffer
format.x is element count
format.y is element size
format.z is max(dword per pixel, 1)
format.w is texture type.
pixelOrigin is start pixel address.
*/
struct KernelArgs {
uint64_t image[5];
void* buffer;
int32_t pixelOrigin[4];
uint32_t format[4];
uint64_t pitch;
uint64_t slice_pitch;
OCLHiddenArgs ocl;
};
KernelArgs* args = (KernelArgs*)Allocate(src_image_view->component, sizeof(KernelArgs));
assert(args != NULL);
memset(args, 0, sizeof(KernelArgs));
for(auto &img : args->image)
img = src_image_view->Convert();
args->buffer = dst_memory;
args->pixelOrigin[0] = image_region.offset.x;
args->pixelOrigin[1] = image_region.offset.y;
args->pixelOrigin[2] = image_region.offset.z;
ImageManager* manager = ext_image::ImageRuntime::instance()->image_manager(
src_image_view->component);
const uint32_t element_size =
manager->GetImageProperty(src_image_view->component,
src_image_view->desc.format,
src_image_view->desc.geometry).element_size;
// Try to minimize the write operation to buffer by reading the buffer
// up to one DWORD at a time.
uint32_t buffer_write_count = element_size / sizeof(uint32_t);
buffer_write_count = (buffer_write_count == 0) ? 1 : buffer_write_count;
const uint32_t num_channel = GetNumChannel(*src_image_view);
const uint32_t size_per_channel = element_size / num_channel;
args->format[0] = num_channel;
args->format[1] = size_per_channel;
args->format[2] = buffer_write_count;
args->format[3] = src_image_view->desc.geometry;
unsigned long buffer_pitch[2] = {0, 0};
CalcBufferRowSlicePitchesInPixel(src_image_view->desc.geometry, element_size,
image_region.range, dst_row_pitch,
dst_slice_pitch, buffer_pitch);
args->pitch = buffer_pitch[0];
args->slice_pitch = buffer_pitch[1];
packet.kernarg_address = args;
// Setup packet dimension and working size.
CalcWorkingSize(*src_image_view, image_region.range, packet);
status = LaunchKernel(blit_queue, packet);
if (&src_image != src_image_view) {
Image::Destroy(src_image_view);
}
hsa_amd_memory_pool_free(args);
return status;
}
hsa_status_t BlitKernel::CopyImage(
BlitQueue& blit_queue, const std::vector<BlitCodeInfo>& blit_code_catalog,
const Image& dst_image, const Image& src_image,
const hsa_dim3_t& dst_origin, const hsa_dim3_t& src_origin,
const hsa_dim3_t size, KernelOp copy_type) {
assert(src_image.component.handle == dst_image.component.handle);
const Image* src_image_view = &src_image;
const Image* dst_image_view = &dst_image;
const BlitCodeInfo* blit_code = NULL;
if (copy_type == KERNEL_OP_COPY_IMAGE_DEFAULT) {
// Linear to linear image copy.
hsa_status_t status = ConvertImage(src_image, &src_image_view);
if (HSA_STATUS_SUCCESS != status) {
return status;
}
assert(src_image_view != NULL);
status = ConvertImage(dst_image, &dst_image_view);
if (HSA_STATUS_SUCCESS != status) {
return status;
}
assert(dst_image_view != NULL);
const hsa_ext_image_geometry_t src_geometry = src_image_view->desc.geometry;
const hsa_ext_image_geometry_t dst_geometry = dst_image_view->desc.geometry;
if (src_geometry != HSA_EXT_IMAGE_GEOMETRY_1DB &&
dst_geometry != HSA_EXT_IMAGE_GEOMETRY_1DB) {
blit_code = &blit_code_catalog.at(KERNEL_OP_COPY_IMAGE_DEFAULT);
} else if (src_geometry == HSA_EXT_IMAGE_GEOMETRY_1DB &&
dst_geometry != HSA_EXT_IMAGE_GEOMETRY_1DB) {
blit_code = &blit_code_catalog.at(KERNEL_OP_COPY_IMAGE_1DB_TO_REG);
} else if (src_geometry != HSA_EXT_IMAGE_GEOMETRY_1DB &&
dst_geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
blit_code = &blit_code_catalog.at(KERNEL_OP_COPY_IMAGE_REG_TO_1DB);
} else {
blit_code = &blit_code_catalog.at(KERNEL_OP_COPY_IMAGE_1DB);
}
} else {
blit_code = &blit_code_catalog.at(copy_type);
}
hsa_kernel_dispatch_packet_t packet = {0};
packet.kernel_object = blit_code->code_handle_;
packet.group_segment_size = blit_code->group_segment_size_;
packet.private_segment_size = blit_code->private_segment_size_;
// Setup kernel argument.
struct KernelArgs {
uint64_t src[5];
uint64_t dst[5];
int32_t srcOrigin[4];
int32_t dstOrigin[4];
int32_t srcFormat;
int32_t dstFormat;
OCLHiddenArgs ocl;
};
KernelArgs* args = (KernelArgs*)Allocate(dst_image_view->component, sizeof(KernelArgs));
assert(args != NULL);
memset(args, 0, sizeof(KernelArgs));
for(auto& img : args->src)
img = src_image_view->Convert();
args->srcFormat = src_image_view->desc.geometry;
args->srcOrigin[0] = src_origin.x;
args->srcOrigin[1] = src_origin.y;
args->srcOrigin[2] = src_origin.z;
for(auto& img : args->dst)
img = dst_image_view->Convert();
args->dstFormat = dst_image_view->desc.geometry;
args->dstOrigin[0] = dst_origin.x;
args->dstOrigin[1] = dst_origin.y;
args->dstOrigin[2] = dst_origin.z;
packet.kernarg_address = args;
// Setup packet dimension and working size.
CalcWorkingSize(*src_image_view, *dst_image_view, size, packet);
hsa_status_t status = LaunchKernel(blit_queue, packet);
if (&src_image != src_image_view) {
Image::Destroy(src_image_view);
}
if (&dst_image != dst_image_view) {
Image::Destroy(dst_image_view);
}
hsa_amd_memory_pool_free(args);
return status;
}
hsa_status_t BlitKernel::FillImage(
BlitQueue& blit_queue, const std::vector<BlitCodeInfo>& blit_code_catalog,
const Image& image, const void* pattern,
const hsa_ext_image_region_t& region) {
hsa_kernel_dispatch_packet_t packet = {0};
const BlitCodeInfo& blit_code =
(image.desc.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB)
? blit_code_catalog.at(KERNEL_OP_CLEAR_IMAGE)
: blit_code_catalog.at(KERNEL_OP_CLEAR_IMAGE_1DB);
packet.kernel_object = blit_code.code_handle_;
packet.group_segment_size = blit_code.group_segment_size_;
packet.private_segment_size = blit_code.private_segment_size_;
// Setup kernel argument.
struct KernelArgs {
uint64_t image[5];
int32_t format;
uint32_t type;
uint32_t data[4];
int32_t origin[4];
OCLHiddenArgs ocl;
};
KernelArgs* args = (KernelArgs*)Allocate(image.component, sizeof(KernelArgs));
assert(args != NULL);
memset(args, 0, sizeof(KernelArgs));
for(auto &img : args->image)
img = image.Convert();
args->format = image.desc.geometry;
for(int i=0; i<4; i++)
args->data[i] = ((const uint32_t*)pattern)[i];
args->origin[0] = region.offset.x;
args->origin[1] = region.offset.y;
args->origin[2] = region.offset.z;
args->type = GetImageAccessType(image);
packet.kernarg_address = args;
// Setup packet dimension and working size.
CalcWorkingSize(image, region.range, packet);
hsa_status_t status = LaunchKernel(blit_queue, packet);
hsa_amd_memory_pool_free(args);
return status;
}
const char *BlitKernel::kernel_name_[KERNEL_OP_COUNT] = {
"&__copy_image_to_buffer_kernel",
"&__copy_buffer_to_image_kernel",
"&__copy_image_default_kernel",
"&__copy_image_linear_to_standard_kernel",
"&__copy_image_standard_to_linear_kernel",
"&__copy_image_1db_kernel",
"&__copy_image_1db_to_reg_kernel",
"&__copy_image_reg_to_1db_kernel",
"&__clear_image_kernel",
"&__clear_image_1db_kernel"};
const char *BlitKernel::ocl_kernel_name_[KERNEL_OP_COUNT] = {
"copy_image_to_buffer.kd",
"copy_buffer_to_image.kd",
"copy_image_default.kd",
"copy_image_linear_to_standard.kd",
"copy_image_standard_to_linear.kd",
"copy_image_1db.kd",
"copy_image_1db_to_reg.kd",
"copy_image_reg_to_1db.kd",
"clear_image.kd",
"clear_image_1db.kd"};
hsa_status_t BlitKernel::PopulateKernelCode(
hsa_agent_t agent, hsa_executable_t executable,
std::vector<BlitCodeInfo>& blit_code_catalog) {
blit_code_catalog.clear();
for (int i = 0; i < KERNEL_OP_COUNT; ++i) {
// Get symbol handle.
hsa_executable_symbol_t kernel_symbol = {0};
hsa_status_t status = hsa_executable_get_symbol_by_name(executable, ocl_kernel_name_[i], &agent, &kernel_symbol);
if (HSA_STATUS_SUCCESS != status) {
blit_code_catalog.clear();
return status;
}
// Get code handle.
BlitCodeInfo blit_code = {0};
status = hsa_executable_symbol_get_info(
kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
&blit_code.code_handle_);
if (HSA_STATUS_SUCCESS != status) {
blit_code_catalog.clear();
return status;
}
status = hsa_executable_symbol_get_info(
kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
&blit_code.group_segment_size_);
if (HSA_STATUS_SUCCESS != status) {
blit_code_catalog.clear();
return status;
}
status = hsa_executable_symbol_get_info(
kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
&blit_code.private_segment_size_);
if (HSA_STATUS_SUCCESS != status) {
blit_code_catalog.clear();
return status;
}
blit_code_catalog.push_back(blit_code);
}
assert(blit_code_catalog.size() == KERNEL_OP_COUNT);
return HSA_STATUS_SUCCESS;
}
void BlitKernel::CalcBufferRowSlicePitchesInPixel(
hsa_ext_image_geometry_t geometry, uint32_t element_size,
const hsa_dim3_t& copy_size, size_t in_row_pitch_byte,
size_t in_slice_pitch_byte, unsigned long* out_pitch_pixel) {
const bool is_1d_array =
(geometry == HSA_EXT_IMAGE_GEOMETRY_1DA) ? true : false;
out_pitch_pixel[0] =
std::max(static_cast<unsigned long>(copy_size.x),
static_cast<unsigned long>(in_row_pitch_byte / element_size));
out_pitch_pixel[1] =
(is_1d_array)
? out_pitch_pixel[0]
: (std::max(
static_cast<unsigned long>(out_pitch_pixel[0] * copy_size.y),
static_cast<unsigned long>(in_slice_pitch_byte /
element_size)));
assert((out_pitch_pixel[0] <= out_pitch_pixel[1]));
}
uint32_t BlitKernel::GetDimSize(const Image& image) {
static const uint32_t kDimSizeTable[] = {
1, // HSA_EXT_IMAGE_GEOMETRY_1D
2, // HSA_EXT_IMAGE_GEOMETRY_2D
3, // HSA_EXT_IMAGE_GEOMETRY_3D
2, // HSA_EXT_IMAGE_GEOMETRY_1DA
3, // HSA_EXT_IMAGE_GEOMETRY_2DA
1, // HSA_EXT_IMAGE_GEOMETRY_1DB
2, // HSA_EXT_IMAGE_GEOMETRY_2DDEPTH
3, // HSA_EXT_IMAGE_GEOMETRY_2DADEPTH
};
return kDimSizeTable[image.desc.geometry];
}
uint32_t BlitKernel::GetNumChannel(const Image& image) {
static const uint32_t kNumChannelTable[] = {
1, // HSA_EXT_IMAGE_CHANNEL_ORDER_A,
1, // HSA_EXT_IMAGE_CHANNEL_ORDER_R,
1, // HSA_EXT_IMAGE_CHANNEL_ORDER_RX,
2, // HSA_EXT_IMAGE_CHANNEL_ORDER_RG,
2, // HSA_EXT_IMAGE_CHANNEL_ORDER_RGX,
2, // HSA_EXT_IMAGE_CHANNEL_ORDER_RA,
3, // HSA_EXT_IMAGE_CHANNEL_ORDER_RGB,
3, // HSA_EXT_IMAGE_CHANNEL_ORDER_RGBX,
4, // HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA,
4, // HSA_EXT_IMAGE_CHANNEL_ORDER_BGRA,
4, // HSA_EXT_IMAGE_CHANNEL_ORDER_ARGB,
4, // HSA_EXT_IMAGE_CHANNEL_ORDER_ABGR,
3, // HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB,
3, // HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX,
4, // HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA,
4, // HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA,
1, // HSA_EXT_IMAGE_CHANNEL_ORDER_INTENSITY,
1, // HSA_EXT_IMAGE_CHANNEL_ORDER_LUMINANCE,
1, // HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH,
1, // HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL
};
return kNumChannelTable[image.desc.format.channel_order];
}
uint32_t BlitKernel::GetImageAccessType(const Image& image) {
enum AccessType {
ACCESS_TYPE_F = 0,
ACCESS_TYPE_I = 1,
ACCESS_TYPE_UI = 2,
};
static const uint32_t kAccessType[] = {
ACCESS_TYPE_F, // HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT8
ACCESS_TYPE_F, // HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT16
ACCESS_TYPE_F, // HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8
ACCESS_TYPE_F, // HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT16
ACCESS_TYPE_F, // HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT24
ACCESS_TYPE_F, // HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555
ACCESS_TYPE_F, // HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565
ACCESS_TYPE_F, // HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_101010
ACCESS_TYPE_I, // HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8
ACCESS_TYPE_I, // HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16
ACCESS_TYPE_I, // HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32
ACCESS_TYPE_UI, // HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8
ACCESS_TYPE_UI, // HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16
ACCESS_TYPE_UI, // HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32
ACCESS_TYPE_F, // HSA_EXT_IMAGE_CHANNEL_TYPE_HALF_FLOAT
ACCESS_TYPE_F // HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT
};
return kAccessType[image.desc.format.channel_type];
}
void BlitKernel::CalcWorkingSize(const Image& image, const hsa_dim3_t& range,
hsa_kernel_dispatch_packet_t& packet) {
switch (image.desc.geometry) {
case HSA_EXT_IMAGE_GEOMETRY_1D:
case HSA_EXT_IMAGE_GEOMETRY_1DB:
case HSA_EXT_IMAGE_GEOMETRY_1DA:
packet.setup = 2;
packet.grid_size_x = range.x;
packet.grid_size_y = range.y;
packet.grid_size_z = 1;
packet.workgroup_size_x = 64;
packet.workgroup_size_y = packet.workgroup_size_z = 1;
break;
case HSA_EXT_IMAGE_GEOMETRY_2D:
case HSA_EXT_IMAGE_GEOMETRY_2DDEPTH:
case HSA_EXT_IMAGE_GEOMETRY_2DADEPTH:
case HSA_EXT_IMAGE_GEOMETRY_2DA:
packet.setup = 3;
packet.grid_size_x = range.x;
packet.grid_size_y = range.y;
packet.grid_size_z = range.z;
packet.workgroup_size_x = packet.workgroup_size_y = 8;
packet.workgroup_size_z = 1;
break;
case HSA_EXT_IMAGE_GEOMETRY_3D:
packet.setup = 3;
packet.grid_size_x = range.x;
packet.grid_size_y = range.y;
packet.grid_size_z = range.z;
packet.workgroup_size_x = packet.workgroup_size_y = 4;
packet.workgroup_size_z = 4;
break;
}
}
void BlitKernel::CalcWorkingSize(const Image& src_image, const Image& dst_image,
const hsa_dim3_t& range,
hsa_kernel_dispatch_packet_t& packet) {
if (GetDimSize(src_image) < GetDimSize(dst_image)) {
CalcWorkingSize(src_image, range, packet);
} else {
CalcWorkingSize(dst_image, range, packet);
}
}
hsa_status_t BlitKernel::ConvertImage(const Image& original_image,
const Image** new_image) {
// To simplify the kernel, some particular image channel types are converted
// to a new channel type, while preserving the actual per pixel size.
// E.g.: a UNORM SIGNED INT8 is converted into UNSIGNED INT8. This way the
// kernel can just use read_imageui on all images.
static const uint32_t kTypeConvertTable[] = {
HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8, // HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT8
HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16, // HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT16
HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8, // HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8
HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16, // HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT16
HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT24, // HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT24
HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16, // HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555
HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16, // HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565
HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32, // HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_101010
HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8, // HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8
HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16, // HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16
HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32, // HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32
HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8, // HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8
HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16, // HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16
HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32, // HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32
HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16, // HSA_EXT_IMAGE_CHANNEL_TYPE_HALF_FLOAT
HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 // HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT
};
// To simplify the kernel, some particular image channel orders are converted
// to a new channel order, while preserving the actual per pixel size.
// E.g.: a CHANNEL ORDER A is converted into CHANNEL ORDER R. This way the
// kernel can just read the first components of vector4 on all images.
static const uint32_t kOrderConvertTable[] = {
HSA_EXT_IMAGE_CHANNEL_ORDER_R, // HSA_EXT_IMAGE_CHANNEL_ORDER_A
HSA_EXT_IMAGE_CHANNEL_ORDER_R, // HSA_EXT_IMAGE_CHANNEL_ORDER_R
HSA_EXT_IMAGE_CHANNEL_ORDER_R, // HSA_EXT_IMAGE_CHANNEL_ORDER_RX
HSA_EXT_IMAGE_CHANNEL_ORDER_RG, // HSA_EXT_IMAGE_CHANNEL_ORDER_RG
HSA_EXT_IMAGE_CHANNEL_ORDER_RG, // HSA_EXT_IMAGE_CHANNEL_ORDER_RGX
HSA_EXT_IMAGE_CHANNEL_ORDER_RG, // HSA_EXT_IMAGE_CHANNEL_ORDER_RA
HSA_EXT_IMAGE_CHANNEL_ORDER_RGB, // HSA_EXT_IMAGE_CHANNEL_ORDER_RGB
HSA_EXT_IMAGE_CHANNEL_ORDER_RGB, // HSA_EXT_IMAGE_CHANNEL_ORDER_RGBX
HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA, // HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA
HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA, // HSA_EXT_IMAGE_CHANNEL_ORDER_BGRA
HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA, // HSA_EXT_IMAGE_CHANNEL_ORDER_ARGB
HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA, // HSA_EXT_IMAGE_CHANNEL_ORDER_ABGR
HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA, // HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB
HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA, // HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX
HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA, // HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA
HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA, // HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA
HSA_EXT_IMAGE_CHANNEL_ORDER_R, // HSA_EXT_IMAGE_CHANNEL_ORDER_INTENSITY
HSA_EXT_IMAGE_CHANNEL_ORDER_R, // HSA_EXT_IMAGE_CHANNEL_ORDER_LUMINANCE
HSA_EXT_IMAGE_CHANNEL_ORDER_R, // HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH
HSA_EXT_IMAGE_CHANNEL_ORDER_RG // HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL
};
const uint32_t current_type = original_image.desc.format.channel_type;
uint32_t converted_type = kTypeConvertTable[current_type];
const uint32_t current_order = original_image.desc.format.channel_order;
uint32_t converted_order = kOrderConvertTable[current_order];
if ((current_type == converted_type) && (current_order == converted_order)) {
*new_image = &original_image;
return HSA_STATUS_SUCCESS;
}
// Handle formats that drop channels on conversion, only usable with RGB(X)
if((current_type == HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555) ||
(current_type == HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565) ||
(current_type == HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_101010)) {
converted_order = HSA_EXT_IMAGE_CHANNEL_ORDER_R;
}
// For internal book keeping, depth isn't a HW type.
const hsa_ext_image_geometry_t current_geometry =
original_image.desc.geometry;
hsa_ext_image_geometry_t converted_geometry = current_geometry;
if (converted_geometry == HSA_EXT_IMAGE_GEOMETRY_2DDEPTH) {
converted_geometry = HSA_EXT_IMAGE_GEOMETRY_2D;
} else if (converted_geometry == HSA_EXT_IMAGE_GEOMETRY_2DADEPTH) {
converted_geometry = HSA_EXT_IMAGE_GEOMETRY_2DA;
}
hsa_ext_image_format_t new_format = {
static_cast<hsa_ext_image_channel_type_t>(converted_type),
static_cast<hsa_ext_image_channel_order_t>(converted_order)};
amd::Image* new_image_handle = amd::Image::Create(original_image.component);
*new_image_handle=original_image;
new_image_handle->desc.geometry = converted_geometry;
hsa_status_t status = ext_image::ImageRuntime::instance()
->image_manager(new_image_handle->component)
->ModifyImageSrd(*new_image_handle, new_format);
if (status != HSA_STATUS_SUCCESS) {
return status;
}
*new_image = new_image_handle;
return HSA_STATUS_SUCCESS;
}
hsa_status_t BlitKernel::LaunchKernel(BlitQueue& blit_queue,
hsa_kernel_dispatch_packet_t& packet) {
static const uint16_t kInvalidPacketHeader = HSA_PACKET_TYPE_INVALID;
static const uint16_t kDispatchPacketHeader =
(HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) |
(0 << HSA_PACKET_HEADER_BARRIER) |
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE) |
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE);
// Copying the packet content to the queue buffer is not atomic, so it is
// possible that the packet has a valid packet type but invalid content.
// To make sure packet processor does not read invalid packet, we first
// initialized the packet type to invalid.
packet.header = kInvalidPacketHeader;
// Setup completion signal.
hsa_signal_t kernel_signal = {0};
hsa_status_t status = hsa_signal_create(1, 0, NULL, &kernel_signal);
if (HSA_STATUS_SUCCESS != status) {
return status;
}
packet.completion_signal = kernel_signal;
// Populate the queue.
hsa_queue_t* queue = blit_queue.queue_;
const uint32_t bitmask = queue->size - 1;
// Reserve write index.
uint64_t write_index = hsa_queue_add_write_index_acq_rel(queue, 1);
while (true) {
// Wait until we have room in the queue;
const uint64_t read_index = hsa_queue_load_read_index_relaxed(queue);
if ((write_index - read_index) < queue->size) {
break;
}
}
// Populate queue buffer with AQL packet.
hsa_kernel_dispatch_packet_t* queue_buffer =
reinterpret_cast<hsa_kernel_dispatch_packet_t*>(queue->base_address);
queue_buffer[write_index & bitmask] = packet;
std::atomic_thread_fence(std::memory_order_release);
// Enable packet.
queue_buffer[write_index & bitmask].header = kDispatchPacketHeader;
// Update doorbel register.
hsa_signal_store_release(queue->doorbell_signal, write_index);
// Wait for the packet to finish.
if (hsa_signal_wait_acquire(kernel_signal, HSA_SIGNAL_CONDITION_LT, 1,
uint64_t(-1), HSA_WAIT_STATE_ACTIVE) != 0) {
status = hsa_signal_destroy(kernel_signal);
assert(status == HSA_STATUS_SUCCESS);
// Signal wait returned unexpected value.
return HSA_STATUS_ERROR;
}
// Cleanup
status = hsa_signal_destroy(kernel_signal);
assert(status == HSA_STATUS_SUCCESS);
return HSA_STATUS_SUCCESS;
}
hsa_status_t BlitKernel::GetPatchedBlitObject(const char* agent_name,
uint8_t** blit_code_object) {
if (strncmp(agent_name, "gfx", 3) != 0) {
return HSA_STATUS_ERROR_INVALID_ISA_NAME;
}
uint64_t target_name = atoi(&agent_name[3]);
switch (target_name) {
case 700:
*blit_code_object = ocl_blit_object_gfx700;
break;
case 701:
*blit_code_object = ocl_blit_object_gfx701;
break;
case 702:
*blit_code_object = ocl_blit_object_gfx702;
break;
case 801:
*blit_code_object = ocl_blit_object_gfx801;
break;
case 802:
*blit_code_object = ocl_blit_object_gfx802;
break;
case 803:
*blit_code_object = ocl_blit_object_gfx803;
break;
case 900:
*blit_code_object = ocl_blit_object_gfx900;
break;
case 902:
*blit_code_object = ocl_blit_object_gfx902;
break;
case 904:
*blit_code_object = ocl_blit_object_gfx904;
break;
case 906:
*blit_code_object = ocl_blit_object_gfx906;
break;
case 908:
*blit_code_object = ocl_blit_object_gfx908;
break;
case 1010:
*blit_code_object = ocl_blit_object_gfx1010;
break;
case 1011:
*blit_code_object = ocl_blit_object_gfx1011;
break;
case 1012:
*blit_code_object = ocl_blit_object_gfx1012;
break;
default:
return HSA_STATUS_ERROR_INVALID_ISA_NAME;
}
return HSA_STATUS_SUCCESS;
}
} // namespace amd
#undef HSA_ARGUMENT_ALIGN_BYTES
@@ -0,0 +1,123 @@
#ifndef HSA_RUNTIME_EXT_IMAGE_BLIT_KERNEL_H
#define HSA_RUNTIME_EXT_IMAGE_BLIT_KERNEL_H
#include <assert.h>
#include <atomic>
#include <mutex>
#include <unordered_map>
#include <vector>
#include "inc/hsa.h"
#include "resource.h"
namespace amd {
typedef struct BlitQueue {
hsa_queue_t* queue_;
volatile std::atomic<uint64_t> cached_index_;
} BlitQueue;
typedef struct BlitCodeInfo {
uint64_t code_handle_;
uint32_t group_segment_size_;
uint32_t private_segment_size_;
} BlitCodeInfo;
class BlitKernel {
public:
typedef enum KernelOp {
KERNEL_OP_COPY_IMAGE_TO_BUFFER = 0,
KERNEL_OP_COPY_BUFFER_TO_IMAGE = 1,
KERNEL_OP_COPY_IMAGE_DEFAULT = 2,
KERNEL_OP_COPY_IMAGE_LINEAR_TO_STANDARD = 3,
KERNEL_OP_COPY_IMAGE_STANDARD_TO_LINEAR = 4,
KERNEL_OP_COPY_IMAGE_1DB = 5,
KERNEL_OP_COPY_IMAGE_1DB_TO_REG = 6,
KERNEL_OP_COPY_IMAGE_REG_TO_1DB = 7,
KERNEL_OP_CLEAR_IMAGE = 8,
KERNEL_OP_CLEAR_IMAGE_1DB = 9,
KERNEL_OP_COUNT = 10
} KernelOp;
explicit BlitKernel();
~BlitKernel();
hsa_status_t Initialize();
hsa_status_t Cleanup();
hsa_status_t BuildBlitCode(hsa_agent_t agent,
std::vector<BlitCodeInfo>& blit_code_catalog);
hsa_status_t CopyBufferToImage(
BlitQueue& blit_queue,
const std::vector<BlitCodeInfo>& blit_code_catalog,
const void* src_memory, size_t src_row_pitch, size_t src_slice_pitch,
const Image& dst_image, const hsa_ext_image_region_t& image_region);
hsa_status_t CopyImageToBuffer(
BlitQueue& blit_queue,
const std::vector<BlitCodeInfo>& blit_code_catalog,
const Image& src_image, void* dst_memory, size_t dst_row_pitch,
size_t dst_slice_pitch, const hsa_ext_image_region_t& image_region);
hsa_status_t CopyImage(BlitQueue& blit_queue,
const std::vector<BlitCodeInfo>& blit_code_catalog,
const Image& dst_image, const Image& src_image,
const hsa_dim3_t& dst_origin,
const hsa_dim3_t& src_origin, const hsa_dim3_t size,
KernelOp copy_type);
hsa_status_t FillImage(BlitQueue& blit_queue,
const std::vector<BlitCodeInfo>& blit_code_catalog,
const Image& image, const void* pattern,
const hsa_ext_image_region_t& region);
private:
hsa_status_t PopulateKernelCode(
hsa_agent_t agent, hsa_executable_t executable,
std::vector<BlitCodeInfo>& blit_code_catalog);
inline void CalcBufferRowSlicePitchesInPixel(
hsa_ext_image_geometry_t geometry, uint32_t element_size,
const hsa_dim3_t& copy_size, size_t in_row_pitch_byte,
size_t in_slice_pitch_byte, unsigned long* out_pitch_pixel);
inline uint32_t GetDimSize(const Image& image);
inline uint32_t GetNumChannel(const Image& image);
inline uint32_t GetImageAccessType(const Image& image);
void CalcWorkingSize(const Image& image, const hsa_dim3_t& range,
hsa_kernel_dispatch_packet_t& packet);
void CalcWorkingSize(const Image& src_image, const Image& dst_image,
const hsa_dim3_t& range,
hsa_kernel_dispatch_packet_t& packet);
hsa_status_t ConvertImage(const Image& original_image,
const Image** new_image);
hsa_status_t LaunchKernel(BlitQueue& queue,
hsa_kernel_dispatch_packet_t& packet);
// The kernels' name.
static const char* kernel_name_[KERNEL_OP_COUNT];
static const char* ocl_kernel_name_[KERNEL_OP_COUNT];
// Mapping of ISA and kernel object.
std::unordered_map<uint64_t, hsa_code_object_t> code_object_map_;
// Mapping of ISA and kernel executable.
std::unordered_map<uint64_t, hsa_executable_t> code_executable_map_;
std::mutex lock_;
DISALLOW_COPY_AND_ASSIGN(BlitKernel);
// Get the patched code object
hsa_status_t GetPatchedBlitObject(const char* agent_name, uint8_t** code_object_handle);
};
} // namespace amd
#endif // HSA_RUNTIME_EXT_IMAGE_BLIT_KERNEL_H
@@ -0,0 +1,448 @@
/// Kernel code for HSA image import/export/copy/clear in OpenCL C form.
__kernel void copy_image_to_buffer(
__read_only image2d_array_t src,
__global uint* dstUInt,
__global ushort* dstUShort,
__global uchar* dstUChar,
int4 srcOrigin,
ulong4 dstOrigin,
int4 size,
uint4 format,
ulong4 pitch)
{
ulong idxDst;
int4 coordsSrc;
uint4 texel;
coordsSrc.x = get_global_id(0);
coordsSrc.y = get_global_id(1);
coordsSrc.z = get_global_id(2);
coordsSrc.w = 0;
if ((coordsSrc.x >= size.x) ||
(coordsSrc.y >= size.y) ||
(coordsSrc.z >= size.z)) {
return;
}
idxDst = (coordsSrc.z * pitch.y + coordsSrc.y * pitch.x +
coordsSrc.x) * format.z + dstOrigin.x;
coordsSrc.x += srcOrigin.x;
coordsSrc.y += srcOrigin.y;
coordsSrc.z += srcOrigin.z;
texel = read_imageui(src, coordsSrc);
// Check components
switch (format.x) {
case 1:
// Check size
switch (format.y) {
case 1:
dstUChar[idxDst] = (uchar)texel.x;
break;
case 2:
dstUShort[idxDst] = (ushort)texel.x;
break;
case 4:
dstUInt[idxDst] = texel.x;
break;
}
break;
case 2:
// Check size
switch (format.y) {
case 1:
dstUShort[idxDst] = (ushort)texel.x |
((ushort)texel.y << 8);
break;
case 2:
dstUInt[idxDst] = texel.x | (texel.y << 16);
break;
case 4:
dstUInt[idxDst++] = texel.x;
dstUInt[idxDst] = texel.y;
break;
}
break;
case 4:
// Check size
switch (format.y) {
case 1:
dstUInt[idxDst] = (uint)texel.x |
(texel.y << 8) |
(texel.z << 16) |
(texel.w << 24);
break;
case 2:
dstUInt[idxDst++] = texel.x | (texel.y << 16);
dstUInt[idxDst] = texel.z | (texel.w << 16);
break;
case 4:
dstUInt[idxDst++] = texel.x;
dstUInt[idxDst++] = texel.y;
dstUInt[idxDst++] = texel.z;
dstUInt[idxDst] = texel.w;
break;
}
break;
}
}
__kernel void copy_buffer_to_image(
__global uint* src,
__write_only image2d_array_t dst,
ulong4 srcOrigin,
int4 dstOrigin,
int4 size,
uint4 format,
ulong4 pitch)
{
ulong idxSrc;
int4 coordsDst;
uint4 pixel;
__global uint* srcUInt = src;
__global ushort* srcUShort = (__global ushort*)src;
__global uchar* srcUChar = (__global uchar*)src;
ushort tmpUShort;
uint tmpUInt;
coordsDst.x = get_global_id(0);
coordsDst.y = get_global_id(1);
coordsDst.z = get_global_id(2);
coordsDst.w = 0;
if ((coordsDst.x >= size.x) ||
(coordsDst.y >= size.y) ||
(coordsDst.z >= size.z)) {
return;
}
idxSrc = (coordsDst.z * pitch.y +
coordsDst.y * pitch.x + coordsDst.x) *
format.z + srcOrigin.x;
coordsDst.x += dstOrigin.x;
coordsDst.y += dstOrigin.y;
coordsDst.z += dstOrigin.z;
// Check components
switch (format.x) {
case 1:
// Check size
if (format.y == 1) {
pixel.x = (uint)srcUChar[idxSrc];
}
else if (format.y == 2) {
pixel.x = (uint)srcUShort[idxSrc];
}
else {
pixel.x = srcUInt[idxSrc];
}
break;
case 2:
// Check size
if (format.y == 1) {
tmpUShort = srcUShort[idxSrc];
pixel.x = (uint)(tmpUShort & 0xff);
pixel.y = (uint)(tmpUShort >> 8);
}
else if (format.y == 2) {
tmpUInt = srcUInt[idxSrc];
pixel.x = (tmpUInt & 0xffff);
pixel.y = (tmpUInt >> 16);
}
else {
pixel.x = srcUInt[idxSrc++];
pixel.y = srcUInt[idxSrc];
}
break;
case 4:
// Check size
if (format.y == 1) {
tmpUInt = srcUInt[idxSrc];
pixel.x = tmpUInt & 0xff;
pixel.y = (tmpUInt >> 8) & 0xff;
pixel.z = (tmpUInt >> 16) & 0xff;
pixel.w = (tmpUInt >> 24) & 0xff;
}
else if (format.y == 2) {
tmpUInt = srcUInt[idxSrc++];
pixel.x = tmpUInt & 0xffff;
pixel.y = (tmpUInt >> 16);
tmpUInt = srcUInt[idxSrc];
pixel.z = tmpUInt & 0xffff;
pixel.w = (tmpUInt >> 16);
}
else {
pixel.x = srcUInt[idxSrc++];
pixel.y = srcUInt[idxSrc++];
pixel.z = srcUInt[idxSrc++];
pixel.w = srcUInt[idxSrc];
}
break;
}
// Write the final pixel
write_imageui(dst, coordsDst, pixel);
}
__kernel void copy_image_default(
__read_only image2d_array_t src,
__write_only image2d_array_t dst,
int4 srcOrigin,
int4 dstOrigin,
int4 size)
{
int4 coordsDst;
int4 coordsSrc;
coordsDst.x = get_global_id(0);
coordsDst.y = get_global_id(1);
coordsDst.z = get_global_id(2);
coordsDst.w = 0;
if ((coordsDst.x >= size.x) ||
(coordsDst.y >= size.y) ||
(coordsDst.z >= size.z)) {
return;
}
coordsSrc = srcOrigin + coordsDst;
coordsDst += dstOrigin;
uint4 texel;
texel = read_imageui(src, coordsSrc);
write_imageui(dst, coordsDst, texel);
}
float linear_to_standard_rgba(float l_val) {
float s_val = l_val;
if (isnan(s_val)) s_val = 0.0f;
if (s_val > 1.0f) {
s_val = 1.0f;
} else if (s_val < 0.0f) {
s_val = 0.0f;
} else if (s_val < 0.0031308f) {
s_val = 12.92f * s_val;
} else {
s_val = (1.055f * pow(s_val, 5.0f / 12.0f)) - 0.055f;
}
return s_val;
}
__kernel void copy_image_linear_to_standard(
__read_only image2d_array_t src,
__write_only image2d_array_t dst,
int4 srcOrigin,
int4 dstOrigin,
int4 size,
int copyType)
{
int4 coordsDst;
int4 coordsSrc;
coordsDst.x = get_global_id(0);
coordsDst.y = get_global_id(1);
coordsDst.z = get_global_id(2);
coordsDst.w = 0;
if ((coordsDst.x >= size.x) ||
(coordsDst.y >= size.y) ||
(coordsDst.z >= size.z)) {
return;
}
coordsSrc = srcOrigin + coordsDst;
coordsDst += dstOrigin;
float4 texel;
texel = read_imagef(src, coordsSrc);
texel.x = linear_to_standard_rgba(texel.x);
texel.y = linear_to_standard_rgba(texel.y);
texel.z = linear_to_standard_rgba(texel.z);
write_imagef(dst, coordsDst, texel);
}
__kernel void copy_image_standard_to_linear(
__read_only image2d_array_t src,
__write_only image2d_array_t dst,
int4 srcOrigin,
int4 dstOrigin,
int4 size,
int copyType)
{
int4 coordsDst;
int4 coordsSrc;
coordsDst.x = get_global_id(0);
coordsDst.y = get_global_id(1);
coordsDst.z = get_global_id(2);
coordsDst.w = 0;
if ((coordsDst.x >= size.x) ||
(coordsDst.y >= size.y) ||
(coordsDst.z >= size.z)) {
return;
}
coordsSrc = srcOrigin + coordsDst;
coordsDst += dstOrigin;
float4 texel;
texel = read_imagef(src, coordsSrc);
write_imagef(dst, coordsDst, texel);
}
__kernel void copy_image_1db(
__read_only image1d_buffer_t src,
__write_only image1d_buffer_t dst,
int4 srcOrigin,
int4 dstOrigin,
int4 size)
{
int coordDst;
int coordSrc;
coordDst = get_global_id(0);
if (coordDst >= size.x) {
return;
}
coordSrc = srcOrigin.x + coordDst;
coordDst += dstOrigin.x;
uint4 texel;
texel = read_imageui(src, coordSrc);
write_imageui(dst, coordDst, texel);
}
__kernel void copy_image_1db_to_reg(
__read_only image1d_buffer_t src,
__write_only image2d_array_t dst,
int4 srcOrigin,
int4 dstOrigin,
int4 size)
{
int4 coordsDst;
int coordSrc;
coordsDst.x = get_global_id(0);
coordsDst.y = get_global_id(1);
coordsDst.z = get_global_id(2);
coordsDst.w = 0;
if (coordsDst.x >= size.x) {
return;
}
coordSrc = srcOrigin.x + coordsDst.x;
coordsDst += dstOrigin;
uint4 texel;
texel = read_imageui(src, coordSrc);
write_imageui(dst, coordsDst, texel);
}
__kernel void copy_image_reg_to_1db(
__read_only image2d_array_t src,
__write_only image1d_buffer_t dst,
int4 srcOrigin,
int4 dstOrigin,
int4 size)
{
int coordDst;
int4 coordsSrc;
coordsSrc.x = get_global_id(0);
coordsSrc.y = get_global_id(1);
coordsSrc.z = get_global_id(2);
coordsSrc.w = 0;
if (coordsSrc.x >= size.x) {
return;
}
coordDst = dstOrigin.x + coordsSrc.x;
coordsSrc += srcOrigin;
uint4 texel;
texel = read_imageui(src, coordsSrc);
write_imageui(dst, coordDst, texel);
}
__kernel void clear_image(
__write_only image2d_array_t image,
float4 patternFLOAT4,
int4 patternINT4,
uint4 patternUINT4,
int4 origin,
int4 size,
uint type)
{
int4 coords;
coords.x = get_global_id(0);
coords.y = get_global_id(1);
coords.z = get_global_id(2);
coords.w = 0;
if ((coords.x >= size.x) ||
(coords.y >= size.y) ||
(coords.z >= size.z)) {
return;
}
coords += origin;
// Check components
switch (type) {
case 0:
write_imagef(image, coords, patternFLOAT4);
break;
case 1:
write_imagei(image, coords, patternINT4);
break;
case 2:
write_imageui(image, coords, patternUINT4);
break;
}
}
__kernel void clear_image_1db(
__write_only image1d_buffer_t image,
float4 patternFLOAT4,
int4 patternINT4,
uint4 patternUINT4,
int4 origin,
int4 size,
uint type)
{
int coord = get_global_id(0);
if (coord >= size.x) {
return;
}
coord += origin.x;
// Check components
switch (type) {
case 0:
write_imagef(image, coord, patternFLOAT4);
break;
case 1:
write_imagei(image, coord, patternINT4);
break;
case 2:
write_imageui(image, coord, patternUINT4);
break;
}
}
Diff do arquivo suprimido porque uma ou mais linhas são muito longas
Diff do arquivo suprimido porque uma ou mais linhas são muito longas
Diff do arquivo suprimido porque uma ou mais linhas são muito longas
@@ -0,0 +1,209 @@
#
# Minimum version of cmake required
#
cmake_minimum_required(VERSION 3.5.0)
#
# Required Defines on cmake command line
#
# 1) Set location of OpenCL header files
# OPENCL_DIR="Root for OpenCL install"
# If not set, the default value is "/opt/rocm/opencl"
#
# 2) Set location of CLANG/LLVM binary directory
# LLVM_DIR="Directory contains clang, llvm-link and llvm-dis
# If not set, the default value is "<PROJECT_BUILD_DIR>/lightning/bin"
#
# 3) Set BITCODE library directory
# BITCODE_DIR="Directory contains the bitcode library"
# If not set, the default value is "${OPENCL_DIR}/lib/x86_64/bitcode"
#
# 4) Set TARGET_DEVICES to indicate gpu types for kernel builds (e.g., "gfx803;gfx900; ...")
# If not set, the target devices are those have the Open Compute Library Controls (OCLC)
# bitcode file, "oclc_isa_version_*.amdgcn.bc", in the BITCODE directory
#
# Building - Should be automatic but for manual builds:
#
# 1) *** Create build folder e.g. "blit_src/build" - any name will do
# 2) Go to the build folder
# 3) Run "cmake .."
# 4) Run "make opencl_blit_objects.cpp"
#
## Include the cmake_modules utils.cmake
list ( APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../../opensrc/hsa-runtime/cmake_modules" )
include ( utils )
# Flag to abort before executing after default initialization of cache variables
set (QUIT 0)
# Collect possible LLVM version directories.
set (LLVM_SEARCH_PATHS "")
set (LLVM_SEARCH_ROOT "${CMAKE_INSTALL_PREFIX}/llvm/lib/clang")
listsubdirs(${LLVM_SEARCH_ROOT} FOLDERS)
foreach(ITEM IN LISTS FOLDERS)
list (APPEND LLVM_SEARCH_PATHS "${LLVM_SEARCH_ROOT}/${ITEM}/include/")
endforeach()
if (NOT DEFINED OPENCL_VER)
set (OPENCL_VER "2.0")
endif()
set( OPENCL_VER ${OPENCL_VER} CACHE STRING "OpenCL version" FORCE )
get_include_path(BITCODE_DIR "Bitcode library path" RESULT FOUND NAMES "opencl.amdgcn.bc" HINTS "${CMAKE_INSTALL_PREFIX}/lib/bitcode" "${OPENCL_DIR}/lib/x86_64/bitcode")
if (NOT ${FOUND})
set (QUIT 1)
endif()
set (BITCODE_LIB "${BITCODE_DIR}/opencl.amdgcn.bc")
if (NOT EXISTS ${BITCODE_LIB})
message("ERROR: path to opencl.amdgcn.bc (${BITCODE_LIB}) is not valid. Is BITCODE_DIR correctly defined?")
set (QUIT 1)
endif()
get_include_path(LLVM_DIR "LLVM directory" RESULT FOUND NAMES "clang" HINTS "${CMAKE_INSTALL_PREFIX}/llvm/bin")
if (NOT ${FOUND})
set (QUIT 1)
endif()
set (CLANG "${LLVM_DIR}/clang")
if (NOT EXISTS ${CLANG})
message("ERROR: path to clang (${CLANG}) is not valid. Is LLVM_DIR correctly defined?")
set (QUIT 1)
endif()
set (LLVM_LINK "${LLVM_DIR}/llvm-link")
if (NOT EXISTS ${LLVM_LINK})
message("ERROR: path to llvm-link (${LLVM_LINK}) is not valid. Is LLVM_DIR correctly defined?")
set (QUIT 1)
endif()
set (LLVM_DIS "${LLVM_DIR}/llvm-dis")
if (NOT EXISTS ${LLVM_DIS})
message("ERROR: path to llvm-dis (${LLVM_DIS}) is not valid. Is LLVM_DIR correctly defined?")
set (QUIT 1)
endif()
# Value of Images Src Dir is bound in parent environment
set (KERNELS_DIR "${IMAGE_SOURCE_DIR}/blit_src")
# Define the target devices with xnack enable
if (NOT DEFINED XNACK_DEVS)
set (XNACK_DEVS "gfx801;gfx902")
endif()
set( XNACK_DEVS ${XNACK_DEVS} CACHE STRING "XNACK targets" FORCE )
# Determine the target devices if not specified
if (NOT DEFINED TARGET_DEVICES)
set (TARGET_DEVICES "gfx700;gfx701;gfx702;gfx801;gfx802;gfx803;gfx900;gfx902;gfx904;gfx906;gfx908;gfx1010;gfx1011;gfx1012")
endif()
set( TARGET_DEVICES ${TARGET_DEVICES} CACHE STRING "Build targets" FORCE )
# End of default configuration and path checking.
# Quit if configuration is incomplete.
if (QUIT)
message(FATAL_ERROR "Configuration halted.")
return()
endif()
# Determine the target triple
execute_process(COMMAND ${LLVM_DIS} ${BITCODE_LIB} -o - OUTPUT_VARIABLE LLVM_DIS_OUTPUT)
string(REGEX MATCH "(amdgcn-amd-.*)\"[\r\n]" QUOTED_TRIPLE "${LLVM_DIS_OUTPUT}")
string(REGEX REPLACE "[\"\r\n]" "" TARGET_TRIPLE "${QUOTED_TRIPLE}")
message("")
message("Build Setting:")
message(" Target Devices: ${TARGET_DEVICES}")
message(" Proj. Src Dir: ${PROJECT_SOURCE_DIR}")
message(" Proj. Bld Dir: ${PROJECT_BINARY_DIR}")
message(" Image Source Dir: ${IMAGE_SOURCE_DIR}")
message(" LLVM Dir: ${LLVM_DIR}")
message(" Clang path: ${CLANG}")
message(" OpenCL Dir: ${OPENCL_DIR}")
message(" OpenCL version: ${OPENCL_VER}")
message(" Bitcode Dir: ${BITCODE_DIR}")
message(" Target Triple: ${TARGET_TRIPLE}")
##==========================================
## Generate Kernel Bitcode
##==========================================
function(gen_kernel_bc TARGET_DEV XNACK_OPT FPREFIX INPUT_FILE OUTPUT_FILE)
string (REPLACE "gfx" "" GFXIP "${TARGET_DEV}")
separate_arguments(CLANG_ARG_LIST UNIX_COMMAND
"-O2 -x cl -target ${TARGET_TRIPLE} -Xclang -finclude-default-header -mcpu=${TARGET_DEV} -m${XNACK_OPT}
-nogpulib
-Xclang -mlink-bitcode-file -Xclang ${BITCODE_DIR}/opencl.amdgcn.bc
-Xclang -mlink-bitcode-file -Xclang ${BITCODE_DIR}/ockl.amdgcn.bc
-Xclang -mlink-bitcode-file -Xclang ${BITCODE_DIR}/ocml.amdgcn.bc
-Xclang -mlink-bitcode-file -Xclang ${BITCODE_DIR}/oclc_daz_opt_on.amdgcn.bc
-Xclang -mlink-bitcode-file -Xclang ${BITCODE_DIR}/oclc_isa_version_${GFXIP}.amdgcn.bc
-Xclang -mlink-bitcode-file -Xclang ${BITCODE_DIR}/oclc_unsafe_math_off.amdgcn.bc
-Xclang -mlink-bitcode-file -Xclang ${BITCODE_DIR}/oclc_finite_only_off.amdgcn.bc
-cl-std=CL${OPENCL_VER} -o ${OUTPUT_FILE} ${INPUT_FILE}")
add_custom_target("${FPREFIX}" ${CLANG} ${CLANG_ARG_LIST}
COMMENT "BUILDING bitcode for ${FPREFIX}..."
VERBATIM)
message(" Kernel Source: " ${INPUT_FILE})
message(" Kernel Bitcode: " ${OUTPUT_FILE})
endfunction(gen_kernel_bc)
##==========================================
## Build the kernel for a device
##==========================================
function(build_kernel BLIT_NAME TARG_DEV)
list (FIND XNACK_DEVS ${TARG_DEV} XNACK_IDX)
if (${XNACK_IDX} GREATER -1)
set (XNACK_OPT "xnack")
else()
set (XNACK_OPT "no-xnack")
endif()
set (FILE_PREFIX "${BLIT_NAME}_${TARG_DEV}")
set (HSACO_TARG_LIST ${HSACO_TARG_LIST} "${FILE_PREFIX}" CACHE INTERNAL HSACO_TARG_LIST)
## generate kernel bitcodes
##
set (CL_FILE "${KERNELS_DIR}/imageblit_kernels.cl")
set (KERNEL_BC_FILE "${FILE_PREFIX}")
gen_kernel_bc(${TARG_DEV} ${XNACK_OPT} ${FILE_PREFIX} ${CL_FILE} ${KERNEL_BC_FILE})
endfunction(build_kernel)
##==========================================
## Build the kernel for a list of devices
##==========================================
function(build_kernel_for_devices BLIT_NAME)
set(HSACO_TARG_LIST PARENT_SCOPE)
foreach(dev ${TARGET_DEVICES})
message("\n Working on: ${dev} ...")
build_kernel(${BLIT_NAME} ${dev})
endforeach(dev)
endfunction(build_kernel_for_devices)
##==========================================
## Create BLIT Code Object blobs file
##==========================================
function(generate_blit_file BFILE)
file(REMOVE ${IMAGE_SOURCE_DIR}/${BFILE})
add_custom_command(OUTPUT ${IMAGE_SOURCE_DIR}/${BFILE}
COMMAND ${KERNELS_DIR}/create_hsaco_ascii_file.sh ${IMAGE_SOURCE_DIR}/${BFILE})
message("\n Will create ASCII bitcodes in ${BFILE} for ${TARGET_DEVICES} ... \n")
add_custom_target(${BFILE} DEPENDS ${HSACO_TARG_LIST} ${IMAGE_SOURCE_DIR}/${BFILE})
endfunction(generate_blit_file)
build_kernel_for_devices("ocl_blit_object")
generate_blit_file("opencl_blit_objects.cpp")
@@ -0,0 +1,61 @@
## OVERVIEW
This directory contains the CMakeLists.txt for automatically generating
the ASCII code object file, "opencl_blit_objects.cpp", which contains the
blobs of the code object of the Image BLIT kernels for the devices supported
on ROCm. The blobs are loaded by the image library and required to update
whenever a new device is introduced.
## ADD NEW DEVICE
To add a new supported device, the following steps are required:
1. Declare an extern variable of the device XXX, by adding the line of
"extern uint32_t ocl_blit_object_gfxNNN[];" in "blit_kernel.cpp"
2. Update the BlitKernel::GetPatchedBlitObject() function to support the
device by assigning "blit_code_object" to "ocl_blit_object_gfxNNN[]"
3. Add the gfxNNN to the TARGET_DEVICES list in CMakeLists.txt
4. If the new device requires XNACK, add it to the XNACK_DEVS list in CMakeLists.txt
5. Rebuild the image library
## REQUIREMENT
In order to create the code object file, the bitcodes of the kernels are
generated by the compiler and the following bitcode libraries are required,
opencl.amdgcn.bc
ocml.amdgcn.bc
irif.amdgcn.bc
oclc_correctly_rounded_sqrt_off.amdgcn.bc
oclc_daz_opt_on.amdgcn.bc
oclc_finite_only_off.amdgcn.bc
oclc_isa_version_<GFXIP>.amdgcn.bc
oclc_unsafe_math_off.amdgcn.bc
where <GFXIP> is the gfxip number of the GPU. The directory contains the
bitcode libraries is specified in a CMake varaible.
There are several variables are required for CMake to build the code
object file. All of them have default values, and defined as following:
OPENCL_DIR - the location of installed OpenCL
(Default: /opt/rocm/opencl)
BITCODE_DIR - the directory contains the bitcode library
(Default: ${OPENCL_DIR}/lib/x86_64/bitcode)
LLVM_DIR - the directory contains the clang, llvm-link and llvm-dis
executables
(Default: ${PROJECT_BUILD_DIR}/../lightning/bin)
TARGET_DEVICES - list of gpu types for kernel builds (eg. "gfx900;gfx902")
(Default: "gfx900;gfx902;gfx904")
## STEPS TO BUILD
$ make build
$ cd build
$ cmake -D${OPENCL_DIR} -D${BITCODE_DIR} -D${LLVM_DIR} -D${TARGET_DEVICES} ..
$ make opencl_blit_objects.cpp
@@ -0,0 +1,32 @@
#!/bin/bash -e
opencl_blit_file="$1"
if ! command -v xxd >/dev/null
then
echo "xxd not found!"
exit 1
fi
# Create the file in a temporary location and then move it in atomically
rm -rf "$opencl_blit_file.tmp"
{
cat <<EOF
//==============================================================================
// This file is automatically generated during build process, don't modify it
//==============================================================================
EOF
for file in ocl_blit_object*
do
xxd -i $file
echo -e '\n'
done
} > "$opencl_blit_file.tmp"
# Move the file atomically into place, so make doesn't get half a file
# but only if it has changed. cmp -s is happy for one file not to exist
cmp -s "$opencl_blit_file.tmp" "$opencl_blit_file" ||
mv -f "$opencl_blit_file.tmp" "$opencl_blit_file"
@@ -0,0 +1,615 @@
/// Kernel code for HSA image import/export/copy/clear in OpenCL C form.
uint4 read_image(__read_only image1d_t src1d,
__read_only image2d_t src2d,
__read_only image3d_t src3d,
__read_only image1d_array_t src1da,
__read_only image2d_array_t src2da,
uint format,
int4 coords) {
switch (format) {
case 0: // 1D
return read_imageui(src1d, coords.x);
break;
case 1: // 2D
return read_imageui(src2d, coords.xy);
break;
case 2: // 3D
return read_imageui(src3d, coords);
break;
case 3: // 1DA
return read_imageui(src1da, coords.xy);
break;
case 4: // 2DA
return read_imageui(src2da, coords);
break;
// case 5: //1DB
// return read_imageui(src1db, coords.x);
// break;
default: // Critical failure.
return 0;
}
}
void write_image(__write_only image1d_t src1d,
__write_only image2d_t src2d,
__write_only image3d_t src3d,
__write_only image1d_array_t src1da,
__write_only image2d_array_t src2da,
uint format,
int4 coords,
uint4 texel) {
switch (format) {
case 0: // 1D
write_imageui(src1d, coords.x, texel);
break;
case 1: // 2D
write_imageui(src2d, coords.xy, texel);
break;
case 2: // 3D
write_imageui(src3d, coords, texel);
break;
case 3: // 1DA
write_imageui(src1da, coords.xy, texel);
break;
case 4: // 2DA
write_imageui(src2da, coords, texel);
break;
// case 5: //1DB
// write_imageui(src1db, coords.x, texel);
// break;
default: // Critical failure.
return;
}
}
float4 read_image_float(__read_only image1d_t src1d,
__read_only image2d_t src2d,
__read_only image3d_t src3d,
__read_only image1d_array_t src1da,
__read_only image2d_array_t src2da,
uint format,
int4 coords) {
switch (format) {
case 0: // 1D
return read_imagef(src1d, coords.x);
break;
case 1: // 2D
return read_imagef(src2d, coords.xy);
break;
case 2: // 3D
return read_imagef(src3d, coords);
break;
case 3: // 1DA
return read_imagef(src1da, coords.xy);
break;
case 4: // 2DA
return read_imagef(src2da, coords);
break;
default: // Critical failure.
return 0;
}
}
void write_image_float(__write_only image1d_t src1d,
__write_only image2d_t src2d,
__write_only image3d_t src3d,
__write_only image1d_array_t src1da,
__write_only image2d_array_t src2da,
uint format,
int4 coords,
float4 texel) {
switch (format) {
case 0: // 1D
write_imagef(src1d, coords.x, texel);
break;
case 1: // 2D
write_imagef(src2d, coords.xy, texel);
break;
case 2: // 3D
write_imagef(src3d, coords, texel);
break;
case 3: // 1DA
write_imagef(src1da, coords.xy, texel);
break;
case 4: // 2DA
write_imagef(src2da, coords, texel);
break;
default: // Critical failure.
return;
}
}
void write_image_int(__write_only image1d_t src1d,
__write_only image2d_t src2d,
__write_only image3d_t src3d,
__write_only image1d_array_t src1da,
__write_only image2d_array_t src2da,
uint format,
int4 coords,
int4 texel) {
switch (format) {
case 0: // 1D
write_imagei(src1d, coords.x, texel);
break;
case 1: // 2D
write_imagei(src2d, coords.xy, texel);
break;
case 2: // 3D
write_imagei(src3d, coords, texel);
break;
case 3: // 1DA
write_imagei(src1da, coords.xy, texel);
break;
case 4: // 2DA
write_imagei(src2da, coords, texel);
break;
default: // Critical failure.
return;
}
}
//image handle is repeated since OCL doesn't allow pointers to or casting of images.
//dst is start of output pixel in destination buffer
//format.x is element count
//format.y is element size
//format.z is max(dword per pixel, 1)
//format.w is texture type.
//srcOrigin is start pixel address.
//No export for 64, 96, 128 bit formats
__kernel void copy_image_to_buffer(
__read_only image1d_t src1d,
__read_only image2d_t src2d,
__read_only image3d_t src3d,
__read_only image1d_array_t src1da,
__read_only image2d_array_t src2da,
__global void* const dst,
int4 srcOrigin,
uint4 format,
ulong pitch,
ulong slice_pitch)
{
ulong idxDst;
int4 coordsSrc;
uint4 texel;
__global uchar* const dstUChar = (__global uchar* const)dst;
__global ushort* const dstUShort = (__global ushort* const)dst;
__global uint* const dstUInt = (__global uint* const)dst;
coordsSrc.x = get_global_id(0);
coordsSrc.y = get_global_id(1);
coordsSrc.z = get_global_id(2);
coordsSrc.w = 0;
idxDst = (coordsSrc.z * slice_pitch + coordsSrc.y * pitch +
coordsSrc.x) * format.z;
coordsSrc.x += srcOrigin.x;
coordsSrc.y += srcOrigin.y;
coordsSrc.z += srcOrigin.z;
texel = read_image(src1d, src2d, src3d, src1da, src2da, format.w, coordsSrc);
// Check components
switch (format.x) {
case 1:
// Check size
switch (format.y) {
case 1:
dstUChar[idxDst] = texel.x;
break;
case 2:
dstUShort[idxDst] = texel.x;
break;
case 4:
dstUInt[idxDst] = texel.x;
break;
}
break;
case 2:
// Check size
switch (format.y) {
case 1:
dstUShort[idxDst] = texel.x |
(texel.y << 8);
break;
case 2:
dstUInt[idxDst] = texel.x | (texel.y << 16);
break;
case 4:
dstUInt[idxDst++] = texel.x;
dstUInt[idxDst] = texel.y;
break;
}
break;
case 4:
// Check size
switch (format.y) {
case 1:
dstUInt[idxDst] = texel.x |
(texel.y << 8) |
(texel.z << 16) |
(texel.w << 24);
break;
case 2:
dstUInt[idxDst++] = texel.x | (texel.y << 16);
dstUInt[idxDst] = texel.z | (texel.w << 16);
break;
case 4:
dstUInt[idxDst++] = texel.x;
dstUInt[idxDst++] = texel.y;
dstUInt[idxDst++] = texel.z;
dstUInt[idxDst] = texel.w;
break;
}
break;
}
}
__kernel void copy_buffer_to_image(__global uint* src,
__write_only image1d_t dst1d,
__write_only image2d_t dst2d,
__write_only image3d_t dst3d,
__write_only image1d_array_t dst1da,
__write_only image2d_array_t dst2da,
int4 dstOrigin,
uint4 format,
ulong pitch,
ulong slice_pitch) {
ulong idxSrc;
int4 coordsDst;
uint4 texel;
__global uint* srcUInt = src;
__global ushort* srcUShort = (__global ushort*)src;
__global uchar* srcUChar = (__global uchar*)src;
ushort tmpUShort;
uint tmpUInt;
coordsDst.x = get_global_id(0);
coordsDst.y = get_global_id(1);
coordsDst.z = get_global_id(2);
coordsDst.w = 0;
idxSrc = (coordsDst.z * slice_pitch + coordsDst.y * pitch + coordsDst.x) * format.z;
coordsDst.x += dstOrigin.x;
coordsDst.y += dstOrigin.y;
coordsDst.z += dstOrigin.z;
// Check components
switch (format.x) {
case 1:
// Check size
switch (format.y) {
case 1:
texel.x = (uint)srcUChar[idxSrc];
break;
case 2:
texel.x = (uint)srcUShort[idxSrc];
break;
case 4:
texel.x = srcUInt[idxSrc];
break;
}
break;
case 2:
// Check size
switch (format.y) {
case 1:
tmpUShort = srcUShort[idxSrc];
texel.x = (uint)(tmpUShort & 0xff);
texel.y = (uint)(tmpUShort >> 8);
break;
case 2:
tmpUInt = srcUInt[idxSrc];
texel.x = (tmpUInt & 0xffff);
texel.y = (tmpUInt >> 16);
break;
case 4:
texel.x = srcUInt[idxSrc++];
texel.y = srcUInt[idxSrc];
break;
}
break;
case 4:
// Check size
switch (format.y) {
case 1:
tmpUInt = srcUInt[idxSrc];
texel.x = tmpUInt & 0xff;
texel.y = (tmpUInt >> 8) & 0xff;
texel.z = (tmpUInt >> 16) & 0xff;
texel.w = (tmpUInt >> 24) & 0xff;
break;
case 2:
tmpUInt = srcUInt[idxSrc++];
texel.x = tmpUInt & 0xffff;
texel.y = (tmpUInt >> 16);
tmpUInt = srcUInt[idxSrc];
texel.z = tmpUInt & 0xffff;
texel.w = (tmpUInt >> 16);
break;
case 4:
texel.x = srcUInt[idxSrc++];
texel.y = srcUInt[idxSrc++];
texel.z = srcUInt[idxSrc++];
texel.w = srcUInt[idxSrc];
break;
}
break;
}
// Write the final pixel
write_image(dst1d, dst2d, dst3d, dst1da, dst2da, format.w, coordsDst, texel);
}
__kernel void copy_image_default(__read_only image1d_t src1d,
__read_only image2d_t src2d,
__read_only image3d_t src3d,
__read_only image1d_array_t src1da,
__read_only image2d_array_t src2da,
__write_only image1d_t dst1d,
__write_only image2d_t dst2d,
__write_only image3d_t dst3d,
__write_only image1d_array_t dst1da,
__write_only image2d_array_t dst2da,
int4 srcOrigin,
int4 dstOrigin,
int srcFormat,
int dstFormat) {
int4 coordsDst;
int4 coordsSrc;
coordsDst.x = get_global_id(0);
coordsDst.y = get_global_id(1);
coordsDst.z = get_global_id(2);
coordsDst.w = 0;
coordsSrc = srcOrigin + coordsDst;
coordsDst += dstOrigin;
uint4 texel;
texel = read_image(src1d, src2d, src3d, src1da, src2da, srcFormat, coordsSrc);
write_image(dst1d, dst2d, dst3d, dst1da, dst2da, dstFormat, coordsDst, texel);
}
float linear_to_standard_rgba(float l_val) {
float s_val = l_val;
if (isnan(s_val)) s_val = 0.0f;
if (s_val > 1.0f) {
s_val = 1.0f;
} else if (s_val < 0.0f) {
s_val = 0.0f;
} else if (s_val < 0.0031308f) {
s_val = 12.92f * s_val;
} else {
s_val = (1.055f * pow(s_val, 5.0f / 12.0f)) - 0.055f;
}
return s_val;
}
__kernel void copy_image_linear_to_standard(
__read_only image1d_t src1d,
__read_only image2d_t src2d,
__read_only image3d_t src3d,
__read_only image1d_array_t src1da,
__read_only image2d_array_t src2da,
int srcFormat,
__write_only image1d_t dst1d,
__write_only image2d_t dst2d,
__write_only image3d_t dst3d,
__write_only image1d_array_t dst1da,
__write_only image2d_array_t dst2da,
int dstFormat,
int4 srcOrigin,
int4 dstOrigin) {
int4 coordsDst;
int4 coordsSrc;
coordsDst.x = get_global_id(0);
coordsDst.y = get_global_id(1);
coordsDst.z = get_global_id(2);
coordsDst.w = 0;
coordsSrc = srcOrigin + coordsDst;
coordsDst += dstOrigin;
float4 texel;
texel = read_image_float(src1d, src2d, src3d, src1da, src2da, srcFormat, coordsSrc);
texel.x = linear_to_standard_rgba(texel.x);
texel.y = linear_to_standard_rgba(texel.y);
texel.z = linear_to_standard_rgba(texel.z);
write_image_float(dst1d, dst2d, dst3d, dst1da, dst2da, dstFormat, coordsDst, texel);
}
__kernel void copy_image_standard_to_linear(
__read_only image1d_t src1d,
__read_only image2d_t src2d,
__read_only image3d_t src3d,
__read_only image1d_array_t src1da,
__read_only image2d_array_t src2da,
int srcFormat,
__write_only image1d_t dst1d,
__write_only image2d_t dst2d,
__write_only image3d_t dst3d,
__write_only image1d_array_t dst1da,
__write_only image2d_array_t dst2da,
int dstFormat,
int4 srcOrigin,
int4 dstOrigin) {
int4 coordsDst;
int4 coordsSrc;
coordsDst.x = get_global_id(0);
coordsDst.y = get_global_id(1);
coordsDst.z = get_global_id(2);
coordsDst.w = 0;
coordsSrc = srcOrigin + coordsDst;
coordsDst += dstOrigin;
float4 texel;
texel = read_image_float(src1d, src2d, src3d, src1da, src2da, srcFormat, coordsSrc);
write_image_float(dst1d, dst2d, dst3d, dst1da, dst2da, dstFormat, coordsDst, texel);
}
__kernel void copy_image_1db(
__read_only image1d_buffer_t src1d,
__read_only image2d_t src2d,
__read_only image3d_t src3d,
__read_only image1d_array_t src1da,
__read_only image2d_array_t src2da,
int srcFormat,
__write_only image1d_t dst1d,
__write_only image2d_t dst2d,
__write_only image3d_t dst3d,
__write_only image1d_array_t dst1da,
__write_only image2d_array_t dst2da,
int dstFormat,
int4 srcOrigin,
int4 dstOrigin)
{
int coordDst;
int coordSrc;
coordDst = get_global_id(0);
coordSrc = srcOrigin.x + coordDst;
coordDst += dstOrigin.x;
uint4 texel;
texel = read_imageui(src1d, coordSrc);
write_imageui(dst1d, coordDst, texel);
}
__kernel void copy_image_1db_to_reg(
__read_only image1d_buffer_t src1d,
__read_only image2d_t src2d,
__read_only image3d_t src3d,
__read_only image1d_array_t src1da,
__read_only image2d_array_t src2da,
int srcFormat,
__write_only image1d_t dst1d,
__write_only image2d_t dst2d,
__write_only image3d_t dst3d,
__write_only image1d_array_t dst1da,
__write_only image2d_array_t dst2da,
int dstFormat,
int4 srcOrigin,
int4 dstOrigin)
{
int4 coordsDst;
int coordSrc;
coordsDst.x = get_global_id(0);
coordsDst.y = get_global_id(1);
coordsDst.z = get_global_id(2);
coordsDst.w = 0;
coordSrc = srcOrigin.x + coordsDst.x;
coordsDst += dstOrigin;
uint4 texel;
texel = read_imageui(src1d, coordSrc);
write_imageui(dst1d, coordsDst.x, texel);
}
__kernel void copy_image_reg_to_1db(
__read_only image1d_t src1d,
__read_only image2d_t src2d,
__read_only image3d_t src3d,
__read_only image1d_array_t src1da,
__read_only image2d_array_t src2da,
int srcFormat,
__write_only image1d_buffer_t dst1d,
__write_only image2d_t dst2d,
__write_only image3d_t dst3d,
__write_only image1d_array_t dst1da,
__write_only image2d_array_t dst2da,
int dstFormat,
int4 srcOrigin,
int4 dstOrigin)
{
int coordDst;
int4 coordsSrc;
coordsSrc.x = get_global_id(0);
coordsSrc.y = get_global_id(1);
coordsSrc.z = get_global_id(2);
coordsSrc.w = 0;
coordDst = dstOrigin.x + coordsSrc.x;
coordsSrc += srcOrigin;
uint4 texel;
texel = read_imageui(src1d, coordsSrc.x);
write_imageui(dst1d, coordDst, texel);
}
__kernel void clear_image(__write_only image1d_t dst1d,
__write_only image2d_t dst2d,
__write_only image3d_t dst3d,
__write_only image1d_array_t dst1da,
__write_only image2d_array_t dst2da,
int dstFormat,
uint type,
uint4 fill_data,
int4 origin) {
int4 coords;
coords.x = get_global_id(0);
coords.y = get_global_id(1);
coords.z = get_global_id(2);
coords.w = 0;
coords += origin;
// Check components
switch (type) {
case 0:
write_image_float(dst1d, dst2d, dst3d, dst1da, dst2da, dstFormat, coords, *(float4*)&fill_data);
break;
case 1:
write_image_int(dst1d, dst2d, dst3d, dst1da, dst2da, dstFormat, coords, *(int4*)&fill_data);
break;
case 2:
write_image(dst1d, dst2d, dst3d, dst1da, dst2da, dstFormat, coords, fill_data);
break;
}
}
__kernel void clear_image_1db(__write_only image1d_buffer_t dst1d,
__write_only image2d_t dst2d,
__write_only image3d_t dst3d,
__write_only image1d_array_t dst1da,
__write_only image2d_array_t dst2da,
int dstFormat,
uint4 fill_data,
int4 origin,
uint type) {
int4 coords;
coords.x = get_global_id(0);
coords += origin;
// Check components
switch (type) {
case 0:
write_imagef(dst1d, coords.x, *(float4*)&fill_data);
break;
case 1:
write_imagei(dst1d, coords.x, *(int4*)&fill_data);
break;
case 2:
write_imageui(dst1d, coords.x, fill_data);
break;
}
}
+181
Ver Arquivo
@@ -0,0 +1,181 @@
// These lines should be changed later after we get formal Copyright.
//------------------------------------------------------------------------------
//
// File: device_info.cpp
// Project: HSA
//
// Description: Implementation file for Api to query HSA System - Number of
// compute nodes, devices, queue properties, etc.
//
// HsaGetQueueProperties()
// The public Api provided for users to query the properties
// of a Queue object.
//
// Copyright (c) 2013-2013 Advanced Micro Devices, Inc. (unpublished)
//
// All rights reserved. This notice is intended as a precaution against
// inadvertent publication and does not imply publication or any waiver
// of confidentiality. The year included in the foregoing notice is the
// year of creation of the work.
#include <assert.h>
#include <string>
#include "inc/hsa.h"
#include "device_info.h"
#include "addrlib/src/amdgpu_asic_addr.h"
uint32_t MajorVerFromDevID(uint32_t dev_id) {
return dev_id/100;
}
uint32_t MinorVerFromDevID(uint32_t dev_id) {
return (dev_id % 100)/10;
}
uint32_t StepFromDevID(uint32_t dev_id) {
return (dev_id%100)%10;
}
hsa_status_t GetGPUAsicID(hsa_agent_t agent, uint32_t *chip_id) {
char asic_name[64];
assert(chip_id != nullptr);
hsa_status_t status = hsa_agent_get_info(
agent, static_cast<hsa_agent_info_t>(HSA_AGENT_INFO_NAME),
&asic_name);
assert(status == HSA_STATUS_SUCCESS);
if (status != HSA_STATUS_SUCCESS) {
return status;
}
std::string a_str(asic_name);
assert(a_str.compare(0, 3, "gfx", 3) == 0);
a_str.erase(0,3);
*chip_id = std::stoi(a_str);
return HSA_STATUS_SUCCESS;
}
uint32_t DevIDToAddrLibFamily(uint32_t dev_id) {
uint32_t major_ver = MajorVerFromDevID(dev_id);
uint32_t minor_ver = MinorVerFromDevID(dev_id);
uint32_t step = StepFromDevID(dev_id);
// FAMILY_UNKNOWN 0xFF
// FAMILY_SI - Southern Islands: Tahiti (P), Pitcairn (PM), Cape Verde (M), Bali (V)
// FAMILY_TN - Fusion Trinity: Devastator - DVST (M), Scrapper (V)
// FAMILY_CI - Sea Islands: Hawaii (P), Maui (P), Bonaire (M)
// FAMILY_KV - Fusion Kaveri: Spectre, Spooky; Fusion Kabini: Kalindi
// FAMILY_VI - Volcanic Islands: Iceland (V), Tonga (M)
// FAMILY_CZ - Carrizo, Nolan, Amur
// FAMILY_PI - Pirate Islands
// FAMILY_AI - Arctic Islands
// FAMILY_RV - Raven
// FAMILY_NV - Navi
switch (major_ver) {
case 6:
switch (minor_ver) {
case 0:
switch (step) {
case 0:
case 1:
return FAMILY_SI;
default:
return FAMILY_UNKNOWN;
}
default:
return FAMILY_UNKNOWN;
}
case 7:
switch (minor_ver) {
case 0:
switch (step) {
case 0:
case 1:
case 2:
return FAMILY_CI;
case 3:
return FAMILY_KV;
default:
return FAMILY_UNKNOWN;
}
default:
return FAMILY_UNKNOWN;
}
case 8:
switch (minor_ver) {
case 0:
switch (step) {
case 0:
case 2:
case 3:
case 4:
return FAMILY_VI;
case 1:
return FAMILY_CZ;
default:
return FAMILY_UNKNOWN;
}
default:
return FAMILY_UNKNOWN;
}
case 9:
switch (minor_ver) {
case 0:
switch (step) {
case 0:
case 1:
case 4: // Vega12
case 6: // Vega20
case 8: // Arcturus
return FAMILY_AI;
case 2:
case 3:
return FAMILY_RV;
default:
return FAMILY_UNKNOWN;
}
default:
return FAMILY_UNKNOWN;
}
case 10:
switch (minor_ver) {
case 0: // Ariel
case 1: // Navi
switch (step) {
case 0:
case 1:
case 2:
case 3:
return FAMILY_NV;
default:
return FAMILY_UNKNOWN;
}
default:
return FAMILY_UNKNOWN;
}
default:
return FAMILY_UNKNOWN;
}
assert(0); // We should have already returned
}
+39
Ver Arquivo
@@ -0,0 +1,39 @@
// File: device_info.h
// Project: HSA
//
// Description: Interface file for Api to query HSA System - Number of
// compute nodes, devices, etc.
//
// HsaGetAsicFamilyType()
// The private Api is provided to query the Id of Asic
// family of the device.
//
// Copyright (c) 2013-2013 Advanced Micro Devices, Inc. (unpublished)
//
// All rights reserved. This notice is intended as a precaution against
// inadvertent publication and does not imply publication or any waiver
// of confidentiality. The year included in the foregoing notice is the
// year of creation of the work.
//
#ifndef HSA_RUNTIME_CORE_INC_DEVICE_INFO_H_
#define HSA_RUNTIME_CORE_INC_DEVICE_INFO_H_
#include "stdint.h"
#include "inc/hsa.h"
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
uint32_t MajorVerFromDevID(uint32_t dev_id);
uint32_t MinorVerFromDevID(uint32_t dev_id);
uint32_t StepFromDevID(uint32_t dev_id);
uint32_t DevIDToAddrLibFamily(uint32_t dev_id);
hsa_status_t GetGPUAsicID(hsa_agent_t agent, uint32_t *chip_id);
#ifdef __cplusplus
}
#endif // __cplusplus
#endif // HSA_RUNTIME_CORE_INC_DEVICE_INFO_H_
@@ -0,0 +1,310 @@
#include "inc/hsa.h"
#include "inc/hsa_ext_amd.h"
#include "inc/hsa_ext_image.h"
#include "image_runtime.h"
#undef HSA_API
#define HSA_API HSA_API_EXPORT
//---------------------------------------------------------------------------//
// Utilty routines
//---------------------------------------------------------------------------//
static void enforceDefaultPitch(hsa_agent_t agent, const hsa_ext_image_descriptor_t* image_descriptor, size_t& image_data_row_pitch, size_t& image_data_slice_pitch) {
// Set default pitch
if (image_data_row_pitch == 0) {
auto manager = ext_image::ImageRuntime::instance()->image_manager(agent);
assert((manager != nullptr) && "Image manager should already exit.");
image_data_row_pitch = image_descriptor->width *
manager->GetImageProperty(agent, image_descriptor->format, image_descriptor->geometry)
.element_size;
}
// Set default slice pitch
if ((image_data_slice_pitch == 0) &&
((image_descriptor->depth != 0) || (image_descriptor->array_size != 0))) {
switch (image_descriptor->geometry) {
case HSA_EXT_IMAGE_GEOMETRY_3D:
case HSA_EXT_IMAGE_GEOMETRY_2DA:
case HSA_EXT_IMAGE_GEOMETRY_2DADEPTH: {
image_data_slice_pitch = image_data_row_pitch * image_descriptor->height;
break;
}
case HSA_EXT_IMAGE_GEOMETRY_1DA: {
image_data_slice_pitch = image_data_row_pitch;
break;
}
default:
fprintf(stderr, "Depth set on single layer image geometry.\n");
//assert(false && "Depth set on single layer image geometry.");
}
}
}
//---------------------------------------------------------------------------//
// Image APIs
//---------------------------------------------------------------------------//
extern "C" {
hsa_status_t HSA_API hsa_amd_image_get_info_max_dim_impl(hsa_agent_t agent,
hsa_agent_info_t attribute,
void* value) {
if (agent.handle == 0) {
return HSA_STATUS_ERROR_INVALID_AGENT;
}
if (value == NULL) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
return ext_image::ImageRuntime::instance()->GetImageInfoMaxDimension(
agent, attribute, value);
}
hsa_status_t HSA_API
hsa_ext_image_get_capability_impl(hsa_agent_t agent,
hsa_ext_image_geometry_t image_geometry,
const hsa_ext_image_format_t* image_format,
uint32_t* capability_mask) {
if (agent.handle == 0) {
return HSA_STATUS_ERROR_INVALID_AGENT;
}
if ((image_format == NULL) || (capability_mask == NULL) ||
(image_geometry < HSA_EXT_IMAGE_GEOMETRY_1D) ||
(image_geometry > HSA_EXT_IMAGE_GEOMETRY_2DADEPTH)) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
return ext_image::ImageRuntime::instance()->GetImageCapability(
agent, *image_format, image_geometry, *capability_mask);
}
hsa_status_t HSA_API hsa_ext_image_data_get_info_impl(
hsa_agent_t agent, const hsa_ext_image_descriptor_t* image_descriptor,
hsa_access_permission_t access_permission,
hsa_ext_image_data_info_t* image_data_info) {
if (agent.handle == 0) {
return HSA_STATUS_ERROR_INVALID_AGENT;
}
if ((image_descriptor == NULL) || (image_data_info == NULL) ||
(access_permission < HSA_ACCESS_PERMISSION_RO) ||
(access_permission > HSA_ACCESS_PERMISSION_RW)) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
return ext_image::ImageRuntime::instance()->GetImageSizeAndAlignment(
agent, *image_descriptor, HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE, 0, 0, *image_data_info);
}
hsa_status_t HSA_API
hsa_ext_image_create_impl(hsa_agent_t agent,
const hsa_ext_image_descriptor_t* image_descriptor,
const void* image_data,
hsa_access_permission_t access_permission,
hsa_ext_image_t* image) {
if (agent.handle == 0) {
return HSA_STATUS_ERROR_INVALID_AGENT;
}
if (image_descriptor == NULL || image_data == NULL || image == NULL) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
return ext_image::ImageRuntime::instance()->CreateImageHandle(
agent, *image_descriptor, image_data, access_permission,
HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE, 0, 0, *image);
}
hsa_status_t HSA_API
hsa_ext_image_destroy_impl(hsa_agent_t agent, hsa_ext_image_t image) {
if (agent.handle == 0) {
return HSA_STATUS_ERROR_INVALID_AGENT;
}
return ext_image::ImageRuntime::instance()->DestroyImageHandle(image);
}
hsa_status_t HSA_API
hsa_ext_image_copy_impl(hsa_agent_t agent, hsa_ext_image_t src_image,
const hsa_dim3_t* src_offset, hsa_ext_image_t dst_image,
const hsa_dim3_t* dst_offset, const hsa_dim3_t* range) {
if (agent.handle == 0) {
return HSA_STATUS_ERROR_INVALID_AGENT;
}
if (src_image.handle == 0 || dst_image.handle == 0 || src_offset == NULL ||
dst_offset == NULL || range == NULL) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
return ext_image::ImageRuntime::instance()->CopyImage(
src_image, dst_image, *src_offset, *dst_offset, *range);
}
hsa_status_t HSA_API
hsa_ext_image_import_impl(hsa_agent_t agent, const void* src_memory,
size_t src_row_pitch, size_t src_slice_pitch,
hsa_ext_image_t dst_image,
const hsa_ext_image_region_t* image_region) {
if (agent.handle == 0) {
return HSA_STATUS_ERROR_INVALID_AGENT;
}
if (src_memory == NULL || dst_image.handle == 0 || image_region == NULL) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
return ext_image::ImageRuntime::instance()->CopyBufferToImage(
src_memory, src_row_pitch, src_slice_pitch, dst_image, *image_region);
}
hsa_status_t HSA_API
hsa_ext_image_export_impl(hsa_agent_t agent, hsa_ext_image_t src_image,
void* dst_memory, size_t dst_row_pitch,
size_t dst_slice_pitch,
const hsa_ext_image_region_t* image_region) {
if (agent.handle == 0) {
return HSA_STATUS_ERROR_INVALID_AGENT;
}
if (dst_memory == NULL || src_image.handle == 0 || image_region == NULL) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
return ext_image::ImageRuntime::instance()->CopyImageToBuffer(
src_image, dst_memory, dst_row_pitch, dst_slice_pitch, *image_region);
}
hsa_status_t HSA_API
hsa_ext_image_clear_impl(hsa_agent_t agent, hsa_ext_image_t image,
const void* data,
const hsa_ext_image_region_t* image_region) {
if (agent.handle == 0) {
return HSA_STATUS_ERROR_INVALID_AGENT;
}
if (image.handle == 0 || image_region == NULL || data == NULL) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
return ext_image::ImageRuntime::instance()->FillImage(image, data,
*image_region);
};
hsa_status_t HSA_API hsa_ext_sampler_create_impl(
hsa_agent_t agent, const hsa_ext_sampler_descriptor_t* sampler_descriptor,
hsa_ext_sampler_t* sampler) {
if (agent.handle == 0) {
return HSA_STATUS_ERROR_INVALID_AGENT;
}
if (sampler_descriptor == NULL || sampler == NULL) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
return ext_image::ImageRuntime::instance()->CreateSamplerHandle(
agent, *sampler_descriptor, *sampler);
}
hsa_status_t HSA_API
hsa_ext_sampler_destroy_impl(hsa_agent_t agent, hsa_ext_sampler_t sampler) {
if (agent.handle == 0) {
return HSA_STATUS_ERROR_INVALID_AGENT;
}
return ext_image::ImageRuntime::instance()->DestroySamplerHandle(sampler);
}
hsa_status_t HSA_API
hsa_ext_image_get_capability_with_layout_impl(hsa_agent_t agent,
hsa_ext_image_geometry_t image_geometry,
const hsa_ext_image_format_t* image_format,
hsa_ext_image_data_layout_t image_data_layout,
uint32_t* capability_mask) {
if (agent.handle == 0) {
return HSA_STATUS_ERROR_INVALID_AGENT;
}
if ((image_format == NULL) || (capability_mask == NULL) ||
(image_geometry < HSA_EXT_IMAGE_GEOMETRY_1D) ||
(image_geometry > HSA_EXT_IMAGE_GEOMETRY_2DADEPTH) ||
(image_data_layout != HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR)) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
return ext_image::ImageRuntime::instance()->GetImageCapability(
agent, *image_format, image_geometry, *capability_mask);
}
hsa_status_t HSA_API hsa_ext_image_data_get_info_with_layout_impl(
hsa_agent_t agent, const hsa_ext_image_descriptor_t* image_descriptor,
hsa_access_permission_t access_permission,
hsa_ext_image_data_layout_t image_data_layout,
size_t image_data_row_pitch,
size_t image_data_slice_pitch,
hsa_ext_image_data_info_t* image_data_info) {
if (agent.handle == 0) {
return HSA_STATUS_ERROR_INVALID_AGENT;
}
if ((image_descriptor == NULL) || (image_data_info == NULL) ||
(access_permission < HSA_ACCESS_PERMISSION_RO) ||
(access_permission > HSA_ACCESS_PERMISSION_RW) ||
(image_data_layout != HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR)) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
enforceDefaultPitch(agent, image_descriptor, image_data_row_pitch, image_data_slice_pitch);
return ext_image::ImageRuntime::instance()->GetImageSizeAndAlignment(
agent, *image_descriptor, image_data_layout, image_data_row_pitch,
image_data_slice_pitch, *image_data_info);
}
hsa_status_t HSA_API
hsa_ext_image_create_with_layout_impl(hsa_agent_t agent,
const hsa_ext_image_descriptor_t* image_descriptor,
const void* image_data,
hsa_access_permission_t access_permission,
hsa_ext_image_data_layout_t image_data_layout,
size_t image_data_row_pitch,
size_t image_data_slice_pitch,
hsa_ext_image_t* image) {
if (agent.handle == 0) {
return HSA_STATUS_ERROR_INVALID_AGENT;
}
if (image_descriptor == NULL || image_data == NULL || image == NULL ||
image_data_layout != HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
enforceDefaultPitch(agent, image_descriptor, image_data_row_pitch, image_data_slice_pitch);
return ext_image::ImageRuntime::instance()->CreateImageHandle(
agent, *image_descriptor, image_data, access_permission, image_data_layout,
image_data_row_pitch, image_data_slice_pitch, *image);
}
hsa_status_t HSA_API hsa_amd_image_create_impl(
hsa_agent_t agent,
const hsa_ext_image_descriptor_t *image_descriptor,
const hsa_amd_image_descriptor_t *image_layout,
const void *image_data,
hsa_access_permission_t access_permission,
hsa_ext_image_t *image)
{
if (agent.handle == 0) {
return HSA_STATUS_ERROR_INVALID_AGENT;
}
if (image_descriptor == NULL || image_data == NULL || image == NULL) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
return ext_image::ImageRuntime::instance()->CreateImageHandleWithLayout(
agent, *image_descriptor, image_layout, image_data, access_permission, *image);
}
}
@@ -0,0 +1,22 @@
{
global:
hsa_ext_image_get_capability_impl;
hsa_ext_image_data_get_info_impl;
hsa_ext_image_create_impl;
hsa_ext_image_import_impl;
hsa_ext_image_export_impl;
hsa_ext_image_copy_impl;
hsa_ext_image_clear_impl;
hsa_ext_image_destroy_impl;
hsa_ext_sampler_create_impl;
hsa_ext_sampler_destroy_impl;
hsa_ext_image_get_capability_with_layout_impl;
hsa_ext_image_data_get_info_with_layout_impl;
hsa_ext_image_create_with_layout_impl;
hsa_amd_image_get_info_max_dim_impl;
hsa_amd_image_create_impl;
Load;
Unload;
local:
*;
};
@@ -0,0 +1,37 @@
#ifndef AMD_HSA_EXT_IMAGE_IMAGE_LUT_H
#define AMD_HSA_EXT_IMAGE_IMAGE_LUT_H
#include <stdint.h>
#include "inc/hsa_ext_image.h"
#include "resource.h"
#include "util.h"
namespace amd {
class ImageLut {
public:
ImageLut() {}
virtual ~ImageLut() {}
virtual uint32_t MapGeometry(hsa_ext_image_geometry_t geometry) const = 0;
virtual ImageProperty MapFormat(const hsa_ext_image_format_t& format,
hsa_ext_image_geometry_t geometry) const = 0;
virtual Swizzle MapSwizzle(hsa_ext_image_channel_order32_t order) const = 0;
virtual uint32_t GetMaxWidth(hsa_ext_image_geometry_t geometry) const = 0;
virtual uint32_t GetMaxHeight(hsa_ext_image_geometry_t geometry) const = 0;
virtual uint32_t GetMaxDepth(hsa_ext_image_geometry_t geometry) const = 0;
virtual uint32_t GetMaxArraySize(hsa_ext_image_geometry_t geometry) const = 0;
private:
DISALLOW_COPY_AND_ASSIGN(ImageLut);
};
} // namespace
#endif // AMD_HSA_EXT_IMAGE_IMAGE_LUT_H
@@ -0,0 +1,396 @@
#include "image_lut_kv.h"
#include "resource_kv.h"
namespace amd {
const uint32_t ImageLutKv::kGeometryLut_[GEOMETRY_COUNT] = {
SQ_RSRC_IMG_1D, // HSA_EXT_IMAGE_GEOMETRY_1D
SQ_RSRC_IMG_2D, // HSA_EXT_IMAGE_GEOMETRY_2D
SQ_RSRC_IMG_3D, // HSA_EXT_IMAGE_GEOMETRY_3D
SQ_RSRC_IMG_1D_ARRAY, // HSA_EXT_IMAGE_GEOMETRY_1DA
SQ_RSRC_IMG_2D_ARRAY, // HSA_EXT_IMAGE_GEOMETRY_2DA
0, // HSA_EXT_IMAGE_GEOMETRY_1DB
SQ_RSRC_IMG_2D, // HSA_EXT_IMAGE_GEOMETRY_2DDEPTH
SQ_RSRC_IMG_2D_ARRAY // HSA_EXT_IMAGE_GEOMETRY_2DADEPTH
};
const ImageProperty ImageLutKv::kPropLut_[ORDER_COUNT][TYPE_COUNT] = {
{// HSA_EXT_IMAGE_CHANNEL_ORDER_A
{RW, 1, FMT_8, TYPE_SNORM},
{RW, 2, FMT_16, TYPE_SNORM},
{RW, 1, FMT_8, TYPE_UNORM},
{RW, 2, FMT_16, TYPE_UNORM},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{RW, 1, FMT_8, TYPE_SINT},
{RW, 2, FMT_16, TYPE_SINT},
{RW, 4, FMT_32, TYPE_SINT},
{RW, 1, FMT_8, TYPE_UINT},
{RW, 2, FMT_16, TYPE_UINT},
{RW, 4, FMT_32, TYPE_UINT},
{RW, 2, FMT_16, TYPE_FLOAT},
{RW, 4, FMT_32, TYPE_FLOAT}},
{// HSA_EXT_IMAGE_CHANNEL_ORDER_R
{RW, 1, FMT_8, TYPE_SNORM},
{RW, 2, FMT_16, TYPE_SNORM},
{RW, 1, FMT_8, TYPE_UNORM},
{RW, 2, FMT_16, TYPE_UNORM},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{RW, 1, FMT_8, TYPE_SINT},
{RW, 2, FMT_16, TYPE_SINT},
{RW, 4, FMT_32, TYPE_SINT},
{RW, 1, FMT_8, TYPE_UINT},
{RW, 2, FMT_16, TYPE_UINT},
{RW, 4, FMT_32, TYPE_UINT},
{RW, 2, FMT_16, TYPE_FLOAT},
{RW, 4, FMT_32, TYPE_FLOAT}},
{0}, // HSA_EXT_IMAGE_CHANNEL_ORDER_RX
{ // HSA_EXT_IMAGE_CHANNEL_ORDER_RG
{RW, 2, FMT_8_8, TYPE_SNORM},
{RW, 4, FMT_16_16, TYPE_SNORM},
{RW, 2, FMT_8_8, TYPE_UNORM},
{RW, 4, FMT_16_16, TYPE_UNORM},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{RW, 2, FMT_8_8, TYPE_SINT},
{RW, 4, FMT_16_16, TYPE_SINT},
{RW, 8, FMT_32_32, TYPE_SINT},
{RW, 2, FMT_8_8, TYPE_UINT},
{RW, 4, FMT_16_16, TYPE_UINT},
{RW, 8, FMT_32_32, TYPE_UINT},
{RW, 4, FMT_16_16, TYPE_FLOAT},
{RW, 8, FMT_32_32, TYPE_FLOAT}},
{0}, // HSA_EXT_IMAGE_CHANNEL_ORDER_RGX
{ // HSA_EXT_IMAGE_CHANNEL_ORDER_RA
{RW, 2, FMT_8_8, TYPE_SNORM},
{RW, 4, FMT_16_16, TYPE_SNORM},
{RW, 2, FMT_8_8, TYPE_UNORM},
{RW, 4, FMT_16_16, TYPE_UNORM},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{RW, 2, FMT_8_8, TYPE_SINT},
{RW, 4, FMT_16_16, TYPE_SINT},
{RW, 8, FMT_32_32, TYPE_SINT},
{RW, 2, FMT_8_8, TYPE_UINT},
{RW, 4, FMT_16_16, TYPE_UINT},
{RW, 8, FMT_32_32, TYPE_UINT},
{RW, 4, FMT_16_16, TYPE_FLOAT},
{RW, 8, FMT_32_32, TYPE_FLOAT}},
{// HSA_EXT_IMAGE_CHANNEL_ORDER_RGB
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{RW, 2, FMT_1_5_5_5, TYPE_UNORM},
{RW, 2, FMT_5_6_5, TYPE_UNORM},
{RW, 4, FMT_2_10_10_10, TYPE_UNORM},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0}},
{0}, // HSA_EXT_IMAGE_CHANNEL_ORDER_RGBX
{ // HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA
{RW, 4, FMT_8_8_8_8, TYPE_SNORM},
{RW, 8, FMT_16_16_16_16, TYPE_SNORM},
{RW, 4, FMT_8_8_8_8, TYPE_UNORM},
{RW, 8, FMT_16_16_16_16, TYPE_UNORM},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{RW, 4, FMT_8_8_8_8, TYPE_SINT},
{RW, 8, FMT_16_16_16_16, TYPE_SINT},
{RW, 16, FMT_32_32_32_32, TYPE_SINT},
{RW, 4, FMT_8_8_8_8, TYPE_UINT},
{RW, 8, FMT_16_16_16_16, TYPE_UINT},
{RW, 16, FMT_32_32_32_32, TYPE_UINT},
{RW, 8, FMT_16_16_16_16, TYPE_FLOAT},
{RW, 16, FMT_32_32_32_32, TYPE_FLOAT}},
{// HSA_EXT_IMAGE_CHANNEL_ORDER_BGRA
{RW, 4, FMT_8_8_8_8, TYPE_SNORM},
{0, 0, 0, 0},
{RW, 4, FMT_8_8_8_8, TYPE_UNORM},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{RW, 4, FMT_8_8_8_8, TYPE_SINT},
{0, 0, 0, 0},
{0, 0, 0, 0},
{RW, 4, FMT_8_8_8_8, TYPE_UINT},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0}},
{// HSA_EXT_IMAGE_CHANNEL_ORDER_ARGB
{RW, 4, FMT_8_8_8_8, TYPE_SNORM},
{0, 0, 0, 0},
{RW, 4, FMT_8_8_8_8, TYPE_UNORM},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{RW, 4, FMT_8_8_8_8, TYPE_SINT},
{0, 0, 0, 0},
{0, 0, 0, 0},
{RW, 4, FMT_8_8_8_8, TYPE_UINT},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0}},
{0}, // HSA_EXT_IMAGE_CHANNEL_ORDER_ABGR
{0}, // HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB
{0}, // HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX
{ // HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA
{0, 0, 0, 0},
{0, 0, 0, 0},
{RO, 4, FMT_8_8_8_8, TYPE_SRGB},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0}},
{0}, // HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA
{ // HSA_EXT_IMAGE_CHANNEL_ORDER_INTENSITY
{RW, 1, FMT_8, TYPE_SNORM},
{RW, 2, FMT_16, TYPE_SNORM},
{RW, 1, FMT_8, TYPE_UNORM},
{RW, 2, FMT_16, TYPE_UNORM},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{RW, 2, FMT_16, TYPE_FLOAT},
{RW, 4, FMT_32, TYPE_FLOAT}},
{// HSA_EXT_IMAGE_CHANNEL_ORDER_LUMINANCE
{RW, 1, FMT_8, TYPE_SNORM},
{RW, 2, FMT_16, TYPE_SNORM},
{RW, 1, FMT_8, TYPE_UNORM},
{RW, 2, FMT_16, TYPE_UNORM},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{RW, 2, FMT_16, TYPE_FLOAT},
{RW, 4, FMT_32, TYPE_FLOAT}},
{// HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{ROWO, 2, FMT_16, TYPE_UNORM},
// TODO: 24 bit
{0, 3, FMT_32, TYPE_UNORM},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{ROWO, 4, FMT_32, TYPE_FLOAT}},
{0} // HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL
};
const Swizzle ImageLutKv::kSwizzleLut_[ORDER_COUNT] = {
{SEL_0, SEL_0, SEL_0, SEL_X}, // HSA_EXT_IMAGE_CHANNEL_ORDER_A
{SEL_X, SEL_0, SEL_0, SEL_1}, // HSA_EXT_IMAGE_CHANNEL_ORDER_R
{SEL_X, SEL_0, SEL_0, SEL_1}, // HSA_EXT_IMAGE_CHANNEL_ORDER_RX
{SEL_X, SEL_Y, SEL_0, SEL_1}, // HSA_EXT_IMAGE_CHANNEL_ORDER_RG
{SEL_X, SEL_Y, SEL_0, SEL_1}, // HSA_EXT_IMAGE_CHANNEL_ORDER_RGX
{SEL_X, SEL_0, SEL_0, SEL_Y}, // HSA_EXT_IMAGE_CHANNEL_ORDER_RA
{SEL_Z, SEL_Y, SEL_X, SEL_1}, // HSA_EXT_IMAGE_CHANNEL_ORDER_RGB
{SEL_Z, SEL_Y, SEL_X, SEL_1}, // HSA_EXT_IMAGE_CHANNEL_ORDER_RGBX
{SEL_X, SEL_Y, SEL_Z, SEL_W}, // HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA
{SEL_Z, SEL_Y, SEL_X, SEL_W}, // HSA_EXT_IMAGE_CHANNEL_ORDER_BGRA
{SEL_Y, SEL_Z, SEL_W, SEL_X}, // HSA_EXT_IMAGE_CHANNEL_ORDER_ARGB
{SEL_Y, SEL_X, SEL_W, SEL_Z}, // HSA_EXT_IMAGE_CHANNEL_ORDER_ABGR
{SEL_X, SEL_Y, SEL_Z, SEL_1}, // HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB
{SEL_X, SEL_Y, SEL_Z, SEL_1}, // HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX
{SEL_X, SEL_Y, SEL_Z, SEL_W}, // HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA
{SEL_Z, SEL_Y, SEL_X, SEL_W}, // HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA
{SEL_X, SEL_X, SEL_X, SEL_X}, // HSA_EXT_IMAGE_CHANNEL_ORDER_INTENSITY
{SEL_X, SEL_X, SEL_X, SEL_1}, // HSA_EXT_IMAGE_CHANNEL_ORDER_LUMINANCE
{SEL_X, SEL_0, SEL_0, SEL_0}, // HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH
{SEL_Y, SEL_0, SEL_0, SEL_0} // HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL
};
const uint32_t ImageLutKv::kMaxDimensionLut_[GEOMETRY_COUNT][4] = {
{16384, 1, 1, 1}, // HSA_EXT_IMAGE_GEOMETRY_1D
{16384, 16384, 1, 1}, // HSA_EXT_IMAGE_GEOMETRY_2D
{2048, 2048, 2048, 1}, // HSA_EXT_IMAGE_GEOMETRY_3D
{16384, 1, 1, 2048}, // HSA_EXT_IMAGE_GEOMETRY_1DA
{16384, 16384, 1, 2048}, // HSA_EXT_IMAGE_GEOMETRY_2DA
{65536, 1, 1, 1}, // HSA_EXT_IMAGE_GEOMETRY_1DB
{16384, 16384, 1, 1}, // HSA_EXT_IMAGE_GEOMETRY_2DDEPTH
{16384, 16384, 1, 2048} // HSA_EXT_IMAGE_GEOMETRY_2DADEPTH
};
uint32_t ImageLutKv::MapGeometry(hsa_ext_image_geometry_t geometry) const {
switch (geometry) {
case HSA_EXT_IMAGE_GEOMETRY_1D:
case HSA_EXT_IMAGE_GEOMETRY_2D:
case HSA_EXT_IMAGE_GEOMETRY_3D:
case HSA_EXT_IMAGE_GEOMETRY_1DA:
case HSA_EXT_IMAGE_GEOMETRY_2DA:
case HSA_EXT_IMAGE_GEOMETRY_1DB:
case HSA_EXT_IMAGE_GEOMETRY_2DDEPTH:
case HSA_EXT_IMAGE_GEOMETRY_2DADEPTH:
return kGeometryLut_[geometry];
default:
assert(false && "Should not reach here");
return static_cast<uint32_t>(-1);
};
}
ImageProperty ImageLutKv::MapFormat(const hsa_ext_image_format_t& format,
hsa_ext_image_geometry_t geometry) const {
switch (geometry) {
case HSA_EXT_IMAGE_GEOMETRY_1D:
case HSA_EXT_IMAGE_GEOMETRY_2D:
case HSA_EXT_IMAGE_GEOMETRY_3D:
case HSA_EXT_IMAGE_GEOMETRY_1DA:
case HSA_EXT_IMAGE_GEOMETRY_2DA:
return kPropLut_[format.channel_order][format.channel_type];
case HSA_EXT_IMAGE_GEOMETRY_1DB:
switch (format.channel_order) {
// Hardware does not support buffer access to srgb image.
case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB:
case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX:
case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA:
case HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA:
break;
default:
switch (format.channel_type) {
// Hardware does not support buffer access to 555/565 packed image.
case HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555:
case HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565:
break;
default:
return kPropLut_[format.channel_order][format.channel_type];
}
}
break;
case HSA_EXT_IMAGE_GEOMETRY_2DDEPTH:
case HSA_EXT_IMAGE_GEOMETRY_2DADEPTH:
switch (format.channel_order) {
case HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH:
case HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL:
return kPropLut_[format.channel_order][format.channel_type];
default:
break;
}
break;
default:
assert(false && "Should not reach here");
break;
}
ImageProperty prop = {0};
return prop;
}
Swizzle ImageLutKv::MapSwizzle(hsa_ext_image_channel_order32_t order) const {
const Swizzle invalid_swizzle = {0xff, 0xff, 0xff, 0xff};
switch (order) {
case HSA_EXT_IMAGE_CHANNEL_ORDER_A:
case HSA_EXT_IMAGE_CHANNEL_ORDER_R:
case HSA_EXT_IMAGE_CHANNEL_ORDER_RX:
case HSA_EXT_IMAGE_CHANNEL_ORDER_RG:
case HSA_EXT_IMAGE_CHANNEL_ORDER_RGX:
case HSA_EXT_IMAGE_CHANNEL_ORDER_RA:
case HSA_EXT_IMAGE_CHANNEL_ORDER_RGB:
case HSA_EXT_IMAGE_CHANNEL_ORDER_RGBX:
case HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA:
case HSA_EXT_IMAGE_CHANNEL_ORDER_BGRA:
case HSA_EXT_IMAGE_CHANNEL_ORDER_ARGB:
case HSA_EXT_IMAGE_CHANNEL_ORDER_ABGR:
case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB:
case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX:
case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA:
case HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA:
case HSA_EXT_IMAGE_CHANNEL_ORDER_INTENSITY:
case HSA_EXT_IMAGE_CHANNEL_ORDER_LUMINANCE:
case HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH:
case HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL:
return kSwizzleLut_[order];
default:
assert(false && "Should not reach here");
return invalid_swizzle;
};
}
uint32_t ImageLutKv::GetMaxWidth(hsa_ext_image_geometry_t geometry) const {
return kMaxDimensionLut_[geometry][0];
}
uint32_t ImageLutKv::GetMaxHeight(hsa_ext_image_geometry_t geometry) const {
return kMaxDimensionLut_[geometry][1];
}
uint32_t ImageLutKv::GetMaxDepth(hsa_ext_image_geometry_t geometry) const {
return kMaxDimensionLut_[geometry][2];
}
uint32_t ImageLutKv::GetMaxArraySize(hsa_ext_image_geometry_t geometry) const {
return kMaxDimensionLut_[geometry][3];
}
uint32_t ImageLutKv::GetPixelSize(uint8_t data_format, uint8_t data_type) const {
//Currently only supports formats that ROCr can create.
switch(data_format) {
case FMT_1_5_5_5: return 2;
case FMT_16: return 2;
case FMT_16_16: return 4;
case FMT_16_16_16_16: return 8;
case FMT_2_10_10_10: return 4;
//SPK: Where is unorm returning 3? Was this a Hawaii specific thing?
case FMT_32: return (data_type==TYPE_UNORM) ? 3 : 4;
case FMT_32_32: return 8;
case FMT_32_32_32_32: return 16;
case FMT_5_6_5: return 2;
case FMT_8: return 1;
case FMT_8_8: return 2;
case FMT_8_8_8_8: return 4;
default: return 0;
}
}
} // namespace amd
@@ -0,0 +1,50 @@
#ifndef AMD_HSA_EXT_IMAGE_IMAGE_LUT_KV_H
#define AMD_HSA_EXT_IMAGE_IMAGE_LUT_KV_H
#include "image_lut.h"
namespace amd {
class ImageLutKv : public ImageLut {
public:
ImageLutKv() {}
virtual ~ImageLutKv() {}
virtual uint32_t MapGeometry(hsa_ext_image_geometry_t geometry) const;
virtual ImageProperty MapFormat(const hsa_ext_image_format_t& format,
hsa_ext_image_geometry_t geometry) const;
virtual Swizzle MapSwizzle(hsa_ext_image_channel_order32_t order) const;
virtual uint32_t GetMaxWidth(hsa_ext_image_geometry_t geometry) const;
virtual uint32_t GetMaxHeight(hsa_ext_image_geometry_t geometry) const;
virtual uint32_t GetMaxDepth(hsa_ext_image_geometry_t geometry) const;
virtual uint32_t GetMaxArraySize(hsa_ext_image_geometry_t geometry) const;
uint32_t GetPixelSize(uint8_t data_format, uint8_t data_type) const;
private:
// Lookup table of image geometry to device geometry enum.
static const uint32_t kGeometryLut_[GEOMETRY_COUNT];
// Lookup table of channel format property. Based on HSA Programmer's
// Reference Manual 1.0P Table 9-4 Channel Order, Channel type and Image
// Geometry Combinations.
static const ImageProperty kPropLut_[ORDER_COUNT][TYPE_COUNT];
// Lookup table of channel order swizzle.
static const Swizzle kSwizzleLut_[ORDER_COUNT];
// Lookup table of image geometry to max dimension.
// Each record contains four values: widht, height, depth, array_size.
static const uint32_t kMaxDimensionLut_[GEOMETRY_COUNT][4];
DISALLOW_COPY_AND_ASSIGN(ImageLutKv);
};
} // namespace
#endif // AMD_HSA_EXT_IMAGE_IMAGE_LUT_KV_H
@@ -0,0 +1,688 @@
#include "inc/hsa_ext_amd.h"
#include "inc/hsa_ext_image.h"
#include "image_manager.h"
#include "image_runtime.h"
#include <assert.h>
#include <algorithm>
#include <climits>
#include <cmath>
#if (defined(WIN32) || defined(_WIN32))
#define NOMINMAX
__inline long int lrintf(float f) { return _mm_cvtss_si32(_mm_load_ss(&f)); }
#endif
namespace amd {
Image* Image::Create(hsa_agent_t agent) {
hsa_amd_memory_pool_t pool = ext_image::ImageRuntime::instance()->kernarg_pool();
Image* image = NULL;
hsa_status_t status =
hsa_amd_memory_pool_allocate(pool, sizeof(Image), 0, reinterpret_cast<void**>(&image));
assert(status == HSA_STATUS_SUCCESS);
if (status != HSA_STATUS_SUCCESS) return NULL;
new (image) Image();
status = hsa_amd_agents_allow_access(1, &agent, NULL, image);
if (status != HSA_STATUS_SUCCESS) {
Image::Destroy(image);
return NULL;
}
return image;
}
void Image::Destroy(const Image* image) {
assert(image != NULL);
image->~Image();
hsa_status_t status = hsa_amd_memory_pool_free(const_cast<Image*>(image));
assert(status == HSA_STATUS_SUCCESS);
}
Sampler* Sampler::Create(hsa_agent_t agent) {
hsa_amd_memory_pool_t pool = ext_image::ImageRuntime::instance()->kernarg_pool();
Sampler* sampler = NULL;
hsa_status_t status =
hsa_amd_memory_pool_allocate(pool, sizeof(Sampler), 0, reinterpret_cast<void**>(&sampler));
if (status != HSA_STATUS_SUCCESS) return NULL;
new (sampler) Sampler();
status = hsa_amd_agents_allow_access(1, &agent, NULL, sampler);
if (status != HSA_STATUS_SUCCESS) {
Sampler::Destroy(sampler);
return NULL;
}
return sampler;
}
void Sampler::Destroy(const Sampler* sampler) {
assert(sampler != NULL);
sampler->~Sampler();
hsa_status_t status = hsa_amd_memory_pool_free(const_cast<Sampler*>(sampler));
assert(status == HSA_STATUS_SUCCESS);
}
ImageManager::ImageManager() {}
ImageManager::~ImageManager() {}
hsa_status_t ImageManager::CopyBufferToImage(
const void* src_memory, size_t src_row_pitch, size_t src_slice_pitch,
const Image& dst_image, const hsa_ext_image_region_t& image_region) {
Image* src_image = Image::Create(dst_image.component);
src_image->component = dst_image.component;
src_image->desc = dst_image.desc;
src_image->data = const_cast<void*>(src_memory);
src_image->permission = HSA_ACCESS_PERMISSION_RO;
src_image->row_pitch = src_row_pitch;
src_image->slice_pitch = src_slice_pitch;
const hsa_dim3_t dst_origin = image_region.offset;
const hsa_dim3_t src_origin = {0};
const hsa_dim3_t copy_size = image_region.range;
hsa_status_t status = ImageManager::CopyImage(
dst_image, *src_image, dst_origin, src_origin, copy_size);
Image::Destroy(src_image);
return status;
}
hsa_status_t ImageManager::CopyImageToBuffer(
const Image& src_image, void* dst_memory, size_t dst_row_pitch,
size_t dst_slice_pitch, const hsa_ext_image_region_t& image_region) {
// Treat buffer as image since we don't tile our image anyway.
amd::Image* dst_image = Image::Create(src_image.component);
dst_image->component = src_image.component;
dst_image->desc = src_image.desc; // the width, height, depth is ignored.
dst_image->data = dst_memory;
dst_image->permission = HSA_ACCESS_PERMISSION_WO;
dst_image->row_pitch = dst_row_pitch;
dst_image->slice_pitch = dst_slice_pitch;
const hsa_dim3_t dst_origin = {0};
const hsa_dim3_t src_origin = image_region.offset;
const hsa_dim3_t copy_size = image_region.range;
hsa_status_t status = ImageManager::CopyImage(
*dst_image, src_image, dst_origin, src_origin, copy_size);
Image::Destroy(dst_image);
return status;
}
hsa_status_t ImageManager::CopyImage(const Image& dst_image,
const Image& src_image,
const hsa_dim3_t& dst_origin,
const hsa_dim3_t& src_origin,
const hsa_dim3_t size) {
ImageProperty dst_image_prop = GetImageProperty(
dst_image.component, dst_image.desc.format, dst_image.desc.geometry);
assert(dst_image_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);
const size_t dst_element_size = dst_image_prop.element_size;
assert(dst_element_size != 0);
ImageProperty src_image_prop = GetImageProperty(
src_image.component, src_image.desc.format, src_image.desc.geometry);
assert(src_image_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);
const size_t src_element_size = src_image_prop.element_size;
assert(src_element_size != 0);
const hsa_ext_image_format_t src_format = src_image.desc.format;
const hsa_ext_image_channel_order32_t src_order = src_format.channel_order;
const hsa_ext_image_channel_type32_t src_type = src_format.channel_type;
const hsa_ext_image_format_t dst_format = dst_image.desc.format;
const hsa_ext_image_channel_order32_t dst_order = dst_format.channel_order;
const hsa_ext_image_channel_type32_t dst_type = dst_format.channel_type;
bool linear_to_standard_rgb = false;
bool standard_to_linear_rgb = false;
if ((src_order != dst_order) || (src_type != dst_type)) {
// Source and destination format must be the same, except for
// SRGBA <--> RGBA images.
if ((src_type == HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8) &&
(dst_type == HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8)) {
if ((src_order == HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA) &&
(dst_order == HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA)) {
standard_to_linear_rgb = true;
} else if ((src_order == HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA) &&
(dst_order == HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA)) {
linear_to_standard_rgb = true;
} else {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
} else {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
}
// Source and destination format should be the same so the element size
// should be same too.
const size_t element_size = src_element_size;
// row_pitch and slice_pitch in bytes.
const size_t dst_row_pitch =
std::max(dst_image.row_pitch, size.x * element_size);
const size_t dst_slice_pitch = std::max(
dst_image.slice_pitch,
dst_row_pitch *
(dst_image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ? 1 : size.y));
const size_t src_row_pitch =
std::max(src_image.row_pitch, size.x * element_size);
const size_t src_slice_pitch = std::max(
src_image.slice_pitch,
src_row_pitch *
(src_image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ? 1 : size.y));
size_t src_offset = src_origin.x;
size_t dst_offset = dst_origin.x;
size_t copy_size = size.x;
// Calculate source the offset in bytes.
src_offset *= element_size;
src_offset += src_row_pitch * src_origin.y;
src_offset += src_slice_pitch * src_origin.z;
// Calculate destination the offset in bytes.
dst_offset *= element_size;
dst_offset += dst_row_pitch * dst_origin.y;
dst_offset += dst_slice_pitch * dst_origin.z;
copy_size *= element_size;
// Get destination and source memory.
unsigned char* dst = static_cast<unsigned char*>(dst_image.data);
const unsigned char* src = static_cast<const unsigned char*>(src_image.data);
if (!linear_to_standard_rgb && !standard_to_linear_rgb) {
// Copy the memory by row.
for (size_t slice = 0; slice < size.z; ++slice) {
size_t src_offset_temp = src_offset + slice * src_slice_pitch;
size_t dst_offset_temp = dst_offset + slice * dst_slice_pitch;
for (size_t rows = 0; rows < size.y; ++rows) {
std::memcpy((dst + dst_offset_temp), (src + src_offset_temp),
copy_size);
src_offset_temp += src_row_pitch;
dst_offset_temp += dst_row_pitch;
}
}
} else {
// Copy per pixel between RGBA-SRGBA images.
for (size_t slice = 0; slice < size.z; ++slice) {
size_t src_offset_temp = src_offset + slice * src_slice_pitch;
size_t dst_offset_temp = dst_offset + slice * dst_slice_pitch;
for (size_t rows = 0; rows < size.y; ++rows) {
const uint8_t* src_pixel = src + src_offset_temp;
uint8_t* dst_pixel = dst + dst_offset_temp;
if (linear_to_standard_rgb) {
for (size_t cols = 0; cols < size.x; ++cols) {
dst_pixel[0] =
Denormalize(LinearToStandardRGB(Normalize(src_pixel[0]))); // R
dst_pixel[1] =
Denormalize(LinearToStandardRGB(Normalize(src_pixel[1]))); // G
dst_pixel[2] =
Denormalize(LinearToStandardRGB(Normalize(src_pixel[2]))); // B
dst_pixel[3] = src_pixel[3]; // A
src_pixel += element_size;
dst_pixel += element_size;
}
} else {
assert(standard_to_linear_rgb);
for (size_t cols = 0; cols < size.x; ++cols) {
dst_pixel[0] =
Denormalize(StandardToLinearRGB(Normalize(src_pixel[0]))); // R
dst_pixel[1] =
Denormalize(StandardToLinearRGB(Normalize(src_pixel[1]))); // G
dst_pixel[2] =
Denormalize(StandardToLinearRGB(Normalize(src_pixel[2]))); // B
dst_pixel[3] = src_pixel[3]; // A
src_pixel += element_size;
dst_pixel += element_size;
}
}
src_offset_temp += src_row_pitch;
dst_offset_temp += dst_row_pitch;
}
}
}
return HSA_STATUS_SUCCESS;
}
uint16_t ImageManager::FloatToHalf(float in) {
volatile union {
float f;
uint32_t u;
} fu;
fu.f = in;
const uint16_t sign_bit_16 = (fu.u >> 16) & 0x8000;
const uint32_t exp_32 = (fu.u >> 23) & 0xff;
const uint32_t mantissa_32 = (fu.u) & 0x7fffff;
if (exp_32 == 0 && mantissa_32 == 0) {
// Zero.
return sign_bit_16;
} else if (exp_32 == 0xff) {
if (mantissa_32 == 0) {
// Inf.
return (sign_bit_16 | 0x7c00);
} else if ((mantissa_32 & 0x400000)) {
// Quiet NaN.
return (sign_bit_16 | 0x7e00);
} else {
// Signal NaN.
return (sign_bit_16 | 0x7c01);
}
} else {
const uint32_t kMaxExpNormal = 0x477fe000 >> 23; // 65504.
const uint32_t kMinExpNormal = 0x38800000 >> 23; // 2^-14;
const uint32_t kMinExpSubnormal = 0x33800000 >> 23; // 2^-24.
if (exp_32 > kMaxExpNormal) {
// Half overflow.
// TODO: clamp it to max half float or +Inf.
return (sign_bit_16 | 0x7bff);
} else if (exp_32 < kMinExpSubnormal) {
// Half underflow.
return (sign_bit_16);
} else if (exp_32 < kMinExpNormal) {
// Half subnormal.
return (sign_bit_16 |
((0x0400 | (mantissa_32 >> 13)) >> (127 - exp_32 - 14)));
} else {
// Half normal.
return (sign_bit_16 |
(((exp_32 - 127 + 15) << 10) | (mantissa_32 >> 13)));
}
}
}
float ImageManager::Normalize(uint8_t u_val) {
if (u_val == 0) {
return 0.0f;
} else if (u_val == UINT8_MAX) {
return 1.0f;
} else {
return std::min(
std::max(static_cast<float>(u_val) / static_cast<float>(UINT8_MAX),
0.0f),
1.0f);
}
}
uint8_t ImageManager::Denormalize(float f_val) {
const unsigned long kScale = UINT8_MAX;
return std::min(
static_cast<unsigned long>(std::max(lrintf(kScale * f_val), 0l)), kScale);
}
float ImageManager::StandardToLinearRGB(float s_val) {
// Map SRGB value to RGB color space based on HSA Programmers Reference
// Manual version 1.0 Provisional, chapter 7.1.4.1.2 Standard RGB (s-Form).
double l_val = (double)s_val;
l_val = (l_val <= 0.04045f) ? (l_val / 12.92f)
: pow(((l_val + 0.055f) / 1.055f), 2.4f);
return l_val;
}
float ImageManager::LinearToStandardRGB(float l_val) {
// Map RGB value to SRGB color space based on HSA Programmers Reference
// Manual version 1.0 Provisional, chapter 7.1.4.1.2 Standard RGB (s-Form).
double s_val = (double)l_val;
#if (defined(WIN32) || defined(_WIN32))
if (_isnan(s_val)) s_val = 0.0;
#else
if (std::isnan(s_val)) s_val = 0.0;
#endif
if (s_val > 1.0) {
s_val = 1.0;
} else if (s_val < 0.0) {
s_val = 0.0;
} else if (s_val < 0.0031308) {
s_val = 12.92 * s_val;
} else {
s_val = (1.055 * pow(s_val, 5.0 / 12.0)) - 0.055;
}
return s_val;
}
void ImageManager::FormatPattern(const hsa_ext_image_format_t& format,
const void* pattern_in, void* pattern_out) {
const int kR = 0;
const int kG = 1;
const int kB = 2;
const int kA = 3;
int index[4] = {0};
int num_channel = 0;
switch (format.channel_order) {
case HSA_EXT_IMAGE_CHANNEL_ORDER_A:
index[0] = kA;
num_channel = 1;
break;
case HSA_EXT_IMAGE_CHANNEL_ORDER_R:
case HSA_EXT_IMAGE_CHANNEL_ORDER_RX:
index[0] = kR;
num_channel = 1;
break;
case HSA_EXT_IMAGE_CHANNEL_ORDER_RG:
case HSA_EXT_IMAGE_CHANNEL_ORDER_RGX:
index[0] = kR;
index[1] = kG;
num_channel = 2;
break;
case HSA_EXT_IMAGE_CHANNEL_ORDER_RA:
index[0] = kR;
index[1] = kA;
num_channel = 2;
break;
case HSA_EXT_IMAGE_CHANNEL_ORDER_RGB:
case HSA_EXT_IMAGE_CHANNEL_ORDER_RGBX:
case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB:
case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX:
index[0] = kR;
index[1] = kG;
index[2] = kB;
num_channel = 3;
break;
case HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA:
case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA:
index[0] = kR;
index[1] = kG;
index[2] = kB;
index[3] = kA;
num_channel = 4;
break;
case HSA_EXT_IMAGE_CHANNEL_ORDER_BGRA:
case HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA:
index[0] = kB;
index[1] = kG;
index[2] = kR;
index[3] = kA;
num_channel = 4;
break;
case HSA_EXT_IMAGE_CHANNEL_ORDER_ARGB:
index[0] = kA;
index[1] = kR;
index[2] = kG;
index[3] = kB;
num_channel = 4;
break;
case HSA_EXT_IMAGE_CHANNEL_ORDER_ABGR:
index[0] = kA;
index[1] = kB;
index[2] = kG;
index[3] = kR;
num_channel = 4;
break;
case HSA_EXT_IMAGE_CHANNEL_ORDER_INTENSITY:
case HSA_EXT_IMAGE_CHANNEL_ORDER_LUMINANCE:
case HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH:
case HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL:
index[0] = kR;
num_channel = 1;
break;
default:
assert(false && "Should not reach here.");
break;
}
const float* pattern_in_f = NULL;
const int32_t* pattern_in_i32 = NULL;
const uint32_t* pattern_in_ui32 = NULL;
float new_pattern_in_f[4] = { 0 };
if ((format.channel_order == HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB) ||
(format.channel_order == HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX) ||
(format.channel_order == HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA) ||
(format.channel_order == HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA)) {
pattern_in_f = reinterpret_cast<const float*>(pattern_in);
new_pattern_in_f[0] = LinearToStandardRGB(pattern_in_f[0]);
new_pattern_in_f[1] = LinearToStandardRGB(pattern_in_f[1]);
new_pattern_in_f[2] = LinearToStandardRGB(pattern_in_f[2]);
new_pattern_in_f[3] = pattern_in_f[3];
pattern_in_f = reinterpret_cast<const float*>(new_pattern_in_f);
} else {
pattern_in_f = reinterpret_cast<const float*>(pattern_in);
pattern_in_i32 = reinterpret_cast<const int32_t*>(pattern_in);
pattern_in_ui32 = reinterpret_cast<const uint32_t*>(pattern_in);
}
for (int c = 0; c < num_channel; ++c) {
switch (format.channel_type) {
case HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT8: {
int8_t* pattern_out_i8 = reinterpret_cast<int8_t*>(pattern_out);
const long kScale = INT8_MAX;
const long conv = lrintf(kScale * pattern_in_f[index[c]]);
pattern_out_i8[c] = std::min(std::max(conv, -kScale - 1l), kScale);
} break;
case HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT16: {
int16_t* pattern_out_i16 = reinterpret_cast<int16_t*>(pattern_out);
const long kScale = INT16_MAX;
const long conv = lrintf(kScale * pattern_in_f[index[c]]);
pattern_out_i16[c] = std::min(std::max(conv, -kScale - 1l), kScale);
} break;
case HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8: {
uint8_t* pattern_out_ui8 = reinterpret_cast<uint8_t*>(pattern_out);
const unsigned long kScale = UINT8_MAX;
const long conv = lrintf(kScale * pattern_in_f[index[c]]);
pattern_out_ui8[c] =
std::min(static_cast<unsigned long>(std::max(conv, 0l)), kScale);
} break;
case HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT16: {
uint16_t* pattern_out_ui16 = reinterpret_cast<uint16_t*>(pattern_out);
const unsigned long kScale = UINT16_MAX;
const long conv = lrintf(kScale * pattern_in_f[index[c]]);
pattern_out_ui16[c] =
std::min(static_cast<unsigned long>(std::max(conv, 0l)), kScale);
} break;
case HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT24: {
typedef struct Order24 { uint32_t r : 24; } Order24;
Order24* pattern_out_u24 = reinterpret_cast<Order24*>(pattern_out);
const unsigned long kScale = 0xffffff;
const long conv = lrintf(kScale * pattern_in_f[index[c]]);
pattern_out_u24[c].r =
std::min(static_cast<unsigned long>(std::max(conv, 0l)), kScale);
} break;
case HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555: {
typedef struct Order555 {
uint32_t b : 5;
uint32_t g : 5;
uint32_t r : 5;
} Order555;
Order555* pattern_out_u555 = reinterpret_cast<Order555*>(pattern_out);
const unsigned long kScale = 0x1f;
long conv = lrintf(kScale * pattern_in_f[index[0]]);
pattern_out_u555->r =
std::min(static_cast<unsigned long>(std::max(conv, 0l)), kScale);
conv = lrintf(kScale * pattern_in_f[index[1]]);
pattern_out_u555->g =
std::min(static_cast<unsigned long>(std::max(conv, 0l)), kScale);
conv = lrintf(kScale * pattern_in_f[index[2]]);
pattern_out_u555->b =
std::min(static_cast<unsigned long>(std::max(conv, 0l)), kScale);
return;
} break;
case HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565: {
typedef struct Order565 {
uint32_t b : 5;
uint32_t g : 6;
uint32_t r : 5;
} Order565;
Order565* pattern_out_u565 = reinterpret_cast<Order565*>(pattern_out);
unsigned long scale = 0x1f;
long conv = lrintf(scale * pattern_in_f[index[0]]);
pattern_out_u565->r =
std::min(static_cast<unsigned long>(std::max(conv, 0l)), scale);
scale = 0x3f;
conv = lrintf(scale * pattern_in_f[index[1]]);
pattern_out_u565->g =
std::min(static_cast<unsigned long>(std::max(conv, 0l)), scale);
scale = 0x1f;
conv = lrintf(scale * pattern_in_f[index[2]]);
pattern_out_u565->b =
std::min(static_cast<unsigned long>(std::max(conv, 0l)), scale);
return;
} break;
case HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_101010: {
typedef struct Order101010 {
uint32_t b : 10;
uint32_t g : 10;
uint32_t r : 10;
} Order101010;
Order101010* pattern_out_u101010 =
reinterpret_cast<Order101010*>(pattern_out);
const unsigned long kScale = 0x3ff;
long conv = lrintf(kScale * pattern_in_f[index[0]]);
pattern_out_u101010->r =
std::min(static_cast<unsigned long>(std::max(conv, 0l)), kScale);
conv = lrintf(kScale * pattern_in_f[index[1]]);
pattern_out_u101010->g =
std::min(static_cast<unsigned long>(std::max(conv, 0l)), kScale);
conv = lrintf(kScale * pattern_in_f[index[2]]);
pattern_out_u101010->b =
std::min(static_cast<unsigned long>(std::max(conv, 0l)), kScale);
return;
} break;
case HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8: {
int8_t* pattern_out_i8 = reinterpret_cast<int8_t*>(pattern_out);
pattern_out_i8[c] = pattern_in_i32[index[c]];
} break;
case HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16: {
int16_t* pattern_out_i16 = reinterpret_cast<int16_t*>(pattern_out);
pattern_out_i16[c] = pattern_in_i32[index[c]];
} break;
case HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32: {
int32_t* pattern_out_i32 = reinterpret_cast<int32_t*>(pattern_out);
pattern_out_i32[c] = pattern_in_i32[index[c]];
} break;
case HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8: {
uint8_t* pattern_out_ui8 = reinterpret_cast<uint8_t*>(pattern_out);
pattern_out_ui8[c] = pattern_in_ui32[index[c]];
} break;
case HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16: {
uint16_t* pattern_out_ui16 = reinterpret_cast<uint16_t*>(pattern_out);
pattern_out_ui16[c] = pattern_in_ui32[index[c]];
} break;
case HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32: {
uint32_t* pattern_out_ui32 = reinterpret_cast<uint32_t*>(pattern_out);
pattern_out_ui32[c] = pattern_in_ui32[index[c]];
} break;
case HSA_EXT_IMAGE_CHANNEL_TYPE_HALF_FLOAT: {
// TODO: convert to f16
uint16_t* pattern_out_ui16 = reinterpret_cast<uint16_t*>(pattern_out);
pattern_out_ui16[c] = FloatToHalf(pattern_in_f[index[c]]);
break;
}
case HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT: {
float* pattern_out_f = reinterpret_cast<float*>(pattern_out);
pattern_out_f[c] = pattern_in_f[index[c]];
} break;
default:
assert(false && "Should not reach here.");
break;
}
}
}
hsa_status_t ImageManager::FillImage(const Image& image, const void* pattern,
const hsa_ext_image_region_t& region) {
const hsa_dim3_t origin = region.offset;
const hsa_dim3_t size = region.range;
ImageProperty image_prop =
GetImageProperty(image.component, image.desc.format, image.desc.geometry);
assert(image_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);
const size_t element_size = image_prop.element_size;
assert(element_size != 0);
const size_t row_pitch = image.row_pitch;
const size_t slice_pitch = image.slice_pitch;
// Map memory.
unsigned char* fill_mem = static_cast<unsigned char*>(image.data);
char fill_value[4 * sizeof(int)] = {0};
FormatPattern(image.desc.format, pattern, fill_value);
// Calculate offset.
size_t offset = origin.x * element_size;
offset += row_pitch * origin.y;
offset += slice_pitch * origin.z;
// Fill the image memory with the pattern.
for (size_t slice = 0; slice < size.z; ++slice) {
size_t offset_temp = offset + slice * slice_pitch;
for (size_t rows = 0; rows < size.y; ++rows) {
size_t pix_offset = offset_temp;
// Copy pattern per pixel.
for (size_t column = 0; column < size.x; ++column) {
memcpy((fill_mem + pix_offset), fill_value, element_size);
pix_offset += element_size;
}
offset_temp += row_pitch;
}
}
return HSA_STATUS_SUCCESS;
}
} // namespace
@@ -0,0 +1,98 @@
#ifndef AMD_HSA_EXT_IMAGE_IMAGE_MANAGER_H
#define AMD_HSA_EXT_IMAGE_IMAGE_MANAGER_H
#include <cstring>
#include "inc/hsa.h"
#include "inc/hsa_ext_image.h"
#include "resource.h"
#include "util.h"
namespace amd {
/// @brief Abstract class for creating AMD agent specific image / sampler
/// resources and data transfer.
class ImageManager {
public:
explicit ImageManager();
virtual ~ImageManager();
virtual hsa_status_t Initialize(hsa_agent_t agent_handle) = 0;
virtual void Cleanup() = 0;
/// @brief Retrieve device specific image property of a certain format
/// and geometry.
virtual ImageProperty GetImageProperty(
hsa_agent_t component, const hsa_ext_image_format_t& format,
hsa_ext_image_geometry_t geometry) const = 0;
/// @brief Retrieve device specific supported max width, height, depth,
/// and array size of an image geometry.
virtual void GetImageInfoMaxDimension(hsa_agent_t component,
hsa_ext_image_geometry_t geometry,
uint32_t& width, uint32_t& height,
uint32_t& depth,
uint32_t& array_size) const = 0;
/// @brief Calculate the size and alignment of the backing storage of an
/// image.
virtual hsa_status_t CalculateImageSizeAndAlignment(
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
hsa_ext_image_data_layout_t image_data_layout,
size_t image_data_row_pitch,
size_t image_data_slice_pitch,
hsa_ext_image_data_info_t& image_info) const = 0;
/// @brief Fill image structure with device specific image object.
virtual hsa_status_t PopulateImageSrd(Image& image) const = 0;
/// @brief Fill image structure with device specific image object using the given format.
virtual hsa_status_t PopulateImageSrd(Image& image, const metadata_amd_t* desc) const = 0;
/// @brief Modify device specific image object according to the specified
/// new format.
virtual hsa_status_t ModifyImageSrd(
Image& image, hsa_ext_image_format_t& new_format) const = 0;
/// @brief Fill sampler structure with device specific sampler object.
virtual hsa_status_t PopulateSamplerSrd(Sampler& sampler) const = 0;
// @brief Copy the content of a linear memory to an image object.
virtual hsa_status_t CopyBufferToImage(
const void* src_memory, size_t src_row_pitch, size_t src_slice_pitch,
const Image& dst_image, const hsa_ext_image_region_t& image_region);
/// @brief Copy the content of an image object to a linear memory.
virtual hsa_status_t CopyImageToBuffer(
const Image& src_image, void* dst_memory, size_t dst_row_pitch,
size_t dst_slice_pitch, const hsa_ext_image_region_t& image_region);
/// @brief Transfer images backing storage.
virtual hsa_status_t CopyImage(const Image& dst_image, const Image& src_image,
const hsa_dim3_t& dst_origin,
const hsa_dim3_t& src_origin,
const hsa_dim3_t size);
/// @brief Fill image backing storage using host copy.
virtual hsa_status_t FillImage(const Image& image, const void* pattern,
const hsa_ext_image_region_t& region);
protected:
static uint16_t FloatToHalf(float in);
static inline float Normalize(uint8_t u_val);
static inline uint8_t Denormalize(float f_val);
static float StandardToLinearRGB(float s_val);
static float LinearToStandardRGB(float l_val);
static void FormatPattern(const hsa_ext_image_format_t& format,
const void* pattern_in, void* pattern_out);
private:
DISALLOW_COPY_AND_ASSIGN(ImageManager);
};
} // namespace
#endif // AMD_HSA_EXT_IMAGE_IMAGE_MANAGER_H
+554
Ver Arquivo
@@ -0,0 +1,554 @@
#define NOMINMAX
#include "image_manager_ai.h"
#include <assert.h>
#include <algorithm>
#include <climits>
#include "hsakmt.h"
#include "inc/hsa_ext_amd.h"
#include "addrlib/src/core/addrlib.h"
#include "image_runtime.h"
#include "resource.h"
#include "resource_ai.h"
#include "util.h"
#include "device_info.h"
namespace amd {
ImageManagerAi::ImageManagerAi() : ImageManagerKv() {}
ImageManagerAi::~ImageManagerAi() {}
hsa_status_t ImageManagerAi::CalculateImageSizeAndAlignment(
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
hsa_ext_image_data_layout_t image_data_layout,
size_t image_data_row_pitch,
size_t image_data_slice_pitch,
hsa_ext_image_data_info_t& image_info) const {
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
hsa_profile_t profile;
hsa_status_t status = hsa_agent_get_info(component, HSA_AGENT_INFO_PROFILE, &profile);
Image::TileMode tileMode = Image::TileMode::LINEAR;
if (image_data_layout == HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE) {
tileMode = (profile == HSA_PROFILE_BASE &&
desc.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB)?
Image::TileMode::TILED : Image::TileMode::LINEAR;
}
if (GetAddrlibSurfaceInfoAi(component, desc, tileMode,
image_data_row_pitch, image_data_slice_pitch, out) == (uint32_t)(-1)) {
return HSA_STATUS_ERROR;
}
size_t rowPitch = (out.bpp >> 3) * out.pitch;
size_t slicePitch = rowPitch * out.height;
if (desc.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB &&
image_data_layout == HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR &&
((image_data_row_pitch && (rowPitch != image_data_row_pitch)) ||
(image_data_slice_pitch && (slicePitch != image_data_slice_pitch)))) {
return static_cast<hsa_status_t>(HSA_EXT_STATUS_ERROR_IMAGE_PITCH_UNSUPPORTED);
}
image_info.size = out.surfSize;
assert(image_info.size != 0);
image_info.alignment = out.baseAlign;
assert(image_info.alignment != 0);
return HSA_STATUS_SUCCESS;
}
static const uint64_t kLimitSystem = 1ULL << 48;
bool ImageManagerAi::IsLocalMemory(const void* address) const {
return true;
}
hsa_status_t ImageManagerAi::PopulateImageSrd(Image& image, const metadata_amd_t* descriptor) const {
metadata_amd_ai_t* desc = (metadata_amd_ai_t*)descriptor;
bool atc_access = true;
const void* image_data_addr = image.data;
ImageProperty image_prop = image_lut_.MapFormat(image.desc.format, image.desc.geometry);
if((image_prop.cap == HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED) ||
(image_prop.element_size == 0))
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
const Swizzle swizzle = image_lut_.MapSwizzle(image.desc.format.channel_order);
if (IsLocalMemory(image.data)) {
atc_access = false;
image_data_addr = reinterpret_cast<const void*>(
reinterpret_cast<uintptr_t>(image.data) - local_memory_base_address_);
}
image.srd[0]=desc->word0.u32All;
image.srd[1]=desc->word1.u32All;
image.srd[2]=desc->word2.u32All;
image.srd[3]=desc->word3.u32All;
image.srd[4]=desc->word4.u32All;
image.srd[5]=desc->word5.u32All;
image.srd[6]=desc->word6.u32All;
image.srd[7]=desc->word7.u32All;
if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
sq_buf_rsrc_word0_u word0;
sq_buf_rsrc_word1_u word1;
sq_buf_rsrc_word2_u word2;
sq_buf_rsrc_word3_u word3;
word0.val = 0;
word0.f.base_address = ext_image::PtrLow32(image_data_addr);
word1.val = image.srd[1];
word1.f.base_address_hi = ext_image::PtrHigh32(image_data_addr);
word1.f.stride = image_prop.element_size;
word3.val = image.srd[3];
word3.f.dst_sel_x = swizzle.x;
word3.f.dst_sel_y = swizzle.y;
word3.f.dst_sel_z = swizzle.z;
word3.f.dst_sel_w = swizzle.w;
word3.f.num_format = image_prop.data_type;
word3.f.data_format = image_prop.data_format;
word3.f.index_stride = image_prop.element_size;
image.srd[0] = word0.val;
image.srd[1] = word1.val;
image.srd[3] = word3.val;
} else {
uint32_t hwPixelSize = image_lut_.GetPixelSize(desc->word1.bitfields.DATA_FORMAT, desc->word1.bitfields.NUM_FORMAT);
if(image_prop.element_size!=hwPixelSize)
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
((SQ_IMG_RSRC_WORD0*)(&image.srd[0]))->bits.BASE_ADDRESS = ext_image::PtrLow40Shift8(image_data_addr);
((SQ_IMG_RSRC_WORD1*)(&image.srd[1]))->bits.BASE_ADDRESS_HI = ext_image::PtrHigh64Shift40(image_data_addr);
((SQ_IMG_RSRC_WORD1*)(&image.srd[1]))->bits.DATA_FORMAT = image_prop.data_format;
((SQ_IMG_RSRC_WORD1*)(&image.srd[1]))->bits.NUM_FORMAT = image_prop.data_type;
((SQ_IMG_RSRC_WORD3*)(&image.srd[3]))->bits.DST_SEL_X = swizzle.x;
((SQ_IMG_RSRC_WORD3*)(&image.srd[3]))->bits.DST_SEL_Y = swizzle.y;
((SQ_IMG_RSRC_WORD3*)(&image.srd[3]))->bits.DST_SEL_Z = swizzle.z;
((SQ_IMG_RSRC_WORD3*)(&image.srd[3]))->bits.DST_SEL_W = swizzle.w;
if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ||
image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1D) {
((SQ_IMG_RSRC_WORD3*)(&image.srd[3]))->bits.TYPE = image_lut_.MapGeometry(image.desc.geometry);
}
// Imported metadata holds the offset to metadata, add the image base address.
uintptr_t meta = uintptr_t(((SQ_IMG_RSRC_WORD5*)(&image.srd[5]))->bits.META_DATA_ADDRESS_HI) << 40;
meta |= uintptr_t(((SQ_IMG_RSRC_WORD7*)(&image.srd[7]))->bits.META_DATA_ADDRESS) << 8;
meta += reinterpret_cast<uintptr_t>(image_data_addr);
((SQ_IMG_RSRC_WORD7*)(&image.srd[7]))->bits.META_DATA_ADDRESS = ext_image::PtrLow40Shift8((void*)meta);
((SQ_IMG_RSRC_WORD5*)(&image.srd[5]))->bits.META_DATA_ADDRESS_HI = ext_image::PtrHigh64Shift40((void*)meta);
}
//Looks like this is only used for CPU copies.
image.row_pitch = 0;//desc->word4.bits.pitch+1*desc->word3.bits.element_size;
image.slice_pitch = 0;//desc->;
//Used by HSAIL shader ABI
image.srd[8] = image.desc.format.channel_type;
image.srd[9] = image.desc.format.channel_order;
image.srd[10] = static_cast<uint32_t>(image.desc.width);
return HSA_STATUS_SUCCESS;
}
static TEX_BC_SWIZZLE GetBcSwizzle(const Swizzle& swizzle) {
SEL r = (SEL)swizzle.x;
SEL g = (SEL)swizzle.y;
SEL b = (SEL)swizzle.z;
SEL a = (SEL)swizzle.w;
TEX_BC_SWIZZLE bcSwizzle = TEX_BC_Swizzle_XYZW;
if (a == SEL_X)
{
// Have to use either TEX_BC_Swizzle_WZYX or TEX_BC_Swizzle_WXYZ
//
// For the pre-defined border color values (white, opaque black, transparent black), the only thing that
// matters is that the alpha channel winds up in the correct place (because the RGB channels are all the same)
// so either of these TEX_BC_Swizzle enumerations will work. Not sure what happens with border color palettes.
if (b == SEL_Y)
{
// ABGR
bcSwizzle = TEX_BC_Swizzle_WZYX;
}
else if ((r == SEL_X) && (g == SEL_X) && (b == SEL_X))
{
//RGBA
bcSwizzle = TEX_BC_Swizzle_XYZW;
}
else
{
// ARGB
bcSwizzle = TEX_BC_Swizzle_WXYZ;
}
}
else if (r == SEL_X)
{
// Have to use either TEX_BC_Swizzle_XYZW or TEX_BC_Swizzle_XWYZ
if (g == SEL_Y)
{
// RGBA
bcSwizzle = TEX_BC_Swizzle_XYZW;
}
else if((g == SEL_X) && (b == SEL_X) && (a == SEL_W))
{
// RGBA
bcSwizzle = TEX_BC_Swizzle_XYZW;
}
else
{
// RAGB
bcSwizzle = TEX_BC_Swizzle_XWYZ;
}
}
else if (g == SEL_X)
{
// GRAB, have to use TEX_BC_Swizzle_YXWZ
bcSwizzle = TEX_BC_Swizzle_YXWZ;
}
else if (b == SEL_X)
{
// BGRA, have to use TEX_BC_Swizzle_ZYXW
bcSwizzle = TEX_BC_Swizzle_ZYXW;
}
return bcSwizzle;
}
hsa_status_t ImageManagerAi::PopulateImageSrd(Image& image) const {
ImageProperty image_prop =
image_lut_.MapFormat(image.desc.format, image.desc.geometry);
assert(image_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);
assert(image_prop.element_size != 0);
bool atc_access = true;
const void* image_data_addr = image.data;
if (IsLocalMemory(image.data)) {
atc_access = false;
image_data_addr = reinterpret_cast<const void*>(
reinterpret_cast<uintptr_t>(image.data) - local_memory_base_address_);
}
if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
sq_buf_rsrc_word0_u word0;
sq_buf_rsrc_word1_u word1;
sq_buf_rsrc_word2_u word2;
sq_buf_rsrc_word3_u word3;
word0.val = 0;
word0.f.base_address = ext_image::PtrLow32(image_data_addr);
word1.val = 0;
word1.f.base_address_hi = ext_image::PtrHigh32(image_data_addr);
word1.f.stride = image_prop.element_size;
word1.f.swizzle_enable = false;
word1.f.cache_swizzle = false;
word2.f.num_records = image.desc.width * image_prop.element_size;
const Swizzle swizzle =
image_lut_.MapSwizzle(image.desc.format.channel_order);
word3.val = 0;
word3.f.dst_sel_x = swizzle.x;
word3.f.dst_sel_y = swizzle.y;
word3.f.dst_sel_z = swizzle.z;
word3.f.dst_sel_w = swizzle.w;
word3.f.num_format = image_prop.data_type;
word3.f.data_format = image_prop.data_format;
word3.f.index_stride = image_prop.element_size;
word3.f.type = image_lut_.MapGeometry(image.desc.geometry);
image.srd[0] = word0.val;
image.srd[1] = word1.val;
image.srd[2] = word2.val;
image.srd[3] = word3.val;
image.row_pitch = image.desc.width * image_prop.element_size;
image.slice_pitch = image.row_pitch;
} else {
sq_img_rsrc_word0_u word0;
sq_img_rsrc_word1_u word1;
sq_img_rsrc_word2_u word2;
sq_img_rsrc_word3_u word3;
sq_img_rsrc_word4_u word4;
sq_img_rsrc_word5_u word5;
sq_img_rsrc_word6_u word6;
sq_img_rsrc_word7_u word7;
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
uint32_t swizzleMode = GetAddrlibSurfaceInfoAi(image.component, image.desc, image.tile_mode,
image.row_pitch, image.slice_pitch, out);
if (swizzleMode == (uint32_t)(-1)) {
return HSA_STATUS_ERROR;
}
assert((out.bpp / 8) == image_prop.element_size);
const size_t row_pitch_size = out.pitch * image_prop.element_size;
word0.f.base_address = ext_image::PtrLow40Shift8(image_data_addr);
word1.val = 0;
word1.f.base_address_hi = ext_image::PtrHigh64Shift40(image_data_addr);
word1.f.min_lod = 0;
word1.f.data_format = image_prop.data_format;
word1.f.num_format = image_prop.data_type;
word2.val = 0;
word2.f.width = image.desc.width - 1;
word2.f.height = image.desc.height - 1;
word2.f.perf_mod = 0;
const Swizzle swizzle =
image_lut_.MapSwizzle(image.desc.format.channel_order);
word3.val = 0;
word3.f.dst_sel_x = swizzle.x;
word3.f.dst_sel_y = swizzle.y;
word3.f.dst_sel_z = swizzle.z;
word3.f.dst_sel_w = swizzle.w;
word3.f.sw_mode = swizzleMode;
word3.f.type = image_lut_.MapGeometry(image.desc.geometry);
const bool image_array =
(image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ||
image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DA ||
image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DADEPTH);
const bool image_3d = (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_3D);
word4.val = 0;
word4.f.depth =
(image_array)
? std::max(image.desc.array_size, static_cast<size_t>(1)) - 1
: (image_3d) ? image.desc.depth - 1 : 0;
word4.f.pitch = out.pitch - 1;
word4.f.bc_swizzle = GetBcSwizzle(swizzle);
word5.val = 0;
word6.val = 0;
word7.val = 0;
image.srd[0] = word0.val;
image.srd[1] = word1.val;
image.srd[2] = word2.val;
image.srd[3] = word3.val;
image.srd[4] = word4.val;
image.srd[5] = word5.val;
image.srd[6] = word6.val;
image.srd[7] = word7.val;
image.row_pitch = row_pitch_size;
image.slice_pitch = out.sliceSize;
}
image.srd[8] = image.desc.format.channel_type;
image.srd[9] = image.desc.format.channel_order;
image.srd[10] = static_cast<uint32_t>(image.desc.width);
return HSA_STATUS_SUCCESS;
}
hsa_status_t ImageManagerAi::ModifyImageSrd(
Image& image, hsa_ext_image_format_t& new_format) const {
image.desc.format = new_format;
ImageProperty image_prop =
image_lut_.MapFormat(image.desc.format, image.desc.geometry);
assert(image_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);
assert(image_prop.element_size != 0);
if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
const Swizzle swizzle =
image_lut_.MapSwizzle(image.desc.format.channel_order);
SQ_BUF_RSRC_WORD3* word3 =
reinterpret_cast<SQ_BUF_RSRC_WORD3*>(&image.srd[3]);
word3->bits.DST_SEL_X = swizzle.x;
word3->bits.DST_SEL_Y = swizzle.y;
word3->bits.DST_SEL_Z = swizzle.z;
word3->bits.DST_SEL_W = swizzle.w;
word3->bits.NUM_FORMAT = image_prop.data_type;
word3->bits.DATA_FORMAT = image_prop.data_format;
} else {
SQ_IMG_RSRC_WORD1* word1 =
reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&image.srd[1]);
word1->bits.DATA_FORMAT = image_prop.data_format;
word1->bits.NUM_FORMAT = image_prop.data_type;
const Swizzle swizzle =
image_lut_.MapSwizzle(image.desc.format.channel_order);
SQ_IMG_RSRC_WORD3* word3 =
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3]);
word3->bits.DST_SEL_X = swizzle.x;
word3->bits.DST_SEL_Y = swizzle.y;
word3->bits.DST_SEL_Z = swizzle.z;
word3->bits.DST_SEL_W = swizzle.w;
}
image.srd[8] = image.desc.format.channel_type;
image.srd[9] = image.desc.format.channel_order;
image.srd[10] = static_cast<uint32_t>(image.desc.width);
return HSA_STATUS_SUCCESS;
}
hsa_status_t ImageManagerAi::PopulateSamplerSrd(amd::Sampler& sampler) const {
const hsa_ext_sampler_descriptor_t sampler_descriptor = sampler.desc;
SQ_IMG_SAMP_WORD0 word0;
SQ_IMG_SAMP_WORD1 word1;
SQ_IMG_SAMP_WORD2 word2;
SQ_IMG_SAMP_WORD3 word3;
word0.u32All = 0;
switch (sampler_descriptor.address_mode) {
case HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE:
word0.bits.CLAMP_X = static_cast<int>(SQ_TEX_CLAMP_LAST_TEXEL);
break;
case HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_BORDER:
word0.bits.CLAMP_X = static_cast<int>(SQ_TEX_CLAMP_BORDER);
break;
case HSA_EXT_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT:
word0.bits.CLAMP_X = static_cast<int>(SQ_TEX_MIRROR);
break;
case HSA_EXT_SAMPLER_ADDRESSING_MODE_UNDEFINED:
case HSA_EXT_SAMPLER_ADDRESSING_MODE_REPEAT:
word0.bits.CLAMP_X = static_cast<int>(SQ_TEX_WRAP);
break;
default:
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
word0.bits.CLAMP_Y = word0.bits.CLAMP_X;
word0.bits.CLAMP_Z = word0.bits.CLAMP_X;
word0.bits.FORCE_UNNORMALIZED = (sampler_descriptor.coordinate_mode ==
HSA_EXT_SAMPLER_COORDINATE_MODE_UNNORMALIZED);
word1.u32All = 0;
word1.bits.MAX_LOD = 4095;
word2.u32All = 0;
switch (sampler_descriptor.filter_mode) {
case HSA_EXT_SAMPLER_FILTER_MODE_NEAREST:
word2.bits.XY_MAG_FILTER = static_cast<int>(SQ_TEX_XY_FILTER_POINT);
break;
case HSA_EXT_SAMPLER_FILTER_MODE_LINEAR:
word2.bits.XY_MAG_FILTER = static_cast<int>(SQ_TEX_XY_FILTER_BILINEAR);
break;
default:
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
word2.bits.XY_MIN_FILTER = word2.bits.XY_MAG_FILTER;
word2.bits.Z_FILTER = SQ_TEX_Z_FILTER_NONE;
word2.bits.MIP_FILTER = SQ_TEX_MIP_FILTER_NONE;
word3.u32All = 0;
// TODO: check this bit with HSAIL spec.
word3.bits.BORDER_COLOR_TYPE = SQ_TEX_BORDER_COLOR_TRANS_BLACK;
sampler.srd[0] = word0.u32All;
sampler.srd[1] = word1.u32All;
sampler.srd[2] = word2.u32All;
sampler.srd[3] = word3.u32All;
return HSA_STATUS_SUCCESS;
}
uint32_t ImageManagerAi::GetAddrlibSurfaceInfoAi(
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
Image::TileMode tileMode,
size_t image_data_row_pitch,
size_t image_data_slice_pitch,
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT& out) const {
const ImageProperty image_prop =
GetImageProperty(component, desc.format, desc.geometry);
const AddrFormat addrlib_format = GetAddrlibFormat(image_prop);
const uint32_t width = static_cast<uint32_t>(desc.width);
const uint32_t height = static_cast<uint32_t>(desc.height);
static const size_t kMinNumSlice = 1;
const uint32_t num_slice = static_cast<uint32_t>(
std::max(kMinNumSlice, std::max(desc.array_size, desc.depth)));
ADDR2_COMPUTE_SURFACE_INFO_INPUT in = {0};
in.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT);
in.format = addrlib_format;
in.bpp = static_cast<unsigned int>(image_prop.element_size) * 8;
in.width = width;
in.height = height;
in.numSlices = num_slice;
in.pitchInElement = image_data_row_pitch / image_prop.element_size;
switch(desc.geometry) {
case HSA_EXT_IMAGE_GEOMETRY_1D:
case HSA_EXT_IMAGE_GEOMETRY_1DB:
case HSA_EXT_IMAGE_GEOMETRY_1DA:
in.resourceType = ADDR_RSRC_TEX_1D;
break;
case HSA_EXT_IMAGE_GEOMETRY_2D:
case HSA_EXT_IMAGE_GEOMETRY_2DDEPTH:
case HSA_EXT_IMAGE_GEOMETRY_2DA:
case HSA_EXT_IMAGE_GEOMETRY_2DADEPTH:
in.resourceType = ADDR_RSRC_TEX_2D;
break;
case HSA_EXT_IMAGE_GEOMETRY_3D:
in.resourceType = ADDR_RSRC_TEX_3D;
break;
}
in.swizzleMode = (tileMode == Image::TileMode::LINEAR)? ADDR_SW_LINEAR : ADDR_SW_4KB;
in.flags.texture = 1;
ADDR2_GET_PREFERRED_SURF_SETTING_INPUT prefSettingsInput = { 0 };
ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT prefSettingsOutput = { 0 };
prefSettingsInput.size = sizeof(prefSettingsInput);
prefSettingsInput.flags = in.flags;
prefSettingsInput.bpp = in.bpp;
prefSettingsInput.format = in.format;
prefSettingsInput.width = in.width;
prefSettingsInput.height = in.height;
prefSettingsInput.numFrags = in.numFrags;
prefSettingsInput.numSamples = in.numSamples;
prefSettingsInput.numMipLevels = in.numMipLevels;
prefSettingsInput.numSlices = in.numSlices;
prefSettingsInput.resourceLoction = ADDR_RSRC_LOC_UNDEF;
prefSettingsInput.resourceType = in.resourceType;
if (tileMode == Image::TileMode::LINEAR)
{
// this should force linear.
prefSettingsInput.forbiddenBlock.macroThin4KB = 1;
prefSettingsInput.forbiddenBlock.macroThick4KB = 1;
prefSettingsInput.forbiddenBlock.macroThin64KB = 1;
prefSettingsInput.forbiddenBlock.macroThick64KB = 1;
}
else
{
// this should not allow linear.
prefSettingsInput.forbiddenBlock.linear = 1;
}
prefSettingsInput.forbiddenBlock.micro = 1; // but don't ever allow the 256b swizzle modes
prefSettingsInput.forbiddenBlock.var = 1; // and don't allow variable-size block modes
if (ADDR_OK != Addr2GetPreferredSurfaceSetting(addr_lib_, &prefSettingsInput, &prefSettingsOutput)) {
return (uint32_t)(-1);
}
in.swizzleMode = prefSettingsOutput.swizzleMode;
out.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT);
if (ADDR_OK != Addr2ComputeSurfaceInfo(addr_lib_, &in, &out)) {
return (uint32_t)(-1);
}
if (out.surfSize == 0) {
return (uint32_t)(-1);
}
return in.swizzleMode;
}
} // namespace
@@ -0,0 +1,50 @@
#ifndef HSA_RUNTIME_EXT_IMAGE_IMAGE_MANAGER_AI_H
#define HSA_RUNTIME_EXT_IMAGE_IMAGE_MANAGER_AI_H
#include "addrlib/inc/addrinterface.h"
#include "image_manager_kv.h"
namespace amd {
class ImageManagerAi : public ImageManagerKv {
public:
explicit ImageManagerAi();
virtual ~ImageManagerAi();
/// @brief Calculate the size and alignment of the backing storage of an
/// image.
virtual hsa_status_t CalculateImageSizeAndAlignment(
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
hsa_ext_image_data_layout_t image_data_layout,
size_t image_data_row_pitch, size_t image_data_slice_pitch,
hsa_ext_image_data_info_t& image_info) const;
/// @brief Fill image structure with device specific image object.
virtual hsa_status_t PopulateImageSrd(Image& image) const;
/// @brief Fill image structure with device specific image object using the given format.
virtual hsa_status_t PopulateImageSrd(Image& image, const metadata_amd_t* desc) const;
/// @brief Modify device specific image object according to the specified
/// new format.
virtual hsa_status_t ModifyImageSrd(Image& image,
hsa_ext_image_format_t& new_format) const;
/// @brief Fill sampler structure with device specific sampler object.
virtual hsa_status_t PopulateSamplerSrd(Sampler& sampler) const;
protected:
uint32_t GetAddrlibSurfaceInfoAi(hsa_agent_t component,
const hsa_ext_image_descriptor_t& desc,
Image::TileMode tileMode,
size_t image_data_row_pitch,
size_t image_data_slice_pitch,
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT& out) const;
bool IsLocalMemory(const void* address) const;
private:
DISALLOW_COPY_AND_ASSIGN(ImageManagerAi);
};
}
#endif // HSA_RUNTIME_EXT_IMAGE_IMAGE_MANAGER_AI_H
+966
Ver Arquivo
@@ -0,0 +1,966 @@
#define NOMINMAX
#include "image_manager_kv.h"
#include <assert.h>
#include <algorithm>
#include <climits>
#include "hsakmt.h"
#include "inc/hsa.h"
#include "inc/hsa_ext_amd.h"
#include "addrlib/inc/addrinterface.h"
#include "addrlib/src/core/addrlib.h"
#include "image_runtime.h"
#include "resource.h"
#include "resource_kv.h"
#include "util.h"
#include "device_info.h"
namespace amd {
ImageManagerKv::ImageManagerKv() : ImageManager() {}
ImageManagerKv::~ImageManagerKv() {}
hsa_status_t ImageManagerKv::Initialize(hsa_agent_t agent_handle) {
agent_ = agent_handle;
hsa_status_t status = GetGPUAsicID(agent_, &chip_id_);
uint32_t major_ver = MajorVerFromDevID(chip_id_);
assert(status == HSA_STATUS_SUCCESS);
family_type_ = DevIDToAddrLibFamily(chip_id_);
HsaGpuTileConfig tileConfig = {0};
unsigned int tc[40];
unsigned int mtc[40];
tileConfig.TileConfig = &tc[0];
tileConfig.NumTileConfigs = 40;
tileConfig.MacroTileConfig = &mtc[0];
tileConfig.NumMacroTileConfigs = 40;
uint32_t node_id = 0;
status = hsa_agent_get_info(agent_, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_DRIVER_NODE_ID), &node_id);
assert(status == HSA_STATUS_SUCCESS);
HSAKMT_STATUS stat = hsaKmtGetTileConfig(node_id, &tileConfig);
assert(stat == HSAKMT_STATUS_SUCCESS);
// Initialize address library.
// TODO(bwicakso) hard coded based on UGL parameters.
// Need to get this information from KMD.
addr_lib_ = NULL;
ADDR_CREATE_INPUT addr_create_input = {0};
ADDR_CREATE_OUTPUT addr_create_output = {0};
if (major_ver >= 9) {
addr_create_input.chipEngine = CIASICIDGFXENGINE_ARCTICISLAND;
} else {
addr_create_input.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND;
}
addr_create_input.chipFamily = family_type_;
addr_create_input.chipRevision = 0; // TODO(bwicakso): find how to get this.
ADDR_CREATE_FLAGS create_flags = {0};
create_flags.value = 0;
create_flags.useTileIndex = 1;
addr_create_input.createFlags = create_flags;
addr_create_input.callbacks.allocSysMem = AllocSysMem;
addr_create_input.callbacks.freeSysMem = FreeSysMem;
addr_create_input.callbacks.debugPrint = 0;
ADDR_REGISTER_VALUE reg_val = {0};
reg_val.gbAddrConfig = tileConfig.GbAddrConfig;
reg_val.noOfBanks = tileConfig.NumBanks;
reg_val.noOfRanks = tileConfig.NumRanks;
reg_val.pTileConfig = tileConfig.TileConfig;
reg_val.noOfEntries = tileConfig.NumTileConfigs;
reg_val.noOfMacroEntries = tileConfig.NumMacroTileConfigs;
reg_val.pMacroTileConfig = tileConfig.MacroTileConfig;
addr_create_input.regValue = reg_val;
addr_create_input.minPitchAlignPixels = 0;
ADDR_E_RETURNCODE addr_ret =
AddrCreate(&addr_create_input, &addr_create_output);
if (addr_ret == ADDR_OK) {
addr_lib_ = addr_create_output.hLib;
} else {
return HSA_STATUS_ERROR;
}
// The ImageManagerKv::Initialize is called on the first call to
// hsa_ext_image_*, so checking the coherency mode here is fine as long as
// the change to the coherency mode happens before a call to
// hsa_ext_image_create.
hsa_amd_coherency_type_t coherency_type;
status = hsa_amd_coherency_get_type(agent_, &coherency_type);
assert(status == HSA_STATUS_SUCCESS);
mtype_ = (coherency_type == HSA_AMD_COHERENCY_TYPE_COHERENT) ? 3 : 1;
// TODO: handle the case where the call to hsa_set_memory_type happens after
// hsa_ext_image_create.
hsa_region_t local_region = {0};
status =
hsa_agent_iterate_regions(agent_, GetLocalMemoryRegion, &local_region);
assert(status == HSA_STATUS_SUCCESS);
local_memory_base_address_ = 0;
if (local_region.handle != 0) {
status = hsa_region_get_info(
local_region, static_cast<hsa_region_info_t>(HSA_AMD_REGION_INFO_BASE),
&local_memory_base_address_);
assert(status == HSA_STATUS_SUCCESS);
}
// Zeroed the queue object so it can be created on demand.
blit_queue_.queue_ = NULL;
blit_queue_.cached_index_ = 0;
return HSA_STATUS_SUCCESS;
}
void ImageManagerKv::Cleanup() {
if (blit_queue_.queue_ != NULL) {
hsa_queue_destroy(blit_queue_.queue_);
}
if (addr_lib_ != NULL) {
AddrDestroy(addr_lib_);
}
}
ImageProperty ImageManagerKv::GetImageProperty(
hsa_agent_t component, const hsa_ext_image_format_t& format,
hsa_ext_image_geometry_t geometry) const {
return image_lut_.MapFormat(format, geometry);
}
void ImageManagerKv::GetImageInfoMaxDimension(hsa_agent_t component,
hsa_ext_image_geometry_t geometry,
uint32_t& width, uint32_t& height,
uint32_t& depth,
uint32_t& array_size) const {
width = image_lut_.GetMaxWidth(geometry);
height = image_lut_.GetMaxHeight(geometry);
depth = image_lut_.GetMaxDepth(geometry);
array_size = image_lut_.GetMaxArraySize(geometry);
}
hsa_status_t ImageManagerKv::CalculateImageSizeAndAlignment(
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
hsa_ext_image_data_layout_t image_data_layout,
size_t image_data_row_pitch,
size_t image_data_slice_pitch,
hsa_ext_image_data_info_t& image_info) const {
ADDR_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
hsa_profile_t profile;
hsa_status_t status = hsa_agent_get_info(component, HSA_AGENT_INFO_PROFILE, &profile);
Image::TileMode tileMode = Image::TileMode::LINEAR;
if (image_data_layout == HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE) {
tileMode = (profile == HSA_PROFILE_BASE &&
desc.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB)?
Image::TileMode::TILED : Image::TileMode::LINEAR;
}
if (!GetAddrlibSurfaceInfo(component, desc, tileMode,
image_data_row_pitch, image_data_slice_pitch, out)) {
return HSA_STATUS_ERROR;
}
size_t rowPitch = (out.bpp >> 3) * out.pitch;
size_t slicePitch = rowPitch * out.height;
if (desc.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB &&
image_data_layout == HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR &&
((image_data_row_pitch && (rowPitch != image_data_row_pitch)) ||
(image_data_slice_pitch && (slicePitch != image_data_slice_pitch)))) {
return static_cast<hsa_status_t>(HSA_EXT_STATUS_ERROR_IMAGE_PITCH_UNSUPPORTED);
}
image_info.size = out.surfSize;
assert(image_info.size != 0);
image_info.alignment = out.baseAlign;
assert(image_info.alignment != 0);
return HSA_STATUS_SUCCESS;
}
static const uint64_t kLimitSystem = 1ULL << 48;
bool ImageManagerKv::IsLocalMemory(const void* address) const {
uintptr_t u_address = reinterpret_cast<uintptr_t>(address);
uint32_t major_ver = MajorVerFromDevID(chip_id_);
if (major_ver >= 8) {
return true;
}
#ifdef HSA_LARGE_MODEL
// Fast path without querying local memory region info.
// User mode system memory addressable by CPU is 0 to 2^48.
return (u_address >= kLimitSystem);
#else
// No local memory on 32 bit.
return false;
#endif
}
hsa_status_t ImageManagerKv::PopulateImageSrd(Image& image, const metadata_amd_t* descriptor) const {
metadata_amd_ci_vi_t* desc = (metadata_amd_ci_vi_t*)descriptor;
bool atc_access = true;
uint32_t mtype = mtype_;
const void* image_data_addr = image.data;
ImageProperty image_prop = image_lut_.MapFormat(image.desc.format, image.desc.geometry);
if((image_prop.cap == HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED) ||
(image_prop.element_size == 0))
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
uint32_t hwPixelSize = image_lut_.GetPixelSize(desc->word1.bitfields.data_format, desc->word1.bitfields.num_format);
if(image_prop.element_size!=hwPixelSize)
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
const Swizzle swizzle = image_lut_.MapSwizzle(image.desc.format.channel_order);
if (IsLocalMemory(image.data)) {
atc_access = false;
mtype = 1;
image_data_addr = reinterpret_cast<const void*>(
reinterpret_cast<uintptr_t>(image.data) - local_memory_base_address_);
}
image.srd[0]=desc->word0.u32_all;
image.srd[1]=desc->word1.u32_all;
image.srd[2]=desc->word2.u32_all;
image.srd[3]=desc->word3.u32_all;
image.srd[4]=desc->word4.u32_all;
image.srd[5]=desc->word5.u32_all;
image.srd[6]=desc->word6.u32_all;
image.srd[7]=desc->word7.u32_all;
((SQ_IMG_RSRC_WORD0*)(&image.srd[0]))->bits.base_address = ext_image::PtrLow40Shift8(image_data_addr);
((SQ_IMG_RSRC_WORD1*)(&image.srd[1]))->bits.base_address_hi = ext_image::PtrHigh64Shift40(image_data_addr);
((SQ_IMG_RSRC_WORD1*)(&image.srd[1]))->bits.data_format = image_prop.data_format;
((SQ_IMG_RSRC_WORD1*)(&image.srd[1]))->bits.num_format = image_prop.data_type;
((SQ_IMG_RSRC_WORD1*)(&image.srd[1]))->bits.mtype = mtype;
((SQ_IMG_RSRC_WORD3*)(&image.srd[3]))->bits.atc=atc_access;
((SQ_IMG_RSRC_WORD3*)(&image.srd[3]))->bits.dst_sel_x = swizzle.x;
((SQ_IMG_RSRC_WORD3*)(&image.srd[3]))->bits.dst_sel_y = swizzle.y;
((SQ_IMG_RSRC_WORD3*)(&image.srd[3]))->bits.dst_sel_z = swizzle.z;
((SQ_IMG_RSRC_WORD3*)(&image.srd[3]))->bits.dst_sel_w = swizzle.w;
((SQ_IMG_RSRC_WORD7*)(&image.srd[7]))->bits.meta_data_address += ext_image::PtrLow40Shift8(image_data_addr);
//Looks like this is only used for CPU copies.
image.row_pitch = (desc->word4.bits.pitch+1)*image_prop.element_size;
image.slice_pitch = image.row_pitch * (desc->word2.bits.height+1);
//Used by HSAIL shader ABI
image.srd[8] = image.desc.format.channel_type;
image.srd[9] = image.desc.format.channel_order;
image.srd[10] = static_cast<uint32_t>(image.desc.width);
return HSA_STATUS_SUCCESS;
}
hsa_status_t ImageManagerKv::PopulateImageSrd(Image& image) const {
ImageProperty image_prop =
image_lut_.MapFormat(image.desc.format, image.desc.geometry);
assert(image_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);
assert(image_prop.element_size != 0);
bool atc_access = true;
uint32_t mtype = mtype_;
const void* image_data_addr = image.data;
if (IsLocalMemory(image.data)) {
atc_access = false;
mtype = 1;
image_data_addr = reinterpret_cast<const void*>(
reinterpret_cast<uintptr_t>(image.data) - local_memory_base_address_);
}
if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
SQ_BUF_RSRC_WORD0 word0;
SQ_BUF_RSRC_WORD1 word1;
SQ_BUF_RSRC_WORD2 word2;
SQ_BUF_RSRC_WORD3 word3;
word0.u32_all = 0;
word0.bits.base_address = ext_image::PtrLow32(image_data_addr);
word1.u32_all = 0;
word1.bits.base_address_hi = ext_image::PtrHigh32(image_data_addr);
word1.bits.stride = image_prop.element_size;
word1.bits.swizzle_enable = false;
word1.bits.cache_swizzle = false;
uint32_t major_ver = MajorVerFromDevID(chip_id_);
word2.bits.num_records = (major_ver < 8) ?
image.desc.width : image.desc.width * image_prop.element_size;
const Swizzle swizzle =
image_lut_.MapSwizzle(image.desc.format.channel_order);
word3.u32_all = 0;
word3.bits.dst_sel_x = swizzle.x;
word3.bits.dst_sel_y = swizzle.y;
word3.bits.dst_sel_z = swizzle.z;
word3.bits.dst_sel_w = swizzle.w;
word3.bits.num_format = image_prop.data_type;
word3.bits.data_format = image_prop.data_format;
word3.bits.atc = atc_access;
word3.bits.element_size = image_prop.element_size;
word3.bits.type = image_lut_.MapGeometry(image.desc.geometry);
word3.bits.mtype = mtype;
image.srd[0] = word0.u32_all;
image.srd[1] = word1.u32_all;
image.srd[2] = word2.u32_all;
image.srd[3] = word3.u32_all;
image.row_pitch = image.desc.width * image_prop.element_size;
image.slice_pitch = image.row_pitch;
} else {
SQ_IMG_RSRC_WORD0 word0;
SQ_IMG_RSRC_WORD1 word1;
SQ_IMG_RSRC_WORD2 word2;
SQ_IMG_RSRC_WORD3 word3;
SQ_IMG_RSRC_WORD4 word4;
SQ_IMG_RSRC_WORD5 word5;
SQ_IMG_RSRC_WORD6 word6;
SQ_IMG_RSRC_WORD7 word7;
ADDR_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
if (!GetAddrlibSurfaceInfo(image.component, image.desc, image.tile_mode,
image.row_pitch, image.slice_pitch, out)) {
return HSA_STATUS_ERROR;
}
assert((out.bpp / 8) == image_prop.element_size);
const size_t row_pitch_size = out.pitch * image_prop.element_size;
word0.bits.base_address = ext_image::PtrLow40Shift8(image_data_addr);
word1.u32_all = 0;
word1.bits.base_address_hi = ext_image::PtrHigh64Shift40(image_data_addr);
word1.bits.min_lod = 0;
word1.bits.data_format = image_prop.data_format;
word1.bits.num_format = image_prop.data_type;
word1.bits.mtype = mtype;
word2.u32_all = 0;
word2.bits.width = image.desc.width - 1;
word2.bits.height = image.desc.height - 1;
word2.bits.perf_mod = 0;
word2.bits.interlaced = 0;
const Swizzle swizzle =
image_lut_.MapSwizzle(image.desc.format.channel_order);
word3.u32_all = 0;
word3.bits.dst_sel_x = swizzle.x;
word3.bits.dst_sel_y = swizzle.y;
word3.bits.dst_sel_z = swizzle.z;
word3.bits.dst_sel_w = swizzle.w;
word3.bits.tiling_index = out.tileIndex;
word3.bits.pow2_pad = (ext_image::IsPowerOfTwo(row_pitch_size) &&
ext_image::IsPowerOfTwo(image.desc.height))
? 1
: 0;
word3.bits.type = image_lut_.MapGeometry(image.desc.geometry);
word3.bits.atc = atc_access;
const bool image_array =
(image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ||
image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DA ||
image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DADEPTH);
const bool image_3d = (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_3D);
word4.u32_all = 0;
word4.bits.depth =
(image_array)
? std::max(image.desc.array_size, static_cast<size_t>(1)) - 1
: (image_3d) ? image.desc.depth - 1 : 0;
word4.bits.pitch = out.pitch - 1;
word5.u32_all = 0;
word5.bits.last_array =
(image_array)
? (std::max(image.desc.array_size, static_cast<size_t>(1)) - 1)
: 0;
word6.u32_all = 0;
word7.u32_all = 0;
image.srd[0] = word0.u32_all;
image.srd[1] = word1.u32_all;
image.srd[2] = word2.u32_all;
image.srd[3] = word3.u32_all;
image.srd[4] = word4.u32_all;
image.srd[5] = word5.u32_all;
image.srd[6] = word6.u32_all;
image.srd[7] = word7.u32_all;
image.row_pitch = row_pitch_size;
image.slice_pitch = out.sliceSize;
}
image.srd[8] = image.desc.format.channel_type;
image.srd[9] = image.desc.format.channel_order;
image.srd[10] = static_cast<uint32_t>(image.desc.width);
return HSA_STATUS_SUCCESS;
}
hsa_status_t ImageManagerKv::ModifyImageSrd(
Image& image, hsa_ext_image_format_t& new_format) const {
image.desc.format = new_format;
ImageProperty image_prop =
image_lut_.MapFormat(image.desc.format, image.desc.geometry);
assert(image_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);
assert(image_prop.element_size != 0);
if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
const Swizzle swizzle =
image_lut_.MapSwizzle(image.desc.format.channel_order);
SQ_BUF_RSRC_WORD3* word3 =
reinterpret_cast<SQ_BUF_RSRC_WORD3*>(&image.srd[3]);
word3->bits.dst_sel_x = swizzle.x;
word3->bits.dst_sel_y = swizzle.y;
word3->bits.dst_sel_z = swizzle.z;
word3->bits.dst_sel_w = swizzle.w;
word3->bits.num_format = image_prop.data_type;
word3->bits.data_format = image_prop.data_format;
} else {
SQ_IMG_RSRC_WORD1* word1 =
reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&image.srd[1]);
word1->bits.data_format = image_prop.data_format;
word1->bits.num_format = image_prop.data_type;
const Swizzle swizzle =
image_lut_.MapSwizzle(image.desc.format.channel_order);
SQ_IMG_RSRC_WORD3* word3 =
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3]);
word3->bits.dst_sel_x = swizzle.x;
word3->bits.dst_sel_y = swizzle.y;
word3->bits.dst_sel_z = swizzle.z;
word3->bits.dst_sel_w = swizzle.w;
}
image.srd[8] = image.desc.format.channel_type;
image.srd[9] = image.desc.format.channel_order;
image.srd[10] = static_cast<uint32_t>(image.desc.width);
return HSA_STATUS_SUCCESS;
}
hsa_status_t ImageManagerKv::PopulateSamplerSrd(amd::Sampler& sampler) const {
const hsa_ext_sampler_descriptor_t sampler_descriptor = sampler.desc;
SQ_IMG_SAMP_WORD0 word0;
SQ_IMG_SAMP_WORD1 word1;
SQ_IMG_SAMP_WORD2 word2;
SQ_IMG_SAMP_WORD3 word3;
word0.u32_all = 0;
switch (sampler_descriptor.address_mode) {
case HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE:
word0.bits.clamp_x = static_cast<int>(SQ_TEX_CLAMP_LAST_TEXEL);
break;
case HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_BORDER:
word0.bits.clamp_x = static_cast<int>(SQ_TEX_CLAMP_BORDER);
break;
case HSA_EXT_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT:
word0.bits.clamp_x = static_cast<int>(SQ_TEX_MIRROR);
break;
case HSA_EXT_SAMPLER_ADDRESSING_MODE_UNDEFINED:
case HSA_EXT_SAMPLER_ADDRESSING_MODE_REPEAT:
word0.bits.clamp_x = static_cast<int>(SQ_TEX_WRAP);
break;
default:
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
word0.bits.clamp_y = word0.bits.clamp_x;
word0.bits.clamp_z = word0.bits.clamp_x;
word0.bits.force_unormalized = (sampler_descriptor.coordinate_mode ==
HSA_EXT_SAMPLER_COORDINATE_MODE_UNNORMALIZED);
word1.u32_all = 0;
word1.bits.max_lod = 4095;
word2.u32_all = 0;
switch (sampler_descriptor.filter_mode) {
case HSA_EXT_SAMPLER_FILTER_MODE_NEAREST:
word2.bits.xy_mag_filter = static_cast<int>(SQ_TEX_XY_FILTER_POINT);
break;
case HSA_EXT_SAMPLER_FILTER_MODE_LINEAR:
word2.bits.xy_mag_filter = static_cast<int>(SQ_TEX_XY_FILTER_BILINEAR);
break;
default:
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
word2.bits.xy_min_filter = word2.bits.xy_mag_filter;
word2.bits.z_filter = SQ_TEX_Z_FILTER_NONE;
word2.bits.mip_filter = SQ_TEX_MIP_FILTER_NONE;
word3.u32_all = 0;
// TODO: check this bit with HSAIL spec.
word3.bits.border_color_type = SQ_TEX_BORDER_COLOR_TRANS_BLACK;
sampler.srd[0] = word0.u32_all;
sampler.srd[1] = word1.u32_all;
sampler.srd[2] = word2.u32_all;
sampler.srd[3] = word3.u32_all;
return HSA_STATUS_SUCCESS;
}
hsa_status_t ImageManagerKv::CopyBufferToImage(
const void* src_memory, size_t src_row_pitch, size_t src_slice_pitch,
const Image& dst_image, const hsa_ext_image_region_t& image_region) {
if (BlitQueueInit().queue_ == NULL) {
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
}
return ext_image::ImageRuntime::instance()->blit_kernel().CopyBufferToImage(
blit_queue_, blit_code_catalog_, src_memory, src_row_pitch,
src_slice_pitch, dst_image, image_region);
}
hsa_status_t ImageManagerKv::CopyImageToBuffer(
const Image& src_image, void* dst_memory, size_t dst_row_pitch,
size_t dst_slice_pitch, const hsa_ext_image_region_t& image_region) {
if (BlitQueueInit().queue_ == NULL) {
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
}
return ext_image::ImageRuntime::instance()->blit_kernel().CopyImageToBuffer(
blit_queue_, blit_code_catalog_, src_image, dst_memory, dst_row_pitch,
dst_slice_pitch, image_region);
}
hsa_status_t ImageManagerKv::CopyImage(const Image& dst_image,
const Image& src_image,
const hsa_dim3_t& dst_origin,
const hsa_dim3_t& src_origin,
const hsa_dim3_t size) {
if (BlitQueueInit().queue_ == NULL) {
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
}
const hsa_ext_image_format_t src_format = src_image.desc.format;
const hsa_ext_image_channel_order32_t src_order = src_format.channel_order;
const hsa_ext_image_channel_type32_t src_type = src_format.channel_type;
const hsa_ext_image_format_t dst_format = dst_image.desc.format;
const hsa_ext_image_channel_order32_t dst_order = dst_format.channel_order;
const hsa_ext_image_channel_type32_t dst_type = dst_format.channel_type;
BlitKernel::KernelOp copy_type = BlitKernel::KERNEL_OP_COPY_IMAGE_DEFAULT;
if ((src_order == dst_order) && (src_type == dst_type)) {
return ext_image::ImageRuntime::instance()->blit_kernel().CopyImage(
blit_queue_, blit_code_catalog_, dst_image, src_image, dst_origin,
src_origin, size, copy_type);
}
// Source and destination format must be the same, except for
// SRGBA <--> RGBA images.
if ((src_type == HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8) &&
(dst_type == HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8)) {
if ((src_order == HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA) &&
(dst_order == HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA)) {
copy_type = BlitKernel::KERNEL_OP_COPY_IMAGE_STANDARD_TO_LINEAR;
} else if ((src_order == HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA) &&
(dst_order == HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA)) {
copy_type = BlitKernel::KERNEL_OP_COPY_IMAGE_LINEAR_TO_STANDARD;
}
if (copy_type != BlitKernel::KERNEL_OP_COPY_IMAGE_DEFAULT) {
// KV and CZ don't have write support for SRGBA image, so treat the
// destination image as RGBA image.
SQ_IMG_RSRC_WORD1* word1 = reinterpret_cast<SQ_IMG_RSRC_WORD1*>(
&const_cast<Image&>(dst_image).srd[1]);
// Destination can be linear or standard, preserve the original value.
uint32_t num_format_original = word1->bits.num_format;
word1->bits.num_format = TYPE_UNORM;
hsa_status_t status =
ext_image::ImageRuntime::instance()->blit_kernel().CopyImage(
blit_queue_, blit_code_catalog_, dst_image, src_image, dst_origin,
src_origin, size, copy_type);
// Revert to the original format after the copy operation is finished.
word1->bits.num_format = num_format_original;
return status;
}
}
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
hsa_status_t ImageManagerKv::FillImage(const Image& image, const void* pattern,
const hsa_ext_image_region_t& region) {
if (BlitQueueInit().queue_ == NULL) {
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
}
Image* image_view = const_cast<Image*>(&image);
SQ_BUF_RSRC_WORD3* word3_buff = NULL;
SQ_IMG_RSRC_WORD3* word3_image = NULL;
uint32_t dst_sel_w_original = 0;
if (image_view->desc.format.channel_type ==
HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_101010) {
// Force GPU to ignore the last two bits (alpha bits).
if (image_view->desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
word3_buff = reinterpret_cast<SQ_BUF_RSRC_WORD3*>(&image_view->srd[3]);
dst_sel_w_original = word3_buff->bits.dst_sel_w;
word3_buff->bits.dst_sel_w = SEL_0;
} else {
word3_image = reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image_view->srd[3]);
dst_sel_w_original = word3_image->bits.dst_sel_w;
word3_image->bits.dst_sel_w = SEL_0;
}
}
SQ_IMG_RSRC_WORD1* word1 = NULL;
uint32_t num_format_original = 0;
const void* new_pattern = pattern;
float fill_value[4] = {0};
switch (image_view->desc.format.channel_order) {
case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA:
case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB:
case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX:
case HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA: {
// KV and CZ don't have write support for SRGBA image, so convert pattern
// to standard form and treat the image as RGBA image.
const float* pattern_f = reinterpret_cast<const float*>(pattern);
fill_value[0] = LinearToStandardRGB(pattern_f[0]);
fill_value[1] = LinearToStandardRGB(pattern_f[1]);
fill_value[2] = LinearToStandardRGB(pattern_f[2]);
fill_value[3] = pattern_f[3];
new_pattern = fill_value;
word1 = reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&image_view->srd[1]);
num_format_original = word1->bits.num_format;
word1->bits.num_format = TYPE_UNORM;
} break;
default:
break;
}
hsa_status_t status =
ext_image::ImageRuntime::instance()->blit_kernel().FillImage(
blit_queue_, blit_code_catalog_, *image_view, new_pattern, region);
// Revert back original configuration.
if (word3_buff != NULL) {
word3_buff->bits.dst_sel_w = dst_sel_w_original;
}
if (word3_image != NULL) {
word3_image->bits.dst_sel_w = dst_sel_w_original;
}
if (word1 != NULL) {
word1->bits.num_format = num_format_original;
}
return status;
}
hsa_status_t ImageManagerKv::GetLocalMemoryRegion(hsa_region_t region,
void* data) {
if (data == NULL) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
hsa_region_segment_t segment;
hsa_status_t stat =
hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment);
if (stat != HSA_STATUS_SUCCESS) {
return stat;
}
if (segment != HSA_REGION_SEGMENT_GLOBAL) {
return HSA_STATUS_SUCCESS;
}
uint32_t base = 0;
stat = hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &base);
if (stat != HSA_STATUS_SUCCESS) {
return stat;
}
if ((base & HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED) != 0) {
hsa_region_t* local_memory_region = (hsa_region_t*)data;
*local_memory_region = region;
}
return HSA_STATUS_SUCCESS;
}
AddrFormat ImageManagerKv::GetAddrlibFormat(const ImageProperty& image_prop) {
switch (image_prop.data_format) {
case FMT_8:
return ADDR_FMT_8;
break;
case FMT_16:
return (image_prop.data_type != TYPE_FLOAT) ? ADDR_FMT_16
: ADDR_FMT_16_FLOAT;
break;
case FMT_8_8:
return ADDR_FMT_8_8;
break;
case FMT_32:
return (image_prop.data_type != TYPE_FLOAT) ? ADDR_FMT_32
: ADDR_FMT_32_FLOAT;
break;
case FMT_16_16:
return (image_prop.data_type != TYPE_FLOAT) ? ADDR_FMT_16_16
: ADDR_FMT_16_16_FLOAT;
break;
case FMT_2_10_10_10:
return ADDR_FMT_2_10_10_10;
break;
case FMT_8_8_8_8:
return ADDR_FMT_8_8_8_8;
break;
case FMT_32_32:
return (image_prop.data_type != TYPE_FLOAT) ? ADDR_FMT_32_32
: ADDR_FMT_32_32_FLOAT;
break;
case FMT_16_16_16_16:
return (image_prop.data_type != TYPE_FLOAT) ? ADDR_FMT_16_16_16_16
: ADDR_FMT_16_16_16_16_FLOAT;
break;
case FMT_32_32_32_32:
return (image_prop.data_type != TYPE_FLOAT) ? ADDR_FMT_32_32_32_32
: ADDR_FMT_32_32_32_32_FLOAT;
break;
case FMT_5_6_5:
return ADDR_FMT_5_6_5;
break;
case FMT_1_5_5_5:
return ADDR_FMT_1_5_5_5;
break;
case FMT_8_24:
return ADDR_FMT_8_24;
break;
default:
assert(false && "Should not reach here");
return ADDR_FMT_INVALID;
break;
}
assert(false && "Should not reach here");
return ADDR_FMT_INVALID;
}
VOID* ADDR_API
ImageManagerKv::AllocSysMem(const ADDR_ALLOCSYSMEM_INPUT* input) {
return malloc(input->sizeInBytes);
}
ADDR_E_RETURNCODE ADDR_API
ImageManagerKv::FreeSysMem(const ADDR_FREESYSMEM_INPUT* input) {
free(input->pVirtAddr);
return ADDR_OK;
}
bool ImageManagerKv::GetAddrlibSurfaceInfo(
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
Image::TileMode tileMode,
size_t image_data_row_pitch,
size_t image_data_slice_pitch,
ADDR_COMPUTE_SURFACE_INFO_OUTPUT& out) const {
const ImageProperty image_prop =
GetImageProperty(component, desc.format, desc.geometry);
const AddrFormat addrlib_format = GetAddrlibFormat(image_prop);
const uint32_t width = static_cast<uint32_t>(desc.width);
const uint32_t height = static_cast<uint32_t>(desc.height);
static const size_t kMinNumSlice = 1;
const uint32_t num_slice = static_cast<uint32_t>(
std::max(kMinNumSlice, std::max(desc.array_size, desc.depth)));
uint32_t major_ver = MajorVerFromDevID(chip_id_);
if (major_ver >= 9) {
ADDR2_COMPUTE_SURFACE_INFO_INPUT in = {0};
in.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT);
in.format = addrlib_format;
in.bpp = static_cast<unsigned int>(image_prop.element_size) * 8;
in.width = width;
in.height = height;
in.numSlices = num_slice;
in.pitchInElement = image_data_row_pitch / image_prop.element_size;
switch(desc.geometry) {
case HSA_EXT_IMAGE_GEOMETRY_1D:
case HSA_EXT_IMAGE_GEOMETRY_1DB:
in.resourceType = ADDR_RSRC_TEX_1D;
break;
case HSA_EXT_IMAGE_GEOMETRY_2D:
case HSA_EXT_IMAGE_GEOMETRY_2DDEPTH:
case HSA_EXT_IMAGE_GEOMETRY_1DA:
in.resourceType = ADDR_RSRC_TEX_2D;
break;
case HSA_EXT_IMAGE_GEOMETRY_3D:
case HSA_EXT_IMAGE_GEOMETRY_2DA:
case HSA_EXT_IMAGE_GEOMETRY_2DADEPTH:
in.resourceType = ADDR_RSRC_TEX_3D;
break;
}
in.swizzleMode = (tileMode == Image::TileMode::LINEAR)? ADDR_SW_LINEAR : ADDR_SW_4KB;
in.flags.texture = 1;
ADDR2_GET_PREFERRED_SURF_SETTING_INPUT prefSettingsInput = { 0 };
ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT prefSettingsOutput = { 0 };
prefSettingsInput.size = sizeof(prefSettingsInput);
prefSettingsInput.flags = in.flags;
prefSettingsInput.bpp = in.bpp;
prefSettingsInput.format = in.format;
prefSettingsInput.width = in.width;
prefSettingsInput.height = in.height;
prefSettingsInput.numFrags = in.numFrags;
prefSettingsInput.numSamples = in.numSamples;
prefSettingsInput.numMipLevels = in.numMipLevels;
prefSettingsInput.numSlices = in.numSlices;
prefSettingsInput.resourceLoction = ADDR_RSRC_LOC_UNDEF;
prefSettingsInput.resourceType = in.resourceType;
if (tileMode == Image::TileMode::LINEAR)
{
// this should force linear.
prefSettingsInput.forbiddenBlock.macroThin4KB = 1;
prefSettingsInput.forbiddenBlock.macroThick4KB = 1;
prefSettingsInput.forbiddenBlock.macroThin64KB = 1;
prefSettingsInput.forbiddenBlock.macroThick64KB = 1;
}
else
{
// this should not allow linear.
prefSettingsInput.forbiddenBlock.linear = 1;
}
prefSettingsInput.forbiddenBlock.micro = 1; // but don't ever allow the 256b swizzle modes
prefSettingsInput.forbiddenBlock.var = 1; // and don't allow variable-size block modes
if (ADDR_OK != Addr2GetPreferredSurfaceSetting(addr_lib_, &prefSettingsInput, &prefSettingsOutput)) {
return false;
}
in.swizzleMode = prefSettingsOutput.swizzleMode;
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out2 = {0};
out.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT);
if (ADDR_OK != Addr2ComputeSurfaceInfo(addr_lib_, &in, &out2)) {
return false;
}
out.pitch = out2.pitch;
out.height = out2.height;
out.surfSize = out2.surfSize;
out.bpp = out2.bpp;
out.baseAlign = out2.baseAlign;
out.tileIndex = in.swizzleMode;
out.sliceSize = out2.sliceSize;
return true;
}
ADDR_COMPUTE_SURFACE_INFO_INPUT in = {0};
in.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT);
in.tileMode = (tileMode == Image::TileMode::LINEAR)?
ADDR_TM_LINEAR_ALIGNED : ADDR_TM_2D_TILED_THIN1;
in.format = addrlib_format;
in.bpp = static_cast<unsigned int>(image_prop.element_size) * 8;
in.numSamples = 1;
in.width = width;
in.height = height;
in.numSlices = num_slice;
in.flags.texture = 1;
in.flags.noStencil = 1;
in.flags.opt4Space = 0;
in.tileType = ADDR_NON_DISPLAYABLE;
in.tileIndex = -1;
if (image_data_row_pitch != 0) {
in.width = image_data_row_pitch / image_prop.element_size;
// in.pitchAlign = image_data_row_pitch / image_prop.element_size;
// in.heightAlign = image_data_slice_pitch / image_data_row_pitch;
}
if (ADDR_OK != AddrComputeSurfaceInfo(addr_lib_, &in, &out)) {
return false;
}
assert(out.tileIndex != -1);
return (out.tileIndex != -1) ? true : false;
}
size_t ImageManagerKv::CalWorkingSizeBytes(hsa_ext_image_geometry_t geometry,
hsa_dim3_t size_pixel,
uint32_t element_size) const {
switch (geometry) {
case HSA_EXT_IMAGE_GEOMETRY_1D:
case HSA_EXT_IMAGE_GEOMETRY_1DB:
return size_pixel.x * element_size;
case HSA_EXT_IMAGE_GEOMETRY_2D:
case HSA_EXT_IMAGE_GEOMETRY_2DDEPTH:
case HSA_EXT_IMAGE_GEOMETRY_1DA:
return size_pixel.x * size_pixel.y * element_size;
default:
return size_pixel.x * size_pixel.y * size_pixel.z * element_size;
}
}
BlitQueue& ImageManagerKv::BlitQueueInit() {
if (blit_queue_.queue_ == NULL) {
// Queue is a precious resource, so only create it when it is needed.
std::lock_guard<std::mutex> lock(lock_);
if (blit_queue_.queue_ == NULL) {
// Create the kernel queue.
blit_queue_.cached_index_ = 0;
uint32_t max_queue_size = 0;
hsa_status_t status = hsa_agent_get_info(
agent_, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &max_queue_size);
status =
hsa_queue_create(agent_, max_queue_size, HSA_QUEUE_TYPE_MULTI, NULL,
NULL, UINT_MAX, UINT_MAX, &blit_queue_.queue_);
if (HSA_STATUS_SUCCESS != status) {
blit_queue_.queue_ = NULL;
return blit_queue_;
}
// Get the kernel handles.
status = ext_image::ImageRuntime::instance()->blit_kernel().BuildBlitCode(
agent_, blit_code_catalog_);
if (HSA_STATUS_SUCCESS != status) {
blit_code_catalog_.clear();
hsa_queue_destroy(blit_queue_.queue_);
blit_queue_.queue_ = NULL;
return blit_queue_;
}
}
}
assert(blit_queue_.queue_ != NULL &&
blit_code_catalog_.size() == BlitKernel::KERNEL_OP_COUNT);
return blit_queue_;
}
} // namespace
+125
Ver Arquivo
@@ -0,0 +1,125 @@
#ifndef HSA_RUNTIME_EXT_IMAGE_IMAGE_MANAGER_KV_H
#define HSA_RUNTIME_EXT_IMAGE_IMAGE_MANAGER_KV_H
#include "addrlib/inc/addrinterface.h"
#include "blit_kernel.h"
#include "image_lut_kv.h"
#include "image_manager.h"
namespace amd {
class ImageManagerKv : public ImageManager {
public:
explicit ImageManagerKv();
virtual ~ImageManagerKv();
virtual hsa_status_t Initialize(hsa_agent_t agent_handle);
virtual void Cleanup();
/// @brief Retrieve device specific image property of a certain format
/// and geometry.
virtual ImageProperty GetImageProperty(
hsa_agent_t component, const hsa_ext_image_format_t& format,
hsa_ext_image_geometry_t geometry) const;
/// @brief Retrieve device specific supported max width, height, depth,
/// and array size of an image geometry.
virtual void GetImageInfoMaxDimension(hsa_agent_t component,
hsa_ext_image_geometry_t geometry,
uint32_t& width, uint32_t& height,
uint32_t& depth,
uint32_t& array_size) const;
/// @brief Calculate the size and alignment of the backing storage of an
/// image.
virtual hsa_status_t CalculateImageSizeAndAlignment(
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
hsa_ext_image_data_layout_t image_data_layout,
size_t image_data_row_pitch, size_t image_data_slice_pitch,
hsa_ext_image_data_info_t& image_info) const;
/// @brief Fill image structure with device specific image object.
virtual hsa_status_t PopulateImageSrd(Image& image) const;
/// @brief Fill image structure with device specific image object using the given format.
virtual hsa_status_t PopulateImageSrd(Image& image, const metadata_amd_t* desc) const;
/// @brief Modify device specific image object according to the specified
/// new format.
virtual hsa_status_t ModifyImageSrd(Image& image,
hsa_ext_image_format_t& new_format) const;
/// @brief Fill sampler structure with device specific sampler object.
virtual hsa_status_t PopulateSamplerSrd(Sampler& sampler) const;
// @brief Copy the content of a linear memory to an image object.
virtual hsa_status_t CopyBufferToImage(
const void* src_memory, size_t src_row_pitch, size_t src_slice_pitch,
const Image& dst_image, const hsa_ext_image_region_t& image_region);
/// @brief Copy the content of an image object to a linear memory.
virtual hsa_status_t CopyImageToBuffer(
const Image& src_image, void* dst_memory, size_t dst_row_pitch,
size_t dst_slice_pitch, const hsa_ext_image_region_t& image_region);
/// @brief Transfer images backing storage using agent copy.
virtual hsa_status_t CopyImage(const Image& dst_image, const Image& src_image,
const hsa_dim3_t& dst_origin,
const hsa_dim3_t& src_origin,
const hsa_dim3_t size);
/// @brief Fill image backing storage using agent copy.
virtual hsa_status_t FillImage(const Image& image, const void* pattern,
const hsa_ext_image_region_t& region);
protected:
static hsa_status_t GetLocalMemoryRegion(hsa_region_t region, void* data);
static AddrFormat GetAddrlibFormat(const ImageProperty& image_prop);
static VOID* ADDR_API AllocSysMem(const ADDR_ALLOCSYSMEM_INPUT* input);
static ADDR_E_RETURNCODE ADDR_API
FreeSysMem(const ADDR_FREESYSMEM_INPUT* input);
bool GetAddrlibSurfaceInfo(hsa_agent_t component,
const hsa_ext_image_descriptor_t& desc,
Image::TileMode tileMode,
size_t image_data_row_pitch,
size_t image_data_slice_pitch,
ADDR_COMPUTE_SURFACE_INFO_OUTPUT& out) const;
size_t CalWorkingSizeBytes(hsa_ext_image_geometry_t geometry,
hsa_dim3_t size_pixel,
uint32_t element_size) const;
virtual bool IsLocalMemory(const void* address) const;
BlitQueue& BlitQueueInit();
ImageLutKv image_lut_;
ADDR_HANDLE addr_lib_;
hsa_agent_t agent_;
uint32_t family_type_;
uint32_t chip_id_;
BlitQueue blit_queue_;
std::vector<BlitCodeInfo> blit_code_catalog_;
uint32_t mtype_;
uintptr_t local_memory_base_address_;
std::mutex lock_;
private:
DISALLOW_COPY_AND_ASSIGN(ImageManagerKv);
};
}
#endif // HSA_RUNTIME_EXT_IMAGE_IMAGE_MANAGER_KV_H
+746
Ver Arquivo
@@ -0,0 +1,746 @@
#define NOMINMAX
#include "image_manager_nv.h"
#include <assert.h>
#include <algorithm>
#include <climits>
#include "inc/hsa_ext_amd.h"
#include "addrlib/src/core/addrlib.h"
#include "image_runtime.h"
#include "resource.h"
#include "resource_nv.h"
#include "util.h"
#include "device_info.h"
namespace amd {
//-----------------------------------------------------------------------------
// Workaround switch to combined format/type codes and missing gfx10
// specific look up table. Only covers types used in image_lut_kv.cpp.
//-----------------------------------------------------------------------------
struct formatconverstion_t {
FMT fmt;
type type;
FORMAT format;
};
// Format/Type to combined format code table.
// Sorted and indexed to allow fast searches.
static const formatconverstion_t FormatLUT[] = {
{FMT_1_5_5_5, TYPE_UNORM, CFMT_1_5_5_5_UNORM},
{FMT_10_10_10_2, TYPE_UNORM, CFMT_10_10_10_2_UNORM},
{FMT_10_10_10_2, TYPE_SNORM, CFMT_10_10_10_2_SNORM},
{FMT_10_10_10_2, TYPE_UINT, CFMT_10_10_10_2_UINT},
{FMT_10_10_10_2, TYPE_SINT, CFMT_10_10_10_2_SINT},
{FMT_16, TYPE_UNORM, CFMT_16_UNORM},
{FMT_16, TYPE_SNORM, CFMT_16_SNORM},
{FMT_16, TYPE_UINT, CFMT_16_UINT},
{FMT_16, TYPE_SINT, CFMT_16_SINT},
{FMT_16, TYPE_FLOAT, CFMT_16_FLOAT},
{FMT_16_16, TYPE_UNORM, CFMT_16_16_UNORM},
{FMT_16_16, TYPE_SNORM, CFMT_16_16_SNORM},
{FMT_16_16, TYPE_UINT, CFMT_16_16_UINT},
{FMT_16_16, TYPE_SINT, CFMT_16_16_SINT},
{FMT_16_16, TYPE_FLOAT, CFMT_16_16_FLOAT},
{FMT_16_16_16_16, TYPE_UNORM, CFMT_16_16_16_16_UNORM},
{FMT_16_16_16_16, TYPE_SNORM, CFMT_16_16_16_16_SNORM},
{FMT_16_16_16_16, TYPE_UINT, CFMT_16_16_16_16_UINT},
{FMT_16_16_16_16, TYPE_SINT, CFMT_16_16_16_16_SINT},
{FMT_16_16_16_16, TYPE_FLOAT, CFMT_16_16_16_16_FLOAT},
{FMT_2_10_10_10, TYPE_UNORM, CFMT_2_10_10_10_UNORM},
{FMT_2_10_10_10, TYPE_SNORM, CFMT_2_10_10_10_SNORM},
{FMT_2_10_10_10, TYPE_UINT, CFMT_2_10_10_10_UINT},
{FMT_2_10_10_10, TYPE_SINT, CFMT_2_10_10_10_SINT},
{FMT_24_8, TYPE_UNORM, CFMT_24_8_UNORM},
{FMT_24_8, TYPE_UINT, CFMT_24_8_UINT},
{FMT_32, TYPE_UINT, CFMT_32_UINT},
{FMT_32, TYPE_SINT, CFMT_32_SINT},
{FMT_32, TYPE_FLOAT, CFMT_32_FLOAT},
{FMT_32_32, TYPE_UINT, CFMT_32_32_UINT},
{FMT_32_32, TYPE_SINT, CFMT_32_32_SINT},
{FMT_32_32, TYPE_FLOAT, CFMT_32_32_FLOAT},
{FMT_32_32_32, TYPE_UINT, CFMT_32_32_32_UINT},
{FMT_32_32_32, TYPE_SINT, CFMT_32_32_32_SINT},
{FMT_32_32_32, TYPE_FLOAT, CFMT_32_32_32_FLOAT},
{FMT_32_32_32_32, TYPE_UINT, CFMT_32_32_32_32_UINT},
{FMT_32_32_32_32, TYPE_SINT, CFMT_32_32_32_32_SINT},
{FMT_32_32_32_32, TYPE_FLOAT, CFMT_32_32_32_32_FLOAT},
{FMT_5_5_5_1, TYPE_UNORM, CFMT_5_5_5_1_UNORM},
{FMT_5_6_5, TYPE_UNORM, CFMT_5_6_5_UNORM},
{FMT_8, TYPE_UNORM, CFMT_8_UNORM},
{FMT_8, TYPE_SNORM, CFMT_8_SNORM},
{FMT_8, TYPE_UINT, CFMT_8_UINT},
{FMT_8, TYPE_SINT, CFMT_8_SINT},
{FMT_8, TYPE_SRGB, CFMT_8_SRGB},
{FMT_8_24, TYPE_UNORM, CFMT_8_24_UNORM},
{FMT_8_24, TYPE_UINT, CFMT_8_24_UINT},
{FMT_8_8, TYPE_UNORM, CFMT_8_8_UNORM},
{FMT_8_8, TYPE_SNORM, CFMT_8_8_SNORM},
{FMT_8_8, TYPE_UINT, CFMT_8_8_UINT},
{FMT_8_8, TYPE_SINT, CFMT_8_8_SINT},
{FMT_8_8, TYPE_SRGB, CFMT_8_8_SRGB},
{FMT_8_8_8_8, TYPE_UNORM, CFMT_8_8_8_8_UNORM},
{FMT_8_8_8_8, TYPE_SNORM, CFMT_8_8_8_8_SNORM},
{FMT_8_8_8_8, TYPE_UINT, CFMT_8_8_8_8_UINT},
{FMT_8_8_8_8, TYPE_SINT, CFMT_8_8_8_8_SINT},
{FMT_8_8_8_8, TYPE_SRGB, CFMT_8_8_8_8_SRGB}
};
static const int FormatLUTSize = sizeof(FormatLUT)/sizeof(formatconverstion_t);
//Index in FormatLUT to start search, indexed by FMT enum.
static const int FormatEntryPoint[] = {
57,
40,
5,
47,
26,
10,
57,
57,
1,
20,
52,
29,
15,
32,
35,
57,
39,
0,
38,
57,
45,
24
};
static FORMAT GetCombinedFormat(uint8_t fmt, uint8_t type) {
assert(fmt < sizeof(FormatEntryPoint)/sizeof(int) && "FMT out of range.");
int start = FormatEntryPoint[fmt];
int stop = std::min(start + 6, FormatLUTSize); // Only 6 types are used in image_kv_lut.cpp
for(int i=start; i<stop; i++) {
if((FormatLUT[i].fmt == fmt) && (FormatLUT[i].type == type))
return FormatLUT[i].format;
}
return CFMT_INVALID;
};
//-----------------------------------------------------------------------------
// End workaround
//-----------------------------------------------------------------------------
ImageManagerNv::ImageManagerNv() : ImageManagerKv() {}
ImageManagerNv::~ImageManagerNv() {}
// TODO(cfreehil) remove from class, make it a utility function
hsa_status_t ImageManagerNv::CalculateImageSizeAndAlignment(
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
hsa_ext_image_data_layout_t image_data_layout,
size_t image_data_row_pitch,
size_t image_data_slice_pitch,
hsa_ext_image_data_info_t& image_info) const {
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
hsa_profile_t profile;
hsa_status_t status = hsa_agent_get_info(component,
HSA_AGENT_INFO_PROFILE, &profile);
Image::TileMode tileMode = Image::TileMode::LINEAR;
if (image_data_layout == HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE) {
tileMode = (profile == HSA_PROFILE_BASE &&
desc.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB)?
Image::TileMode::TILED : Image::TileMode::LINEAR;
}
if (GetAddrlibSurfaceInfoNv(component, desc, tileMode,
image_data_row_pitch, image_data_slice_pitch, out) ==
(uint32_t)(-1)) {
return HSA_STATUS_ERROR;
}
size_t rowPitch = (out.bpp >> 3) * out.pitch;
size_t slicePitch = rowPitch * out.height;
if (desc.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB &&
image_data_layout == HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR &&
((image_data_row_pitch && (rowPitch != image_data_row_pitch)) ||
(image_data_slice_pitch && (slicePitch != image_data_slice_pitch)))) {
return static_cast<hsa_status_t>(
HSA_EXT_STATUS_ERROR_IMAGE_PITCH_UNSUPPORTED);
}
image_info.size = out.surfSize;
assert(image_info.size != 0);
image_info.alignment = out.baseAlign;
assert(image_info.alignment != 0);
return HSA_STATUS_SUCCESS;
}
bool ImageManagerNv::IsLocalMemory(const void* address) const {
return true;
}
hsa_status_t ImageManagerNv::PopulateImageSrd(Image& image,
const metadata_amd_t* descriptor) const {
const metadata_amd_nv_t* desc =
reinterpret_cast<const metadata_amd_nv_t*>(descriptor);
bool atc_access = true;
const void* image_data_addr = image.data;
ImageProperty image_prop =
image_lut_.MapFormat(image.desc.format, image.desc.geometry);
if ((image_prop.cap == HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED) ||
(image_prop.element_size == 0))
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
const Swizzle swizzle =
image_lut_.MapSwizzle(image.desc.format.channel_order);
if (IsLocalMemory(image.data)) {
atc_access = false;
image_data_addr = reinterpret_cast<const void*>(
reinterpret_cast<uintptr_t>(image.data) - local_memory_base_address_);
}
image.srd[0] = desc->word0.u32All;
image.srd[1] = desc->word1.u32All;
image.srd[2] = desc->word2.u32All;
image.srd[3] = desc->word3.u32All;
image.srd[4] = desc->word4.u32All;
image.srd[5] = desc->word5.u32All;
image.srd[6] = desc->word6.u32All;
image.srd[7] = desc->word7.u32All;
if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
SQ_BUF_RSRC_WORD0 word0;
SQ_BUF_RSRC_WORD1 word1;
SQ_BUF_RSRC_WORD2 word2;
SQ_BUF_RSRC_WORD3 word3;
word0.val = 0;
word0.f.BASE_ADDRESS = ext_image::PtrLow32(image_data_addr);
word1.val = image.srd[1];
word1.f.BASE_ADDRESS_HI = ext_image::PtrHigh32(image_data_addr);
word1.f.STRIDE = image_prop.element_size;
word3.val = image.srd[3];
word3.f.DST_SEL_X = swizzle.x;
word3.f.DST_SEL_Y = swizzle.y;
word3.f.DST_SEL_Z = swizzle.z;
word3.f.DST_SEL_W = swizzle.w;
word3.f.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type);
word3.f.INDEX_STRIDE = image_prop.element_size;
image.srd[0] = word0.val;
image.srd[1] = word1.val;
image.srd[3] = word3.val;
} else {
uint32_t hwPixelSize = image_lut_.GetPixelSize(image_prop.data_format, image_prop.data_type);
if (image_prop.element_size != hwPixelSize) {
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
}
reinterpret_cast<SQ_IMG_RSRC_WORD0*>(&image.srd[0])->bits.BASE_ADDRESS =
ext_image::PtrLow40Shift8(image_data_addr);
reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&image.srd[1])->bits.BASE_ADDRESS_HI
= ext_image::PtrHigh64Shift40(image_data_addr);
reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&image.srd[1])->bits.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type);
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3])->bits.DST_SEL_X =
swizzle.x;
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3])->bits.DST_SEL_Y =
swizzle.y;
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3])->bits.DST_SEL_Z =
swizzle.z;
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3])->bits.DST_SEL_W =
swizzle.w;
if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ||
image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1D) {
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3])->bits.TYPE =
image_lut_.MapGeometry(image.desc.geometry);
}
// Imported metadata holds the offset to metadata, add the image base address.
uintptr_t meta = uintptr_t(((SQ_IMG_RSRC_WORD7*)(&image.srd[7]))->bits.META_DATA_ADDRESS_HI) << 16;
meta |= uintptr_t(((SQ_IMG_RSRC_WORD6*)(&image.srd[6]))->bits.META_DATA_ADDRESS) << 8;
meta += reinterpret_cast<uintptr_t>(image_data_addr);
((SQ_IMG_RSRC_WORD6*)(&image.srd[6]))->bits.META_DATA_ADDRESS = ext_image::PtrLow16Shift8((void*)meta);
((SQ_IMG_RSRC_WORD7*)(&image.srd[7]))->bits.META_DATA_ADDRESS_HI = ext_image::PtrHigh64Shift16((void*)meta);
}
// Looks like this is only used for CPU copies.
image.row_pitch = 0;
image.slice_pitch = 0;
// Used by HSAIL shader ABI
image.srd[8] = image.desc.format.channel_type;
image.srd[9] = image.desc.format.channel_order;
image.srd[10] = static_cast<uint32_t>(image.desc.width);
return HSA_STATUS_SUCCESS;
}
static TEX_BC_SWIZZLE GetBcSwizzle(const Swizzle& swizzle) {
SEL r = (SEL)swizzle.x;
SEL g = (SEL)swizzle.y;
SEL b = (SEL)swizzle.z;
SEL a = (SEL)swizzle.w;
TEX_BC_SWIZZLE bcSwizzle = TEX_BC_Swizzle_XYZW;
if (a == SEL_X) {
// Have to use either TEX_BC_Swizzle_WZYX or TEX_BC_Swizzle_WXYZ
//
// For the pre-defined border color values (white, opaque black,
// transparent black), the only thing that matters is that the alpha
// channel winds up in the correct place (because the RGB channels are
// all the same) so either of these TEX_BC_Swizzle enumerations will
// work. Not sure what happens with border color palettes.
if (b == SEL_Y) {
// ABGR
bcSwizzle = TEX_BC_Swizzle_WZYX;
} else if ((r == SEL_X) && (g == SEL_X) && (b == SEL_X)) {
// RGBA
bcSwizzle = TEX_BC_Swizzle_XYZW;
} else {
// ARGB
bcSwizzle = TEX_BC_Swizzle_WXYZ;
}
} else if (r == SEL_X) {
// Have to use either TEX_BC_Swizzle_XYZW or TEX_BC_Swizzle_XWYZ
if (g == SEL_Y) {
// RGBA
bcSwizzle = TEX_BC_Swizzle_XYZW;
} else if ((g == SEL_X) && (b == SEL_X) && (a == SEL_W)) {
// RGBA
bcSwizzle = TEX_BC_Swizzle_XYZW;
} else {
// RAGB
bcSwizzle = TEX_BC_Swizzle_XWYZ;
}
} else if (g == SEL_X) {
// GRAB, have to use TEX_BC_Swizzle_YXWZ
bcSwizzle = TEX_BC_Swizzle_YXWZ;
} else if (b == SEL_X) {
// BGRA, have to use TEX_BC_Swizzle_ZYXW
bcSwizzle = TEX_BC_Swizzle_ZYXW;
}
return bcSwizzle;
}
hsa_status_t ImageManagerNv::PopulateImageSrd(Image& image) const {
ImageProperty image_prop =
image_lut_.MapFormat(image.desc.format, image.desc.geometry);
assert(image_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);
assert(image_prop.element_size != 0);
bool atc_access = true;
const void* image_data_addr = image.data;
if (IsLocalMemory(image.data)) {
atc_access = false;
image_data_addr = reinterpret_cast<const void*>(
reinterpret_cast<uintptr_t>(image.data) - local_memory_base_address_);
}
if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
SQ_BUF_RSRC_WORD0 word0;
SQ_BUF_RSRC_WORD1 word1;
SQ_BUF_RSRC_WORD2 word2;
SQ_BUF_RSRC_WORD3 word3;
word0.val = 0;
word0.f.BASE_ADDRESS = ext_image::PtrLow32(image_data_addr);
word1.val = 0;
word1.f.BASE_ADDRESS_HI = ext_image::PtrHigh32(image_data_addr);
word1.f.STRIDE = image_prop.element_size;
word1.f.SWIZZLE_ENABLE = false;
word1.f.CACHE_SWIZZLE = false;
word2.f.NUM_RECORDS = image.desc.width * image_prop.element_size;
const Swizzle swizzle =
image_lut_.MapSwizzle(image.desc.format.channel_order);
word3.val = 0;
word3.f.RESOURCE_LEVEL = 1;
word3.f.DST_SEL_X = swizzle.x;
word3.f.DST_SEL_Y = swizzle.y;
word3.f.DST_SEL_Z = swizzle.z;
word3.f.DST_SEL_W = swizzle.w;
word3.f.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type);
word3.f.INDEX_STRIDE = image_prop.element_size;
word3.f.TYPE = image_lut_.MapGeometry(image.desc.geometry);
image.srd[0] = word0.val;
image.srd[1] = word1.val;
image.srd[2] = word2.val;
image.srd[3] = word3.val;
image.row_pitch = image.desc.width * image_prop.element_size;
image.slice_pitch = image.row_pitch;
} else {
SQ_IMG_RSRC_WORD0 word0;
SQ_IMG_RSRC_WORD1 word1;
SQ_IMG_RSRC_WORD2 word2;
SQ_IMG_RSRC_WORD3 word3;
SQ_IMG_RSRC_WORD4 word4;
SQ_IMG_RSRC_WORD5 word5;
SQ_IMG_RSRC_WORD5 word6;
SQ_IMG_RSRC_WORD5 word7;
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
uint32_t swizzleMode = GetAddrlibSurfaceInfoNv(
image.component, image.desc, image.tile_mode,
image.row_pitch, image.slice_pitch, out);
if (swizzleMode == (uint32_t)(-1)) {
return HSA_STATUS_ERROR;
}
assert((out.bpp / 8) == image_prop.element_size);
const size_t row_pitch_size = out.pitch * image_prop.element_size;
word0.f.BASE_ADDRESS = ext_image::PtrLow40Shift8(image_data_addr);
word1.val = 0;
word1.f.BASE_ADDRESS_HI = ext_image::PtrHigh64Shift40(image_data_addr);
word1.f.MIN_LOD = 0;
word1.f.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type);
// Only take the lowest 2 bits of (image.desc.width - 1)
word1.f.WIDTH = ext_image::BitSelect<0, 1>(image.desc.width - 1);
word2.val = 0;
// Take the high 12 bits of (image.desc.width - 1)
word2.f.WIDTH_HI = ext_image::BitSelect<2, 13>(image.desc.width - 1);
word2.f.HEIGHT = image.desc.height ? image.desc.height - 1 : 0;
word2.f.RESOURCE_LEVEL = 1;
const Swizzle swizzle =
image_lut_.MapSwizzle(image.desc.format.channel_order);
word3.val = 0;
word3.f.DST_SEL_X = swizzle.x;
word3.f.DST_SEL_Y = swizzle.y;
word3.f.DST_SEL_Z = swizzle.z;
word3.f.DST_SEL_W = swizzle.w;
word3.f.SW_MODE = swizzleMode;
word3.f.BC_SWIZZLE = GetBcSwizzle(swizzle);
word3.f.TYPE = image_lut_.MapGeometry(image.desc.geometry);
const bool image_array =
(image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ||
image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DA ||
image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DADEPTH);
const bool image_3d = (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_3D);
word4.val = 0;
word4.f.DEPTH =
(image_array) // Doesn't hurt but isn't array_size already >0?
? std::max(image.desc.array_size, static_cast<size_t>(1)) - 1
: (image_3d) ? image.desc.depth - 1 : out.pitch - 1;
word5.val = 0;
word6.val = 0;
word7.val = 0;
image.srd[0] = word0.val;
image.srd[1] = word1.val;
image.srd[2] = word2.val;
image.srd[3] = word3.val;
image.srd[4] = word4.val;
image.srd[5] = word5.val;
image.srd[6] = word6.val;
image.srd[7] = word7.val;
image.row_pitch = row_pitch_size;
image.slice_pitch = out.sliceSize;
}
image.srd[8] = image.desc.format.channel_type;
image.srd[9] = image.desc.format.channel_order;
image.srd[10] = static_cast<uint32_t>(image.desc.width);
return HSA_STATUS_SUCCESS;
}
hsa_status_t ImageManagerNv::ModifyImageSrd(
Image& image, hsa_ext_image_format_t& new_format) const {
image.desc.format = new_format;
ImageProperty image_prop =
image_lut_.MapFormat(image.desc.format, image.desc.geometry);
assert(image_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);
assert(image_prop.element_size != 0);
if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
const Swizzle swizzle =
image_lut_.MapSwizzle(image.desc.format.channel_order);
SQ_BUF_RSRC_WORD3* word3 =
reinterpret_cast<SQ_BUF_RSRC_WORD3*>(&image.srd[3]);
word3->bits.DST_SEL_X = swizzle.x;
word3->bits.DST_SEL_Y = swizzle.y;
word3->bits.DST_SEL_Z = swizzle.z;
word3->bits.DST_SEL_W = swizzle.w;
word3->bits.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type);
} else {
SQ_IMG_RSRC_WORD1* word1 =
reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&image.srd[1]);
word1->bits.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type);
const Swizzle swizzle =
image_lut_.MapSwizzle(image.desc.format.channel_order);
SQ_IMG_RSRC_WORD3* word3 =
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3]);
word3->bits.DST_SEL_X = swizzle.x;
word3->bits.DST_SEL_Y = swizzle.y;
word3->bits.DST_SEL_Z = swizzle.z;
word3->bits.DST_SEL_W = swizzle.w;
}
image.srd[8] = image.desc.format.channel_type;
image.srd[9] = image.desc.format.channel_order;
image.srd[10] = static_cast<uint32_t>(image.desc.width);
return HSA_STATUS_SUCCESS;
}
hsa_status_t ImageManagerNv::PopulateSamplerSrd(amd::Sampler& sampler) const {
const hsa_ext_sampler_descriptor_t sampler_descriptor = sampler.desc;
SQ_IMG_SAMP_WORD0 word0;
SQ_IMG_SAMP_WORD1 word1;
SQ_IMG_SAMP_WORD2 word2;
SQ_IMG_SAMP_WORD3 word3;
word0.u32All = 0;
switch (sampler_descriptor.address_mode) {
case HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE:
word0.bits.CLAMP_X = static_cast<int>(SQ_TEX_CLAMP_LAST_TEXEL);
break;
case HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_BORDER:
word0.bits.CLAMP_X = static_cast<int>(SQ_TEX_CLAMP_BORDER);
break;
case HSA_EXT_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT:
word0.bits.CLAMP_X = static_cast<int>(SQ_TEX_MIRROR);
break;
case HSA_EXT_SAMPLER_ADDRESSING_MODE_UNDEFINED:
case HSA_EXT_SAMPLER_ADDRESSING_MODE_REPEAT:
word0.bits.CLAMP_X = static_cast<int>(SQ_TEX_WRAP);
break;
default:
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
word0.bits.CLAMP_Y = word0.bits.CLAMP_X;
word0.bits.CLAMP_Z = word0.bits.CLAMP_X;
word0.bits.FORCE_UNNORMALIZED = (sampler_descriptor.coordinate_mode ==
HSA_EXT_SAMPLER_COORDINATE_MODE_UNNORMALIZED);
word1.u32All = 0;
word1.bits.MAX_LOD = 4095;
word2.u32All = 0;
switch (sampler_descriptor.filter_mode) {
case HSA_EXT_SAMPLER_FILTER_MODE_NEAREST:
word2.bits.XY_MAG_FILTER = static_cast<int>(SQ_TEX_XY_FILTER_POINT);
break;
case HSA_EXT_SAMPLER_FILTER_MODE_LINEAR:
word2.bits.XY_MAG_FILTER = static_cast<int>(SQ_TEX_XY_FILTER_BILINEAR);
break;
default:
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
word2.bits.XY_MIN_FILTER = word2.bits.XY_MAG_FILTER;
word2.bits.Z_FILTER = SQ_TEX_Z_FILTER_NONE;
word2.bits.MIP_FILTER = SQ_TEX_MIP_FILTER_NONE;
word3.u32All = 0;
// TODO: check this bit with HSAIL spec.
word3.bits.BORDER_COLOR_TYPE = SQ_TEX_BORDER_COLOR_TRANS_BLACK;
sampler.srd[0] = word0.u32All;
sampler.srd[1] = word1.u32All;
sampler.srd[2] = word2.u32All;
sampler.srd[3] = word3.u32All;
return HSA_STATUS_SUCCESS;
}
uint32_t ImageManagerNv::GetAddrlibSurfaceInfoNv(
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
Image::TileMode tileMode,
size_t image_data_row_pitch,
size_t image_data_slice_pitch,
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT& out) const {
const ImageProperty image_prop =
GetImageProperty(component, desc.format, desc.geometry);
const AddrFormat addrlib_format = GetAddrlibFormat(image_prop);
const uint32_t width = static_cast<uint32_t>(desc.width);
const uint32_t height = static_cast<uint32_t>(desc.height);
static const size_t kMinNumSlice = 1;
const uint32_t num_slice = static_cast<uint32_t>(
std::max(kMinNumSlice, std::max(desc.array_size, desc.depth)));
ADDR2_COMPUTE_SURFACE_INFO_INPUT in = {0};
in.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT);
in.format = addrlib_format;
in.bpp = static_cast<unsigned int>(image_prop.element_size) * 8;
in.width = width;
in.height = height;
in.numSlices = num_slice;
switch (desc.geometry) {
case HSA_EXT_IMAGE_GEOMETRY_1D:
case HSA_EXT_IMAGE_GEOMETRY_1DB:
case HSA_EXT_IMAGE_GEOMETRY_1DA:
in.resourceType = ADDR_RSRC_TEX_1D;
break;
case HSA_EXT_IMAGE_GEOMETRY_2D:
case HSA_EXT_IMAGE_GEOMETRY_2DDEPTH:
case HSA_EXT_IMAGE_GEOMETRY_2DA:
case HSA_EXT_IMAGE_GEOMETRY_2DADEPTH:
in.resourceType = ADDR_RSRC_TEX_2D;
break;
case HSA_EXT_IMAGE_GEOMETRY_3D:
in.resourceType = ADDR_RSRC_TEX_3D;
break;
}
in.swizzleMode =
(tileMode == Image::TileMode::LINEAR)? ADDR_SW_LINEAR : ADDR_SW_4KB;
in.flags.texture = 1;
ADDR2_GET_PREFERRED_SURF_SETTING_INPUT prefSettingsInput = { 0 };
ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT prefSettingsOutput = { 0 };
prefSettingsInput.size = sizeof(prefSettingsInput);
prefSettingsInput.flags = in.flags;
prefSettingsInput.bpp = in.bpp;
prefSettingsInput.format = in.format;
prefSettingsInput.width = in.width;
prefSettingsInput.height = in.height;
prefSettingsInput.numFrags = in.numFrags;
prefSettingsInput.numSamples = in.numSamples;
prefSettingsInput.numMipLevels = in.numMipLevels;
prefSettingsInput.numSlices = in.numSlices;
prefSettingsInput.resourceLoction = ADDR_RSRC_LOC_UNDEF;
prefSettingsInput.resourceType = in.resourceType;
if (tileMode == Image::TileMode::LINEAR) {
// this should force linear.
prefSettingsInput.forbiddenBlock.macroThin4KB = 1;
prefSettingsInput.forbiddenBlock.macroThick4KB = 1;
prefSettingsInput.forbiddenBlock.macroThin64KB = 1;
prefSettingsInput.forbiddenBlock.macroThick64KB = 1;
prefSettingsInput.forbiddenBlock.micro = 1;
prefSettingsInput.forbiddenBlock.var = 1;
} else {
// this should not allow linear.
prefSettingsInput.forbiddenBlock.linear = 1;
// Debug setting, simplifies buffer alignment.
prefSettingsInput.forbiddenBlock.macroThin64KB = 1;
prefSettingsInput.forbiddenBlock.macroThick64KB = 1;
}
// but don't ever allow the 256b swizzle modes
//prefSettingsInput.forbiddenBlock.micro = 1;
// and don't allow variable-size block modes
//prefSettingsInput.forbiddenBlock.var = 1;
if (ADDR_OK != Addr2GetPreferredSurfaceSetting(addr_lib_,
&prefSettingsInput, &prefSettingsOutput)) {
return (uint32_t)(-1);
}
in.swizzleMode = prefSettingsOutput.swizzleMode;
out.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT);
if (ADDR_OK != Addr2ComputeSurfaceInfo(addr_lib_, &in, &out)) {
return (uint32_t)(-1);
}
if (out.surfSize == 0) {
return (uint32_t)(-1);
}
return in.swizzleMode;
}
hsa_status_t ImageManagerNv::FillImage(const Image& image, const void* pattern,
const hsa_ext_image_region_t& region) {
if (BlitQueueInit().queue_ == NULL) {
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
}
Image* image_view = const_cast<Image*>(&image);
SQ_BUF_RSRC_WORD3* word3_buff = NULL;
SQ_IMG_RSRC_WORD3* word3_image = NULL;
uint32_t dst_sel_w_original = 0;
if (image_view->desc.format.channel_type ==
HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_101010) {
// Force GPU to ignore the last two bits (alpha bits).
if (image_view->desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
word3_buff = reinterpret_cast<SQ_BUF_RSRC_WORD3*>(&image_view->srd[3]);
dst_sel_w_original = word3_buff->bits.DST_SEL_W;
word3_buff->bits.DST_SEL_W = SEL_0;
} else {
word3_image = reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image_view->srd[3]);
dst_sel_w_original = word3_image->bits.DST_SEL_W;
word3_image->bits.DST_SEL_W = SEL_0;
}
}
SQ_IMG_RSRC_WORD1* word1 = NULL;
uint32_t num_format_original = 0;
const void* new_pattern = pattern;
float fill_value[4] = {0};
switch (image_view->desc.format.channel_order) {
case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA:
case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB:
case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX:
case HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA: {
// KV and CZ don't have write support for SRGBA image, so convert pattern
// to standard form and treat the image as RGBA image.
const float* pattern_f = reinterpret_cast<const float*>(pattern);
fill_value[0] = LinearToStandardRGB(pattern_f[0]);
fill_value[1] = LinearToStandardRGB(pattern_f[1]);
fill_value[2] = LinearToStandardRGB(pattern_f[2]);
fill_value[3] = pattern_f[3];
new_pattern = fill_value;
ImageProperty image_prop = image_lut_.MapFormat(image.desc.format, image.desc.geometry);
word1 = reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&image_view->srd[1]);
num_format_original = word1->bits.FORMAT;
word1->bits.FORMAT = GetCombinedFormat(image_prop.data_format, TYPE_UNORM);
} break;
default:
break;
}
hsa_status_t status =
ext_image::ImageRuntime::instance()->blit_kernel().FillImage(
blit_queue_, blit_code_catalog_, *image_view, new_pattern, region);
// Revert back original configuration.
if (word3_buff != NULL) {
word3_buff->bits.DST_SEL_W = dst_sel_w_original;
}
if (word3_image != NULL) {
word3_image->bits.DST_SEL_W = dst_sel_w_original;
}
if (word1 != NULL) {
word1->bits.FORMAT = num_format_original;
}
return status;
}
} // namespace amd
+53
Ver Arquivo
@@ -0,0 +1,53 @@
#ifndef EXT_IMAGE_IMAGE_MANAGER_NV_H_
#define EXT_IMAGE_IMAGE_MANAGER_NV_H_
#include "addrlib/inc/addrinterface.h"
#include "image_manager_kv.h"
namespace amd {
class ImageManagerNv : public ImageManagerKv {
public:
ImageManagerNv();
virtual ~ImageManagerNv();
/// @brief Calculate the size and alignment of the backing storage of an
/// image.
virtual hsa_status_t CalculateImageSizeAndAlignment(
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
hsa_ext_image_data_layout_t image_data_layout,
size_t image_data_row_pitch, size_t image_data_slice_pitch,
hsa_ext_image_data_info_t& image_info) const;
/// @brief Fill image structure with device specific image object.
virtual hsa_status_t PopulateImageSrd(Image& image) const;
/// @brief Fill image structure with device specific image object using the given format.
virtual hsa_status_t PopulateImageSrd(Image& image, const metadata_amd_t* desc) const;
/// @brief Modify device specific image object according to the specified
/// new format.
virtual hsa_status_t ModifyImageSrd(Image& image,
hsa_ext_image_format_t& new_format) const;
/// @brief Fill sampler structure with device specific sampler object.
virtual hsa_status_t PopulateSamplerSrd(Sampler& sampler) const;
/// @brief Fill image backing storage using agent copy.
virtual hsa_status_t FillImage(const Image& image, const void* pattern,
const hsa_ext_image_region_t& region);
protected:
uint32_t GetAddrlibSurfaceInfoNv(hsa_agent_t component,
const hsa_ext_image_descriptor_t& desc,
Image::TileMode tileMode,
size_t image_data_row_pitch,
size_t image_data_slice_pitch,
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT& out) const;
bool IsLocalMemory(const void* address) const;
private:
DISALLOW_COPY_AND_ASSIGN(ImageManagerNv);
}; // namespace amd
}
#endif // EXT_IMAGE_IMAGE_MANAGER_NV_H_
+543
Ver Arquivo
@@ -0,0 +1,543 @@
#define NOMINMAX
#include "image_runtime.h"
#include <assert.h>
#include <climits>
#include <mutex>
#include "resource.h"
#include "image_manager_kv.h"
#include "image_manager_ai.h"
#include "image_manager_nv.h"
#include "device_info.h"
// Per library unload callback function.
extern "C" void (*UnloadCallback)();
namespace ext_image {
std::atomic<ImageRuntime*> ImageRuntime::instance_(NULL);
std::mutex ImageRuntime::instance_mutex_;
hsa_status_t FindKernelArgPool(hsa_amd_memory_pool_t pool, void* data) {
if (NULL == data) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
hsa_status_t err;
hsa_amd_segment_t segment;
uint32_t flag;
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
&segment);
assert(err == HSA_STATUS_SUCCESS);
err = hsa_amd_memory_pool_get_info(
pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag);
assert(err == HSA_STATUS_SUCCESS);
if (HSA_AMD_SEGMENT_GLOBAL == segment &&
(HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT & flag) == 1) {
*(reinterpret_cast<hsa_amd_memory_pool_t*>(data)) = pool;
// Found the kernarg pool, stop the iteration.
return HSA_STATUS_INFO_BREAK;
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t ImageRuntime::CreateImageManager(hsa_agent_t agent, void* data) {
ImageRuntime* runtime = reinterpret_cast<ImageRuntime*>(data);
hsa_device_type_t hsa_device_type;
hsa_status_t hsa_error_code =
hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &hsa_device_type);
if (hsa_error_code != HSA_STATUS_SUCCESS) {
return hsa_error_code;
}
if (hsa_device_type == HSA_DEVICE_TYPE_GPU) {
uint32_t chip_id;
hsa_error_code = GetGPUAsicID(agent, &chip_id);
uint32_t major_ver = MajorVerFromDevID(chip_id);
amd::ImageManager* image_manager;
if (major_ver >= 10) {
image_manager = new amd::ImageManagerNv();
} else if (major_ver >= 9) {
image_manager = new amd::ImageManagerAi();
} else {
image_manager = new amd::ImageManagerKv();
}
hsa_error_code = image_manager->Initialize(agent);
if (hsa_error_code != HSA_STATUS_SUCCESS) {
delete image_manager;
return hsa_error_code;
}
runtime->image_managers_[agent.handle] = image_manager;
} else if (hsa_device_type == HSA_DEVICE_TYPE_CPU) {
uint32_t caches[4] = {0};
hsa_error_code =
hsa_agent_get_info(agent, HSA_AGENT_INFO_CACHE_SIZE, caches);
if (hsa_error_code != HSA_STATUS_SUCCESS) {
return hsa_error_code;
}
runtime->cpu_l2_cache_size_ = caches[1];
hsa_error_code = hsa_amd_agent_iterate_memory_pools(
agent, FindKernelArgPool, &runtime->kernarg_pool_);
if (hsa_error_code != HSA_STATUS_INFO_BREAK) {
return static_cast<hsa_status_t>(HSA_STATUS_ERROR_INVALID_MEMORY_POOL);
}
}
return HSA_STATUS_SUCCESS;
}
ImageRuntime* ImageRuntime::instance() {
ImageRuntime* instance = instance_.load(std::memory_order_acquire);
if (instance == NULL) {
// Protect the initialization from multi threaded access.
std::lock_guard<std::mutex> lock(instance_mutex_);
// Make sure we are not initializing it twice.
instance = instance_.load(std::memory_order_relaxed);
if (instance != NULL) {
return instance;
}
instance = CreateSingleton();
if (instance == NULL) {
return NULL;
}
UnloadCallback = &ext_image::ImageRuntime::DestroySingleton;
}
return instance;
}
ImageRuntime* ImageRuntime::CreateSingleton() {
ImageRuntime* instance = new ImageRuntime();
if (HSA_STATUS_SUCCESS != instance->blit_kernel_.Initialize()) {
instance->Cleanup();
delete instance;
return NULL;
}
if (HSA_STATUS_SUCCESS != hsa_iterate_agents(CreateImageManager, instance)) {
instance->Cleanup();
delete instance;
return NULL;
}
assert(instance->image_managers_.size() != 0);
//assert(instance->cpu_l2_cache_size_ != 0);
instance_.store(instance, std::memory_order_release);
return instance;
}
void ImageRuntime::DestroySingleton() {
ImageRuntime* instance = instance_.load(std::memory_order_acquire);
if (instance == NULL) {
return;
}
instance->Cleanup();
instance_.store(NULL, std::memory_order_release);
delete instance;
}
hsa_status_t ImageRuntime::GetImageInfoMaxDimension(hsa_agent_t component,
hsa_agent_info_t attribute,
void* value) {
uint32_t* value_u32 = NULL;
uint32_t* value_u32_v2 = NULL;
uint32_t* value_u32_v3 = NULL;
hsa_ext_image_geometry_t geometry;
size_t image_attribute = static_cast<size_t>(attribute);
switch (image_attribute) {
case HSA_EXT_AGENT_INFO_IMAGE_1D_MAX_ELEMENTS:
geometry = HSA_EXT_IMAGE_GEOMETRY_1D;
value_u32 = static_cast<uint32_t*>(value);
break;
case HSA_EXT_AGENT_INFO_IMAGE_1DA_MAX_ELEMENTS:
geometry = HSA_EXT_IMAGE_GEOMETRY_1DA;
value_u32 = static_cast<uint32_t*>(value);
break;
case HSA_EXT_AGENT_INFO_IMAGE_1DB_MAX_ELEMENTS:
geometry = HSA_EXT_IMAGE_GEOMETRY_1DB;
value_u32 = static_cast<uint32_t*>(value);
break;
case HSA_EXT_AGENT_INFO_IMAGE_2D_MAX_ELEMENTS:
geometry = HSA_EXT_IMAGE_GEOMETRY_2D;
value_u32_v2 = static_cast<uint32_t*>(value);
break;
case HSA_EXT_AGENT_INFO_IMAGE_2DA_MAX_ELEMENTS:
geometry = HSA_EXT_IMAGE_GEOMETRY_2DA;
value_u32_v2 = static_cast<uint32_t*>(value);
break;
case HSA_EXT_AGENT_INFO_IMAGE_2DDEPTH_MAX_ELEMENTS:
geometry = HSA_EXT_IMAGE_GEOMETRY_2DDEPTH;
value_u32_v2 = static_cast<uint32_t*>(value);
break;
case HSA_EXT_AGENT_INFO_IMAGE_2DADEPTH_MAX_ELEMENTS:
geometry = HSA_EXT_IMAGE_GEOMETRY_2DADEPTH;
value_u32_v2 = static_cast<uint32_t*>(value);
break;
case HSA_EXT_AGENT_INFO_IMAGE_3D_MAX_ELEMENTS:
geometry = HSA_EXT_IMAGE_GEOMETRY_3D;
value_u32_v3 = static_cast<uint32_t*>(value);
break;
case HSA_EXT_AGENT_INFO_IMAGE_ARRAY_MAX_LAYERS:
geometry = HSA_EXT_IMAGE_GEOMETRY_2DA;
value_u32 = static_cast<uint32_t*>(value);
break;
default:
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
uint32_t width = 0;
uint32_t height = 0;
uint32_t depth = 0;
uint32_t array_size = 0;
hsa_device_type_t device_type;
hsa_status_t status =
hsa_agent_get_info(component, HSA_AGENT_INFO_DEVICE, &device_type);
if (status != HSA_STATUS_SUCCESS) {
return status;
}
// Image is only supported on a GPU device.
if (device_type == HSA_DEVICE_TYPE_GPU) {
image_manager(component)->GetImageInfoMaxDimension(
component, geometry, width, height, depth, array_size);
}
if (value_u32_v3 != NULL) {
value_u32_v3[0] = width;
value_u32_v3[1] = height;
value_u32_v3[2] = depth;
} else if (value_u32_v2 != NULL) {
value_u32_v2[0] = width;
value_u32_v2[1] = height;
} else {
*value_u32 = (image_attribute == HSA_EXT_AGENT_INFO_IMAGE_ARRAY_MAX_LAYERS)
? array_size
: width;
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t ImageRuntime::GetImageCapability(
hsa_agent_t component, const hsa_ext_image_format_t& format,
hsa_ext_image_geometry_t geometry, uint32_t& capability) {
hsa_device_type_t device_type;
hsa_status_t status =
hsa_agent_get_info(component, HSA_AGENT_INFO_DEVICE, &device_type);
if (status != HSA_STATUS_SUCCESS) {
return status;
}
if (device_type == HSA_DEVICE_TYPE_GPU) {
amd::ImageManager* manager = image_manager(component);
capability = manager->GetImageProperty(component, format, geometry).cap;
} else {
// Image is only supported on a GPU device.
capability = 0;
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t ImageRuntime::GetImageSizeAndAlignment(
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
hsa_ext_image_data_layout_t image_data_layout,
size_t image_data_row_pitch,
size_t image_data_slice_pitch,
hsa_ext_image_data_info_t& image_info) {
image_info.alignment = 0;
image_info.size = 0;
// Validate the image format and geometry.
uint32_t capability = 0;
hsa_status_t status =
GetImageCapability(component, desc.format, desc.geometry, capability);
if (status != HSA_STATUS_SUCCESS) {
return status;
}
if (capability == 0) {
return static_cast<hsa_status_t>(
HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED);
}
const hsa_ext_image_geometry_t geometry = desc.geometry;
uint32_t max_width = 0;
uint32_t max_height = 0;
uint32_t max_depth = 0;
uint32_t max_array_size = 0;
amd::ImageManager* manager = image_manager(component);
// Validate the image dimension.
manager->GetImageInfoMaxDimension(component, geometry, max_width, max_height,
max_depth, max_array_size);
if (desc.width > max_width || desc.height > max_height ||
desc.depth > max_depth || desc.array_size > max_array_size) {
return static_cast<hsa_status_t>(
HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED);
}
return manager->CalculateImageSizeAndAlignment(component, desc,
image_data_layout, image_data_row_pitch, image_data_slice_pitch, image_info);
}
hsa_status_t ImageRuntime::CreateImageHandle(
hsa_agent_t component, const hsa_ext_image_descriptor_t& image_descriptor,
const void* image_data, const hsa_access_permission_t access_permission,
hsa_ext_image_data_layout_t image_data_layout,
size_t image_data_row_pitch,
size_t image_data_slice_pitch,
hsa_ext_image_t& image_handle) {
image_handle.handle = 0;
assert(image_data != NULL);
// Validate image dimension.
hsa_ext_image_data_info_t image_info = {0};
hsa_status_t status =
GetImageSizeAndAlignment(component, image_descriptor,
image_data_layout, image_data_row_pitch, image_data_slice_pitch,
image_info);
if (status != HSA_STATUS_SUCCESS) {
return status;
}
// Validate image address alignment.
if (!IsMultipleOf(reinterpret_cast<size_t>(image_data),
image_info.alignment)) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
amd::Image* image = amd::Image::Create(component);
image->component = component;
image->desc = image_descriptor;
image->permission = access_permission;
image->data = const_cast<void*>(image_data);
image->row_pitch = image_data_row_pitch;
image->slice_pitch = image_data_slice_pitch;
hsa_profile_t profile;
status = hsa_agent_get_info(component, HSA_AGENT_INFO_PROFILE, &profile);
if (image_data_layout == HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR) {
image->tile_mode = amd::Image::TileMode::LINEAR;
} else {
amd::Image::TileMode tileMode = (profile == HSA_PROFILE_BASE &&
image_descriptor.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB)?
amd::Image::TileMode::TILED : amd::Image::TileMode::LINEAR;
image->tile_mode = tileMode;
}
image_manager(component)->PopulateImageSrd(*image);
image_handle.handle = image->Convert();
return HSA_STATUS_SUCCESS;
}
hsa_status_t ImageRuntime::CreateImageHandleWithLayout(
hsa_agent_t component, const hsa_ext_image_descriptor_t& image_descriptor,
const hsa_amd_image_descriptor_t* image_layout,
const void* image_data, const hsa_access_permission_t access_permission,
hsa_ext_image_t& image_handle)
{
if(!IsMultipleOf(image_data, 256))
return HSA_STATUS_ERROR_INVALID_ALLOCATION;
if(image_layout->version!=1)
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
uint32_t id;
hsa_agent_get_info(component, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_CHIP_ID, &id);
if(image_layout->deviceID!=(0x1002<<16|id))
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
const amd::metadata_amd_t* desc=reinterpret_cast<const amd::metadata_amd_t*>(image_layout);
amd::Image* image = amd::Image::Create(component);
image->component = component;
image->desc = image_descriptor;
image->permission = access_permission;
image->data = const_cast<void*>(image_data);
image->tile_mode=amd::Image::TILED;
hsa_status_t err=image_manager(component)->PopulateImageSrd(*image, desc);
if(err!=HSA_STATUS_SUCCESS) {
amd::Image::Destroy(image);
return err;
}
image_handle.handle = image->Convert();
return HSA_STATUS_SUCCESS;
}
hsa_status_t ImageRuntime::DestroyImageHandle(
const hsa_ext_image_t& image_handle) {
const amd::Image* image = amd::Image::Convert(image_handle.handle);
if (image == NULL) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
amd::Image::Destroy(const_cast<amd::Image*>(image));
return HSA_STATUS_SUCCESS;
}
hsa_status_t ImageRuntime::CopyBufferToImage(
const void* src_memory, size_t src_row_pitch, size_t src_slice_pitch,
const hsa_ext_image_t& dst_image_handle,
const hsa_ext_image_region_t& image_region) {
const amd::Image* dst_image = amd::Image::Convert(dst_image_handle.handle);
if (dst_image == NULL) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
amd::ImageManager* manager = image_manager(dst_image->component);
return manager->CopyBufferToImage(src_memory, src_row_pitch, src_slice_pitch,
*dst_image, image_region);
}
hsa_status_t ImageRuntime::CopyImageToBuffer(
const hsa_ext_image_t& src_image_handle, void* dst_memory,
size_t dst_row_pitch, size_t dst_slice_pitch,
const hsa_ext_image_region_t& image_region) {
const amd::Image* src_image = amd::Image::Convert(src_image_handle.handle);
if (src_image == NULL) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
amd::ImageManager* manager = image_manager(src_image->component);
return manager->CopyImageToBuffer(*src_image, dst_memory, dst_row_pitch,
dst_slice_pitch, image_region);
}
hsa_status_t ImageRuntime::CopyImage(const hsa_ext_image_t& src_image_handle,
const hsa_ext_image_t& dst_image_handle,
const hsa_dim3_t& src_origin,
const hsa_dim3_t& dst_origin,
const hsa_dim3_t size) {
const amd::Image* src_image = amd::Image::Convert(src_image_handle.handle);
if (src_image == NULL) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
const amd::Image* dst_image = amd::Image::Convert(dst_image_handle.handle);
if (dst_image == NULL) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
if (src_image->component.handle != dst_image->component.handle) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
amd::ImageManager* manager = image_manager(src_image->component);
return manager->CopyImage(*dst_image, *src_image, dst_origin, src_origin,
size);
}
hsa_status_t ImageRuntime::FillImage(
const hsa_ext_image_t& image_handle, const void* pattern,
const hsa_ext_image_region_t& image_region) {
const amd::Image* image = amd::Image::Convert(image_handle.handle);
if (image == NULL) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
amd::ImageManager* manager = image_manager(image->component);
return manager->FillImage(*image, pattern, image_region);
}
hsa_status_t ImageRuntime::CreateSamplerHandle(
hsa_agent_t component,
const hsa_ext_sampler_descriptor_t& sampler_descriptor,
hsa_ext_sampler_t& sampler_handle) {
sampler_handle.handle = 0;
hsa_device_type_t device_type;
hsa_status_t status =
hsa_agent_get_info(component, HSA_AGENT_INFO_DEVICE, &device_type);
if (status != HSA_STATUS_SUCCESS) {
return status;
}
// Sampler is only supported on a GPU device.
if (device_type != HSA_DEVICE_TYPE_GPU) {
return HSA_STATUS_ERROR_INVALID_AGENT;
}
amd::Sampler* sampler = amd::Sampler::Create(component);
if (sampler == NULL) {
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
}
sampler->component = component;
sampler->desc = sampler_descriptor;
image_manager(component)->PopulateSamplerSrd(*sampler);
sampler_handle.handle = sampler->Convert();
return HSA_STATUS_SUCCESS;
}
hsa_status_t ImageRuntime::DestroySamplerHandle(
hsa_ext_sampler_t& sampler_handle) {
const amd::Sampler* sampler = amd::Sampler::Convert(sampler_handle.handle);
if (sampler == NULL) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
amd::Sampler::Destroy(sampler);
return HSA_STATUS_SUCCESS;
}
ImageRuntime::ImageRuntime()
: cpu_l2_cache_size_(0), kernarg_pool_({0}) {}
ImageRuntime::~ImageRuntime() {}
void ImageRuntime::Cleanup() {
std::map<uint64_t, amd::ImageManager*>::iterator it;
for (it = image_managers_.begin(); it != image_managers_.end(); ++it) {
it->second->Cleanup();
delete it->second;
}
blit_kernel_.Cleanup();
}
} // namespace
@@ -0,0 +1,147 @@
// AMD HSA image extension interface file.
#ifndef HSA_RUNTIME_EXT_IMAGE_IMAGE_RUNTIME_H
#define HSA_RUNTIME_EXT_IMAGE_IMAGE_RUNTIME_H
#include <atomic>
#include <map>
#include <mutex>
#include "inc/hsa.h"
#undef HSA_API
#define HSA_API
#include "inc/hsa_ext_image.h"
#include "inc/hsa_ext_amd.h"
#include "blit_kernel.h"
#include "image_manager.h"
#include "util.h"
namespace ext_image {
class ImageRuntime {
public:
/// @brief Getter for the ImageRuntime singleton object.
static ImageRuntime* instance();
/// @brief Destroy singleton object.
static void DestroySingleton();
/// @brief Retrieve maximum size of width, height, depth, array size in pixels
/// for a particular geometry on a component.
hsa_status_t GetImageInfoMaxDimension(hsa_agent_t component,
hsa_agent_info_t attribute,
void* value);
/// @brief Query image support with particular format and geometry.
hsa_status_t GetImageCapability(hsa_agent_t component,
const hsa_ext_image_format_t& format,
hsa_ext_image_geometry_t geometry,
uint32_t& capability);
/// @brief Query the size and address alignment of the backing storage of
/// the image.
hsa_status_t GetImageSizeAndAlignment(hsa_agent_t component,
const hsa_ext_image_descriptor_t& desc,
hsa_ext_image_data_layout_t image_data_layout,
size_t image_data_row_pitch,
size_t image_data_slice_pitch,
hsa_ext_image_data_info_t& image_info);
/// @brief Create device image object and return its handle.
hsa_status_t CreateImageHandle(
hsa_agent_t component, const hsa_ext_image_descriptor_t& image_descriptor,
const void* image_data, const hsa_access_permission_t access_permission,
hsa_ext_image_data_layout_t image_data_layout,
size_t image_data_row_pitch,
size_t image_data_slice_pitch,
hsa_ext_image_t& image);
/// @brief Create device image object and return its handle.
hsa_status_t CreateImageHandleWithLayout(
hsa_agent_t component, const hsa_ext_image_descriptor_t& image_descriptor,
const hsa_amd_image_descriptor_t* image_layout,
const void* image_data, const hsa_access_permission_t access_permission,
hsa_ext_image_t& image);
/// @brief Destroy the device image object referenced by the handle.
hsa_status_t DestroyImageHandle(const hsa_ext_image_t& image);
/// @brief Copy the content of a linear memory to an image object.
hsa_status_t CopyBufferToImage(const void* src_memory, size_t src_row_pitch,
size_t src_slice_pitch,
const hsa_ext_image_t& dst_image,
const hsa_ext_image_region_t& image_region);
/// @brief Copy the content of an image object to a linear memory.
hsa_status_t CopyImageToBuffer(const hsa_ext_image_t& src_image,
void* dst_memory, size_t dst_row_pitch,
size_t dst_slice_pitch,
const hsa_ext_image_region_t& image_region);
/// @brief Copy the content of an image object to another image object.
hsa_status_t CopyImage(const hsa_ext_image_t& src_image,
const hsa_ext_image_t& dst_image,
const hsa_dim3_t& src_origin,
const hsa_dim3_t& dst_origin, const hsa_dim3_t size);
/// @brief Fill the content of an image object with a pattern.
hsa_status_t FillImage(const hsa_ext_image_t& image, const void* pattern,
const hsa_ext_image_region_t& image_region);
/// @brief Create device sampler object and return its handle.
hsa_status_t CreateSamplerHandle(
hsa_agent_t component,
const hsa_ext_sampler_descriptor_t& sampler_descriptor,
hsa_ext_sampler_t& sampler);
/// @brief Destroy the device sampler object referenced by the handle.
hsa_status_t DestroySamplerHandle(hsa_ext_sampler_t& sampler);
amd::ImageManager* image_manager(hsa_agent_t agent) {
std::map<uint64_t, amd::ImageManager*>::iterator it =
image_managers_.find(agent.handle);
return (it != image_managers_.end()) ? it->second : NULL;
}
amd::BlitKernel& blit_kernel() { return blit_kernel_; }
size_t cpu_l2_cache_size() const { return cpu_l2_cache_size_; }
hsa_amd_memory_pool_t kernarg_pool() const {
return kernarg_pool_;
}
private:
/// @brief Initialize singleton object, must be called once.
static ImageRuntime* CreateSingleton();
static hsa_status_t CreateImageManager(hsa_agent_t agent, void* data);
ImageRuntime();
~ImageRuntime();
void Cleanup();
/// Pointer to singleton object.
static std::atomic<ImageRuntime*> instance_;
static std::mutex instance_mutex_;
/// @brief Contains mapping of agent and its corresponding ::ImageManager
/// object.
std::map<uint64_t, amd::ImageManager*> image_managers_;
/// @brief Manages kernel for accessing images.
amd::BlitKernel blit_kernel_;
size_t cpu_l2_cache_size_;
hsa_amd_memory_pool_t kernarg_pool_;
DISALLOW_COPY_AND_ASSIGN(ImageRuntime);
};
} // namespace
#endif // HSA_RUNTIME_EXT_IMAGE_IMAGE_RUNTIME_H
Diferenças do arquivo suprimidas por serem muito extensas Carregar Diff
@@ -0,0 +1,155 @@
#ifndef HSA_RUNTIME_EXT_IMAGE_RESOURCE_H
#define HSA_RUNTIME_EXT_IMAGE_RESOURCE_H
#include <stdint.h>
#include <cstring>
#include "inc/hsa.h"
#include "inc/hsa_ext_image.h"
#include "util.h"
#define HSA_IMAGE_OBJECT_SIZE_DWORD 12
#define HSA_IMAGE_OBJECT_ALIGNMENT 16
#define HSA_SAMPLER_OBJECT_SIZE_DWORD 8
#define HSA_SAMPLER_OBJECT_ALIGNMENT 16
#define GEOMETRY_COUNT 8
#define ORDER_COUNT 20
#define TYPE_COUNT 16
#define RO HSA_EXT_IMAGE_CAPABILITY_READ_ONLY
#define ROWO \
(HSA_EXT_IMAGE_CAPABILITY_READ_ONLY | HSA_EXT_IMAGE_CAPABILITY_WRITE_ONLY)
#define RW \
(HSA_EXT_IMAGE_CAPABILITY_READ_ONLY | HSA_EXT_IMAGE_CAPABILITY_WRITE_ONLY | \
HSA_EXT_IMAGE_CAPABILITY_READ_WRITE)
namespace amd {
typedef struct metadata_amd_s {
uint32_t version; // Must be 1
uint32_t vendorID; // AMD | CZ
uint32_t words[8];
uint32_t mip_offsets[0]; //Mip level offset bits [39:8] for each level (if any)
} metadata_amd_t;
/// @brief Structure to represent image access component.
typedef struct Swizzle {
uint8_t x;
uint8_t y;
uint8_t z;
uint8_t w;
} Swizzle;
/// @brief Structure to contain the property of an image with a particular
/// format and geometry.
typedef struct ImageProperty {
uint8_t cap; // hsa_ext_image_format_capability_t mask.
uint8_t element_size; // size per pixel in bytes.
uint8_t data_format; // device specific channel ordering.
uint8_t data_type; // device specific channel type.
} ImageProperty;
/// @brief Structure to represent an HSA image object.
typedef struct Image {
private:
Image() {
component.handle = 0;
permission = HSA_ACCESS_PERMISSION_RO;
data = NULL;
std::memset(srd, 0, sizeof(srd));
std::memset(&desc, 0, sizeof(desc));
row_pitch = slice_pitch = 0;
tile_mode = LINEAR;
}
~Image() {}
public:
typedef enum TileMode {
LINEAR,
TILED
} TileMode;
/// @brief Create an Image.
static Image* Create(hsa_agent_t agent);
/// @brief Destroy an Image.
static void Destroy(const Image* image);
/// @brief Convert from vendor representation to HSA handle.
uint64_t Convert() const { return reinterpret_cast<uint64_t>(srd); }
/// @brief Convert from HSA handle to vendor representation.
static Image* Convert(uint64_t handle) {
return reinterpret_cast<Image*>(handle - offsetof(Image, srd));
}
// Vendor specific image object.
__ALIGNED__(
HSA_IMAGE_OBJECT_ALIGNMENT) uint32_t srd[HSA_IMAGE_OBJECT_SIZE_DWORD];
// HSA component of the image object.
hsa_agent_t component;
// HSA image descriptor of the image object.
hsa_ext_image_descriptor_t desc;
// HSA image access permission of the image object.
hsa_access_permission_t permission;
// Backing storage of the image object.
void* data;
// Device specific row pitch of the image object in size.
size_t row_pitch;
// Device specific slice pitch of the image object in size.
size_t slice_pitch;
// Device specific tile mode
TileMode tile_mode;
} Image;
/// @brief Structure to represent an HSA sampler object.
typedef struct Sampler {
private:
Sampler() {
component.handle = 0;
std::memset(srd, 0, sizeof(srd));
std::memset(&desc, 0, sizeof(desc));
}
~Sampler() {}
public:
/// @brief Create a Sampler.
static Sampler* Create(hsa_agent_t agent);
/// @brief Destroy a Sampler.
static void Destroy(const Sampler* sampler);
/// @brief Convert from vendor representation to HSA handle.
uint64_t Convert() { return reinterpret_cast<uint64_t>(srd); }
/// @brief Convert from HSA handle to vendor representation.
static Sampler* Convert(uint64_t handle) {
return reinterpret_cast<Sampler*>(handle - offsetof(Sampler, srd));
}
// Vendor specific sampler object.
__ALIGNED__(HSA_SAMPLER_OBJECT_ALIGNMENT)
uint32_t srd[HSA_SAMPLER_OBJECT_SIZE_DWORD];
// HSA component of the sampler object.
hsa_agent_t component;
// HSA sampler descriptor of the image object.
hsa_ext_sampler_descriptor_t desc;
} Sampler;
} // namespace
#endif // HSA_RUNTIME_EXT_IMAGE_RESOURCE_H
Diferenças do arquivo suprimidas por serem muito extensas Carregar Diff
@@ -0,0 +1,463 @@
#ifndef HSA_RUNTIME_EXT_IMAGE_RESOURCE_KV_H
#define HSA_RUNTIME_EXT_IMAGE_RESOURCE_KV_H
#if defined(LITTLEENDIAN_CPU)
#elif defined(BIGENDIAN_CPU)
#else
#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined"
#endif
namespace amd {
union SQ_BUF_RSRC_WORD0 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int base_address : 32;
#elif defined(BIGENDIAN_CPU)
unsigned int base_address : 32;
#endif
} bitfields, bits;
unsigned int u32_all;
signed int i32_all;
float f32_all;
};
union SQ_BUF_RSRC_WORD1 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int base_address_hi : 16;
unsigned int stride : 14;
unsigned int cache_swizzle : 1;
unsigned int swizzle_enable : 1;
#elif defined(BIGENDIAN_CPU)
unsigned int swizzle_enable : 1;
unsigned int cache_swizzle : 1;
unsigned int stride : 14;
unsigned int base_address_hi : 16;
#endif
} bitfields, bits;
unsigned int u32_all;
signed int i32_all;
float f32_all;
};
union SQ_BUF_RSRC_WORD2 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int num_records : 32;
#elif defined(BIGENDIAN_CPU)
unsigned int num_records : 32;
#endif
} bitfields, bits;
unsigned int u32_all;
signed int i32_all;
float f32_all;
};
union SQ_BUF_RSRC_WORD3 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int dst_sel_x : 3;
unsigned int dst_sel_y : 3;
unsigned int dst_sel_z : 3;
unsigned int dst_sel_w : 3;
unsigned int num_format : 3;
unsigned int data_format : 4;
unsigned int element_size : 2;
unsigned int index_stride : 2;
unsigned int add_tid_enable : 1;
unsigned int atc : 1;
unsigned int hash_enable : 1;
unsigned int heap : 1;
unsigned int mtype : 3;
unsigned int type : 2;
#elif defined(BIGENDIAN_CPU)
unsigned int type : 2;
unsigned int mtype : 3;
unsigned int heap : 1;
unsigned int hash_enable : 1;
unsigned int atc : 1;
unsigned int add_tid_enable : 1;
unsigned int index_stride : 2;
unsigned int element_size : 2;
unsigned int data_format : 4;
unsigned int num_format : 3;
unsigned int dst_sel_w : 3;
unsigned int dst_sel_z : 3;
unsigned int dst_sel_y : 3;
unsigned int dst_sel_x : 3;
#endif
} bitfields, bits;
unsigned int u32_all;
signed int i32_all;
float f32_all;
};
union SQ_IMG_RSRC_WORD0 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int base_address : 32;
#elif defined(BIGENDIAN_CPU)
unsigned int base_address : 32;
#endif
} bitfields, bits;
unsigned int u32_all;
signed int i32_all;
float f32_all;
};
union SQ_IMG_RSRC_WORD1 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int base_address_hi : 8;
unsigned int min_lod : 12;
unsigned int data_format : 6;
unsigned int num_format : 4;
unsigned int mtype : 2;
#elif defined(BIGENDIAN_CPU)
unsigned int mtype : 2;
unsigned int num_format : 4;
unsigned int data_format : 6;
unsigned int min_lod : 12;
unsigned int base_address_hi : 8;
#endif
} bitfields, bits;
unsigned int u32_all;
signed int i32_all;
float f32_all;
};
union SQ_IMG_RSRC_WORD2 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int width : 14;
unsigned int height : 14;
unsigned int perf_mod : 3;
unsigned int interlaced : 1;
#elif defined(BIGENDIAN_CPU)
unsigned int interlaced : 1;
unsigned int perf_mod : 3;
unsigned int height : 14;
unsigned int width : 14;
#endif
} bitfields, bits;
unsigned int u32_all;
signed int i32_all;
float f32_all;
};
union SQ_IMG_RSRC_WORD3 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int dst_sel_x : 3;
unsigned int dst_sel_y : 3;
unsigned int dst_sel_z : 3;
unsigned int dst_sel_w : 3;
unsigned int base_level : 4;
unsigned int last_level : 4;
unsigned int tiling_index : 5;
unsigned int pow2_pad : 1;
unsigned int mtype : 1;
unsigned int atc : 1;
unsigned int type : 4;
#elif defined(BIGENDIAN_CPU)
unsigned int type : 4;
unsigned int atc : 1;
unsigned int mtype : 1;
unsigned int pow2_pad : 1;
unsigned int tiling_index : 5;
unsigned int last_level : 4;
unsigned int base_level : 4;
unsigned int dst_sel_w : 3;
unsigned int dst_sel_z : 3;
unsigned int dst_sel_y : 3;
unsigned int dst_sel_x : 3;
#endif
} bitfields, bits;
unsigned int u32_all;
signed int i32_all;
float f32_all;
};
union SQ_IMG_RSRC_WORD4 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int depth : 13;
unsigned int pitch : 14;
unsigned int : 5;
#elif defined(BIGENDIAN_CPU)
unsigned int : 5;
unsigned int pitch : 14;
unsigned int depth : 13;
#endif
} bitfields, bits;
unsigned int u32_all;
signed int i32_all;
float f32_all;
};
union SQ_IMG_RSRC_WORD5 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int base_array : 13;
unsigned int last_array : 13;
unsigned int : 6;
#elif defined(BIGENDIAN_CPU)
unsigned int : 6;
unsigned int last_array : 13;
unsigned int base_array : 13;
#endif
} bitfields, bits;
unsigned int u32_all;
signed int i32_all;
float f32_all;
};
union SQ_IMG_RSRC_WORD6 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int min_lod_warn : 12;
unsigned int counter_bank_id : 8;
unsigned int lod_hdw_cnt_en : 1;
unsigned int compression_en : 1;
unsigned int alpha_is_on_msb : 1;
unsigned int color_transform : 1;
unsigned int lost_alpha_bits : 4;
unsigned int lost_color_bits : 4;
#elif defined(BIGENDIAN_CPU)
unsigned int lost_color_bits : 4;
unsigned int lost_alpha_bits : 4;
unsigned int color_transform : 1;
unsigned int alpha_is_on_msb : 1;
unsigned int compression_en : 1;
unsigned int lod_hdw_cnt_en : 1;
unsigned int counter_bank_id : 8;
unsigned int min_lod_warn : 12;
#endif
} bitfields, bits;
unsigned int u32_all;
signed int i32_all;
float f32_all;
};
union SQ_IMG_RSRC_WORD7 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int meta_data_address : 32;
#elif defined(BIGENDIAN_CPU)
unsigned int meta_data_address : 32;
#endif
} bitfields, bits;
unsigned int u32_all;
signed int i32_all;
float f32_all;
};
union SQ_IMG_SAMP_WORD0 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int clamp_x : 3;
unsigned int clamp_y : 3;
unsigned int clamp_z : 3;
unsigned int max_aniso_ratio : 3;
unsigned int depth_compare_func : 3;
unsigned int force_unormalized : 1;
unsigned int aniso_threshold : 3;
unsigned int mc_coord_trunc : 1;
unsigned int force_degamma : 1;
unsigned int aniso_bias : 6;
unsigned int trunc_coord : 1;
unsigned int disable_cube_wrap : 1;
unsigned int filter_mode : 2;
unsigned int compat_mode : 1;
#elif defined(BIGENDIAN_CPU)
unsigned int compat_mode : 1;
unsigned int filter_mode : 2;
unsigned int disable_cube_wrap : 1;
unsigned int trunc_coord : 1;
unsigned int aniso_bias : 6;
unsigned int force_degamma : 1;
unsigned int mc_coord_trunc : 1;
unsigned int aniso_threshold : 3;
unsigned int force_unormalized : 1;
unsigned int depth_compare_func : 3;
unsigned int max_aniso_ratio : 3;
unsigned int clamp_z : 3;
unsigned int clamp_y : 3;
unsigned int clamp_x : 3;
#endif
} bitfields, bits;
unsigned int u32_all;
signed int i32_all;
float f32_all;
};
union SQ_IMG_SAMP_WORD1 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int min_lod : 12;
unsigned int max_lod : 12;
unsigned int perf_mip : 4;
unsigned int perf_z : 4;
#elif defined(BIGENDIAN_CPU)
unsigned int perf_z : 4;
unsigned int perf_mip : 4;
unsigned int max_lod : 12;
unsigned int min_lod : 12;
#endif
} bitfields, bits;
unsigned int u32_all;
signed int i32_all;
float f32_all;
};
union SQ_IMG_SAMP_WORD2 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int lod_bias : 14;
unsigned int lod_bias_sec : 6;
unsigned int xy_mag_filter : 2;
unsigned int xy_min_filter : 2;
unsigned int z_filter : 2;
unsigned int mip_filter : 2;
unsigned int mip_point_preclamp : 1;
unsigned int disable_lsb_ceil : 1;
unsigned int filter_prec_fix : 1;
unsigned int aniso_override_vi : 1;
#elif defined(BIGENDIAN_CPU)
unsigned int aniso_override_vi : 1;
unsigned int filter_prec_fix : 1;
unsigned int disable_lsb_ceil : 1;
unsigned int mip_point_preclamp : 1;
unsigned int mip_filter : 2;
unsigned int z_filter : 2;
unsigned int xy_min_filter : 2;
unsigned int xy_mag_filter : 2;
unsigned int lod_bias_sec : 6;
unsigned int lod_bias : 14;
#endif
} bitfields, bits;
unsigned int u32_all;
signed int i32_all;
float f32_all;
};
union SQ_IMG_SAMP_WORD3 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int border_color_ptr : 12;
unsigned int : 18;
unsigned int border_color_type : 2;
#elif defined(BIGENDIAN_CPU)
unsigned int border_color_type : 2;
unsigned int : 18;
unsigned int border_color_ptr : 12;
#endif
} bitfields, bits;
unsigned int u32_all;
signed int i32_all;
float f32_all;
};
typedef enum FMT {
FMT_INVALID = 0x00000000,
FMT_8 = 0x00000001,
FMT_16 = 0x00000002,
FMT_8_8 = 0x00000003,
FMT_32 = 0x00000004,
FMT_16_16 = 0x00000005,
FMT_10_10_10_2 = 0x00000008,
FMT_2_10_10_10 = 0x00000009,
FMT_8_8_8_8 = 0x0000000a,
FMT_32_32 = 0x0000000b,
FMT_16_16_16_16 = 0x0000000c,
FMT_32_32_32 = 0x0000000d,
FMT_32_32_32_32 = 0x0000000e,
FMT_5_6_5 = 0x00000010,
FMT_1_5_5_5 = 0x00000011,
FMT_5_5_5_1 = 0x00000012,
FMT_8_24 = 0x00000014,
FMT_24_8 = 0x00000015,
FMT_X24_8_32 = 0x00000016,
FMT_RESERVED_24__SI__CI = 0x00000018
} FMT;
typedef enum type {
TYPE_UNORM = 0x00000000,
TYPE_SNORM = 0x00000001,
TYPE_UINT = 0x00000004,
TYPE_SINT = 0x00000005,
TYPE_FLOAT = 0x00000007,
TYPE_SRGB = 0x00000009
} type;
typedef enum SEL {
SEL_0 = 0x00000000,
SEL_1 = 0x00000001,
SEL_X = 0x00000004,
SEL_Y = 0x00000005,
SEL_Z = 0x00000006,
SEL_W = 0x00000007,
} SEL;
typedef enum SQ_RSRC_IMG_TYPE {
SQ_RSRC_IMG_1D = 0x00000008,
SQ_RSRC_IMG_2D = 0x00000009,
SQ_RSRC_IMG_3D = 0x0000000a,
SQ_RSRC_IMG_1D_ARRAY = 0x0000000c,
SQ_RSRC_IMG_2D_ARRAY = 0x0000000d,
} SQ_RSRC_IMG_TYPE;
typedef enum SQ_TEX_XY_FILTER {
SQ_TEX_XY_FILTER_POINT = 0x00000000,
SQ_TEX_XY_FILTER_BILINEAR = 0x00000001,
SQ_TEX_XY_FILTER_ANISO_POINT = 0x00000002,
SQ_TEX_XY_FILTER_ANISO_BILINEAR = 0x00000003,
} SQ_TEX_XY_FILTER;
typedef enum SQ_TEX_Z_FILTER {
SQ_TEX_Z_FILTER_NONE = 0x00000000,
SQ_TEX_Z_FILTER_POINT = 0x00000001,
SQ_TEX_Z_FILTER_LINEAR = 0x00000002,
} SQ_TEX_Z_FILTER;
typedef enum SQ_TEX_MIP_FILTER {
SQ_TEX_MIP_FILTER_NONE = 0x00000000,
SQ_TEX_MIP_FILTER_POINT = 0x00000001,
SQ_TEX_MIP_FILTER_LINEAR = 0x00000002,
SQ_TEX_MIP_FILTER_POINT_ANISO_ADJ__VI = 0x00000003,
} SQ_TEX_MIP_FILTER;
typedef enum SQ_TEX_CLAMP {
SQ_TEX_WRAP = 0x00000000,
SQ_TEX_MIRROR = 0x00000001,
SQ_TEX_CLAMP_LAST_TEXEL = 0x00000002,
SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x00000003,
SQ_TEX_CLAMP_HALF_BORDER = 0x00000004,
SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x00000005,
SQ_TEX_CLAMP_BORDER = 0x00000006,
SQ_TEX_MIRROR_ONCE_BORDER = 0x00000007,
} SQ_TEX_CLAMP;
typedef enum SQ_TEX_BORDER_COLOR {
SQ_TEX_BORDER_COLOR_TRANS_BLACK = 0x00000000,
SQ_TEX_BORDER_COLOR_OPAQUE_BLACK = 0x00000001,
SQ_TEX_BORDER_COLOR_OPAQUE_WHITE = 0x00000002,
SQ_TEX_BORDER_COLOR_REGISTER = 0x00000003,
} SQ_TEX_BORDER_COLOR;
typedef struct metadata_amd_ci_vi_s {
uint32_t version; // Must be 1
uint32_t vendorID; // AMD | CZ
SQ_IMG_RSRC_WORD0 word0;
SQ_IMG_RSRC_WORD1 word1;
SQ_IMG_RSRC_WORD2 word2;
SQ_IMG_RSRC_WORD3 word3;
SQ_IMG_RSRC_WORD4 word4;
SQ_IMG_RSRC_WORD5 word5;
SQ_IMG_RSRC_WORD6 word6;
SQ_IMG_RSRC_WORD7 word7;
uint32_t mip_offsets[0]; //Mip level offset bits [39:8] for each level (if any)
} metadata_amd_ci_vi_t;
} // namespace
#endif // HSA_RUNTIME_EXT_IMAGE_RESOURCE_KV_H
+834
Ver Arquivo
@@ -0,0 +1,834 @@
#ifndef EXT_IMAGE_RESOURCE_NV_H_
#define EXT_IMAGE_RESOURCE_NV_H_
#if defined(LITTLEENDIAN_CPU)
#elif defined(BIGENDIAN_CPU)
#else
#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined"
#endif
namespace amd {
/**********************************************************/
/**********************************************************/
#define SQ_BUF_RSC_WRD0_REG_SZ 32
#define SQ_BUF_RSC_WRD0_BASE_ADDRESS_SZ 32
struct sq_buf_rsrc_word0_t {
#if defined(LITTLEENDIAN_CPU)
unsigned int BASE_ADDRESS : SQ_BUF_RSC_WRD0_BASE_ADDRESS_SZ;
#elif defined(BIGENDIAN_CPU)
unsigned int BASE_ADDRESS : SQ_BUF_RSC_WRD0_BASE_ADDRESS_SZ;
#endif
};
union SQ_BUF_RSRC_WORD0 {
sq_buf_rsrc_word0_t bitfields, bits, f;
uint32_t val : SQ_BUF_RSC_WRD0_REG_SZ;
uint32_t u32All;
int32_t i32All;
float f32All;
};
/***********/
#define SQ_BUF_RSC_WRD1_REG_SZ 32
#define SQ_BUF_RSC_WRD1_BASE_ADDRESS_HI_SZ 16
#define SQ_BUF_RSC_WRD1_STRIDE_SZ 14
#define SQ_BUF_RSC_WRD1_CACHE_SWIZZLE_SZ 1
#define SQ_BUF_RSC_WRD1_SWIZZLE_ENABLE_SZ 1
struct sq_buf_rsrc_word1_t {
#if defined(LITTLEENDIAN_CPU)
unsigned int BASE_ADDRESS_HI : SQ_BUF_RSC_WRD1_BASE_ADDRESS_HI_SZ;
unsigned int STRIDE : SQ_BUF_RSC_WRD1_STRIDE_SZ;
unsigned int CACHE_SWIZZLE : SQ_BUF_RSC_WRD1_CACHE_SWIZZLE_SZ;
unsigned int SWIZZLE_ENABLE : SQ_BUF_RSC_WRD1_SWIZZLE_ENABLE_SZ;
#elif defined(BIGENDIAN_CPU)
unsigned int SWIZZLE_ENABLE : SQ_BUF_RSC_WRD1_SWIZZLE_ENABLE_SZ;
unsigned int CACHE_SWIZZLE : SQ_BUF_RSC_WRD1_CACHE_SWIZZLE_SZ;
unsigned int STRIDE : SQ_BUF_RSC_WRD1_STRIDE_SZ;
unsigned int BASE_ADDRESS_HI : SQ_BUF_RSC_WRD1_BASE_ADDRESS_HI_SZ;
#endif
};
union SQ_BUF_RSRC_WORD1 {
sq_buf_rsrc_word1_t bitfields, bits, f;
uint32_t val : SQ_BUF_RSC_WRD1_REG_SZ;
uint32_t u32All;
int32_t i32All;
float f32All;
};
/***********/
#define SQ_BUF_RSC_WRD2_REG_SZ 32
#define SQ_BUF_RSC_WRD2_NUM_RECORDS_SZ 32
struct sq_buf_rsrc_word2_t {
#if defined(LITTLEENDIAN_CPU)
unsigned int NUM_RECORDS : SQ_BUF_RSC_WRD2_NUM_RECORDS_SZ;
#elif defined(BIGENDIAN_CPU)
unsigned int NUM_RECORDS : SQ_BUF_RSC_WRD2_NUM_RECORDS_SZ;
#endif
};
union SQ_BUF_RSRC_WORD2 {
sq_buf_rsrc_word2_t bitfields, bits, f;
uint32_t val : SQ_BUF_RSC_WRD2_REG_SZ;
uint32_t u32All;
int32_t i32All;
float f32All;
};
/***********/
#define SQ_BUF_RSC_WRD3_REG_SZ 32
#define SQ_BUF_RSC_WRD3_DST_SEL_X_SZ 3
#define SQ_BUF_RSC_WRD3_DST_SEL_Y_SZ 3
#define SQ_BUF_RSC_WRD3_DST_SEL_Z_SZ 3
#define SQ_BUF_RSC_WRD3_DST_SEL_W_SZ 3
#define SQ_BUF_RSC_WRD3_FORMAT_SZ 7
#define SQ_BUF_RSC_WRD3_INDEX_STRIDE_SZ 2
#define SQ_BUF_RSC_WRD3_ADD_TID_ENABLE_SZ 1
#define SQ_BUF_RSC_WRD3_RESOURCE_LEVEL 1
#define SQ_BUF_RSC_WRD3_LLC_NOALLOC 2
#define SQ_BUF_RSC_WORD3_OOB_SELECT_SZ 2
#define SQ_BUF_RSC_WRD3_TYPE_SZ 2
struct sq_buf_rsrc_word3_t {
#if defined(LITTLEENDIAN_CPU)
unsigned int DST_SEL_X : SQ_BUF_RSC_WRD3_DST_SEL_X_SZ;
unsigned int DST_SEL_Y : SQ_BUF_RSC_WRD3_DST_SEL_Y_SZ;
unsigned int DST_SEL_Z : SQ_BUF_RSC_WRD3_DST_SEL_Z_SZ;
unsigned int DST_SEL_W : SQ_BUF_RSC_WRD3_DST_SEL_W_SZ;
unsigned int FORMAT : SQ_BUF_RSC_WRD3_FORMAT_SZ;
unsigned int : 2;
unsigned int INDEX_STRIDE : SQ_BUF_RSC_WRD3_INDEX_STRIDE_SZ;
unsigned int ADD_TID_ENABLE : SQ_BUF_RSC_WRD3_ADD_TID_ENABLE_SZ;
unsigned int RESOURCE_LEVEL : SQ_BUF_RSC_WRD3_RESOURCE_LEVEL;
unsigned int : 1;
unsigned int LLC_NOALLOC : SQ_BUF_RSC_WRD3_LLC_NOALLOC;
unsigned int OOB_SELECT : SQ_BUF_RSC_WORD3_OOB_SELECT_SZ;
unsigned int TYPE : SQ_BUF_RSC_WRD3_TYPE_SZ;
#elif defined(BIGENDIAN_CPU)
unsigned int TYPE : SQ_BUF_RSC_WRD3_TYPE_SZ;
unsigned int OOB_SELECT : SQ_BUF_RSC_WORD3_OOB_SELECT_SZ;
unsigned int LLC_NOALLOC : SQ_BUF_RSC_WRD3_LLC_NOALLOC;
unsigned int : 1;
unsigned int RESOURCE_LEVEL : SQ_BUF_RSC_WRD3_RESOURCE_LEVEL;
unsigned int ADD_TID_ENABLE : SQ_BUF_RSC_WRD3_ADD_TID_ENABLE_SZ;
unsigned int INDEX_STRIDE : SQ_BUF_RSC_WRD3_INDEX_STRIDE_SZ;
unsigned int : 2;
unsigned int FORMAT : SQ_BUF_RSC_WRD3_FORMAT_SZ;
unsigned int DST_SEL_W : SQ_BUF_RSC_WRD3_DST_SEL_W_SZ;
unsigned int DST_SEL_Z : SQ_BUF_RSC_WRD3_DST_SEL_Z_SZ;
unsigned int DST_SEL_Y : SQ_BUF_RSC_WRD3_DST_SEL_Y_SZ;
unsigned int DST_SEL_X : SQ_BUF_RSC_WRD3_DST_SEL_X_SZ;
#endif
};
union SQ_BUF_RSRC_WORD3 {
sq_buf_rsrc_word3_t bitfields, bits, f;
uint32_t val : SQ_BUF_RSC_WRD3_REG_SZ;
uint32_t u32All;
int32_t i32All;
float f32All;
};
/***********/
/**********************************************************/
/**********************************************************/
#define SQ_IMG_RSC_WRD0_REG_SZ 32
#define SQ_IMG_RSC_WRD0_BASE_ADDRESS_SZ 32
struct sq_img_rsrc_word0_t {
#if defined(LITTLEENDIAN_CPU)
unsigned int BASE_ADDRESS : SQ_IMG_RSC_WRD0_BASE_ADDRESS_SZ;
#elif defined(BIGENDIAN_CPU)
unsigned int BASE_ADDRESS : SQ_IMG_RSC_WRD0_BASE_ADDRESS_SZ;
#endif
};
union SQ_IMG_RSRC_WORD0 {
sq_img_rsrc_word0_t bitfields, bits, f;
uint32_t val : SQ_IMG_RSC_WRD0_REG_SZ;
uint32_t u32All;
int32_t i32All;
float f32All;
};
/***********/
#define SQ_IMG_RSC_WRD1_REG_SZ 32
#define SQ_IMG_RSC_WRD1_BASE_ADDRESS_HI_SZ 8
#define SQ_IMG_RSC_WRD1_MIN_LOD_SZ 12
#define SQ_IMG_RSC_WRD1_FORMAT_SZ 9
#define SQ_IMG_RSC_WRD1_WIDTH_LO 2
struct sq_img_rsrc_word1_t{
#if defined(LITTLEENDIAN_CPU)
unsigned int BASE_ADDRESS_HI : SQ_IMG_RSC_WRD1_BASE_ADDRESS_HI_SZ;
unsigned int MIN_LOD : SQ_IMG_RSC_WRD1_MIN_LOD_SZ;
unsigned int FORMAT : SQ_IMG_RSC_WRD1_FORMAT_SZ;
unsigned int : 1;
unsigned int WIDTH : SQ_IMG_RSC_WRD1_WIDTH_LO;
#elif defined(BIGENDIAN_CPU)
unsigned int WIDTH : SQ_IMG_RSC_WRD1_WIDTH_LO;
unsigned int : 1;
unsigned int FORMAT : SQ_IMG_RSC_WRD1_FORMAT_SZ;
unsigned int MIN_LOD : SQ_IMG_RSC_WRD1_MIN_LOD_SZ;
unsigned int BASE_ADDRESS_HI : SQ_IMG_RSC_WRD1_BASE_ADDRESS_HI_SZ;
#endif
};
union SQ_IMG_RSRC_WORD1 {
sq_img_rsrc_word1_t bitfields, bits, f;
uint32_t val : SQ_IMG_RSC_WRD1_REG_SZ;
uint32_t u32All;
int32_t i32All;
float f32All;
};
/***********/
#define SQ_IMG_RSC_WRD2_REG_SZ 32
#define SQ_IMG_RSC_WRD2_WIDTH_HI_SZ 12
#define SQ_IMG_RSC_WRD2_HEIGHT_SZ 14
#define SQ_IMG_RSC_WRD2_RESOURCE_LEVEL_SZ 1
struct sq_img_rsrc_word2_t {
#if defined(LITTLEENDIAN_CPU)
unsigned int WIDTH_HI : SQ_IMG_RSC_WRD2_WIDTH_HI_SZ;
unsigned int : 2;
unsigned int HEIGHT : SQ_IMG_RSC_WRD2_HEIGHT_SZ;
unsigned int : 2;
unsigned int : 1;
unsigned int RESOURCE_LEVEL : SQ_IMG_RSC_WRD2_RESOURCE_LEVEL_SZ;
#elif defined(BIGENDIAN_CPU)
unsigned int RESOURCE_LEVEL : SQ_IMG_RSC_WRD2_RESOURCE_LEVEL_SZ;
unsigned int RESERVED : 1;
unsigned int RESERVED : 2;
unsigned int HEIGHT : SQ_IMG_RSC_WRD2_HEIGHT_SZ;
unsigned int : 2;
unsigned int WIDTH_HI : SQ_IMG_RSC_WRD2_WIDTH_SZ;
#endif
};
union SQ_IMG_RSRC_WORD2 {
sq_img_rsrc_word2_t bitfields, bits, f;
uint32_t val : SQ_IMG_RSC_WRD2_REG_SZ;
uint32_t u32All;
int32_t i32All;
float f32All;
};
/***********/
#define SQ_IMG_RSC_WRD3_REG_SZ 32
#define SQ_IMG_RSC_WRD3_DST_SEL_X_SZ 3
#define SQ_IMG_RSC_WRD3_DST_SEL_Y_SZ 3
#define SQ_IMG_RSC_WRD3_DST_SEL_Z_SZ 3
#define SQ_IMG_RSC_WRD3_DST_SEL_W_SZ 3
#define SQ_IMG_RSC_WRD3_BASE_LEVEL_SZ 4
#define SQ_IMG_RSC_WRD3_LAST_LEVEL_SZ 4
#define SQ_IMG_RSC_WRD3_SW_MODE_SZ 5
#define SQ_IMG_RSC_WRD3_BC_SWIZZLE_SZ 3
#define SQ_IMG_RSC_WRD3_TYPE_SZ 4
struct sq_img_rsrc_word3_t {
#if defined(LITTLEENDIAN_CPU)
unsigned int DST_SEL_X : SQ_IMG_RSC_WRD3_DST_SEL_X_SZ;
unsigned int DST_SEL_Y : SQ_IMG_RSC_WRD3_DST_SEL_Y_SZ;
unsigned int DST_SEL_Z : SQ_IMG_RSC_WRD3_DST_SEL_Z_SZ;
unsigned int DST_SEL_W : SQ_IMG_RSC_WRD3_DST_SEL_W_SZ;
unsigned int BASE_LEVEL : SQ_IMG_RSC_WRD3_BASE_LEVEL_SZ;
unsigned int LAST_LEVEL : SQ_IMG_RSC_WRD3_LAST_LEVEL_SZ;
unsigned int SW_MODE : SQ_IMG_RSC_WRD3_SW_MODE_SZ;
unsigned int BC_SWIZZLE : SQ_IMG_RSC_WRD3_BC_SWIZZLE_SZ;
unsigned int TYPE : SQ_IMG_RSC_WRD3_TYPE_SZ;
#elif defined(BIGENDIAN_CPU)
unsigned int TYPE : SQ_IMG_RSC_WRD3_TYPE_SZ;
unsigned int BC_SWIZZLE : SQ_IMG_RSC_WRD3_BC_SWIZZLE_SZ;
unsigned int W_MODE : SQ_IMG_RSC_WRD3_SW_MODE_SZ;
unsigned int LAST_LEVEL : SQ_IMG_RSC_WRD3_LAST_LEVEL_SZ;
unsigned int BASE_LEVEL : SQ_IMG_RSC_WRD3_BASE_LEVEL_SZ;
unsigned int DST_SEL_W : SQ_IMG_RSC_WRD3_DST_SEL_W_SZ;
unsigned int DST_SEL_Z : SQ_IMG_RSC_WRD3_DST_SEL_Z_SZ;
unsigned int DST_SEL_Y : SQ_IMG_RSC_WRD3_DST_SEL_Y_SZ;
unsigned int DST_SEL_X : SQ_IMG_RSC_WRD3_DST_SEL_X_SZ;
#endif
};
union SQ_IMG_RSRC_WORD3 {
sq_img_rsrc_word3_t bitfields, bits, f;
uint32_t val : SQ_IMG_RSC_WRD3_REG_SZ;
uint32_t u32All;
int32_t i32All;
float f32All;
};
/***********/
#define SQ_IMG_RSC_WRD4_REG_SZ 32
#define SQ_IMG_RSC_WRD4_DEPTH_SZ 13
#define SQ_IMG_RSC_WRD4_BASE_ARR_SZ 13
struct sq_img_rsrc_word4_t {
#if defined(LITTLEENDIAN_CPU)
// For arrays this is last slice in view, for 3D this is depth-1, For remaining this is pitch-1
unsigned int DEPTH : SQ_IMG_RSC_WRD4_DEPTH_SZ;
unsigned int : 1; //Pitch[13] in gfx10.3 (NV21)
unsigned int : 2;
unsigned int BASE_ARRAY : SQ_IMG_RSC_WRD4_BASE_ARR_SZ;
unsigned int : 3;
#elif defined(BIGENDIAN_CPU)
unsigned int : 3;
unsigned int BASE_ARRAY : SQ_IMG_RSC_WRD4_BASE_ARR_SZ;
unsigned int : 2;
unsigned int : 1; //Pitch[13] in gfx10.3 (NV21)
unsigned int DEPTH : SQ_IMG_RSC_WRD4_DEPTH_SZ; //Pitch[0:12] in gfx10.3 (NV21)
#endif
};
union SQ_IMG_RSRC_WORD4 {
sq_img_rsrc_word4_t bitfields, bits, f;
uint32_t val : SQ_IMG_RSC_WRD4_REG_SZ;
uint32_t u32All;
int32_t i32All;
float f32All;
};
/***********/
#define SQ_IMG_RSC_WRD5_REG_SZ 32
#define SQ_IMG_RSC_WRD5_ARRAY_PITCH_SZ 4
#define SQ_IMG_RSC_WRD5_MAX_MIP_SZ 4
//#define SQ_IMG_RSC_WRD5_DSCAL_OR_MID_LOD_WRN_SZ 4
//#define SQ_IMG_RSC_WRD5_HSCAL_OR_MID_LOD_WRN_SZ 4
//#define SQ_IMG_RSC_WRD5_WSCAL_OR_MID_LOD_WRN_SZ 4
#define SQ_IMG_RSC_WRD5_MID_LOD_WRN_SZ 12
#define SQ_IMG_RSC_WRD5_PERF_MOD_SZ 3
#define SQ_IMG_RSC_WRD5_CORNER_SAMPLES_SZ 1
#define SQ_IMG_RSC_WRD5_LINKED_RESOURCE_SZ 1
#define SQ_IMG_RSC_WRD5_LOD_HDW_CNT_EN_SZ 1
#define SQ_IMG_RSC_WRD5_PRT_DEFAULT_SZ 1
#define SQ_IMG_RSC_WRD5_BIG_PAGE_SZ 1
struct sq_img_rsrc_word5_t {
#if defined(LITTLEENDIAN_CPU)
unsigned int ARRAY_PITCH : SQ_IMG_RSC_WRD5_ARRAY_PITCH_SZ;
unsigned int MAX_MIP : SQ_IMG_RSC_WRD5_MAX_MIP_SZ;
unsigned int MID_LOD_WRN : SQ_IMG_RSC_WRD5_MID_LOD_WRN_SZ;
// unsigned int DSCAL_OR_MID_LOD_WRN : SQ_IMG_RSC_WRD5_DSCAL_OR_MID_LOD_WRN_SZ;
// unsigned int HSCAL_OR_MID_LOD_WRN : SQ_IMG_RSC_WRD5_HSCAL_OR_MID_LOD_WRN_SZ;
// unsigned int WSCAL_OR_MID_LOD_WRN : SQ_IMG_RSC_WRD5_WSCAL_OR_MID_LOD_WRN_SZ;
unsigned int PERF_MOD : SQ_IMG_RSC_WRD5_PERF_MOD_SZ;
unsigned int CORNER_SAMPLES : SQ_IMG_RSC_WRD5_CORNER_SAMPLES_SZ;
unsigned int LINKED_RESOURCE : SQ_IMG_RSC_WRD5_LINKED_RESOURCE_SZ;
unsigned int LOD_HDW_CNT_EN : SQ_IMG_RSC_WRD5_LOD_HDW_CNT_EN_SZ;
unsigned int PRT_DEFAULT : SQ_IMG_RSC_WRD5_PRT_DEFAULT_SZ;
unsigned int : 4;
unsigned int BIG_PAGE : SQ_IMG_RSC_WRD5_BIG_PAGE_SZ;
#elif defined(BIGENDIAN_CPU)
unsigned int BIG_PAGE : SQ_IMG_RSC_WRD5_BIG_PAGE_SZ;
unsigned int : 4;
unsigned int PRT_DEFAULT : SQ_IMG_RSC_WRD5_PRT_DEFAULT_SZ;
unsigned int LOD_HDW_CNT_EN : SQ_IMG_RSC_WRD5_LOD_HDW_CNT_EN_SZ;
unsigned int LINKED_RESOURCE : SQ_IMG_RSC_WRD5_LINKED_RESOURCE_SZ;
unsigned int CORNER_SAMPLES : SQ_IMG_RSC_WRD5_CORNER_SAMPLES_SZ;
unsigned int PERF_MOD : SQ_IMG_RSC_WRD5_PERF_MOD_SZ;
unsigned int MID_LOD_WRN : SQ_IMG_RSC_WRD5_MID_LOD_WRN_SZ;
// unsigned int WSCAL_OR_MID_LOD_WRN : SQ_IMG_RSC_WRD5_WSCAL_OR_MID_LOD_WRN_SZ;
// unsigned int HSCAL_OR_MID_LOD_WRN : SQ_IMG_RSC_WRD5_HSCAL_OR_MID_LOD_WRN_SZ;
// unsigned int DSCAL_OR_MID_LOD_WRN : SQ_IMG_RSC_WRD5_DSCAL_OR_MID_LOD_WRN_SZ;
unsigned int MAX_MIP : SQ_IMG_RSC_WRD5_MAX_MIP_SZ;
unsigned int ARRAY_PITCH : SQ_IMG_RSC_WRD5_ARRAY_PITCH_SZ;
#endif
};
union SQ_IMG_RSRC_WORD5 {
sq_img_rsrc_word5_t bitfields, bits, f;
uint32_t val : SQ_IMG_RSC_WRD5_REG_SZ;
uint32_t u32All;
int32_t i32All;
float f32All;
};
/***********/
#define SQ_IMG_RSC_WRD6_REG_SZ 32
#define SQ_IMG_RSC_WRD6_COUNTER_BANK_ID_SZ 8
#define SQ_IMG_RSC_WRD6_LLC_NOALLOC_RES_SZ 2
#define SQ_IMG_RSC_WRD6_ITERATE_256_SZ 1
#define SQ_IMG_RSC_WRD6_MAX_UNCOMP_BLK_SZ_SZ 2
#define SQ_IMG_RSC_WRD6_MAX_COMP_BLK_SZ_SZ 2
#define SQ_IMG_RSC_WRD6_META_PIPE_ALIGNED_SZ 1
#define SQ_IMG_RSC_WRD6_WRITE_COMPRESS_EN_SZ 1
#define SQ_IMG_RSC_WRD6_COMPRESSION_ENABLE_SZ 1
#define SQ_IMG_RSC_WRD6_ALPHA_IS_ON_MSB_SZ 1
#define SQ_IMG_RSC_WRD6_COLOR_TRANSFORM_SZ 1
#define SQ_IMG_RSC_WRD6_META_DATA_ADDR_SZ 8
struct sq_img_rsrc_word6_t {
#if defined(LITTLEENDIAN_CPU)
unsigned int COUNTER_BANK_ID : SQ_IMG_RSC_WRD6_COUNTER_BANK_ID_SZ;
unsigned int LLC_NOALLOC_RES : SQ_IMG_RSC_WRD6_LLC_NOALLOC_RES_SZ; //gfx10.3 (NV21)
unsigned int ITERATE_256 : SQ_IMG_RSC_WRD6_ITERATE_256_SZ;
unsigned int : 4;
unsigned int MAX_UNCOMP_BLK_SZ : SQ_IMG_RSC_WRD6_MAX_UNCOMP_BLK_SZ_SZ;
unsigned int MAX_COMP_BLK_SZ : SQ_IMG_RSC_WRD6_MAX_COMP_BLK_SZ_SZ;
unsigned int META_PIPE_ALIGNED : SQ_IMG_RSC_WRD6_META_PIPE_ALIGNED_SZ;
unsigned int WRITE_COMPRESS_ENABLE : SQ_IMG_RSC_WRD6_WRITE_COMPRESS_EN_SZ;
unsigned int COMPRESSION_ENABLE : SQ_IMG_RSC_WRD6_COMPRESSION_ENABLE_SZ;
unsigned int ALPHA_IS_ON_MSB : SQ_IMG_RSC_WRD6_ALPHA_IS_ON_MSB_SZ;
unsigned int COLOR_TRANSFORM : SQ_IMG_RSC_WRD6_COLOR_TRANSFORM_SZ;
unsigned int META_DATA_ADDRESS : SQ_IMG_RSC_WRD6_META_DATA_ADDR_SZ;
#elif defined(BIGENDIAN_CPU)
unsigned int META_DATA_ADDRESS : SQ_IMG_RSC_WRD6_META_DATA_ADDR_SZ;
unsigned int COLOR_TRANSFORM : SQ_IMG_RSC_WRD6_COLOR_TRANSFORM_SZ;
unsigned int ALPHA_IS_ON_MSB : SQ_IMG_RSC_WRD6_ALPHA_IS_ON_MSB_SZ;
unsigned int COMPRESSION_ENABLE : SQ_IMG_RSC_WRD6_COMPRESSION_ENABLE_SZ;
unsigned int WRITE_COMPRESS_ENABLE : SQ_IMG_RSC_WRD6_WRITE_COMPRESS_EN_SZ;
unsigned int META_PIPE_ALIGNED : SQ_IMG_RSC_WRD6_META_PIPE_ALIGNED_SZ;
unsigned int MAX_COMP_BLK_SZ : SQ_IMG_RSC_WRD6_MAX_COMP_BLK_SZ_SZ;
unsigned int MAX_UNCOMP_BLK_SZ : SQ_IMG_RSC_WRD6_MAX_UNCOMP_BLK_SZ_SZ;
unsigned int : 4;
unsigned int ITERATE_256 : SQ_IMG_RSC_WRD6_ITERATE_256_SZ;
unsigned int LLC_NOALLOC_RES : SQ_IMG_RSC_WRD6_LLC_NOALLOC_RES_SZ;
unsigned int COUNTER_BANK_ID : SQ_IMG_RSC_WRD6_COUNTER_BANK_ID_SZ;
#endif
};
union SQ_IMG_RSRC_WORD6 {
sq_img_rsrc_word6_t bitfields, bits, f;
uint32_t val : SQ_IMG_RSC_WRD6_REG_SZ;
uint32_t u32All;
int32_t i32All;
float f32All;
};
/***********/
#define SQ_IMG_RSC_WRD7_REG_SZ 32
#define SQ_IMG_RSC_WRD7_META_DATA_ADDRESS_HI_SZ 32
struct sq_img_rsrc_word7_t {
#if defined(LITTLEENDIAN_CPU)
unsigned int META_DATA_ADDRESS_HI : SQ_IMG_RSC_WRD7_META_DATA_ADDRESS_HI_SZ;
#elif defined(BIGENDIAN_CPU)
unsigned int META_DATA_ADDRESS_HI : SQ_IMG_RSC_WRD7_META_DATA_ADDRESS_HI_SZ;
#endif
};
union SQ_IMG_RSRC_WORD7 {
sq_img_rsrc_word7_t bitfields, bits, f;
uint32_t val : SQ_IMG_RSC_WRD7_REG_SZ;
uint32_t u32All;
int32_t i32All;
float f32All;
};
/***********/
/**********************************************************/
/**********************************************************/
#define SQ_IMG_SAMP_WORD0_REG_SZ 32
#define SQ_IMG_SAMP_WORD0_CLAMP_X_SZ 3
#define SQ_IMG_SAMP_WORD0_CLAMP_Y_SZ 3
#define SQ_IMG_SAMP_WORD0_CLAMP_Z_SZ 3
#define SQ_IMG_SAMP_WORD0_MAX_ANISO_RATIO_SZ 3
#define SQ_IMG_SAMP_WORD0_DEPTH_COMPARE_FUNC_SZ 3
#define SQ_IMG_SAMP_WORD0_FORCE_UNNORMALIZED_SZ 1
#define SQ_IMG_SAMP_WORD0_ANISO_THRESHOLD_SZ 3
#define SQ_IMG_SAMP_WORD0_MC_COORD_TRUNC_SZ 1
#define SQ_IMG_SAMP_WORD0_FORCE_DEGAMMA_SZ 1
#define SQ_IMG_SAMP_WORD0_ANISO_BIAS_SZ 6
#define SQ_IMG_SAMP_WORD0_TRUNC_COORD_SZ 1
#define SQ_IMG_SAMP_WORD0_DISABLE_CUBE_WRAP_SZ 1
#define SQ_IMG_SAMP_WORD0_FILTER_MODE_SZ 2
#define SQ_IMG_SAMP_WORD0_SKIP_DEGAMMA_SZ 1
struct sq_img_samp_word0_t {
#if defined(LITTLEENDIAN_CPU)
unsigned int CLAMP_X : SQ_IMG_SAMP_WORD0_CLAMP_X_SZ;
unsigned int CLAMP_Y : SQ_IMG_SAMP_WORD0_CLAMP_Y_SZ;
unsigned int CLAMP_Z : SQ_IMG_SAMP_WORD0_CLAMP_Z_SZ;
unsigned int MAX_ANISO_RATIO : SQ_IMG_SAMP_WORD0_MAX_ANISO_RATIO_SZ;
unsigned int DEPTH_COMPARE_FUNC : SQ_IMG_SAMP_WORD0_DEPTH_COMPARE_FUNC_SZ;
unsigned int FORCE_UNNORMALIZED : SQ_IMG_SAMP_WORD0_FORCE_UNNORMALIZED_SZ;
unsigned int ANISO_THRESHOLD : SQ_IMG_SAMP_WORD0_ANISO_THRESHOLD_SZ;
unsigned int MC_COORD_TRUNC : SQ_IMG_SAMP_WORD0_MC_COORD_TRUNC_SZ;
unsigned int FORCE_DEGAMMA : SQ_IMG_SAMP_WORD0_FORCE_DEGAMMA_SZ;
unsigned int ANISO_BIAS : SQ_IMG_SAMP_WORD0_ANISO_BIAS_SZ;
unsigned int TRUNC_COORD : SQ_IMG_SAMP_WORD0_TRUNC_COORD_SZ;
unsigned int DISABLE_CUBE_WRAP : SQ_IMG_SAMP_WORD0_DISABLE_CUBE_WRAP_SZ;
unsigned int FILTER_MODE : SQ_IMG_SAMP_WORD0_FILTER_MODE_SZ;
unsigned int SKIP_DEGAMMA : SQ_IMG_SAMP_WORD0_SKIP_DEGAMMA_SZ;
#elif defined(BIGENDIAN_CPU)
unsigned int SKIP_DEGAMMA : SQ_IMG_SAMP_WORD0_SKIP_DEGAMMA_SZ;
unsigned int FILTER_MODE : SQ_IMG_SAMP_WORD0_FILTER_MODE_SZ;
unsigned int DISABLE_CUBE_WRAP : SQ_IMG_SAMP_WORD0_DISABLE_CUBE_WRAP_SZ;
unsigned int TRUNC_COORD : SQ_IMG_SAMP_WORD0_TRUNC_COORD_SZ;
unsigned int ANISO_BIAS : SQ_IMG_SAMP_WORD0_ANISO_BIAS_SZ;
unsigned int FORCE_DEGAMMA : SQ_IMG_SAMP_WORD0_FORCE_DEGAMMA_SZ;
unsigned int MC_COORD_TRUNC : SQ_IMG_SAMP_WORD0_MC_COORD_TRUNC_SZ;
unsigned int ANISO_THRESHOLD : SQ_IMG_SAMP_WORD0_ANISO_THRESHOLD_SZ;
unsigned int FORCE_UNNORMALIZED : SQ_IMG_SAMP_WORD0_FORCE_UNNORMALIZED_SZ;
unsigned int DEPTH_COMPARE_FUNC : SQ_IMG_SAMP_WORD0_DEPTH_COMPARE_FUNC_SZ;
unsigned int MAX_ANISO_RATIO : SQ_IMG_SAMP_WORD0_MAX_ANISO_RATIO_SZ;
unsigned int CLAMP_Z : SQ_IMG_SAMP_WORD0_CLAMP_Z_SZ;
unsigned int CLAMP_Y : SQ_IMG_SAMP_WORD0_CLAMP_Y_SZ;
unsigned int CLAMP_X : SQ_IMG_SAMP_WORD0_CLAMP_X_SZ;
#endif
};
union SQ_IMG_SAMP_WORD0 {
sq_img_samp_word0_t bitfields, bits, f;
uint32_t val : SQ_IMG_SAMP_WORD0_REG_SZ;
uint32_t u32All;
int32_t i32All;
float f32All;
};
/***********/
#define SQ_IMG_SAMP_WORD1_REG_SZ 32
#define SQ_IMG_SAMP_WORD1_MIN_LOD_SZ 12
#define SQ_IMG_SAMP_WORD1_MAX_LOD_SZ 12
#define SQ_IMG_SAMP_WORD1_PERF_MIP_SZ 4
#define SQ_IMG_SAMP_WORD1_PERF_Z_SZ 4
struct sq_img_samp_word1_t {
#if defined(LITTLEENDIAN_CPU)
unsigned int MIN_LOD : SQ_IMG_SAMP_WORD1_MIN_LOD_SZ;
unsigned int MAX_LOD : SQ_IMG_SAMP_WORD1_MAX_LOD_SZ;
unsigned int PERF_MIP : SQ_IMG_SAMP_WORD1_PERF_MIP_SZ;
unsigned int PERF_Z : SQ_IMG_SAMP_WORD1_PERF_Z_SZ;
#elif defined(BIGENDIAN_CPU)
unsigned int PERF_Z : SQ_IMG_SAMP_WORD1_PERF_Z_SZ;
unsigned int PERF_MIP : SQ_IMG_SAMP_WORD1_PERF_MIP_SZ;
unsigned int MAX_LOD : SQ_IMG_SAMP_WORD1_MAX_LOD_SZ;
unsigned int MIN_LOD : SQ_IMG_SAMP_WORD1_MIN_LOD_SZ;
#endif
};
union SQ_IMG_SAMP_WORD1 {
sq_img_samp_word1_t bitfields, bits, f;
uint32_t val : SQ_IMG_SAMP_WORD1_REG_SZ;
uint32_t u32All;
int32_t i32All;
float f32All;
};
/***********/
#define SQ_IMG_SAMP_WORD2_REG_SZ 32
#define SQ_IMG_SAMP_WORD2_BC_LRS_LB_SZ 12
#define SQ_IMG_SAMP_WORD2_BC_OR_BCT_SZ 2
#define SQ_IMG_SAMP_WORD2_LOD_BIAS_SEC_SZ 6
#define SQ_IMG_SAMP_WORD2_XY_MAG_FILTER_SZ 2
#define SQ_IMG_SAMP_WORD2_XY_MIN_FILTER_SZ 2
#define SQ_IMG_SAMP_WORD2_Z_FILTER_SZ 2
#define SQ_IMG_SAMP_WORD2_MIP_FILTER_SZ 2
#define SQ_IMG_SAMP_WORD2_MIP_POINT_PRECLAMP_SZ 1
#define SQ_IMG_SAMP_WORD2_ANISO_OVERRIDE_SZ 1
#define SQ_IMG_SAMP_WORD2_BLEND_ZERO_PRT_SZ 1
#define SQ_IMG_SAMP_WORD2_DERIV_ADJUST_ENABLE_SZ 1
struct sq_img_samp_word2_t {
#if defined(LITTLEENDIAN_CPU)
unsigned int BC_LRS_LB : SQ_IMG_SAMP_WORD2_BC_LRS_LB_SZ;
unsigned int BC_OR_BCT : SQ_IMG_SAMP_WORD2_BC_OR_BCT_SZ;
unsigned int LOD_BIAS_SEC : SQ_IMG_SAMP_WORD2_LOD_BIAS_SEC_SZ;
unsigned int XY_MAG_FILTER : SQ_IMG_SAMP_WORD2_XY_MAG_FILTER_SZ;
unsigned int XY_MIN_FILTER : SQ_IMG_SAMP_WORD2_XY_MIN_FILTER_SZ;
unsigned int Z_FILTER : SQ_IMG_SAMP_WORD2_Z_FILTER_SZ;
unsigned int MIP_FILTER : SQ_IMG_SAMP_WORD2_MIP_FILTER_SZ;
unsigned int MIP_POINT_PRECLAMP : SQ_IMG_SAMP_WORD2_MIP_POINT_PRECLAMP_SZ;
unsigned int ANISO_OVERRIDE : SQ_IMG_SAMP_WORD2_ANISO_OVERRIDE_SZ;
unsigned int BLEND_ZERO_PRT : SQ_IMG_SAMP_WORD2_BLEND_ZERO_PRT_SZ;
unsigned int DERIV_ADJUST_EN : SQ_IMG_SAMP_WORD2_DERIV_ADJUST_ENABLE_SZ;
#elif defined(BIGENDIAN_CPU)
unsigned int DERIV_ADJUST_EN : SQ_IMG_SAMP_WORD2_DERIV_ADJUST_ENABLE_SZ;
unsigned int BLEND_ZERO_PRT : SQ_IMG_SAMP_WORD2_BLEND_ZERO_PRT_SZ;
unsigned int ANISO_OVERRIDE : SQ_IMG_SAMP_WORD2_ANISO_OVERRIDE_SZ;
unsigned int MIP_POINT_PRECLAMP : SQ_IMG_SAMP_WORD2_MIP_POINT_PRECLAMP_SZ;
unsigned int MIP_FILTER : SQ_IMG_SAMP_WORD2_MIP_FILTER_SZ;
unsigned int Z_FILTER : SQ_IMG_SAMP_WORD2_Z_FILTER_SZ;
unsigned int XY_MIN_FILTER : SQ_IMG_SAMP_WORD2_XY_MIN_FILTER_SZ;
unsigned int XY_MAG_FILTER : SQ_IMG_SAMP_WORD2_XY_MAG_FILTER_SZ;
unsigned int LOD_BIAS_SEC : SQ_IMG_SAMP_WORD2_LOD_BIAS_SEC_SZ;
unsigned int BC_OR_BCT : SQ_IMG_SAMP_WORD2_BC_OR_BCT_SZ;
unsigned int LOD_BIAS : SQ_IMG_SAMP_WORD2_BC_LRS_LB_SZ;
#endif
};
union SQ_IMG_SAMP_WORD2 {
sq_img_samp_word2_t bitfields, bits, f;
uint32_t val : SQ_IMG_SAMP_WORD2_REG_SZ;
uint32_t u32All;
int32_t i32All;
float f32All;
};
/***********/
#define SQ_IMG_SAMP_WORD3_REG_SZ 32
#define SQ_IMG_SAMP_WORD3_BCP_LRS_DAV_SZ 12
#define SQ_IMG_SAMP_WORD3_GRAD_ADJ_OR_DAV_SZ 16
#define SQ_IMG_SAMP_WORD3_RES_OR_DAV_SZ 2
#define SQ_IMG_SAMP_WORD3_BORD_COLOR_TYPE_SZ 2
struct sq_img_samp_word3_t {
#if defined(LITTLEENDIAN_CPU)
unsigned int BCP_LRS_DAV : SQ_IMG_SAMP_WORD3_BCP_LRS_DAV_SZ;
unsigned int GRAD_ADJ_OR_DAV : SQ_IMG_SAMP_WORD3_GRAD_ADJ_OR_DAV_SZ;
unsigned int RES_OR_DAV : SQ_IMG_SAMP_WORD3_RES_OR_DAV_SZ;
unsigned int BORDER_COLOR_TYPE : SQ_IMG_SAMP_WORD3_BORD_COLOR_TYPE_SZ;
#elif defined(BIGENDIAN_CPU)
unsigned int BORDER_COLOR_TYPE : SQ_IMG_SAMP_WORD3_BORD_COLOR_TYPE_SZ;
unsigned int RES_OR_DAV : SQ_IMG_SAMP_WORD3_RES_OR_DAV_SZ;
unsigned int GRAD_ADJ_OR_DAV : SQ_IMG_SAMP_WORD3_GRAD_ADJ_OR_DAV_SZ;
unsigned int BCP_LRS_DAV : SQ_IMG_SAMP_WORD3_BCP_LRS_DAV_SZ;
#endif
};
union SQ_IMG_SAMP_WORD3 {
sq_img_samp_word3_t bitfields, bits, f;
uint32_t val : SQ_IMG_SAMP_WORD3_REG_SZ;
uint32_t u32All;
int32_t i32All;
float f32All;
};
/***********/
/**************************************************************/
/**************************************************************/
/**************************************************************/
typedef enum FMT {
FMT_INVALID = 0x00000000,
FMT_8 = 0x00000001,
FMT_16 = 0x00000002,
FMT_8_8 = 0x00000003,
FMT_32 = 0x00000004,
FMT_16_16 = 0x00000005,
FMT_10_11_11 = 0x00000006,
FMT_11_11_10 = 0x00000007,
FMT_10_10_10_2 = 0x00000008,
FMT_2_10_10_10 = 0x00000009,
FMT_8_8_8_8 = 0x0000000a,
FMT_32_32 = 0x0000000b,
FMT_16_16_16_16 = 0x0000000c,
FMT_32_32_32 = 0x0000000d,
FMT_32_32_32_32 = 0x0000000e,
FMT_RESERVED_78 = 0x0000000f,
FMT_5_6_5 = 0x00000010,
FMT_1_5_5_5 = 0x00000011,
FMT_5_5_5_1 = 0x00000012,
FMT_4_4_4_4 = 0x00000013,
FMT_8_24 = 0x00000014,
FMT_24_8 = 0x00000015,
FMT_X24_8_32 = 0x00000016,
FMT_RESERVED_155 = 0x00000017,
FMT_1 = 0x00000018,
FMT_1_REVERSED = 0x00000019,
FMT_GB_GR = 0x0000001a,
FMT_BG_RG = 0x0000001b,
FMT_4_4 = 0x0000001c,
FMT_BC1 = 0x0000001d,
FMT_BC2 = 0x0000001e,
FMT_BC3 = 0x0000001f,
FMT_BC4 = 0x00000020,
FMT_BC5 = 0x00000021,
FMT_BC6 = 0x00000022,
FMT_BC7 = 0x00000023,
FMT_6E4 = 0x00000024,
FMT_5_9_9_9 = 0x00000025,
FMT_FMASK8_S2 = 0x00000026,
FMT_FMASK8_S4 = 0x00000027,
FMT_FMASK8_S8 = 0x00000028,
FMT_FMASK16_S16 = 0x00000029,
FMT_FMASK16_S8 = 0x0000002a,
FMT_FMASK32_S16 = 0x0000002b,
FMT_FMASK32_S8 = 0x0000002c,
FMT_FMASK64_S16 = 0x0000002d,
FMT_ETC2_RGB = 0x0000002e,
FMT_ETC2_RGBA = 0x0000002f,
FMT_ETC2_R = 0x00000030,
FMT_ETC2_RG = 0x00000031,
FMT_ETC2_RGBA1 = 0x00000032,
FMT_ASTC_2D_LDR = 0x00000033,
FMT_ASTC_2D_HDR = 0x00000034,
FMT_ASTC_2D_LDR_SRGB = 0x00000035,
FMT_ASTC_3D_LDR = 0x00000036,
FMT_ASTC_3D_HDR = 0x00000037,
FMT_ASTC_3D_LDR_SRGB = 0x00000038,
FMT_MM_8 = 0x00000039,
FMT_MM_8_8 = 0x0000003a,
FMT_MM_8_8_8_8 = 0x0000003b,
FMT_MM_VYUY8 = 0x0000003c,
FMT_MM_10_11_11 = 0x0000003d,
FMT_MM_2_10_10_10 = 0x0000003e,
FMT_MM_16_16_16_16 = 0x0000003f,
FMT_10_IN_16 = 0x00000040,
FMT_10_IN_16_16 = 0x00000041,
FMT_10_IN_16_16_16_16 = 0x00000042,
FMT_7E3 = 0x00000043,
FMT_YCBCR = 0x00000044,
} FMT;
typedef enum type {
TYPE_UNORM = 0x00000000,
TYPE_SNORM = 0x00000001,
TYPE_USCALED = 0x00000002,
TYPE_SSCALED = 0x00000003,
TYPE_UINT = 0x00000004,
TYPE_SINT = 0x00000005,
TYPE_RESERVED_6 = 0x00000006,
TYPE_FLOAT = 0x00000007,
TYPE_RESERVED_8 = 0x00000008,
TYPE_SRGB = 0x00000009,
TYPE_UNORM_UINT = 0x0000000a,
TYPE_REVERSED_UNORM = 0x0000000b,
TYPE_FLOAT_CLAMP = 0x0000000c,
TYPE_F1 = 0x0000000d,
TYPE_F2 = 0x0000000e,
TYPE_F4 = 0x0000000f,
TYPE_F8 = 0x00000010,
TYPE_4X4 = 0x00000011,
TYPE_5X4 = 0x00000012,
TYPE_5X5 = 0x00000013,
TYPE_6X5 = 0x00000014,
TYPE_6X6 = 0x00000015,
TYPE_8X5 = 0x00000016,
TYPE_8X6 = 0x00000017,
TYPE_8X8 = 0x00000018,
TYPE_10X5 = 0x00000019,
TYPE_10X6 = 0x0000001a,
TYPE_10X8 = 0x0000001b,
TYPE_10X10 = 0x0000001c,
TYPE_12X10 = 0x0000001d,
TYPE_12X12 = 0x0000001e,
TYPE_3X3X3 = 0x0000001f,
TYPE_4X4X3 = 0x00000020,
TYPE_4X4X4 = 0x00000021,
TYPE_5X4X4 = 0x00000022,
TYPE_5X5X4 = 0x00000023,
TYPE_6X5X5 = 0x00000024,
TYPE_6X6X6 = 0x00000025
} type;
enum FORMAT {
CFMT_INVALID = 0,
CFMT_8_UNORM = 1,
CFMT_8_SNORM = 2,
CFMT_8_UINT = 5,
CFMT_8_SINT = 6,
CFMT_16_UNORM = 7,
CFMT_16_SNORM = 8,
CFMT_16_UINT = 11,
CFMT_16_SINT = 12,
CFMT_16_FLOAT = 13,
CFMT_8_8_UNORM = 14,
CFMT_8_8_SNORM = 15,
CFMT_8_8_UINT = 18,
CFMT_8_8_SINT = 19,
CFMT_32_UINT = 20,
CFMT_32_SINT = 21,
CFMT_32_FLOAT = 22,
CFMT_16_16_UNORM = 23,
CFMT_16_16_SNORM = 24,
CFMT_16_16_UINT = 27,
CFMT_16_16_SINT = 28,
CFMT_16_16_FLOAT = 29,
CFMT_10_10_10_2_UNORM = 44,
CFMT_10_10_10_2_SNORM = 45,
CFMT_10_10_10_2_UINT = 48,
CFMT_10_10_10_2_SINT = 49,
CFMT_2_10_10_10_UNORM = 50,
CFMT_2_10_10_10_SNORM = 51,
CFMT_2_10_10_10_UINT = 54,
CFMT_2_10_10_10_SINT = 55,
CFMT_8_8_8_8_UNORM = 56,
CFMT_8_8_8_8_SNORM = 57,
CFMT_8_8_8_8_UINT = 60,
CFMT_8_8_8_8_SINT = 61,
CFMT_32_32_UINT = 62,
CFMT_32_32_SINT = 63,
CFMT_32_32_FLOAT = 64,
CFMT_16_16_16_16_UNORM = 65,
CFMT_16_16_16_16_SNORM = 66,
CFMT_16_16_16_16_UINT = 69,
CFMT_16_16_16_16_SINT = 70,
CFMT_16_16_16_16_FLOAT = 71,
CFMT_32_32_32_UINT = 72,
CFMT_32_32_32_SINT = 73,
CFMT_32_32_32_FLOAT = 74,
CFMT_32_32_32_32_UINT = 75,
CFMT_32_32_32_32_SINT = 76,
CFMT_32_32_32_32_FLOAT = 77,
CFMT_8_SRGB = 128,
CFMT_8_8_SRGB = 129,
CFMT_8_8_8_8_SRGB = 130,
CFMT_5_6_5_UNORM = 133,
CFMT_1_5_5_5_UNORM = 134,
CFMT_5_5_5_1_UNORM = 135,
CFMT_8_24_UNORM = 141,
CFMT_8_24_UINT = 142,
CFMT_24_8_UNORM = 143,
CFMT_24_8_UINT = 144
};
typedef enum SEL {
SEL_0 = 0x00000000,
SEL_1 = 0x00000001,
SEL_X = 0x00000004,
SEL_Y = 0x00000005,
SEL_Z = 0x00000006,
SEL_W = 0x00000007,
} SEL;
typedef enum SQ_RSRC_IMG_TYPE {
SQ_RSRC_IMG_1D = 0x00000008,
SQ_RSRC_IMG_2D = 0x00000009,
SQ_RSRC_IMG_3D = 0x0000000a,
SQ_RSRC_IMG_CUBE_ARRAY = 0x0000000b,
SQ_RSRC_IMG_1D_ARRAY = 0x0000000c,
SQ_RSRC_IMG_2D_ARRAY = 0x0000000d,
SQ_RSRC_IMG_2D_MSAA = 0x0000000e,
SQ_RSRC_IMG_2D_MSAA_ARRAY = 0x0000000f,
} SQ_RSRC_IMG_TYPE;
typedef enum SQ_TEX_XY_FILTER {
SQ_TEX_XY_FILTER_POINT = 0x00000000,
SQ_TEX_XY_FILTER_BILINEAR = 0x00000001,
SQ_TEX_XY_FILTER_ANISO_POINT = 0x00000002,
SQ_TEX_XY_FILTER_ANISO_BILINEAR = 0x00000003,
} SQ_TEX_XY_FILTER;
typedef enum SQ_TEX_Z_FILTER {
SQ_TEX_Z_FILTER_NONE = 0x00000000,
SQ_TEX_Z_FILTER_POINT = 0x00000001,
SQ_TEX_Z_FILTER_LINEAR = 0x00000002,
} SQ_TEX_Z_FILTER;
typedef enum SQ_TEX_MIP_FILTER {
SQ_TEX_MIP_FILTER_NONE = 0x00000000,
SQ_TEX_MIP_FILTER_POINT = 0x00000001,
SQ_TEX_MIP_FILTER_LINEAR = 0x00000002,
SQ_TEX_MIP_FILTER_POINT_ANISO_ADJ__VI = 0x00000003,
} SQ_TEX_MIP_FILTER;
typedef enum SQ_TEX_CLAMP {
SQ_TEX_WRAP = 0x00000000,
SQ_TEX_MIRROR = 0x00000001,
SQ_TEX_CLAMP_LAST_TEXEL = 0x00000002,
SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x00000003,
SQ_TEX_CLAMP_HALF_BORDER = 0x00000004,
SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x00000005,
SQ_TEX_CLAMP_BORDER = 0x00000006,
SQ_TEX_MIRROR_ONCE_BORDER = 0x00000007,
} SQ_TEX_CLAMP;
typedef enum SQ_TEX_BORDER_COLOR {
SQ_TEX_BORDER_COLOR_TRANS_BLACK = 0x00000000,
SQ_TEX_BORDER_COLOR_OPAQUE_BLACK = 0x00000001,
SQ_TEX_BORDER_COLOR_OPAQUE_WHITE = 0x00000002,
SQ_TEX_BORDER_COLOR_REGISTER = 0x00000003,
} SQ_TEX_BORDER_COLOR;
typedef enum TEX_BC_SWIZZLE {
TEX_BC_Swizzle_XYZW = 0x00000000,
TEX_BC_Swizzle_XWYZ = 0x00000001,
TEX_BC_Swizzle_WZYX = 0x00000002,
TEX_BC_Swizzle_WXYZ = 0x00000003,
TEX_BC_Swizzle_ZYXW = 0x00000004,
TEX_BC_Swizzle_YXWZ = 0x00000005,
} TEX_BC_SWIZZLE;
typedef struct metadata_amd_nv_s {
uint32_t version; // Must be 1
uint32_t vendorID; // AMD
SQ_IMG_RSRC_WORD0 word0;
SQ_IMG_RSRC_WORD1 word1;
SQ_IMG_RSRC_WORD2 word2;
SQ_IMG_RSRC_WORD3 word3;
SQ_IMG_RSRC_WORD4 word4;
SQ_IMG_RSRC_WORD5 word5;
SQ_IMG_RSRC_WORD6 word6;
SQ_IMG_RSRC_WORD7 word7;
uint32_t mip_offsets[0];
} metadata_amd_nv_t;
} // namespace amd
#endif // EXT_IMAGE_RESOURCE_NV_H_
@@ -0,0 +1,213 @@
#ifndef HSA_RUNTIME_EXT_IMAGE_UTIL_H
#define HSA_RUNTIME_EXT_IMAGE_UTIL_H
#include <assert.h>
#include <stdint.h>
#include "inc/hsa.h"
// A macro to disallow the copy and move constructor and operator= functions
// This should be used in the private: declarations for a class
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
TypeName(const TypeName&); \
TypeName(TypeName&&); \
void operator=(const TypeName&); \
void operator=(TypeName&&);
#if defined(_MSC_VER)
#define ALIGNED_(x) __declspec(align(x))
#else
#if defined(__GNUC__)
#define ALIGNED_(x) __attribute__ ((aligned(x)))
#endif // __GNUC__
#endif // _MSC_VER
#define MULTILINE(...) # __VA_ARGS__
#if defined(__GNUC__)
#include "mm_malloc.h"
#if defined(__i386__) || defined(__x86_64__)
#include <x86intrin.h>
#else
#error \
"Processor not identified. " \
"Need to provide a lightweight approximate clock interface (aka __rdtsc())."
#endif
namespace ext_image {
#define __forceinline __inline__ __attribute__((always_inline))
static __forceinline void __debugbreak() { __builtin_trap(); }
#define __declspec(x) __attribute__((x))
#undef __stdcall
#define __stdcall // __attribute__((__stdcall__))
#define __ALIGNED__(x) __attribute__((aligned(x)))
static __forceinline void* _aligned_malloc(size_t size, size_t alignment) {
return _mm_malloc(size, alignment);
}
static __forceinline void _aligned_free(void* ptr) { return _mm_free(ptr); }
#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
#include "intrin.h"
#define __ALIGNED__(x) __declspec(align(x))
namespace ext_image {
#else
#error "Compiler and/or processor not identified."
#endif
/// @brief: Checks if a value is power of two, if it is, return true. Be careful
/// when passing 0.
/// @param: val(Input), the data to be checked.
/// @return: bool.
template <typename T>
static __forceinline bool IsPowerOfTwo(T val) {
return (val & (val - 1)) == 0;
}
/// @brief: Calculates the floor value aligned based on parameter of alignment.
/// If value is at the boundary of alignment, it is unchanged.
/// @param: value(Input), value to be calculated.
/// @param: alignment(Input), alignment value.
/// @return: T.
template <typename T>
static __forceinline T AlignDown(T value, size_t alignment) {
assert(IsPowerOfTwo(alignment));
return (T)(value & ~(alignment - 1));
}
/// @brief: Same as previous one, but first parameter becomes pointer, for more
/// info, see the previous desciption.
/// @param: value(Input), pointer to type T.
/// @param: alignment(Input), alignment value.
/// @return: T*, pointer to type T.
template <typename T>
static __forceinline T* AlignDown(T* value, size_t alignment) {
return (T*)AlignDown((intptr_t)value, alignment);
}
/// @brief: Calculates the ceiling value aligned based on parameter of
/// alignment.
/// If value is at the boundary of alignment, it is unchanged.
/// @param: value(Input), value to be calculated.
/// @param: alignment(Input), alignment value.
/// @param: T.
template <typename T>
static __forceinline T AlignUp(T value, size_t alignment) {
return AlignDown((T)(value + alignment - 1), alignment);
}
/// @brief: Same as previous one, but first parameter becomes pointer, for more
/// info, see the previous desciption.
/// @param: value(Input), pointer to type T.
/// @param: alignment(Input), alignment value.
/// @return: T*, pointer to type T.
template <typename T>
static __forceinline T* AlignUp(T* value, size_t alignment) {
return (T*)AlignDown((intptr_t)((uint8_t*)value + alignment - 1), alignment);
}
/// @brief: Checks if the input value is at the boundary of alignment, if it is,
/// @return true.
/// @param: value(Input), value to be checked.
/// @param: alignment(Input), alignment value.
/// @return: bool.
template <typename T>
static __forceinline bool IsMultipleOf(T value, size_t alignment) {
return (AlignUp(value, alignment) == value);
}
/// @brief: Same as previous one, but first parameter becomes pointer, for more
/// info, see the previous desciption.
/// @param: value(Input), pointer to type T.
/// @param: alignment(Input), alignment value.
/// @return: bool.
template <typename T>
static __forceinline bool IsMultipleOf(T* value, size_t alignment) {
return (AlignUp(value, alignment) == value);
}
static __forceinline uint32_t NextPow2(uint32_t value) {
if (value == 0) return 1;
uint32_t v = value - 1;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
return v + 1;
}
static __forceinline uint64_t NextPow2(uint64_t value) {
if (value == 0) return 1;
uint64_t v = value - 1;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
v |= v >> 32;
return v + 1;
}
template<uint32_t lowBit, uint32_t highBit, typename T>
static __forceinline uint32_t BitSelect(T p) {
static_assert(sizeof(T) <= sizeof(uintptr_t), "Type out of range.");
static_assert(highBit < sizeof(uintptr_t)*8, "Bit index out of range.");
uintptr_t ptr = p;
if(highBit != (sizeof(uintptr_t)*8-1))
return (uint32_t)((ptr & ((1ull<<(highBit+1))-1)) >> lowBit);
else
return (uint32_t)(ptr >> lowBit);
}
inline uint32_t PtrLow16Shift8(const void* p) {
uintptr_t ptr = reinterpret_cast<uintptr_t>(p);
return (uint32_t)((ptr & 0xFFFFULL) >> 8);
}
inline uint32_t PtrHigh64Shift16(const void* p) {
uintptr_t ptr = reinterpret_cast<uintptr_t>(p);
return (uint32_t)((ptr & 0xFFFFFFFFFFFF0000ULL) >> 16);
}
inline uint32_t PtrLow40Shift8(const void* p) {
uintptr_t ptr = reinterpret_cast<uintptr_t>(p);
return (uint32_t)((ptr & 0xFFFFFFFFFFULL) >> 8);
}
inline uint32_t PtrHigh64Shift40(const void* p) {
uintptr_t ptr = reinterpret_cast<uintptr_t>(p);
return (uint32_t)((ptr & 0xFFFFFF0000000000ULL) >> 40);
}
inline uint32_t PtrLow32(const void* p) {
return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(p));
}
inline uint32_t PtrHigh32(const void* p) {
uint32_t ptr = 0;
#ifdef HSA_LARGE_MODEL
ptr = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(p) >> 32);
#endif
return ptr;
}
/**
* Generic functor compatible with the STL algorithms that enables proper
* destruction of a container of pointers. If (for instance), \c v is a vector
* of pointers to objects of type T, then the destructors of the elements in
* \c v are invoked when calling
* \code{std::for_each(v.begin(), v.end(), DeleteObject())}
*
* The original code and further information about this function object can be
* found in "Effective STL", 1st edition, item 7.
*/
struct DeleteObject {
template<typename T>
void operator()(const T *ptr) const {
delete ptr;
}
};
} // namespace ext_image
#endif // HSA_RUNTIME_EXT_IMAGE_UTIL_H