diff --git a/runtime/hsa-runtime/image/addrlib/inc/addrinterface.h b/runtime/hsa-runtime/image/addrlib/inc/addrinterface.h index 5260426b68..ba1db8d5e8 100644 --- a/runtime/hsa-runtime/image/addrlib/inc/addrinterface.h +++ b/runtime/hsa-runtime/image/addrlib/inc/addrinterface.h @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ @@ -35,10 +18,13 @@ // Includes should be before extern "C" #include "addrtypes.h" -namespace rocr { +#if defined(__cplusplus) +extern "C" +{ +#endif -#define ADDRLIB_VERSION_MAJOR 6 -#define ADDRLIB_VERSION_MINOR 2 +#define ADDRLIB_VERSION_MAJOR 8 +#define ADDRLIB_VERSION_MINOR 10 #define ADDRLIB_VERSION ((ADDRLIB_VERSION_MAJOR << 16) | ADDRLIB_VERSION_MINOR) /// Virtually all interface functions need ADDR_HANDLE as first parameter @@ -47,6 +33,13 @@ typedef VOID* ADDR_HANDLE; /// Client handle used in callbacks typedef VOID* ADDR_CLIENT_HANDLE; +typedef struct _ADDR_EXTENT3D +{ + UINT_32 width; + UINT_32 height; + UINT_32 depth; // also slices for 2D images +} ADDR_EXTENT3D; + /** * ///////////////////////////////////////////////////////////////////////////////////////////////// * // Callback functions @@ -124,7 +117,7 @@ typedef union _ADDR_CHANNEL_SETTING struct { UINT_8 valid : 1; ///< Indicate whehter this channel setting is valid - UINT_8 channel : 2; ///< 0 for x channel, 1 for y channel, 2 for z channel + UINT_8 channel : 2; ///< 0 for x channel, 1 for y channel, 2 for z channel, 3 for MSAA sample index UINT_8 index : 5; ///< Channel index }; UINT_8 value; ///< Value @@ -158,18 +151,29 @@ typedef union _ADDR_EQUATION_KEY * @brief address equation structure **************************************************************************************************** */ -#define ADDR_MAX_EQUATION_BIT 20u +#define ADDR_MAX_LEGACY_EQUATION_COMP 3u +#define ADDR_MAX_EQUATION_COMP 5u +#define ADDR_MAX_EQUATION_BIT 20u // Invalid equation index #define ADDR_INVALID_EQUATION_INDEX 0xFFFFFFFF typedef struct _ADDR_EQUATION { - ADDR_CHANNEL_SETTING addr[ADDR_MAX_EQUATION_BIT]; ///< addr setting - ///< each bit is result of addr ^ xor ^ xor2 - ADDR_CHANNEL_SETTING xor1[ADDR_MAX_EQUATION_BIT]; ///< xor setting - ADDR_CHANNEL_SETTING xor2[ADDR_MAX_EQUATION_BIT]; ///< xor2 setting + union + { + struct { + ADDR_CHANNEL_SETTING addr[ADDR_MAX_EQUATION_BIT]; ///< addr setting + ADDR_CHANNEL_SETTING xor1[ADDR_MAX_EQUATION_BIT]; ///< xor setting + ADDR_CHANNEL_SETTING xor2[ADDR_MAX_EQUATION_BIT]; ///< xor2 setting + ADDR_CHANNEL_SETTING xor3[ADDR_MAX_EQUATION_BIT]; ///< xor3 setting + ADDR_CHANNEL_SETTING xor4[ADDR_MAX_EQUATION_BIT]; ///< xor4 setting + }; + ///< Components showing the sources of each bit; each bit is result of addr ^ xor ^ xor2... + ADDR_CHANNEL_SETTING comps[ADDR_MAX_EQUATION_COMP][ADDR_MAX_EQUATION_BIT]; + }; UINT_32 numBits; ///< The number of bits in equation + UINT_32 numBitComponents; ///< The max number of channels contributing to a bit BOOL_32 stackedDepthSlices; ///< TRUE if depth slices are treated as being ///< stacked vertically prior to swizzling } ADDR_EQUATION; @@ -1723,6 +1727,30 @@ typedef enum _AddrSwizzleGenOption ADDR_SWIZZLE_GEN_LINEAR = 1, ///< Using a linear increment of swizzle } AddrSwizzleGenOption; +/** +**************************************************************************************************** +* AddrBlockType +* +* @brief +* Macro define resource block type +**************************************************************************************************** +*/ +typedef enum +{ + AddrBlockLinear = 0, // Resource uses linear swizzle mode + AddrBlockMicro = 1, // Resource uses 256B block + AddrBlockThin4KB = 2, // Resource uses thin 4KB block + AddrBlockThick4KB = 3, // Resource uses thick 4KB block + AddrBlockThin64KB = 4, // Resource uses thin 64KB block + AddrBlockThick64KB = 5, // Resource uses thick 64KB block + AddrBlockThinVar = 6, // Resource uses thin var block + AddrBlockThickVar = 7, // Resource uses thick var block + AddrBlockMaxTiledType, + + AddrBlockThin256KB = AddrBlockThinVar, + AddrBlockThick256KB = AddrBlockThickVar, +} AddrBlockType; + /** **************************************************************************************************** * AddrSwizzleOption @@ -2408,7 +2436,8 @@ typedef union _ADDR2_SURFACE_FLAGS UINT_32 metaRbUnaligned : 1; ///< This resource has rb unaligned metadata UINT_32 metaPipeUnaligned : 1; ///< This resource has pipe unaligned metadata UINT_32 view3dAs2dArray : 1; ///< This resource is a 3D resource viewed as 2D array - UINT_32 reserved : 13; ///< Reserved bits + UINT_32 allowExtEquation : 1; ///< If unset, only legacy DX eqs are allowed (2 XORs) + UINT_32 reserved : 12; ///< Reserved bits }; UINT_32 value; @@ -2585,7 +2614,7 @@ typedef struct _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT { UINT_32 size; ///< Size of this structure in bytes - UINT_64 addr; ///< Byte address + UINT_64 addr; ///< Byte offset from the image starting address UINT_32 bitPosition; ///< Bit position within surfaceAddr, 0-7. /// For surface bpp < 8, e.g. FMT_1. UINT_32 prtBlockIndex; ///< Index of a PRT tile (64K block) @@ -3924,6 +3953,20 @@ ADDR_E_RETURNCODE ADDR_API Addr2GetPreferredSurfaceSetting( const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut); +/** +**************************************************************************************************** +* Addr2GetPossibleSwizzleModes +* +* @brief +* Returns a list of swizzle modes that are valid from the hardware's perspective for the +* client to choose from +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2GetPossibleSwizzleModes( + ADDR_HANDLE hLib, + const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, + ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut); + /** **************************************************************************************************** * Addr2IsValidDisplaySwizzleMode @@ -3938,6 +3981,524 @@ ADDR_E_RETURNCODE ADDR_API Addr2IsValidDisplaySwizzleMode( UINT_32 bpp, BOOL_32 *pResult); -} // rocr +/** +**************************************************************************************************** +* Addr2GetAllowedBlockSet +* +* @brief +* Returns the set of allowed block sizes given the allowed swizzle modes and resource type +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2GetAllowedBlockSet( + ADDR_HANDLE hLib, + ADDR2_SWMODE_SET allowedSwModeSet, + AddrResourceType rsrcType, + ADDR2_BLOCK_SET* pAllowedBlockSet); + +/** +**************************************************************************************************** +* Addr2GetAllowedSwSet +* +* @brief +* Returns the set of allowed swizzle types given the allowed swizzle modes +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2GetAllowedSwSet( + ADDR_HANDLE hLib, + ADDR2_SWMODE_SET allowedSwModeSet, + ADDR2_SWTYPE_SET* pAllowedSwSet); + +/** +**************************************************************************************************** +* Addr2IsBlockTypeAvailable +* +* @brief +* Determine whether a block type is allowed in a given blockSet +**************************************************************************************************** +*/ +BOOL_32 Addr2IsBlockTypeAvailable(ADDR2_BLOCK_SET blockSet, AddrBlockType blockType); + +/** +**************************************************************************************************** +* Addr2BlockTypeWithinMemoryBudget +* +* @brief +* Determine whether a new block type is acceptable based on memory waste ratio. Will favor +* larger block types. +**************************************************************************************************** +*/ +BOOL_32 Addr2BlockTypeWithinMemoryBudget( + UINT_64 minSize, + UINT_64 newBlockTypeSize, + UINT_32 ratioLow, + UINT_32 ratioHi, +#if defined(__cplusplus) + DOUBLE memoryBudget = 0.0f, + BOOL_32 newBlockTypeBigger = TRUE); +#else + DOUBLE memoryBudget, + BOOL_32 newBlockTypeBigger); +#endif + +/** +**************************************************************************************************** +* ADDR3_SURFACE_FLAGS +* +* @brief +* Surface flags +**************************************************************************************************** +*/ +typedef union _ADDR3_SURFACE_FLAGS +{ + struct + { + UINT_32 color : 1; ///< This resource is a color buffer, can be used with RTV + UINT_32 depth : 1; ///< This resource is a depth buffer, can be used with DSV + UINT_32 stencil : 1; ///< This resource is a stencil buffer, can be used with DSV + UINT_32 texture : 1; ///< This resource can be used with SRV + UINT_32 unordered : 1; ///< This resource can be used with UAV + UINT_32 hiZHiS : 1; + UINT_32 blockCompressed : 1; + UINT_32 nv12 : 1; + UINT_32 p010 : 1; + UINT_32 view3dAs2dArray : 1; + UINT_32 isVrsImage : 1; ///< This resource is a VRS source image + UINT_32 reserved : 21; ///< Reserved bits + }; + + UINT_32 value; +} ADDR3_SURFACE_FLAGS; + +/** +**************************************************************************************************** +* ADDR3_COMPUTE_SURFACE_INFO_INPUT +* +* @brief +* Input structure for Addr3ComputeSurfaceInfo +**************************************************************************************************** +*/ +typedef struct _ADDR3_COMPUTE_SURFACE_INFO_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + ADDR3_SURFACE_FLAGS flags; ///< Surface flags + Addr3SwizzleMode swizzleMode; ///< Swizzle Mode for Gfx12 + AddrResourceType resourceType; ///< Surface type + AddrFormat format; ///< Surface format + UINT_32 bpp; ///< bits per pixel + UINT_32 width; ///< Width (of mip0), in pixels + UINT_32 height; ///< Height (of mip0), in pixels + UINT_32 numSlices; ///< Number surface slice/depth (of mip0), + UINT_32 numMipLevels; ///< Total mipmap levels. + UINT_32 numSamples; ///< Number of samples + UINT_32 pitchInElement; ///< Pitch in elements (blocks for compressed formats) + UINT_32 sliceAlign; ///< Required slice size in bytes +} ADDR3_COMPUTE_SURFACE_INFO_INPUT; + +/** +**************************************************************************************************** +* ADDR3_MIP_INFO +* +* @brief +* Structure that contains information for mip level +* +**************************************************************************************************** +*/ +typedef struct _ADDR3_MIP_INFO +{ + UINT_32 pitch; ///< Pitch in elements + UINT_32 height; ///< Padded height in elements + UINT_32 depth; ///< Padded depth + UINT_32 pixelPitch; ///< Pitch in pixels + UINT_32 pixelHeight; ///< Padded height in pixels + UINT_32 equationIndex; ///< Equation index in the equation table + UINT_64 offset; ///< Offset in bytes from mip base, should only be used + ///< to setup vam surface descriptor, can't be used + ///< to setup swizzle pattern + UINT_64 macroBlockOffset; ///< macro block offset in bytes from mip base + UINT_32 mipTailOffset; ///< mip tail offset in bytes + UINT_32 mipTailCoordX; ///< mip tail coord x + UINT_32 mipTailCoordY; ///< mip tail coord y + UINT_32 mipTailCoordZ; ///< mip tail coord z +} ADDR3_MIP_INFO; + +/** +**************************************************************************************************** +* ADDR3_COMPUTE_SURFACE_INFO_OUTPUT +* +* @brief +* Output structure for Addr3ComputeSurfaceInfo +* @note + Element: AddrLib unit for computing. e.g. BCn: 4x4 blocks; R32B32B32: 32bit with 3x pitch + Pixel: Original pixel +**************************************************************************************************** +*/ +typedef struct _ADDR3_COMPUTE_SURFACE_INFO_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + UINT_32 pitch; ///< Pitch in elements (blocks for compressed formats) + UINT_32 pixelPitch; ///< Pitch in original pixels + UINT_32 pixelHeight; ///< Height in original pixels + UINT_32 pixelBits; ///< Original bits per pixel, passed from input + UINT_32 bpp; ///< Bits per elements + /// (e.g. blocks for BCn, 1/3 for 96bit) + UINT_32 numSlices; ///< Padded depth for 3d resource + /// or padded number of slices for 2d array resource + UINT_32 height; ///< Padded height (of mip0) in elements + UINT_64 sliceSize; ///< Slice (total mip chain) size in bytes + UINT_64 surfSize; ///< Surface (total mip chain) size in bytes + UINT_32 baseAlign; ///< Base address alignment + ADDR_EXTENT3D blockExtent; ///< Dimensions in element inside one block + UINT_32 pixelMipChainPitch; ///< Mip chain pitch in original pixels + UINT_32 pixelMipChainHeight; ///< Mip chain height in original pixels + ADDR3_MIP_INFO* pMipInfo; ///< Info regarding the start, sizes of the mip levels + BOOL_32 mipChainInTail; ///< If whole mipchain falls into mip tail block + UINT_32 firstMipIdInTail; ///< The id of first mip in tail, if there is no mip + /// in tail, it will be set to number of mip levels +} ADDR3_COMPUTE_SURFACE_INFO_OUTPUT; + +/** +**************************************************************************************************** +* ADDR3_SWMODE_SET +* +* @brief +* Bit field that defines swizzle type +**************************************************************************************************** +*/ +// The bit order MUST be the same as Addr3SwizzleMode enumerations, otherwise using bitset to enable +// or disable swizzle modes will be problematic. +typedef union _ADDR3_SWMODE_SET +{ + struct + { + UINT_32 swLinear : 1; + UINT_32 sw2d256B : 1; + UINT_32 sw2d4kB : 1; + UINT_32 sw2d64kB : 1; + UINT_32 sw2d256kB : 1; + UINT_32 sw3d4kB : 1; + UINT_32 sw3d64kB : 1; + UINT_32 sw3d256kB : 1; + UINT_32 reserved : 24; + }; + + UINT_32 value; +} ADDR3_SWMODE_SET; + +/** +**************************************************************************************************** +* ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT +* +* @brief +* Input structure of Addr3GetPossibleSwizzleModes +**************************************************************************************************** +*/ +typedef struct _ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + ADDR3_SURFACE_FLAGS flags; ///< Surface flags + AddrResourceType resourceType; ///< Surface type + UINT_32 bpp; ///< bits per pixel + UINT_32 width; ///< Width (of mip0), in pixels + UINT_32 height; ///< Height (of mip0), in pixels + UINT_32 numSlices; ///< Number surface slice/depth (of mip0), + UINT_32 numMipLevels; ///< Total mipmap levels. + UINT_32 numSamples; ///< Number of samples + UINT_32 maxAlign; ///< maximum base/size alignment requested by client +} ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT; + +/** +**************************************************************************************************** +* ADDR3_GET_POSSIBLE_SWIZZLE_MODE_OUTPUT +* +* @brief +* Output structure of Addr3GetPossibleSwizzleModes +**************************************************************************************************** +*/ +typedef struct _ADDR3_GET_POSSIBLE_SWIZZLE_MODE_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + ADDR3_SWMODE_SET validModes; ///< List of valid swizzle modes for this function. +} ADDR3_GET_POSSIBLE_SWIZZLE_MODE_OUTPUT; + +/** +**************************************************************************************************** +* Addr3ComputeSurfaceInfo +* +* @brief +* Compute surface width/height/slices/alignments and suitable tiling mode +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr3ComputeSurfaceInfo( + ADDR_HANDLE hLib, + const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR3_COMPUTE_SURFACE_INFO_OUTPUT* pOut); + +/** +**************************************************************************************************** +* Addr3GetPossibleSwizzleModes +* +* @brief +* Returns a list of swizzle modes that are valid from the hardware's perspective for the +* client to choose from +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr3GetPossibleSwizzleModes( + ADDR_HANDLE hLib, + const ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT* pIn, + ADDR3_GET_POSSIBLE_SWIZZLE_MODE_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT +* +* @brief +* Input structure for Addr3ComputeSurfaceAddrFromCoord +**************************************************************************************************** +*/ +typedef struct _ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_32 x; ///< X coordinate + UINT_32 y; ///< Y coordinate + UINT_32 slice; ///< Slice index + UINT_32 sample; ///< Sample index, use fragment index for EQAA + UINT_32 mipId; ///< the mip ID in mip chain + + Addr3SwizzleMode swizzleMode; ///< Swizzle mode for Gfx12 + ADDR3_SURFACE_FLAGS flags; ///< Surface flags + AddrResourceType resourceType; ///< Surface type + UINT_32 bpp; ///< Bits per pixel + ADDR_EXTENT3D unAlignedDims; ///< Surface original dimensions (of mip0) + UINT_32 numMipLevels; ///< Total mipmap levels + UINT_32 numSamples; ///< Number of samples + UINT_32 pitchInElement; ///< Pitch in elements (blocks for compressed formats) +} ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT; + +/** +**************************************************************************************************** +* ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT +* +* @brief +* Output structure for Addr3ComputeSurfaceAddrFromCoord +**************************************************************************************************** +*/ +typedef struct _ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_64 addr; ///< Byte offset from the image starting address + UINT_32 bitPosition; ///< Bit position within surfaceAddr, 0-7. + /// For surface bpp < 8, e.g. FMT_1. + UINT_32 prtBlockIndex; ///< Index of a PRT tile (64K block) +} ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT; + +/** +**************************************************************************************************** +* Addr3ComputeSurfaceAddrFromCoord +* +* @brief +* Compute surface address from a given coordinate. +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr3ComputeSurfaceAddrFromCoord( + ADDR_HANDLE hLib, + const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, + ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ADDR3_COMPUTE_PIPEBANKXOR_INPUT +* +* @brief +* Input structure of Addr3ComputePipebankXor +**************************************************************************************************** +*/ +typedef struct _ADDR3_COMPUTE_PIPEBANKXOR_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + UINT_32 surfIndex; ///< Input surface index + Addr3SwizzleMode swizzleMode; ///< Surface swizzle mode +} ADDR3_COMPUTE_PIPEBANKXOR_INPUT; + +/** +**************************************************************************************************** +* ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT +* +* @brief +* Output structure of Addr3ComputePipebankXor +**************************************************************************************************** +*/ +typedef struct _ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + UINT_32 pipeBankXor; ///< Pipe bank xor +} ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT; + +/** +**************************************************************************************************** +* Addr3ComputePipeBankXor +* +* @brief +* Calculate a valid bank pipe xor value for client to use. +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr3ComputePipeBankXor( + ADDR_HANDLE hLib, + const ADDR3_COMPUTE_PIPEBANKXOR_INPUT* pIn, + ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT +* +* @brief +* Input structure of Addr3ComputeNonBlockCompressedView +**************************************************************************************************** +*/ +typedef struct _ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + ADDR3_SURFACE_FLAGS flags; ///< Surface flags + Addr3SwizzleMode swizzleMode; ///< Swizzle Mode for Gfx12 + AddrResourceType resourceType; ///< Surface type + AddrFormat format; ///< Surface format + ADDR_EXTENT3D unAlignedDims; ///< Surface original dimensions (of mip0) + UINT_32 numMipLevels; ///< Total mipmap levels. + UINT_32 pipeBankXor; ///< Combined swizzle used to do bank/pipe rotation + UINT_32 slice; ///< Index of slice to view + UINT_32 mipId; ///< Id of mip to view +} ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT; + +/** +**************************************************************************************************** +* ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT +* +* @brief +* Output structure of Addr3ComputeNonBlockCompressedView +**************************************************************************************************** +*/ +typedef struct _ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + UINT_64 offset; ///< Offset from resource base for the view + UINT_32 pipeBankXor; ///< Pipe bank xor for the view + ADDR_EXTENT3D unAlignedDims; ///< Mip0 dimens (in element) for the view + UINT_32 numMipLevels; ///< Total mipmap levels for the view + UINT_32 mipId; ///< Mip ID for the view +} ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT; + +/** +**************************************************************************************************** +* Addr3ComputeNonBlockCompressedView +* +* @brief +* Compute non-block-compressed view for a given mipmap level/slice +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr3ComputeNonBlockCompressedView( + ADDR_HANDLE hLib, + const ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn, + ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT +* +* @brief +* Input structure of Addr3ComputeSubResourceOffsetForSwizzlePattern +**************************************************************************************************** +*/ +typedef struct _ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + Addr3SwizzleMode swizzleMode; ///< Surface swizzle mode + AddrResourceType resourceType; ///< Surface resource type + UINT_32 pipeBankXor; ///< Per resource xor + UINT_32 slice; ///< Slice id + UINT_64 sliceSize; ///< Slice size of a mip chain + UINT_64 macroBlockOffset; ///< Macro block offset, returned in ADDR3_MIP_INFO + UINT_32 mipTailOffset; ///< Mip tail offset, returned in ADDR3_MIP_INFO +} ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT; + +/** +**************************************************************************************************** +* ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT +* +* @brief +* Output structure of Addr3ComputeSubResourceOffsetForSwizzlePattern +**************************************************************************************************** +*/ +typedef struct _ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + UINT_64 offset; ///< offset +} ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT; + +/** +**************************************************************************************************** +* Addr3ComputeSubResourceOffsetForSwizzlePattern +* +* @brief +* Calculate sub resource offset to support swizzle pattern. +**************************************************************************************************** +*/ +VOID ADDR_API Addr3ComputeSubResourceOffsetForSwizzlePattern( + ADDR_HANDLE hLib, + const ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, + ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT +* +* @brief +* Input structure of Addr2ComputeSlicePipeBankXor +**************************************************************************************************** +*/ +typedef struct _ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + Addr3SwizzleMode swizzleMode; ///< Surface swizzle mode + AddrResourceType resourceType; ///< Surface resource type + UINT_32 bpe; ///< bits per element (e.g. block size for BCn format) + UINT_32 basePipeBankXor; ///< Base pipe bank xor + UINT_32 slice; ///< Slice id + UINT_32 numSamples; ///< Number of samples +} ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT; + +/** +**************************************************************************************************** +* ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT +* +* @brief +* Output structure of Addr3ComputeSlicePipeBankXor +**************************************************************************************************** +*/ +typedef struct _ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + UINT_32 pipeBankXor; ///< Pipe bank xor +} ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT; + +/** +**************************************************************************************************** +* Addr3ComputeSlicePipeBankXor +* +* @brief +* Calculate slice pipe bank xor value based on base pipe bank xor and slice id. +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr3ComputeSlicePipeBankXor( + ADDR_HANDLE hLib, + const ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, + ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut); + + +#if defined(__cplusplus) +} +#endif #endif // __ADDR_INTERFACE_H__ diff --git a/runtime/hsa-runtime/image/addrlib/inc/addrtypes.h b/runtime/hsa-runtime/image/addrlib/inc/addrtypes.h index ccecc2473f..aa1b48873e 100644 --- a/runtime/hsa-runtime/image/addrlib/inc/addrtypes.h +++ b/runtime/hsa-runtime/image/addrlib/inc/addrtypes.h @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ @@ -292,6 +275,26 @@ typedef enum _AddrSwizzleMode ADDR_SW_256KB_R_X = ADDR_SW_MISCDEF31, } AddrSwizzleMode; +/** +**************************************************************************************************** +* @brief +* Neutral enums that define swizzle modes for Gfx12+ ASIC +* +**************************************************************************************************** +*/ +typedef enum _Addr3SwizzleMode +{ + ADDR3_LINEAR = 0, + ADDR3_256B_2D = 1, + ADDR3_4KB_2D = 2, + ADDR3_64KB_2D = 3, + ADDR3_256KB_2D = 4, + ADDR3_4KB_3D = 5, + ADDR3_64KB_3D = 6, + ADDR3_256KB_3D = 7, + ADDR3_MAX_TYPE = 8, +} Addr3SwizzleMode; + /** **************************************************************************************************** * @brief @@ -454,6 +457,7 @@ typedef enum _AddrFormat { ADDR_FMT_ASTC_12x12 = 0x0000004d, ADDR_FMT_ETC2_64BPP = 0x0000004e, ADDR_FMT_ETC2_128BPP = 0x0000004f, + ADDR_FMT_BG_RG_16_16_16_16 = 0x00000050, } AddrFormat; /** diff --git a/runtime/hsa-runtime/image/addrlib/src/addrinterface.cpp b/runtime/hsa-runtime/image/addrlib/src/addrinterface.cpp index d1ebf2680e..80386996a4 100644 --- a/runtime/hsa-runtime/image/addrlib/src/addrinterface.cpp +++ b/runtime/hsa-runtime/image/addrlib/src/addrinterface.cpp @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ @@ -32,10 +15,10 @@ #include "addrinterface.h" #include "addrlib1.h" #include "addrlib2.h" +#include "addrlib3.h" #include "addrcommon.h" -namespace rocr { using namespace Addr; //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -1796,7 +1779,376 @@ ADDR_E_RETURNCODE ADDR_API Addr2IsValidDisplaySwizzleMode( returnCode = ADDR_ERROR; } - return returnCode; + return returnCode; } -} // namespace rocr \ No newline at end of file +/** +**************************************************************************************************** +* Addr2GetPossibleSwizzleModes +* +* @brief +* Returns a list of swizzle modes that are valid from the hardware's perspective for the +* client to choose from +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2GetPossibleSwizzleModes( + ADDR_HANDLE hLib, ///< handle of addrlib + const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, ///< [in] input + ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) ///< [out] output +{ + ADDR_E_RETURNCODE returnCode; + + V2::Lib* pLib = V2::Lib::GetLib(hLib); + + if (pLib != NULL) + { + returnCode = pLib->GetPossibleSwizzleModes(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} +/** +**************************************************************************************************** +* Addr2GetAllowedBlockSet +* +* @brief +* Returns the set of allowed block sizes given the allowed swizzle modes and resource type +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2GetAllowedBlockSet( + ADDR_HANDLE hLib, ///< handle of addrlib + ADDR2_SWMODE_SET allowedSwModeSet, ///< [in] allowed swizzle modes + AddrResourceType rsrcType, ///< [in] resource type + ADDR2_BLOCK_SET* pAllowedBlockSet) ///< [out] allowed block sizes +{ + ADDR_E_RETURNCODE returnCode; + + V2::Lib* pLib = V2::Lib::GetLib(hLib); + + if (pLib != NULL) + { + returnCode = pLib->GetAllowedBlockSet(allowedSwModeSet, rsrcType, pAllowedBlockSet); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Addr2GetAllowedSwSet +* +* @brief +* Returns the set of allowed swizzle types given the allowed swizzle modes +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2GetAllowedSwSet( + ADDR_HANDLE hLib, ///< handle of addrlib + ADDR2_SWMODE_SET allowedSwModeSet, ///< [in] allowed swizzle modes + ADDR2_SWTYPE_SET* pAllowedSwSet) ///< [out] allowed swizzle types +{ + ADDR_E_RETURNCODE returnCode; + + V2::Lib* pLib = V2::Lib::GetLib(hLib); + + if (pLib != NULL) + { + returnCode = pLib->GetAllowedSwSet(allowedSwModeSet, pAllowedSwSet); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Addr2IsBlockTypeAvailable +* +* @brief +* Determine whether a block type is allowed in a given blockSet +**************************************************************************************************** +*/ +BOOL_32 Addr2IsBlockTypeAvailable( + ADDR2_BLOCK_SET blockSet, + AddrBlockType blockType) +{ + BOOL_32 avail; + + if (blockType == AddrBlockLinear) + { + avail = blockSet.linear ? TRUE : FALSE; + } + else + { + avail = blockSet.value & (1 << (static_cast(blockType) - 1)) ? TRUE : FALSE; + } + + return avail; +} + +/** +**************************************************************************************************** +* Addr2BlockTypeWithinMemoryBudget +* +* @brief +* Determine whether a new block type is acceptable based on memory waste ratio. Will favor +* larger block types. +**************************************************************************************************** +*/ +BOOL_32 Addr2BlockTypeWithinMemoryBudget( + UINT_64 minSize, + UINT_64 newBlockTypeSize, + UINT_32 ratioLow, + UINT_32 ratioHi, + DOUBLE memoryBudget, + BOOL_32 newBlockTypeBigger) +{ + BOOL_32 accept = FALSE; + + if (memoryBudget >= 1.0) + { + if (newBlockTypeBigger) + { + if ((static_cast(newBlockTypeSize) / minSize) <= memoryBudget) + { + accept = TRUE; + } + } + else + { + if ((static_cast(minSize) / newBlockTypeSize) > memoryBudget) + { + accept = TRUE; + } + } + } + else + { + if (newBlockTypeBigger) + { + if ((newBlockTypeSize * ratioHi) <= (minSize * ratioLow)) + { + accept = TRUE; + } + } + else + { + if ((newBlockTypeSize * ratioLow) < (minSize * ratioHi)) + { + accept = TRUE; + } + } + } + + return accept; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Surface functions for Addr3 +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* Addr3ComputeSurfaceInfo +* +* @brief +* Calculate surface width/height/depth/alignments and suitable tiling mode +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr3ComputeSurfaceInfo( + ADDR_HANDLE hLib, ///< address lib handle + const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] surface information + ADDR3_COMPUTE_SURFACE_INFO_OUTPUT* pOut) ///< [out] surface parameters and alignments +{ + V3::Lib* pLib = V3::Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->ComputeSurfaceInfo(pIn, pOut); + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Addr3GetPossibleSwizzleModes +* +* @brief +* Get valid swizzle mode options given image input for further optimal selection +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_PARAMSIZEMISMATCH +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr3GetPossibleSwizzleModes( + ADDR_HANDLE hLib, ///< address lib handle + const ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT* pIn, ///< [in] surface information + ADDR3_GET_POSSIBLE_SWIZZLE_MODE_OUTPUT* pOut) ///< [out] allowable swizzle mdoes +{ + V3::Lib* pLib = V3::Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->GetPossibleSwizzleModes(pIn, pOut); + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Addr3ComputeSurfaceAddrFromCoord +* +* @brief +* Compute surface address according to coordinates +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr3ComputeSurfaceAddrFromCoord( + ADDR_HANDLE hLib, ///< address lib handle + const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] surface info and coordinates + ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] surface address +{ + V3::Lib* pLib = V3::Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->ComputeSurfaceAddrFromCoord(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Addr3ComputePipeBankXor +* +* @brief +* Calculate a valid bank pipe xor value for client to use. +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr3ComputePipeBankXor( + ADDR_HANDLE hLib, ///< handle of addrlib + const ADDR3_COMPUTE_PIPEBANKXOR_INPUT* pIn, ///< [in] input + ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) ///< [out] output +{ + ADDR_E_RETURNCODE returnCode; + + V3::Lib* pLib = V3::Lib::GetLib(hLib); + + if (pLib != NULL) + { + returnCode = pLib->ComputePipeBankXor(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Addr3ComputeNonBlockCompressedView +* +* @brief +* Compute non-block-compressed view for a given mipmap level/slice. +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr3ComputeNonBlockCompressedView( + ADDR_HANDLE hLib, ///< handle of addrlib + const ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn, ///< [in] input + ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT* pOut) ///< [out] output +{ + ADDR_E_RETURNCODE returnCode; + + V3::Lib* pLib = V3::Lib::GetLib(hLib); + + if (pLib != NULL) + { + returnCode = pLib->ComputeNonBlockCompressedView(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Addr3ComputeSubResourceOffsetForSwizzlePattern +* +* @brief +* Calculate sub resource offset for swizzle pattern. +**************************************************************************************************** +*/ +VOID ADDR_API Addr3ComputeSubResourceOffsetForSwizzlePattern( + ADDR_HANDLE hLib, ///< handle of addrlib + const ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, ///< [in] input + ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) ///< [out] output +{ + V3::Lib* pLib = V3::Lib::GetLib(hLib); + + if (pLib != NULL) + { + pLib->ComputeSubResourceOffsetForSwizzlePattern(pIn, pOut); + } +} + +/** +**************************************************************************************************** +* Addr3ComputeSlicePipeBankXor +* +* @brief +* Calculate slice pipe bank xor value based on base pipe bank xor and slice id. +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr3ComputeSlicePipeBankXor( + ADDR_HANDLE hLib, ///< handle of addrlib + const ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, ///< [in] input + ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) ///< [out] output +{ + ADDR_E_RETURNCODE returnCode; + + V3::Lib* pLib = V3::Lib::GetLib(hLib); + + if (pLib != NULL) + { + returnCode = pLib->ComputeSlicePipeBankXor(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} diff --git a/runtime/hsa-runtime/image/addrlib/src/amdgpu_asic_addr.h b/runtime/hsa-runtime/image/addrlib/src/amdgpu_asic_addr.h index c384c138cb..1909e56cb2 100644 --- a/runtime/hsa-runtime/image/addrlib/src/amdgpu_asic_addr.h +++ b/runtime/hsa-runtime/image/addrlib/src/amdgpu_asic_addr.h @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2017-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ @@ -33,24 +16,23 @@ #define AMDGPU_VENDOR_IS_AMD(v) ((v == ATI_VENDOR_ID) || (v == AMD_VENDOR_ID)) #define FAMILY_UNKNOWN 0x00 -#define FAMILY_TN 0x69 -#define FAMILY_SI 0x6E -#define FAMILY_CI 0x78 -#define FAMILY_KV 0x7D -#define FAMILY_VI 0x82 -#define FAMILY_POLARIS 0x82 -#define FAMILY_CZ 0x87 -#define FAMILY_AI 0x8D -#define FAMILY_RV 0x8E -#define FAMILY_NV 0x8F -#define FAMILY_VGH 0x90 -#define FAMILY_GFX1100 0x91 -#define FAMILY_GFX1103 0x94 +#define FAMILY_TN 0x69 //# 105 / Trinity APUs +#define FAMILY_SI 0x6E //# 110 / Southern Islands: Tahiti, Pitcairn, CapeVerde, Oland, Hainan +#define FAMILY_CI 0x78 //# 120 / Sea Islands: Bonaire, Hawaii +#define FAMILY_KV 0x7D //# 125 / Kaveri APUs: Spectre, Spooky, Kalindi, Godavari +#define FAMILY_VI 0x82 //# 130 / Volcanic Islands: Iceland, Tonga, Fiji +#define FAMILY_CZ 0x87 //# 135 / Carrizo APUs: Carrizo, Stoney +#define FAMILY_AI 0x8D //# 141 / Vega: 10, 20 +#define FAMILY_RV 0x8E //# 142 / Raven +#define FAMILY_NV 0x8F //# 143 / Navi: 10 +#define FAMILY_VGH 0x90 //# 144 / Van Gogh +#define FAMILY_NV3 0x91 //# 145 / Navi: 3x #define FAMILY_GFX1150 0x96 -#define FAMILY_RMB 0x92 -#define FAMILY_GC_10_3_6 0x95 -#define FAMILY_GC_10_3_7 0x97 - +#define FAMILY_GFX1103 0x94 +#define FAMILY_RMB 0x92 //# 146 / Rembrandt +#define FAMILY_RPL 0x95 //# 149 / Raphael +#define FAMILY_MDN 0x97 //# 151 / Mendocino +#define FAMILY_GFX12 0x98 // AMDGPU_FAMILY_IS(familyId, familyName) #define FAMILY_IS(f, fn) (f == FAMILY_##fn) @@ -64,70 +46,72 @@ #define FAMILY_IS_AI(f) FAMILY_IS(f, AI) #define FAMILY_IS_RV(f) FAMILY_IS(f, RV) #define FAMILY_IS_NV(f) FAMILY_IS(f, NV) +#define FAMILY_IS_NV3(f) FAMILY_IS(f, NV3) #define FAMILY_IS_RMB(f) FAMILY_IS(f, RMB) -#define FAMILY_IS_GFX1100(f) FAMILY_IS(f, GFX1100) -#define FAMILY_IS_GFX1103(f) FAMILY_IS(f, GFX1103) -#define FAMILY_IS_GFX1150(f) FAMILY_IS(f, GFX1150) +#define FAMILY_IS_GFX12(f) FAMILY_IS(f, GFX12) #define AMDGPU_UNKNOWN 0xFF -#define AMDGPU_TAHITI_RANGE 0x05, 0x14 -#define AMDGPU_PITCAIRN_RANGE 0x15, 0x28 -#define AMDGPU_CAPEVERDE_RANGE 0x29, 0x3C -#define AMDGPU_OLAND_RANGE 0x3C, 0x46 -#define AMDGPU_HAINAN_RANGE 0x46, 0xFF +#define AMDGPU_TAHITI_RANGE 0x05, 0x14 //# 5 <= x < 20 +#define AMDGPU_PITCAIRN_RANGE 0x15, 0x28 //# 21 <= x < 40 +#define AMDGPU_CAPEVERDE_RANGE 0x29, 0x3C //# 41 <= x < 60 +#define AMDGPU_OLAND_RANGE 0x3C, 0x46 //# 60 <= x < 70 +#define AMDGPU_HAINAN_RANGE 0x46, 0xFF //# 70 <= x < max -#define AMDGPU_BONAIRE_RANGE 0x14, 0x28 -#define AMDGPU_HAWAII_RANGE 0x28, 0x3C +#define AMDGPU_BONAIRE_RANGE 0x14, 0x28 //# 20 <= x < 40 +#define AMDGPU_HAWAII_RANGE 0x28, 0x3C //# 40 <= x < 60 -#define AMDGPU_SPECTRE_RANGE 0x01, 0x41 -#define AMDGPU_SPOOKY_RANGE 0x41, 0x81 -#define AMDGPU_KALINDI_RANGE 0x81, 0xA1 -#define AMDGPU_GODAVARI_RANGE 0xA1, 0xFF +#define AMDGPU_SPECTRE_RANGE 0x01, 0x41 //# 1 <= x < 65 +#define AMDGPU_SPOOKY_RANGE 0x41, 0x81 //# 65 <= x < 129 +#define AMDGPU_KALINDI_RANGE 0x81, 0xA1 //# 129 <= x < 161 +#define AMDGPU_GODAVARI_RANGE 0xA1, 0xFF //# 161 <= x < max -#define AMDGPU_ICELAND_RANGE 0x01, 0x14 -#define AMDGPU_TONGA_RANGE 0x14, 0x28 -#define AMDGPU_FIJI_RANGE 0x3C, 0x50 +#define AMDGPU_ICELAND_RANGE 0x01, 0x14 //# 1 <= x < 20 +#define AMDGPU_TONGA_RANGE 0x14, 0x28 //# 20 <= x < 40 +#define AMDGPU_FIJI_RANGE 0x3C, 0x50 //# 60 <= x < 80 -#define AMDGPU_POLARIS10_RANGE 0x50, 0x5A -#define AMDGPU_POLARIS11_RANGE 0x5A, 0x64 -#define AMDGPU_POLARIS12_RANGE 0x64, 0x6E -#define AMDGPU_VEGAM_RANGE 0x6E, 0xFF +#define AMDGPU_POLARIS10_RANGE 0x50, 0x5A //# 80 <= x < 90 +#define AMDGPU_POLARIS11_RANGE 0x5A, 0x64 //# 90 <= x < 100 +#define AMDGPU_POLARIS12_RANGE 0x64, 0x6E //# 100 <= x < 110 +#define AMDGPU_VEGAM_RANGE 0x6E, 0xFF //# 110 <= x < max -#define AMDGPU_CARRIZO_RANGE 0x01, 0x21 -#define AMDGPU_STONEY_RANGE 0x61, 0xFF +#define AMDGPU_CARRIZO_RANGE 0x01, 0x21 //# 1 <= x < 33 +#define AMDGPU_BRISTOL_RANGE 0x10, 0x21 //# 16 <= x < 33 +#define AMDGPU_STONEY_RANGE 0x61, 0xFF //# 97 <= x < max -#define AMDGPU_VEGA10_RANGE 0x01, 0x14 -#define AMDGPU_VEGA12_RANGE 0x14, 0x28 -#define AMDGPU_VEGA20_RANGE 0x28, 0x32 -#define AMDGPU_ARCTURUS_RANGE 0x32, 0x3C -#define AMDGPU_ALDEBARAN_RANGE 0x3C, 0xFF +#define AMDGPU_VEGA10_RANGE 0x01, 0x14 //# 1 <= x < 20 +#define AMDGPU_VEGA12_RANGE 0x14, 0x28 //# 20 <= x < 40 +#define AMDGPU_VEGA20_RANGE 0x28, 0xFF //# 40 <= x < max -#define AMDGPU_RAVEN_RANGE 0x01, 0x81 -#define AMDGPU_RAVEN2_RANGE 0x81, 0x91 -#define AMDGPU_RENOIR_RANGE 0x91, 0xFF +#define AMDGPU_RAVEN_RANGE 0x01, 0x81 //# 1 <= x < 129 +#define AMDGPU_RAVEN2_RANGE 0x81, 0x90 //# 129 <= x < 144 +#define AMDGPU_RENOIR_RANGE 0x91, 0xFF //# 145 <= x < max -#define AMDGPU_NAVI10_RANGE 0x01, 0x0A -#define AMDGPU_NAVI12_RANGE 0x0A, 0x14 -#define AMDGPU_NAVI14_RANGE 0x14, 0x28 -#define AMDGPU_NAVI21_RANGE 0x28, 0x32 -#define AMDGPU_NAVI22_RANGE 0x32, 0x3C -#define AMDGPU_NAVI23_RANGE 0x3C, 0x46 -#define AMDGPU_NAVI24_RANGE 0x46, 0x50 +#define AMDGPU_NAVI10_RANGE 0x01, 0x0A //# 1 <= x < 10 +#define AMDGPU_NAVI12_RANGE 0x0A, 0x14 //# 10 <= x < 20 +#define AMDGPU_NAVI14_RANGE 0x14, 0x28 //# 20 <= x < 40 +#define AMDGPU_NAVI21_RANGE 0x28, 0x32 //# 40 <= x < 50 +#define AMDGPU_NAVI22_RANGE 0x32, 0x3C //# 50 <= x < 60 +#define AMDGPU_NAVI23_RANGE 0x3C, 0x46 //# 60 <= x < 70 +#define AMDGPU_NAVI24_RANGE 0x46, 0x50 //# 70 <= x < 80 -#define AMDGPU_VANGOGH_RANGE 0x01, 0xFF +#define AMDGPU_VANGOGH_RANGE 0x01, 0xFF //# 1 <= x < max -#define AMDGPU_GFX1100_RANGE 0x01, 0x10 -#define AMDGPU_GFX1101_RANGE 0x20, 0xFF -#define AMDGPU_GFX1102_RANGE 0x10, 0x20 -#define AMDGPU_GFX1103_RANGE 0x01, 0xFF -#define AMDGPU_GFX1150_RANGE 0x01, 0xFF +#define AMDGPU_NAVI31_RANGE 0x01, 0x10 //# 01 <= x < 16 +#define AMDGPU_NAVI32_RANGE 0x20, 0xFF //# 32 <= x < 255 +#define AMDGPU_NAVI33_RANGE 0x10, 0x20 //# 16 <= x < 32 +#define AMDGPU_GFX1103_R1_RANGE 0x01, 0x80 //# 1 <= x < 128 +#define AMDGPU_GFX1103_R2_RANGE 0x80, 0xC0 //# 128 <= x < 192 -#define AMDGPU_REMBRANDT_RANGE 0x01, 0xFF +#define AMDGPU_GFX1150_RANGE 0x01, 0xFF //# 1 <= x < max -#define AMDGPU_GFX1036_RANGE 0x01, 0xFF +#define AMDGPU_REMBRANDT_RANGE 0x01, 0xFF //# 01 <= x < 255 -#define AMDGPU_GFX1037_RANGE 0x01, 0xFF +#define AMDGPU_RAPHAEL_RANGE 0x01, 0xFF //# 1 <= x < max + +#define AMDGPU_MENDOCINO_RANGE 0x01, 0xFF //# 1 <= x < max + +#define AMDGPU_GFX12_TBD1_RANGE 0x40, 0xFF //# 64 <= x < max #define AMDGPU_EXPAND_FIX(x) x #define AMDGPU_RANGE_HELPER(val, min, max) ((val >= min) && (val < max)) @@ -160,6 +144,7 @@ #define ASICREV_IS_VEGAM_P(r) ASICREV_IS(r, VEGAM) #define ASICREV_IS_CARRIZO(r) ASICREV_IS(r, CARRIZO) +#define ASICREV_IS_CARRIZO_BRISTOL(r) ASICREV_IS(r, BRISTOL) #define ASICREV_IS_STONEY(r) ASICREV_IS(r, STONEY) #define ASICREV_IS_VEGA10_M(r) ASICREV_IS(r, VEGA10) @@ -167,8 +152,6 @@ #define ASICREV_IS_VEGA12_P(r) ASICREV_IS(r, VEGA12) #define ASICREV_IS_VEGA12_p(r) ASICREV_IS(r, VEGA12) #define ASICREV_IS_VEGA20_P(r) ASICREV_IS(r, VEGA20) -#define ASICREV_IS_ARCTURUS(r) ASICREV_IS(r, ARCTURUS) -#define ASICREV_IS_ALDEBARAN(r) ASICREV_IS(r, ALDEBARAN) #define ASICREV_IS_RAVEN(r) ASICREV_IS(r, RAVEN) #define ASICREV_IS_RAVEN2(r) ASICREV_IS(r, RAVEN2) @@ -190,16 +173,20 @@ #define ASICREV_IS_VANGOGH(r) ASICREV_IS(r, VANGOGH) -#define ASICREV_IS_GFX1100(r) ASICREV_IS(r, GFX1100) -#define ASICREV_IS_GFX1101(r) ASICREV_IS(r, GFX1101) -#define ASICREV_IS_GFX1102(r) ASICREV_IS(r, GFX1102) -#define ASICREV_IS_GFX1103(r) ASICREV_IS(r, GFX1103) +#define ASICREV_IS_NAVI31_P(r) ASICREV_IS(r, NAVI31) +#define ASICREV_IS_NAVI32_P(r) ASICREV_IS(r, NAVI32) +#define ASICREV_IS_NAVI33_P(r) ASICREV_IS(r, NAVI33) +#define ASICREV_IS_GFX1150(r) ASICREV_IS(r, GFX1150) +#define ASICREV_IS_GFX1103_R1(r) ASICREV_IS(r, GFX1103_R1) +#define ASICREV_IS_GFX1103_R2(r) ASICREV_IS(r, GFX1103_R2) #define ASICREV_IS_GFX1150(r) ASICREV_IS(r, GFX1150) #define ASICREV_IS_REMBRANDT(r) ASICREV_IS(r, REMBRANDT) -#define ASICREV_IS_GFX1036(r) ASICREV_IS(r, GFX1036) +#define ASICREV_IS_RAPHAEL(r) ASICREV_IS(r, RAPHAEL) -#define ASICREV_IS_GFX1037(r) ASICREV_IS(r, GFX1037) +#define ASICREV_IS_MENDOCINO(r) ASICREV_IS(r, MENDOCINO) + +#define ASICREV_IS_GFX12_TBD1_P(r) ASICREV_IS(r, GFX12_TBD1) #endif // _AMDGPU_ASIC_ADDR_H diff --git a/runtime/hsa-runtime/image/addrlib/src/chip/gfx10/gfx10_gb_reg.h b/runtime/hsa-runtime/image/addrlib/src/chip/gfx10/gfx10_gb_reg.h index 7383c4e061..9f0521c1f5 100644 --- a/runtime/hsa-runtime/image/addrlib/src/chip/gfx10/gfx10_gb_reg.h +++ b/runtime/hsa-runtime/image/addrlib/src/chip/gfx10/gfx10_gb_reg.h @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ diff --git a/runtime/hsa-runtime/image/addrlib/src/chip/gfx11/gfx11_gb_reg.h b/runtime/hsa-runtime/image/addrlib/src/chip/gfx11/gfx11_gb_reg.h index 99a66c08d3..12ab84da86 100644 --- a/runtime/hsa-runtime/image/addrlib/src/chip/gfx11/gfx11_gb_reg.h +++ b/runtime/hsa-runtime/image/addrlib/src/chip/gfx11/gfx11_gb_reg.h @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ @@ -74,3 +57,4 @@ union GB_ADDR_CONFIG_GFX11 }; #endif + diff --git a/runtime/hsa-runtime/image/addrlib/src/chip/gfx12/gfx12_gb_reg.h b/runtime/hsa-runtime/image/addrlib/src/chip/gfx12/gfx12_gb_reg.h new file mode 100644 index 0000000000..389b3871d9 --- /dev/null +++ b/runtime/hsa-runtime/image/addrlib/src/chip/gfx12/gfx12_gb_reg.h @@ -0,0 +1,57 @@ +/* +************************************************************************************************************************ +* +* Copyright (C) 2007-2023 Advanced Micro Devices, Inc. All rights reserved. +* SPDX-License-Identifier: MIT +* +***********************************************************************************************************************/ + +#if !defined (__GFX12_GB_REG_H__) +#define __GFX12_GB_REG_H__ + +/* +* gfx12_gb_reg.h +* +* Register Spec Release: 1.0 +* +*/ + +// +// Make sure the necessary endian defines are there. +// +#if defined(LITTLEENDIAN_CPU) +#elif defined(BIGENDIAN_CPU) +#else +#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined" +#endif + +union GB_ADDR_CONFIG_GFX12 { + struct { +#if defined(LITTLEENDIAN_CPU) + unsigned int NUM_PIPES : 3; + unsigned int PIPE_INTERLEAVE_SIZE : 3; + unsigned int MAX_COMPRESSED_FRAGS : 2; + unsigned int NUM_PKRS : 3; + unsigned int : 8; + unsigned int NUM_SHADER_ENGINES : 4; + unsigned int : 3; + unsigned int NUM_RB_PER_SE : 2; + unsigned int : 4; +#elif defined(BIGENDIAN_CPU) + unsigned int : 4; + unsigned int NUM_RB_PER_SE : 2; + unsigned int : 3; + unsigned int NUM_SHADER_ENGINES : 4; + unsigned int : 8; + unsigned int NUM_PKRS : 3; + unsigned int MAX_COMPRESSED_FRAGS : 2; + unsigned int PIPE_INTERLEAVE_SIZE : 3; + unsigned int NUM_PIPES : 3; +#endif + } bitfields, bits; + unsigned int u32All; + int i32All; + float f32All; +}; + +#endif \ No newline at end of file diff --git a/runtime/hsa-runtime/image/addrlib/src/chip/gfx9/gfx9_gb_reg.h b/runtime/hsa-runtime/image/addrlib/src/chip/gfx9/gfx9_gb_reg.h index b0be682ccf..8ff6939ab6 100644 --- a/runtime/hsa-runtime/image/addrlib/src/chip/gfx9/gfx9_gb_reg.h +++ b/runtime/hsa-runtime/image/addrlib/src/chip/gfx9/gfx9_gb_reg.h @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ diff --git a/runtime/hsa-runtime/image/addrlib/src/chip/r800/si_gb_reg.h b/runtime/hsa-runtime/image/addrlib/src/chip/r800/si_gb_reg.h index 3f5f4071eb..c5bb578f98 100644 --- a/runtime/hsa-runtime/image/addrlib/src/chip/r800/si_gb_reg.h +++ b/runtime/hsa-runtime/image/addrlib/src/chip/r800/si_gb_reg.h @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ @@ -93,9 +76,52 @@ #endif +#if defined(LITTLEENDIAN_CPU) + + typedef struct _GB_ADDR_CONFIG_N { + unsigned int num_pipes : 3; + unsigned int pipe_interleave_size : 3; + unsigned int max_compressed_frags : 2; + unsigned int bank_interleave_size : 3; + unsigned int : 1; + unsigned int num_banks : 3; + unsigned int : 1; + unsigned int shader_engine_tile_size : 3; + unsigned int num_shader_engines : 2; + unsigned int num_gpus : 3; + unsigned int multi_gpu_tile_size : 2; + unsigned int num_rb_per_se : 2; + unsigned int row_size : 2; + unsigned int num_lower_pipes : 1; + unsigned int se_enable : 1; + } GB_ADDR_CONFIG_N; + +#elif defined(BIGENDIAN_CPU) + + typedef struct _GB_ADDR_CONFIG_N { + unsigned int se_enable : 1; + unsigned int num_lower_pipes : 1; + unsigned int row_size : 2; + unsigned int num_rb_per_se : 2; + unsigned int multi_gpu_tile_size : 2; + unsigned int num_gpus : 3; + unsigned int num_shader_engines : 2; + unsigned int shader_engine_tile_size : 3; + unsigned int : 1; + unsigned int num_banks : 3; + unsigned int : 1; + unsigned int bank_interleave_size : 3; + unsigned int max_compressed_frags : 2; + unsigned int pipe_interleave_size : 3; + unsigned int num_pipes : 3; + } GB_ADDR_CONFIG_N; + +#endif + typedef union { unsigned int val : 32; GB_ADDR_CONFIG_T f; + GB_ADDR_CONFIG_N n; } GB_ADDR_CONFIG; #if defined(LITTLEENDIAN_CPU) diff --git a/runtime/hsa-runtime/image/addrlib/src/core/addrcommon.h b/runtime/hsa-runtime/image/addrlib/src/core/addrcommon.h index 6b8fa0a5cb..28770bf8fe 100644 --- a/runtime/hsa-runtime/image/addrlib/src/core/addrcommon.h +++ b/runtime/hsa-runtime/image/addrlib/src/core/addrcommon.h @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ @@ -42,9 +25,13 @@ #endif #if defined(__GNUC__) + #include #include #endif +#if defined(_WIN32) +#include +#endif //////////////////////////////////////////////////////////////////////////////////////////////////// // Platform specific debug break defines @@ -89,7 +76,13 @@ #else #define ADDR_ASSERT(__e) if ( !((__e) ? TRUE : FALSE)) { ADDR_DBG_BREAK(); } #endif - #define ADDR_ASSERT_ALWAYS() ADDR_DBG_BREAK() + + #if ADDR_SILENCE_ASSERT_ALWAYS + #define ADDR_ASSERT_ALWAYS() + #else + #define ADDR_ASSERT_ALWAYS() ADDR_DBG_BREAK() + #endif + #define ADDR_UNHANDLED_CASE() ADDR_ASSERT(!"Unhandled case") #define ADDR_NOT_IMPLEMENTED() ADDR_ASSERT(!"Not implemented"); #else //DEBUG @@ -191,10 +184,11 @@ } while (0) #endif -namespace rocr { -namespace Addr { -namespace V1 { +namespace Addr +{ +namespace V1 +{ //////////////////////////////////////////////////////////////////////////////////////////////////// // Common constants //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -318,6 +312,49 @@ static inline UINT_32 XorReduce( return result; } +/** +**************************************************************************************************** +* Unset least bit +* +* @brief +* Returns a copy of the value with the least-significant '1' bit unset +**************************************************************************************************** +*/ +static inline UINT_32 UnsetLeastBit( + UINT_32 val) +{ + return val & (val - 1); +} + +/** +**************************************************************************************************** +* BitScanForward +* +* @brief +* Returns the index-position of the least-significant '1' bit. Must not be 0. +**************************************************************************************************** +*/ +static inline UINT_32 BitScanForward( + UINT_32 mask) ///< [in] Bitmask to scan +{ + ADDR_ASSERT(mask > 0); + unsigned long out = 0; +#if (defined(_WIN64) && defined(_M_X64)) || (defined(_WIN32) && defined(_M_IX64)) + out = ::_tzcnt_u32(mask); +#elif (defined(_WIN32) || defined(_WIN64)) + ::_BitScanForward(&out, mask); +#elif defined(__GNUC__) + out = __builtin_ctz(mask); +#else + while ((mask & 1) == 0) + { + mask >>= 1; + out++; + } +#endif + return out; +} + /** **************************************************************************************************** * IsPow2 @@ -974,6 +1011,37 @@ static inline UINT_32 GetCoordActiveMask( return mask; } +/** +**************************************************************************************************** +* FillEqBitComponents +* +* @brief +* Fill the 'numBitComponents' field based on the equation. +**************************************************************************************************** +*/ +static inline void FillEqBitComponents( + ADDR_EQUATION *pEquation) // [in/out] Equation to calculate bit components for +{ + pEquation->numBitComponents = 1; // We always have at least the address + for (UINT_32 xorN = 1; xorN < ADDR_MAX_EQUATION_COMP; xorN++) + { + for (UINT_32 bit = 0; bit < ADDR_MAX_EQUATION_BIT; bit++) + { + if (pEquation->comps[xorN][bit].valid) + { + pEquation->numBitComponents = xorN + 1; + break; + } + } + + if (pEquation->numBitComponents != (xorN + 1)) + { + // Skip following components if this one wasn't valid + break; + } + } +} + /** **************************************************************************************************** * ShiftCeil @@ -1005,7 +1073,6 @@ static inline UINT_32 ShiftRight( } } // Addr -} // rocr #endif // __ADDR_COMMON_H__ diff --git a/runtime/hsa-runtime/image/addrlib/src/core/addrelemlib.cpp b/runtime/hsa-runtime/image/addrlib/src/core/addrelemlib.cpp index b3bff74fc0..e42d2624ad 100644 --- a/runtime/hsa-runtime/image/addrlib/src/core/addrelemlib.cpp +++ b/runtime/hsa-runtime/image/addrlib/src/core/addrelemlib.cpp @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ @@ -33,8 +16,8 @@ #include "addrelemlib.h" #include "addrlib.h" -namespace rocr { -namespace Addr { +namespace Addr +{ /** **************************************************************************************************** @@ -1407,6 +1390,10 @@ UINT_32 ElemLib::GetBitsPerPixel( case ADDR_FMT_24_8: bpp = 32; break; + case ADDR_FMT_BG_RG_16_16_16_16: + elemMode = ADDR_PACKED_BGRG; + bpp = 32; + break; case ADDR_FMT_16_16_16_16: case ADDR_FMT_32_32: case ADDR_FMT_CTX1: @@ -1818,6 +1805,7 @@ BOOL_32 ElemLib::IsMacroPixelPacked( { case ADDR_FMT_BG_RG: case ADDR_FMT_GB_GR: + case ADDR_FMT_BG_RG_16_16_16_16: isMacroPixelPacked = TRUE; break; default: @@ -1827,5 +1815,4 @@ BOOL_32 ElemLib::IsMacroPixelPacked( return isMacroPixelPacked; } -} // Addr -} // rocr +} diff --git a/runtime/hsa-runtime/image/addrlib/src/core/addrelemlib.h b/runtime/hsa-runtime/image/addrlib/src/core/addrelemlib.h index 308c9844b3..0d1da9e7f4 100644 --- a/runtime/hsa-runtime/image/addrlib/src/core/addrelemlib.h +++ b/runtime/hsa-runtime/image/addrlib/src/core/addrelemlib.h @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ @@ -38,8 +21,8 @@ #include "addrobject.h" #include "addrcommon.h" -namespace rocr { -namespace Addr { +namespace Addr +{ class Lib; @@ -273,8 +256,7 @@ private: Addr::Lib* const m_pAddrLib; ///< Pointer to parent addrlib instance }; -} // Addr -} // rocr +} //Addr #endif diff --git a/runtime/hsa-runtime/image/addrlib/src/core/addrlib.cpp b/runtime/hsa-runtime/image/addrlib/src/core/addrlib.cpp index a958cd11ed..f67acde32d 100644 --- a/runtime/hsa-runtime/image/addrlib/src/core/addrlib.cpp +++ b/runtime/hsa-runtime/image/addrlib/src/core/addrlib.cpp @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ @@ -80,8 +63,8 @@ UINT_32 __umoddi3(UINT_64 n, UINT_32 base) #endif // __APPLE__ -namespace rocr { -namespace Addr { +namespace Addr +{ //////////////////////////////////////////////////////////////////////////////////////////////////// // Constructor/Destructor @@ -228,15 +211,18 @@ ADDR_E_RETURNCODE Lib::Create( case FAMILY_NV: case FAMILY_VGH: case FAMILY_RMB: - case FAMILY_GC_10_3_6: - case FAMILY_GC_10_3_7: + case FAMILY_RPL: + case FAMILY_MDN: pLib = Gfx10HwlInit(&client); break; - case FAMILY_GFX1100: - case FAMILY_GFX1103: + case FAMILY_NV3: case FAMILY_GFX1150: + case FAMILY_GFX1103: pLib = Gfx11HwlInit(&client); break; + case FAMILY_GFX12: + pLib = Gfx12HwlInit(&client); + break; default: ADDR_ASSERT_ALWAYS(); break; @@ -247,7 +233,11 @@ ADDR_E_RETURNCODE Lib::Create( break; } } - + + if(pLib == NULL) + { + returnCode = ADDR_OUTOFMEMORY; + } if (pLib != NULL) { BOOL_32 initValid; @@ -286,6 +276,7 @@ ADDR_E_RETURNCODE Lib::Create( { delete pLib; pLib = NULL; + returnCode = ADDR_OUTOFMEMORY; ADDR_ASSERT_ALWAYS(); } else @@ -305,12 +296,6 @@ ADDR_E_RETURNCODE Lib::Create( pLib->SetMaxAlignments(); } - else if ((pLib == NULL) && - (returnCode == ADDR_OK)) - { - // Unknown failures, we return the general error code - returnCode = ADDR_ERROR; - } return returnCode; } @@ -673,4 +658,3 @@ UINT_32 Lib::GetBpe(AddrFormat format) const } } // Addr -} // rocr diff --git a/runtime/hsa-runtime/image/addrlib/src/core/addrlib.h b/runtime/hsa-runtime/image/addrlib/src/core/addrlib.h index 0d16762a55..7a1f59dc1c 100644 --- a/runtime/hsa-runtime/image/addrlib/src/core/addrlib.h +++ b/runtime/hsa-runtime/image/addrlib/src/core/addrlib.h @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ @@ -34,6 +17,7 @@ #define __ADDR_LIB_H__ #include "addrinterface.h" +#include "addrtypes.h" #include "addrobject.h" #include "addrelemlib.h" @@ -55,8 +39,8 @@ #define CIASICIDGFXENGINE_ARCTICISLAND 0x0000000D #endif -namespace rocr { -namespace Addr { +namespace Addr +{ /** **************************************************************************************************** @@ -266,7 +250,7 @@ public: } /// Returns asic chip family name defined by AddrLib - ChipFamily GetChipFamily() + ChipFamily GetChipFamily() const { return m_chipFamily; } @@ -316,6 +300,21 @@ protected: #endif } + static BOOL_32 IsTex1d(AddrResourceType resourceType) + { + return (resourceType == ADDR_RSRC_TEX_1D); + } + + static BOOL_32 IsTex2d(AddrResourceType resourceType) + { + return (resourceType == ADDR_RSRC_TEX_2D); + } + + static BOOL_32 IsTex3d(AddrResourceType resourceType) + { + return (resourceType == ADDR_RSRC_TEX_3D); + } + // // Initialization // @@ -408,7 +407,7 @@ Lib* CiHwlInit (const Client* pClient); Lib* Gfx9HwlInit (const Client* pClient); Lib* Gfx10HwlInit(const Client* pClient); Lib* Gfx11HwlInit(const Client* pClient); +Lib* Gfx12HwlInit(const Client* pClient); } // Addr -} // rocr #endif diff --git a/runtime/hsa-runtime/image/addrlib/src/core/addrlib1.cpp b/runtime/hsa-runtime/image/addrlib/src/core/addrlib1.cpp index c6ab5b3dac..34a866c5d7 100644 --- a/runtime/hsa-runtime/image/addrlib/src/core/addrlib1.cpp +++ b/runtime/hsa-runtime/image/addrlib/src/core/addrlib1.cpp @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ @@ -34,9 +17,10 @@ #include "addrlib1.h" #include "addrcommon.h" -namespace rocr { -namespace Addr { -namespace V1 { +namespace Addr +{ +namespace V1 +{ //////////////////////////////////////////////////////////////////////////////////////////////////// // Static Const Member @@ -2994,6 +2978,7 @@ ADDR_E_RETURNCODE Lib::ComputeMicroTileEquation( // stackedDepthSlices is used for addressing mode that a tile block contains multiple slices, // which is not supported by our address lib pEquation->stackedDepthSlices = FALSE; + pEquation->numBitComponents = 1; return retCode; } @@ -4070,4 +4055,3 @@ ADDR_E_RETURNCODE Lib::ComputePrtInfo( } // V1 } // Addr -} // namespace rocr diff --git a/runtime/hsa-runtime/image/addrlib/src/core/addrlib1.h b/runtime/hsa-runtime/image/addrlib/src/core/addrlib1.h index a6b7fe32d1..f447767603 100644 --- a/runtime/hsa-runtime/image/addrlib/src/core/addrlib1.h +++ b/runtime/hsa-runtime/image/addrlib/src/core/addrlib1.h @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ @@ -36,9 +19,10 @@ #include "addrlib.h" -namespace rocr { -namespace Addr { -namespace V1 { +namespace Addr +{ +namespace V1 +{ /** **************************************************************************************************** @@ -539,7 +523,6 @@ private: } // V1 } // Addr -} // namespace rocr #endif diff --git a/runtime/hsa-runtime/image/addrlib/src/core/addrlib2.cpp b/runtime/hsa-runtime/image/addrlib/src/core/addrlib2.cpp index e230291005..55a87cdd15 100644 --- a/runtime/hsa-runtime/image/addrlib/src/core/addrlib2.cpp +++ b/runtime/hsa-runtime/image/addrlib/src/core/addrlib2.cpp @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ @@ -35,9 +18,10 @@ #include "addrlib2.h" #include "addrcommon.h" -namespace rocr { -namespace Addr { -namespace V2 { +namespace Addr +{ +namespace V2 +{ //////////////////////////////////////////////////////////////////////////////////////////////////// // Static Const Member @@ -302,6 +286,12 @@ ADDR_E_RETURNCODE Lib::ComputeSurfaceInfo( if (localIn.flags.needEquation && (Log2(localIn.numFrags) == 0)) { pOut->equationIndex = GetEquationIndex(&localIn, pOut); + if ((localIn.flags.allowExtEquation == 0) && + (pOut->equationIndex != ADDR_INVALID_EQUATION_INDEX) && + (m_equationTable[pOut->equationIndex].numBitComponents > ADDR_MAX_LEGACY_EQUATION_COMP)) + { + pOut->equationIndex = ADDR_INVALID_EQUATION_INDEX; + } } if (localIn.flags.qbStereo) @@ -1177,6 +1167,7 @@ ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoordLinear( ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0}; ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0}; ADDR2_MIP_INFO mipInfo[MaxMipLevels]; + ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels); localIn.bpp = pIn->bpp; localIn.flags = pIn->flags; @@ -1852,6 +1843,61 @@ ADDR_E_RETURNCODE Lib::Addr2GetPreferredSurfaceSetting( return returnCode; } +/** +************************************************************************************************************************ +* Lib::GetPossibleSwizzleModes +* +* @brief +* Returns a list of swizzle modes that are valid from the hardware's perspective for the client to choose from +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::GetPossibleSwizzleModes( + const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, + ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const +{ + return HwlGetPossibleSwizzleModes(pIn, pOut); +} + +/** +************************************************************************************************************************ +* Lib::GetAllowedBlockSet +* +* @brief +* Returns the set of allowed block sizes given the allowed swizzle modes and resource type +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::GetAllowedBlockSet( + ADDR2_SWMODE_SET allowedSwModeSet, + AddrResourceType rsrcType, + ADDR2_BLOCK_SET* pAllowedBlockSet) const +{ + return HwlGetAllowedBlockSet(allowedSwModeSet, rsrcType, pAllowedBlockSet); +} + +/** +************************************************************************************************************************ +* Lib::GetAllowedSwSet +* +* @brief +* Returns the set of allowed swizzle types given the allowed swizzle modes +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::GetAllowedSwSet( + ADDR2_SWMODE_SET allowedSwModeSet, + ADDR2_SWTYPE_SET* pAllowedSwSet) const +{ + return HwlGetAllowedSwSet(allowedSwModeSet, pAllowedSwSet); +} + /** ************************************************************************************************************************ * Lib::ComputeBlock256Equation @@ -2000,7 +2046,8 @@ VOID Lib::ComputeQbStereoInfo( VOID Lib::FilterInvalidEqSwizzleMode( ADDR2_SWMODE_SET& allowedSwModeSet, AddrResourceType resourceType, - UINT_32 elemLog2 + UINT_32 elemLog2, + UINT_32 maxComponents ) const { if (resourceType != ADDR_RSRC_TEX_1D) @@ -2013,7 +2060,12 @@ VOID Lib::FilterInvalidEqSwizzleMode( { if (validSwModeSet & 1) { - if (m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] == ADDR_INVALID_EQUATION_INDEX) + UINT_32 equation = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2]; + if (equation == ADDR_INVALID_EQUATION_INDEX) + { + allowedSwModeSetVal &= ~(1u << swModeIdx); + } + else if (m_equationTable[equation].numBitComponents > maxComponents) { allowedSwModeSetVal &= ~(1u << swModeIdx); } @@ -2030,94 +2082,6 @@ VOID Lib::FilterInvalidEqSwizzleMode( } } -/** -************************************************************************************************************************ -* Lib::IsBlockTypeAvaiable -* -* @brief -* Determine whether a block type is allowed in a given blockSet -* -* @return -* N/A -************************************************************************************************************************ -*/ -BOOL_32 Lib::IsBlockTypeAvaiable( - ADDR2_BLOCK_SET blockSet, - AddrBlockType blockType) -{ - BOOL_32 avail; - - if (blockType == AddrBlockLinear) - { - avail = blockSet.linear ? TRUE : FALSE; - } - else - { - avail = blockSet.value & (1 << (static_cast(blockType) - 1)) ? TRUE : FALSE; - } - - return avail; -} - -/** -************************************************************************************************************************ -* Lib::BlockTypeWithinMemoryBudget -* -* @brief -* Determine whether a new block type is acceptible based on memory waste ratio -* -* @return -* N/A -************************************************************************************************************************ -*/ -BOOL_32 Lib::BlockTypeWithinMemoryBudget( - UINT_64 minSize, - UINT_64 newBlockTypeSize, - UINT_32 ratioLow, - UINT_32 ratioHi, - DOUBLE memoryBudget, - BOOL_32 newBlockTypeBigger) -{ - BOOL_32 accept = FALSE; - - if (memoryBudget >= 1.0) - { - if (newBlockTypeBigger) - { - if ((static_cast(newBlockTypeSize) / minSize) <= memoryBudget) - { - accept = TRUE; - } - } - else - { - if ((static_cast(minSize) / newBlockTypeSize) > memoryBudget) - { - accept = TRUE; - } - } - } - else - { - if (newBlockTypeBigger) - { - if ((newBlockTypeSize * ratioHi) <= (minSize * ratioLow)) - { - accept = TRUE; - } - } - else - { - if ((newBlockTypeSize * ratioLow) < (minSize * ratioHi)) - { - accept = TRUE; - } - } - } - - return accept; -} - #if DEBUG /** ************************************************************************************************************************ @@ -2195,4 +2159,4 @@ VOID Lib::ValidateStereoInfo( } // V2 } // Addr -} // rocr + diff --git a/runtime/hsa-runtime/image/addrlib/src/core/addrlib2.h b/runtime/hsa-runtime/image/addrlib/src/core/addrlib2.h index 118306674e..d652244ec3 100644 --- a/runtime/hsa-runtime/image/addrlib/src/core/addrlib2.h +++ b/runtime/hsa-runtime/image/addrlib/src/core/addrlib2.h @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ @@ -36,9 +19,10 @@ #include "addrlib.h" -namespace rocr { -namespace Addr { -namespace V2 { +namespace Addr +{ +namespace V2 +{ /** ************************************************************************************************************************ @@ -147,6 +131,8 @@ union ADDR_BIT_SETTING * @brief Swizzle pattern information ************************************************************************************************************************ */ +// Accessed by index representing the logbase2 of (8bpp/16bpp/32bpp/64bpp/128bpp) +// contains the indices which map to 2D arrays SW_PATTERN_NIBBLE[0-9] which contain sections of an index equation. They are dependant on pipe# and bpe # struct ADDR_SW_PATINFO { UINT_8 maxItemCount; @@ -305,6 +291,10 @@ public: const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const; + ADDR_E_RETURNCODE GetPossibleSwizzleModes( + const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, + ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const; + virtual BOOL_32 IsValidDisplaySwizzleMode( const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const { @@ -312,11 +302,21 @@ public: return ADDR_NOTIMPLEMENTED; } + ADDR_E_RETURNCODE GetAllowedBlockSet( + ADDR2_SWMODE_SET allowedSwModeSet, + AddrResourceType rsrcType, + ADDR2_BLOCK_SET* pAllowedBlockSet) const; + + ADDR_E_RETURNCODE GetAllowedSwSet( + ADDR2_SWMODE_SET allowedSwModeSet, + ADDR2_SWTYPE_SET* pAllowedSwSet) const; + protected: Lib(); // Constructor is protected Lib(const Client* pClient); static const UINT_32 MaxNumOfBpp = 5; + static const UINT_32 MaxNumOfBppCMask = 4; static const UINT_32 MaxNumOfAA = 4; static const Dim2d Block256_2d[MaxNumOfBpp]; @@ -669,6 +669,31 @@ protected: return ADDR_NOTSUPPORTED; } + virtual ADDR_E_RETURNCODE HwlGetPossibleSwizzleModes( + const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, + ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const + { + ADDR_NOT_IMPLEMENTED(); + return ADDR_NOTSUPPORTED; + } + + virtual ADDR_E_RETURNCODE HwlGetAllowedBlockSet( + ADDR2_SWMODE_SET allowedSwModeSet, + AddrResourceType rsrcType, + ADDR2_BLOCK_SET* pAllowedBlockSet) const + { + ADDR_NOT_IMPLEMENTED(); + return ADDR_NOTIMPLEMENTED; + } + + virtual ADDR_E_RETURNCODE HwlGetAllowedSwSet( + ADDR2_SWMODE_SET allowedSwModeSet, + ADDR2_SWTYPE_SET* pAllowedSwSet) const + { + ADDR_NOT_IMPLEMENTED(); + return ADDR_NOTIMPLEMENTED; + } + virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoSanityCheck( const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const { @@ -922,17 +947,8 @@ protected: VOID FilterInvalidEqSwizzleMode( ADDR2_SWMODE_SET& allowedSwModeSet, AddrResourceType resourceType, - UINT_32 elemLog2) const; - - static BOOL_32 IsBlockTypeAvaiable(ADDR2_BLOCK_SET blockSet, AddrBlockType blockType); - - static BOOL_32 BlockTypeWithinMemoryBudget( - UINT_64 minSize, - UINT_64 newBlockTypeSize, - UINT_32 ratioLow, - UINT_32 ratioHi, - DOUBLE memoryBudget = 0.0f, - BOOL_32 newBlockTypeBigger = TRUE); + UINT_32 elemLog2, + UINT_32 maxComponents) const; #if DEBUG VOID ValidateStereoInfo( @@ -982,7 +998,6 @@ private: } // V2 } // Addr -} // rocr #endif diff --git a/runtime/hsa-runtime/image/addrlib/src/core/addrlib3.cpp b/runtime/hsa-runtime/image/addrlib/src/core/addrlib3.cpp new file mode 100644 index 0000000000..db6090b1cb --- /dev/null +++ b/runtime/hsa-runtime/image/addrlib/src/core/addrlib3.cpp @@ -0,0 +1,1071 @@ +/* +************************************************************************************************************************ +* +* Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. +* SPDX-License-Identifier: MIT +* +***********************************************************************************************************************/ + + +/** +************************************************************************************************************************ +* @file addrlib3.cpp +* @brief Contains the implementation for the AddrLib3 base class. +************************************************************************************************************************ +*/ + +#include "addrinterface.h" +#include "addrlib3.h" +#include "addrcommon.h" + +namespace Addr +{ +namespace V3 +{ + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Static Const Member +//////////////////////////////////////////////////////////////////////////////////////////////////// + +const Dim2d Lib::Block256_2d[] = {{16, 16}, {16, 8}, {8, 8}, {8, 4}, {4, 4}}; + +const ADDR_EXTENT3D Lib::Block1K_3d[] = {{16, 8, 8}, {8, 8, 8}, {8, 8, 4}, {8, 4, 4}, {4, 4, 4}}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Constructor/Destructor +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +************************************************************************************************************************ +* Lib::Lib +* +* @brief +* Constructor for the Addr::V3::Lib class +* +************************************************************************************************************************ +*/ +Lib::Lib() + : + Addr::Lib(), + m_pipesLog2(0), + m_pipeInterleaveLog2(0), + m_numEquations(0) +{ + Init(); +} + +/** +************************************************************************************************************************ +* Lib::Lib +* +* @brief +* Constructor for the AddrLib3 class with hClient as parameter +* +************************************************************************************************************************ +*/ +Lib::Lib( + const Client* pClient) + : + Addr::Lib(pClient), + m_pipesLog2(0), + m_pipeInterleaveLog2(0), + m_numEquations(0) +{ + Init(); +} + +/** +************************************************************************************************************************ +* Lib::Init +* +* @brief +* Initialization of class +* +************************************************************************************************************************ +*/ +void Lib::Init() +{ + memset(m_equationTable, 0, sizeof(m_equationTable)); + + // There is no equation table entry for linear, so start at the "next" swizzle mode entry. + for (UINT_32 swizzleModeIdx = ADDR3_LINEAR + 1; swizzleModeIdx < ADDR3_MAX_TYPE; swizzleModeIdx++) + { + for (UINT_32 msaaRateIdx = 0; msaaRateIdx < MaxMsaaRateLog2; msaaRateIdx++) + { + for (UINT_32 log2BytesIdx = 0; log2BytesIdx < MaxElementBytesLog2; log2BytesIdx++) + { + SetEquationTableEntry(static_cast(swizzleModeIdx), + msaaRateIdx, + log2BytesIdx, + ADDR_INVALID_EQUATION_INDEX); + } + } + } +} + +/** +************************************************************************************************************************ +* Lib::~Lib +* +* @brief +* Destructor for the AddrLib2 class +* +************************************************************************************************************************ +*/ +Lib::~Lib() +{ +} + +/** +************************************************************************************************************************ +* Lib::GetLib +* +* @brief +* Get Addr::V3::Lib pointer +* +* @return +* An Addr::V2::Lib class pointer +************************************************************************************************************************ +*/ +Lib* Lib::GetLib( + ADDR_HANDLE hLib) ///< [in] handle of ADDR_HANDLE +{ + Addr::Lib* pAddrLib = Addr::Lib::GetLib(hLib); + + return static_cast(hLib); +} + +/** +************************************************************************************************************************ +* Lib::GetBlockSize +* +* @brief +* Returns the byte size of a block for the swizzle mode. +* +* @return +* Byte size of the block, zero if swizzle mode is invalid. +************************************************************************************************************************ +*/ +UINT_32 Lib::GetBlockSize( + Addr3SwizzleMode swizzleMode, + BOOL_32 forPitch + ) const +{ + return (1 << GetBlockSizeLog2(swizzleMode, forPitch)); +} + +/** +************************************************************************************************************************ +* Lib::GetBlockSizeLog2 +* +* @brief +* Returns the log2 of the byte size of a block for the swizzle mode. +* +* @return +* Byte size of the block, zero if swizzle mode is invalid. +************************************************************************************************************************ +*/ +UINT_32 Lib::GetBlockSizeLog2( + Addr3SwizzleMode swizzleMode, + BOOL_32 forPitch + ) const +{ + UINT_32 blockSize = 0; + + switch (swizzleMode) + { + case ADDR3_256B_2D: + blockSize = 8; + break; + case ADDR3_4KB_2D: + case ADDR3_4KB_3D: + blockSize = 12; + break; + case ADDR3_64KB_2D: + case ADDR3_64KB_3D: + blockSize = 16; + break; + case ADDR3_256KB_2D: + case ADDR3_256KB_3D: + blockSize = 18; + break; + case ADDR3_LINEAR: + blockSize = (forPitch ? 7 : 8); + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + + return blockSize; +} + +/** +************************************************************************************************************************ +* Lib::ComputeSurfaceInfo +* +* @brief +* Interface function stub of ComputeSurfaceInfo. +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeSurfaceInfo( + const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure + ADDR3_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR3_COMPUTE_SURFACE_INFO_INPUT)) || + (pOut->size != sizeof(ADDR3_COMPUTE_SURFACE_INFO_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + // Adjust incoming parameters. + ADDR3_COMPUTE_SURFACE_INFO_INPUT localIn = *pIn; + localIn.width = Max(pIn->width, 1u); + localIn.height = Max(pIn->height, 1u); + localIn.numMipLevels = Max(pIn->numMipLevels, 1u); + localIn.numSlices = Max(pIn->numSlices, 1u); + localIn.numSamples = Max(pIn->numSamples, 1u); + + UINT_32 expandX = 1; + UINT_32 expandY = 1; + ElemMode elemMode = ADDR_UNCOMPRESSED; + + if (returnCode == ADDR_OK) + { + // Set format to INVALID will skip this conversion + if (localIn.format != ADDR_FMT_INVALID) + { + // Get compression/expansion factors and element mode which indicates compression/expansion + localIn.bpp = GetElemLib()->GetBitsPerPixel(localIn.format, + &elemMode, + &expandX, + &expandY); + + // Special flag for 96 bit surface. 96 (or 48 if we support) bit surface's width is + // pre-multiplied by 3 and bpp is divided by 3. So pitch alignment for linear- + // aligned does not meet 64-pixel in real. We keep special handling in hwl since hw + // restrictions are different. + // Also Mip 1+ needs an element pitch of 32 bits so we do not need this workaround + // but we use this flag to skip RestoreSurfaceInfo below + if ((elemMode == ADDR_EXPANDED) && (expandX > 1)) + { + ADDR_ASSERT(IsLinear(localIn.swizzleMode)); + } + + UINT_32 basePitch = 0; + GetElemLib()->AdjustSurfaceInfo(elemMode, + expandX, + expandY, + &localIn.bpp, + &basePitch, + &localIn.width, + &localIn.height); + + // Overwrite these parameters if we have a valid format + } + + if (localIn.bpp != 0) + { + localIn.width = Max(localIn.width, 1u); + localIn.height = Max(localIn.height, 1u); + } + else // Rule out some invalid parameters + { + returnCode = ADDR_INVALIDPARAMS; + } + } + + if (returnCode == ADDR_OK) + { + returnCode = HwlComputeSurfaceInfo(&localIn, pOut); + + if (returnCode == ADDR_OK) + { + pOut->bpp = localIn.bpp; + pOut->pixelPitch = pOut->pitch; + pOut->pixelHeight = pOut->height; + + if (localIn.format != ADDR_FMT_INVALID) + { + UINT_32 pixelBits = pOut->pixelBits; + + GetElemLib()->RestoreSurfaceInfo(elemMode, + expandX, + expandY, + &pOut->pixelBits, + &pOut->pixelPitch, + &pOut->pixelHeight); + + GetElemLib()->RestoreSurfaceInfo(elemMode, + expandX, + expandY, + &pixelBits, + &pOut->pixelMipChainPitch, + &pOut->pixelMipChainHeight); + + if ((localIn.numMipLevels > 1) && (pOut->pMipInfo != NULL)) + { + for (UINT_32 i = 0; i < localIn.numMipLevels; i++) + { + pOut->pMipInfo[i].pixelPitch = pOut->pMipInfo[i].pitch; + pOut->pMipInfo[i].pixelHeight = pOut->pMipInfo[i].height; + + GetElemLib()->RestoreSurfaceInfo(elemMode, + expandX, + expandY, + &pixelBits, + &pOut->pMipInfo[i].pixelPitch, + &pOut->pMipInfo[i].pixelHeight); + } + } + } + } + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Lib::GetPossibleSwizzleModes +* +* @brief +* Interface function stub of AddrComputeSurfaceInfo. +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::GetPossibleSwizzleModes( + const ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT* pIn, ///< [in] input structure + ADDR3_GET_POSSIBLE_SWIZZLE_MODE_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT)) || + (pOut->size != sizeof(ADDR3_GET_POSSIBLE_SWIZZLE_MODE_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + const ADDR3_SURFACE_FLAGS flags = pIn->flags; + + // VRS images can only be 2D from the client API rules. + ADDR_ASSERT((flags.isVrsImage == 0) || IsTex2d(pIn->resourceType)); + + if (pIn->bpp == 96) + { + pOut->validModes.swLinear = 1; + } + // Depth/Stencil images can't be linear and must be 2D swizzle modes. + // These three are related to DB block that supports only SW_64KB_2D and SW_256KB_2D for DSV. + else if (flags.depth || flags.stencil) + { + pOut->validModes.sw2d64kB = 1; + pOut->validModes.sw2d256kB = 1; + } + // The organization of elements in the hierarchical surface is the same as any other surface, and it can support + // any 2D swizzle mode (SW_256_2D, SW_4KB_2D, SW_64KB_2D, or SW_256KB_2D). The swizzle mode can be selected + // orthogonally to the underlying z or stencil surface. + else if (pIn->flags.hiZHiS) + { + pOut->validModes.sw2d256B = 1; + pOut->validModes.sw2d4kB = 1; + pOut->validModes.sw2d64kB = 1; + pOut->validModes.sw2d256kB = 1; + } + // MSAA can't be linear and must be 2D swizzle modes. + else if (pIn->numSamples > 1) + { + // NOTE: SW_256B_2D still supports MSAA. The removal of 256B for MSAA is reverted in HW Doc. + pOut->validModes.sw2d256B = 1; + pOut->validModes.sw2d4kB = 1; + pOut->validModes.sw2d64kB = 1; + pOut->validModes.sw2d256kB = 1; + } + // Block-compressed images need to be either using 2D or linear swizzle modes. + else if (flags.blockCompressed) + { + pOut->validModes.swLinear = 1; + + // We find cases where Tex3d BlockCompressed image adopts 2D_256B should be prohibited. + if (IsTex3d(pIn->resourceType) == FALSE) + { + pOut->validModes.sw2d256B = 1; + } + pOut->validModes.sw2d4kB = 1; + pOut->validModes.sw2d64kB = 1; + pOut->validModes.sw2d256kB = 1; + } + else if (IsTex1d(pIn->resourceType)) + { + pOut->validModes.swLinear = 1; + pOut->validModes.sw2d256B = 1; + pOut->validModes.sw2d4kB = 1; + pOut->validModes.sw2d64kB = 1; + pOut->validModes.sw2d256kB = 1; + } + else if (flags.nv12 || flags.p010 || IsTex2d(pIn->resourceType) || flags.view3dAs2dArray) + { + // NV12 and P010 support + // SW_LINEAR, SW_256B_2D, SW_4KB_2D, SW_64KB_2D, SW_256KB_2D + // There could be more multimedia formats that require more hw specific tiling modes... + + // The exception is VRS images. + // Linear is not allowed and the VRS surface needs to be 8BPP format. + if (flags.isVrsImage) + { + ADDR_ASSERT(pIn->bpp == 8); + } + else + { + pOut->validModes.swLinear = 1; + } + if (flags.view3dAs2dArray == 0) + { + // ADDR3_256B_2D can't support 3D images. + pOut->validModes.sw2d256B = 1; + } + pOut->validModes.sw2d4kB = 1; + pOut->validModes.sw2d64kB = 1; + pOut->validModes.sw2d256kB = 1; + } + else if (IsTex3d(pIn->resourceType)) + { + // An eventual determination would be based on pal setting of height_watermark and depth_watermark. + // However, we just adopt the simpler logic currently. + // For 3D images w/ view3dAs2dArray = 0, SW_3D is preferred. + // For 3D images w/ view3dAs2dArray = 1, it should go to 2D path above. + // Enable linear since client may force linear tiling for 3D texture that does not set view3dAs2dArray. + pOut->validModes.swLinear = 1; + pOut->validModes.sw3d4kB = 1; + pOut->validModes.sw3d64kB = 1; + pOut->validModes.sw3d256kB = 1; + } + } + + constexpr UINT_32 Size256 = 256u; + constexpr UINT_32 Size4K = 4 * 1024; + constexpr UINT_32 Size64K = 64 * 1024; + constexpr UINT_32 Size256K = 256 * 1024; + + ADDR_ASSERT(pIn->maxAlign != 0); + + if (pIn->maxAlign < Size256K) + { + pOut->validModes.value &= ~Gfx12Blk256KBSwModeMask; + } + + if (pIn->maxAlign < Size64K) + { + pOut->validModes.value &= ~Gfx12Blk64KBSwModeMask; + } + + if (pIn->maxAlign < Size4K) + { + pOut->validModes.value &= ~Gfx12Blk4KBSwModeMask; + } + + if (pIn->maxAlign < Size256) + { + pOut->validModes.value &= ~Gfx12Blk256BSwModeMask; + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Lib::HwlConvertChipFamily +* +* @brief +* Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision +* @return +* ChipFamily +************************************************************************************************************************ +*/ +ChipFamily Lib::HwlConvertChipFamily( + UINT_32 chipFamily, ///< [in] chip family defined in atiih.h + UINT_32 chipRevision) ///< [in] chip revision defined in "asic_family"_id.h +{ + return ADDR_CHIP_FAMILY_NAVI; +} + +/** +************************************************************************************************************************ +* Lib::ComputeBlockDimensionForSurf +* +* @brief +* Internal function to get block width/height/depth in element from surface input params. +* +* @return +* VOID +************************************************************************************************************************ +*/ +VOID Lib::ComputeBlockDimensionForSurf( + ADDR_EXTENT3D* pExtent, + UINT_32 bpp, + UINT_32 numSamples, + Addr3SwizzleMode swizzleMode + ) const +{ + const UINT_32 eleBytes = bpp >> 3; + const UINT_32 log2EleBytes = Log2(eleBytes); + const UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode); + + if (IsLinear(swizzleMode)) + { + pExtent->width = 1 << (log2BlkSize - log2EleBytes); + pExtent->height = 1; + pExtent->depth = 1; + } + else if (Is3dSwizzle(swizzleMode)) + { + const UINT_32 base = (log2BlkSize / 3) - (log2EleBytes / 3); + const UINT_32 log2BlkSizeMod3 = log2BlkSize % 3; + const UINT_32 log2EleBytesMod3 = log2EleBytes % 3; + + UINT_32 x = base; + UINT_32 y = base; + UINT_32 z = base; + + if (log2BlkSizeMod3 > 0) + { + x++; + } + + if (log2BlkSizeMod3 > 1) + { + z++; + } + + if (log2EleBytesMod3 > 0) + { + x--; + } + + if (log2EleBytesMod3 > 1) + { + z--; + } + + pExtent->width = 1u << x; + pExtent->height = 1u << y; + pExtent->depth = 1u << z; + } + else + { + const UINT_32 log2Samples = Log2(Max(numSamples, 1u)); + const UINT_32 log2Width = (log2BlkSize >> 1) - + (log2EleBytes >> 1) - + (log2Samples >> 1) - + (log2EleBytes & log2Samples & 1); + const UINT_32 log2Height = (log2BlkSize >> 1) - + (log2EleBytes >> 1) - + (log2Samples >> 1) - + ((log2EleBytes | log2Samples) & 1); + + // Return the extent in actual units, not log2 + pExtent->width = 1u << log2Width; + pExtent->height = 1u << log2Height; + pExtent->depth = 1; + } +} + +/** +************************************************************************************************************************ +* Lib::GetMipTailDim +* +* @brief +* Internal function to get out max dimension of first level in mip tail +* +* @return +* Max Width/Height/Depth value of the first mip fitted in mip tail +************************************************************************************************************************ +*/ +ADDR_EXTENT3D Lib::GetMipTailDim( + Addr3SwizzleMode swizzleMode, + const ADDR_EXTENT3D& blockDims + ) const +{ + const UINT_32 log2BlkSize = GetBlockSizeLog2(swizzleMode); + + ADDR_EXTENT3D out = blockDims; + + if (Is3dSwizzle(swizzleMode)) + { + const UINT_32 dim = log2BlkSize % 3; + + if (dim == 0) + { + out.height >>= 1; + } + else if (dim == 1) + { + out.width >>= 1; + } + else + { + out.depth >>= 1; + } + } + else + { + if ((log2BlkSize % 2) == 0) + { + out.width >>= 1; + } + else + { + out.height >>= 1; + } + } + + return out; +} + +/** +************************************************************************************************************************ +* Lib::ComputeSurfaceAddrFromCoord +* +* @brief +* Interface function stub of ComputeSurfaceAddrFromCoord. +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoord( + const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure + ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT)) || + (pOut->size != sizeof(ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT localIn = *pIn; + localIn.unAlignedDims.width = Max(pIn->unAlignedDims.width, 1u); + localIn.unAlignedDims.height = Max(pIn->unAlignedDims.height, 1u); + localIn.unAlignedDims.depth = Max(pIn->unAlignedDims.depth, 1u); + localIn.numMipLevels = Max(pIn->numMipLevels, 1u); + localIn.numSamples = Max(pIn->numSamples, 1u); + + if ((localIn.bpp < 8) || + (localIn.bpp > 128) || + ((localIn.bpp % 8) != 0) || + (localIn.sample >= localIn.numSamples) || + (localIn.slice >= localIn.unAlignedDims.depth) || + (localIn.mipId >= localIn.numMipLevels) || + (IsTex3d(localIn.resourceType) && + (Valid3DMipSliceIdConstraint(localIn.unAlignedDims.depth, localIn.mipId, localIn.slice) == FALSE))) + { + returnCode = ADDR_INVALIDPARAMS; + } + + if (returnCode == ADDR_OK) + { + if (IsLinear(localIn.swizzleMode)) + { + returnCode = ComputeSurfaceAddrFromCoordLinear(&localIn, pOut); + } + else + { + returnCode = ComputeSurfaceAddrFromCoordTiled(&localIn, pOut); + } + + if (returnCode == ADDR_OK) + { + pOut->prtBlockIndex = static_cast(pOut->addr / (64 * 1024)); + } + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Lib::ComputeSurfaceAddrFromCoord +* +* @brief +* Interface function stub of Addr3ComputePipeBankXor. +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputePipeBankXor( + const ADDR3_COMPUTE_PIPEBANKXOR_INPUT* pIn, + ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) +{ + ADDR_E_RETURNCODE returnCode; + + if ((GetFillSizeFieldsFlags() == TRUE) && + ((pIn->size != sizeof(ADDR3_COMPUTE_PIPEBANKXOR_INPUT)) || + (pOut->size != sizeof(ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT)))) + { + returnCode = ADDR_INVALIDPARAMS; + } + else + { + returnCode = HwlComputePipeBankXor(pIn, pOut); + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Lib::ComputeSurfaceAddrFromCoordLinear +* +* @brief +* Internal function to calculate address from coord for linear swizzle surface +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoordLinear( + const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure + ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + BOOL_32 valid = (pIn->numSamples <= 1); + + if (valid) + { + if (IsTex1d(pIn->resourceType)) + { + valid = (pIn->y == 0); + } + } + + if (valid) + { + ADDR3_COMPUTE_SURFACE_INFO_INPUT localIn = {0}; + ADDR3_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0}; + ADDR3_MIP_INFO mipInfo[MaxMipLevels]; + ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels); + + localIn.size = sizeof(localIn); + localIn.flags = pIn->flags; + localIn.swizzleMode = ADDR3_LINEAR; + localIn.resourceType = pIn->resourceType; + localIn.format = ADDR_FMT_INVALID; + localIn.bpp = pIn->bpp; + localIn.width = Max(pIn->unAlignedDims.width, 1u); + localIn.height = Max(pIn->unAlignedDims.height, 1u); + localIn.numSlices = Max(pIn->unAlignedDims.depth, 1u); + localIn.numMipLevels = Max(pIn->numMipLevels, 1u); + localIn.numSamples = Max(pIn->numSamples, 1u); + + if (localIn.numMipLevels <= 1) + { + localIn.pitchInElement = pIn->pitchInElement; + } + + localOut.size = sizeof(localOut); + localOut.pMipInfo = mipInfo; + + returnCode = ComputeSurfaceInfo(&localIn, &localOut); + + if (returnCode == ADDR_OK) + { + pOut->addr = (localOut.sliceSize * pIn->slice) + + mipInfo[pIn->mipId].offset + + (pIn->y * mipInfo[pIn->mipId].pitch + pIn->x) * (pIn->bpp >> 3); + pOut->bitPosition = 0; + } + else + { + valid = FALSE; + } + } + + if (valid == FALSE) + { + returnCode = ADDR_INVALIDPARAMS; + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Lib::ComputeSurfaceAddrFromCoordTiled +* +* @brief +* Internal function to calculate address from coord for tiled swizzle surface +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoordTiled( + const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure + ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure + ) const +{ + return HwlComputeSurfaceAddrFromCoordTiled(pIn, pOut); +} + +/** +************************************************************************************************************************ +* Lib::ComputeNonBlockCompressedView +* +* @brief +* Interface function stub of Addr3ComputeNonBlockCompressedView. +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeNonBlockCompressedView( + const ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn, + ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT* pOut) +{ + ADDR_E_RETURNCODE returnCode; + + if ((GetFillSizeFieldsFlags() == TRUE) && + ((pIn->size != sizeof(ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT)) || + (pOut->size != sizeof(ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT)))) + { + returnCode = ADDR_INVALIDPARAMS; + } + else if (Is3dSwizzle(pIn->swizzleMode)) + { + // 3D volume images using ADDR3_XX_3D is currently not supported. + returnCode = ADDR_NOTSUPPORTED; + } + else + { + returnCode = HwlComputeNonBlockCompressedView(pIn, pOut); + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Lib::ComputeSubResourceOffsetForSwizzlePattern +* +* @brief +* Interface function stub of Addr3ComputeSubResourceOffsetForSwizzlePattern. +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeSubResourceOffsetForSwizzlePattern( + const ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, + ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if ((GetFillSizeFieldsFlags() == TRUE) && + ((pIn->size != sizeof(ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT)) || + (pOut->size != sizeof(ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT)))) + { + returnCode = ADDR_INVALIDPARAMS; + } + else + { + HwlComputeSubResourceOffsetForSwizzlePattern(pIn, pOut); + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Lib::ComputeSlicePipeBankXor +* +* @brief +* Interface function stub of Addr3ComputeSlicePipeBankXor. +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeSlicePipeBankXor( + const ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, + ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) +{ + ADDR_E_RETURNCODE returnCode; + + if ((GetFillSizeFieldsFlags() == TRUE) && + ((pIn->size != sizeof(ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT)) || + (pOut->size != sizeof(ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT)))) + { + returnCode = ADDR_INVALIDPARAMS; + } + if ((pIn->bpe != 0) && + (pIn->bpe != 8) && + (pIn->bpe != 16) && + (pIn->bpe != 32) && + (pIn->bpe != 64) && + (pIn->bpe != 128)) + { + returnCode = ADDR_INVALIDPARAMS; + } + else + { + returnCode = HwlComputeSlicePipeBankXor(pIn, pOut); + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Lib::UseCustomHeight +* +* @brief +* Determines if the calculations for this surface should use minimal HW values or user-specified values. +* +* @return +* Returns TRUE if the user-specified alignment should be used +************************************************************************************************************************ +*/ +BOOL_32 Lib::UseCustomHeight( + const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn + ) const +{ + return ((pIn->numMipLevels <= 1) && + IsLinear(pIn->swizzleMode) && + (pIn->sliceAlign > 0)); +} + +/** +************************************************************************************************************************ +* Lib::UseCustomPitch +* +* @brief +* Determines if the calculations for this surface should use minimal HW values or user-specified values. +* +* @return +* Returns TRUE if the user-specified pitch should be used +************************************************************************************************************************ +*/ +BOOL_32 Lib::UseCustomPitch( + const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn + ) const +{ + return ((pIn->numMipLevels <= 1) && + IsLinear(pIn->swizzleMode) && + (pIn->pitchInElement > 0)); +} + +/** +************************************************************************************************************************ +* Lib::CanTrimLinearPadding +* +* @brief +* Determines if the calculations for this surface can omit extra trailing padding for linear surfaces. +* +* @return +* Returns TRUE if the trailing padding can be omitted. +************************************************************************************************************************ +*/ +BOOL_32 Lib::CanTrimLinearPadding( + const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn + ) const +{ + return ((IsTex3d(pIn->resourceType) == FALSE) && + (pIn->numSlices <= 1) && + IsLinear(pIn->swizzleMode)); +} + +/** +************************************************************************************************************************ +* Lib::ApplyCustomizedPitchHeight +* +* @brief +* Helper function to override hw required row pitch/slice pitch by customrized one +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ApplyCustomizedPitchHeight( + const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure + ADDR3_COMPUTE_SURFACE_INFO_OUTPUT* pOut + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + const UINT_32 elementBytes = pIn->bpp >> 3; + + // Calculate the default pitch/height without any user inputs + pOut->pitch = PowTwoAlign(pIn->width, pOut->blockExtent.width); + pOut->height = PowTwoAlign(pIn->height, pOut->blockExtent.height); + + // Custom pitches / alignments are only possible with single mip level / linear images; otherwise, + // ignore those parameters. + if (UseCustomPitch(pIn)) + { + const UINT_32 pitchAlignmentBytes = 1 << GetBlockSizeLog2(pIn->swizzleMode, TRUE); + const UINT_32 pitchAlignmentElements = pitchAlignmentBytes / elementBytes; + + // Their requested pitch has to meet the pitch alignment constraints applied by the HW. + if ((pIn->pitchInElement % pitchAlignmentElements) != 0) + { + returnCode = ADDR_INVALIDPARAMS; + } + // And their pitch can't be less than the minimum + else if (pIn->pitchInElement < pOut->pitch) + { + returnCode = ADDR_INVALIDPARAMS; + } + else + { + pOut->pitch = pIn->pitchInElement; + } + } + + if ((returnCode == ADDR_OK) && UseCustomHeight(pIn)) + { + UINT_32 customizedHeight = pIn->sliceAlign / elementBytes / pOut->pitch; + + if (customizedHeight * elementBytes * pOut->pitch != pIn->sliceAlign) + { + returnCode = ADDR_INVALIDPARAMS; + } + else if ((pIn->numSlices > 1) && (pOut->height != customizedHeight)) + { + returnCode = ADDR_INVALIDPARAMS; + } + else + { + pOut->height = customizedHeight; + } + } + + return returnCode; +} + +} // V3 +} // Addr diff --git a/runtime/hsa-runtime/image/addrlib/src/core/addrlib3.h b/runtime/hsa-runtime/image/addrlib/src/core/addrlib3.h new file mode 100644 index 0000000000..f57125501b --- /dev/null +++ b/runtime/hsa-runtime/image/addrlib/src/core/addrlib3.h @@ -0,0 +1,415 @@ +/* +************************************************************************************************************************ +* +* Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved. +* SPDX-License-Identifier: MIT +* +***********************************************************************************************************************/ + + +/** +************************************************************************************************************************ +* @file addrlib3.h +* @brief Contains the Addr::V3::Lib class definition. +************************************************************************************************************************ +*/ + +#ifndef __ADDR3_LIB3_H__ +#define __ADDR3_LIB3_H__ + +#include "addrlib.h" + +namespace Addr +{ +namespace V3 +{ + +/** +************************************************************************************************************************ +* @brief Bitmasks for swizzle mode determination on GFX12 +************************************************************************************************************************ +*/ +const UINT_32 Gfx12Blk256KBSwModeMask = (1u << ADDR3_256KB_2D) | + (1u << ADDR3_256KB_3D); + +const UINT_32 Gfx12Blk64KBSwModeMask = (1u << ADDR3_64KB_2D) | + (1u << ADDR3_64KB_3D); + +const UINT_32 Gfx12Blk4KBSwModeMask = (1u << ADDR3_4KB_2D) | + (1u << ADDR3_4KB_3D); + +const UINT_32 Gfx12Blk256BSwModeMask = (1u << ADDR3_256B_2D); + +/** +************************************************************************************************************************ +* @brief Bit setting for swizzle pattern +************************************************************************************************************************ +*/ +union ADDR_BIT_SETTING +{ + struct + { + UINT_16 x; + UINT_16 y; + UINT_16 z; + UINT_16 s; + }; + UINT_64 value; +}; + +/** +************************************************************************************************************************ +* @brief Flags for SwizzleModeTable +************************************************************************************************************************ +*/ +union SwizzleModeFlags +{ + struct + { + // Swizzle mode + UINT_32 isLinear : 1; // Linear + UINT_32 is2d : 1; // 2d mode + UINT_32 is3d : 1; // 3d mode + + // Block size + UINT_32 is256b : 1; // Block size is 256B + UINT_32 is4kb : 1; // Block size is 4KB + UINT_32 is64kb : 1; // Block size is 64KB + UINT_32 is256kb : 1; // Block size is 256KB + + UINT_32 reserved : 25; // Reserved bits + }; + + UINT_32 u32All; +}; + +struct Dim2d +{ + UINT_32 w; + UINT_32 h; +}; + +const UINT_32 Log2Size256 = 8u; +const UINT_32 Log2Size4K = 12u; +const UINT_32 Log2Size64K = 16u; +const UINT_32 Log2Size256K = 18u; + +/** +************************************************************************************************************************ +* @brief Swizzle pattern information +************************************************************************************************************************ +*/ +// Accessed by index representing the logbase2 of (8bpp/16bpp/32bpp/64bpp/128bpp) +// contains the indices which map to 2D arrays SW_PATTERN_NIBBLE[1-4] which contain sections of an index equation. +struct ADDR_SW_PATINFO +{ + UINT_8 nibble1Idx; + UINT_8 nibble2Idx; + UINT_8 nibble3Idx; + UINT_8 nibble4Idx; +}; + +/** +************************************************************************************************************************ +* InitBit +* +* @brief +* Initialize bit setting value via a return value +************************************************************************************************************************ +*/ +#define InitBit(c, index) (1ull << ((c << 4) + index)) + +const UINT_64 X0 = InitBit(0, 0); +const UINT_64 X1 = InitBit(0, 1); +const UINT_64 X2 = InitBit(0, 2); +const UINT_64 X3 = InitBit(0, 3); +const UINT_64 X4 = InitBit(0, 4); +const UINT_64 X5 = InitBit(0, 5); +const UINT_64 X6 = InitBit(0, 6); +const UINT_64 X7 = InitBit(0, 7); +const UINT_64 X8 = InitBit(0, 8); + +const UINT_64 Y0 = InitBit(1, 0); +const UINT_64 Y1 = InitBit(1, 1); +const UINT_64 Y2 = InitBit(1, 2); +const UINT_64 Y3 = InitBit(1, 3); +const UINT_64 Y4 = InitBit(1, 4); +const UINT_64 Y5 = InitBit(1, 5); +const UINT_64 Y6 = InitBit(1, 6); +const UINT_64 Y7 = InitBit(1, 7); +const UINT_64 Y8 = InitBit(1, 8); + +const UINT_64 Z0 = InitBit(2, 0); +const UINT_64 Z1 = InitBit(2, 1); +const UINT_64 Z2 = InitBit(2, 2); +const UINT_64 Z3 = InitBit(2, 3); +const UINT_64 Z4 = InitBit(2, 4); +const UINT_64 Z5 = InitBit(2, 5); + +const UINT_64 S0 = InitBit(3, 0); +const UINT_64 S1 = InitBit(3, 1); +const UINT_64 S2 = InitBit(3, 2); + +/** +************************************************************************************************************************ +* @brief Bit setting for swizzle pattern +************************************************************************************************************************ +*/ + +/** +************************************************************************************************************************ +* @brief This class contains asic independent address lib functionalities +************************************************************************************************************************ +*/ +class Lib : public Addr::Lib +{ +public: + virtual ~Lib(); + + static Lib* GetLib( + ADDR_HANDLE hLib); + + // + // Interface stubs + // + + // For data surface + ADDR_E_RETURNCODE ComputeSurfaceInfo( + const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR3_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE GetPossibleSwizzleModes( + const ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT* pIn, + ADDR3_GET_POSSIBLE_SWIZZLE_MODE_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoord( + const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, + ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const; + + // Misc + ADDR_E_RETURNCODE ComputePipeBankXor( + const ADDR3_COMPUTE_PIPEBANKXOR_INPUT* pIn, + ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT* pOut); + + ADDR_E_RETURNCODE ComputeNonBlockCompressedView( + const ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn, + ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT* pOut); + + ADDR_E_RETURNCODE ComputeSubResourceOffsetForSwizzlePattern( + const ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, + ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut); + + ADDR_E_RETURNCODE ComputeSlicePipeBankXor( + const ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, + ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut); + +protected: + Lib(); // Constructor is protected + Lib(const Client* pClient); + + static const UINT_32 MaxImageDim = 65536; + static const UINT_32 MaxMipLevels = 17; // Max image size is 64k + static const UINT_32 MaxNumOfBpp = 5; + static const UINT_32 MaxNumOfAA = 4; + UINT_32 m_pipesLog2; ///< Number of pipe per shader engine Log2 + UINT_32 m_pipeInterleaveLog2; ///< Log2 of pipe interleave bytes + + static const Dim2d Block256_2d[MaxNumOfBpp]; + static const ADDR_EXTENT3D Block1K_3d[MaxNumOfBpp]; + SwizzleModeFlags m_swizzleModeTable[ADDR3_MAX_TYPE]; ///< Swizzle mode table + + // Number of unique MSAA sample rates (1/2/4/8) + static const UINT_32 MaxMsaaRateLog2 = 4; + // Max number of bpp (8bpp/16bpp/32bpp/64bpp/128bpp) + static const UINT_32 MaxElementBytesLog2 = 5; + // Number of unique swizzle patterns (one entry per swizzle mode + MSAA + bpp configuration) + static const UINT_32 NumSwizzlePatterns = 19 * MaxElementBytesLog2; + + // Number of equation entries in the table + UINT_32 m_numEquations; + // Equation lookup table according to swizzle mode, MSAA sample rate, and bpp + UINT_32 m_equationLookupTable[ADDR3_MAX_TYPE - 1][MaxMsaaRateLog2][MaxElementBytesLog2]; + + // Equation table + ADDR_EQUATION m_equationTable[NumSwizzlePatterns]; + + void SetEquationTableEntry( + Addr3SwizzleMode addrType, + UINT_32 msaaLog2, + UINT_32 elementLog2, + UINT_32 value) + { + m_equationLookupTable[addrType - 1][msaaLog2][elementLog2] = value; + } + + const UINT_32 GetEquationTableEntry( + Addr3SwizzleMode addrType, + UINT_32 msaaLog2, + UINT_32 elementLog2) const + { + return m_equationLookupTable[addrType - 1][msaaLog2][elementLog2]; + } + + static BOOL_32 Valid3DMipSliceIdConstraint( + UINT_32 numSlices, + UINT_32 mipId, + UINT_32 slice) + { + return (Max((numSlices >> mipId), 1u) > slice); + } + + UINT_32 GetBlockSize( + Addr3SwizzleMode swizzleMode, + BOOL_32 forPitch = FALSE) const; + + UINT_32 GetBlockSizeLog2( + Addr3SwizzleMode swizzleMode, + BOOL_32 forPitch = FALSE) const; + + BOOL_32 IsValidSwMode(Addr3SwizzleMode swizzleMode) const + { + return (m_swizzleModeTable[swizzleMode].u32All != 0); + } + + UINT_32 IsLinear(Addr3SwizzleMode swizzleMode) const + { + return m_swizzleModeTable[swizzleMode].isLinear; + } + + // Checking block size + BOOL_32 IsBlock256b(Addr3SwizzleMode swizzleMode) const + { + return m_swizzleModeTable[swizzleMode].is256b; + } + + // Checking block size + BOOL_32 IsBlock4kb(Addr3SwizzleMode swizzleMode) const + { + return m_swizzleModeTable[swizzleMode].is4kb; + } + + // Checking block size + BOOL_32 IsBlock64kb(Addr3SwizzleMode swizzleMode) const + { + return m_swizzleModeTable[swizzleMode].is64kb; + } + + // Checking block size + BOOL_32 IsBlock256kb(Addr3SwizzleMode swizzleMode) const + { + return m_swizzleModeTable[swizzleMode].is256kb; + } + + BOOL_32 Is2dSwizzle(Addr3SwizzleMode swizzleMode) const + { + return m_swizzleModeTable[swizzleMode].is2d; + } + + BOOL_32 Is3dSwizzle(Addr3SwizzleMode swizzleMode) const + { + return m_swizzleModeTable[swizzleMode].is3d; + } + + virtual UINT_32 HwlComputeMaxBaseAlignments() const { return 256 * 1024; } + + virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn) + { + ADDR_NOT_IMPLEMENTED(); + // Although GFX12 addressing should be consistent regardless of the configuration, we still need to + // call some initialization for member variables. + return TRUE; + } + + virtual ChipFamily HwlConvertChipFamily( + UINT_32 chipFamily, + UINT_32 chipRevision); + + virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const { return 0; } + + virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo( + const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR3_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const + { + ADDR_NOT_IMPLEMENTED(); + return ADDR_NOTSUPPORTED; + } + + virtual ADDR_E_RETURNCODE HwlComputePipeBankXor( + const ADDR3_COMPUTE_PIPEBANKXOR_INPUT* pIn, + ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const + { + ADDR_NOT_IMPLEMENTED(); + return ADDR_NOTSUPPORTED; + } + + VOID ComputeBlockDimensionForSurf( + ADDR_EXTENT3D* pExtent, + UINT_32 bpp, + UINT_32 numSamples, + Addr3SwizzleMode swizzleMode) const; + + ADDR_EXTENT3D GetMipTailDim( + Addr3SwizzleMode swizzleMode, + const ADDR_EXTENT3D& blockDims) const; + + ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoordLinear( + const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, + ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoordTiled( + const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, + ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const; + + virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoordTiled( + const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, + ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const + { + ADDR_NOT_IMPLEMENTED(); + return ADDR_NOTIMPLEMENTED; + } + + virtual ADDR_E_RETURNCODE HwlComputeNonBlockCompressedView( + const ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn, + ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT* pOut) const + { + ADDR_NOT_IMPLEMENTED(); + return ADDR_NOTSUPPORTED; + } + + virtual VOID HwlComputeSubResourceOffsetForSwizzlePattern( + const ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, + ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) const + { + ADDR_NOT_IMPLEMENTED(); + } + + virtual ADDR_E_RETURNCODE HwlComputeSlicePipeBankXor( + const ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, + ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) const + { + ADDR_NOT_IMPLEMENTED(); + return ADDR_NOTSUPPORTED; + } + + ADDR_E_RETURNCODE ApplyCustomizedPitchHeight( + const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR3_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; + + BOOL_32 UseCustomHeight(const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn) const; + BOOL_32 UseCustomPitch(const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn) const; + BOOL_32 CanTrimLinearPadding(const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn) const; + +private: + // Disallow the copy constructor + Lib(const Lib& a); + + // Disallow the assignment operator + Lib& operator=(const Lib& a); + + void Init(); +}; + +} // V3 +} // Addr + +#endif \ No newline at end of file diff --git a/runtime/hsa-runtime/image/addrlib/src/core/addrobject.cpp b/runtime/hsa-runtime/image/addrlib/src/core/addrobject.cpp index 2a08b0ae04..82a94d3d23 100644 --- a/runtime/hsa-runtime/image/addrlib/src/core/addrobject.cpp +++ b/runtime/hsa-runtime/image/addrlib/src/core/addrobject.cpp @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ @@ -34,8 +17,8 @@ #include "addrinterface.h" #include "addrobject.h" -namespace rocr { -namespace Addr { +namespace Addr +{ /** **************************************************************************************************** @@ -237,4 +220,3 @@ VOID Object::DebugPrint( } } // Addr -} // rocr diff --git a/runtime/hsa-runtime/image/addrlib/src/core/addrobject.h b/runtime/hsa-runtime/image/addrlib/src/core/addrobject.h index 0d270789a2..db76cb6ca3 100644 --- a/runtime/hsa-runtime/image/addrlib/src/core/addrobject.h +++ b/runtime/hsa-runtime/image/addrlib/src/core/addrobject.h @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ @@ -37,8 +20,8 @@ #include "addrtypes.h" #include "addrcommon.h" -namespace rocr { -namespace Addr { +namespace Addr +{ /** **************************************************************************************************** @@ -91,6 +74,4 @@ private: }; } // Addr -} // rocr - #endif diff --git a/runtime/hsa-runtime/image/addrlib/src/core/coord.cpp b/runtime/hsa-runtime/image/addrlib/src/core/coord.cpp index f371458f4e..068a991947 100644 --- a/runtime/hsa-runtime/image/addrlib/src/core/coord.cpp +++ b/runtime/hsa-runtime/image/addrlib/src/core/coord.cpp @@ -3,24 +3,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ @@ -28,9 +11,10 @@ #include "addrcommon.h" #include "coord.h" -namespace rocr { -namespace Addr { -namespace V2 { +namespace Addr +{ +namespace V2 +{ Coordinate::Coordinate() { @@ -600,4 +584,3 @@ BOOL_32 CoordEq::operator!=(const CoordEq& b) } // V2 } // Addr -} // rocr diff --git a/runtime/hsa-runtime/image/addrlib/src/core/coord.h b/runtime/hsa-runtime/image/addrlib/src/core/coord.h index 490823f3ff..2e8011550e 100644 --- a/runtime/hsa-runtime/image/addrlib/src/core/coord.h +++ b/runtime/hsa-runtime/image/addrlib/src/core/coord.h @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ @@ -28,9 +11,10 @@ #ifndef __COORD_H #define __COORD_H -namespace rocr { -namespace Addr { -namespace V2 { +namespace Addr +{ +namespace V2 +{ #if defined(__cplusplus) #if defined(_MSC_VER) #if _MSC_VER >= 1900 @@ -140,7 +124,6 @@ private: } // V2 } // Addr -} // rocr #endif diff --git a/runtime/hsa-runtime/image/addrlib/src/gfx10/gfx10SwizzlePattern.h b/runtime/hsa-runtime/image/addrlib/src/gfx10/gfx10SwizzlePattern.h index 3a783bb4b3..0b6b1e1cc1 100644 --- a/runtime/hsa-runtime/image/addrlib/src/gfx10/gfx10SwizzlePattern.h +++ b/runtime/hsa-runtime/image/addrlib/src/gfx10/gfx10SwizzlePattern.h @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ @@ -33,10 +16,11 @@ #ifndef __GFX10_SWIZZLE_PATTERN_H__ #define __GFX10_SWIZZLE_PATTERN_H__ -namespace rocr { -namespace Addr { -namespace V2 { +namespace Addr +{ +namespace V2 +{ const ADDR_SW_PATINFO GFX10_SW_256_S_PATINFO[] = { { 1, 0, 0, 0, 0, } , // 1 pipes 1 bpe @ SW_256_S @ Navi1x @@ -6031,7 +6015,7 @@ const UINT_64 GFX10_CMASK_SW_PATTERN[][17] = {X3, Y3, X7, Y7, X8, Y8, X9, Y9, X10, Y4^X9^Y9, Z3^X4^Y4, Z2^Y5^X8, Z1^X5^Y8, Y6^X7, Z0^X6^Y7, 0, 0, }, //34 }; -} // V2 +}// V2 } // Addr -} // rocr + #endif diff --git a/runtime/hsa-runtime/image/addrlib/src/gfx10/gfx10addrlib.cpp b/runtime/hsa-runtime/image/addrlib/src/gfx10/gfx10addrlib.cpp index 733252f884..c1db4128b7 100644 --- a/runtime/hsa-runtime/image/addrlib/src/gfx10/gfx10addrlib.cpp +++ b/runtime/hsa-runtime/image/addrlib/src/gfx10/gfx10addrlib.cpp @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ @@ -31,6 +14,7 @@ */ #include "gfx10addrlib.h" +#include "addrcommon.h" #include "gfx10_gb_reg.h" #include "amdgpu_asic_addr.h" @@ -38,8 +22,8 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -namespace rocr { -namespace Addr { +namespace Addr +{ /** ************************************************************************************************************************ * Gfx10HwlInit @@ -93,7 +77,7 @@ const SwizzleModeFlags Gfx10Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] = {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved {{0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_4KB_S_X {{0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}}, // ADDR_SW_4KB_D_X - {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, // Reserved + {{0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0}}, // ADDR_SW_4KB_R_X {{0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_Z_X {{0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}}, // ADDR_SW_64KB_S_X @@ -128,6 +112,7 @@ Gfx10Lib::Gfx10Lib(const Client* pClient) m_numSaLog2(0), m_colorBaseIndex(0), m_xmaskBaseIndex(0), + m_htileBaseIndex(0), m_dccBaseIndex(0) { memset(&m_settings, 0, sizeof(m_settings)); @@ -675,7 +660,7 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileAddrFromCoord( { const UINT_32 numSampleLog2 = Log2(pIn->numSamples); const UINT_32 pipeMask = (1 << m_pipesLog2) - 1; - const UINT_32 index = m_xmaskBaseIndex + numSampleLog2; + const UINT_32 index = m_htileBaseIndex + numSampleLog2; const UINT_8* patIdxTable = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX; const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 4; @@ -948,9 +933,11 @@ BOOL_32 Gfx10Lib::HwlInitGlobalParams( { // Skip unaligned case - m_xmaskBaseIndex += MaxNumOfAA; + m_xmaskBaseIndex += MaxNumOfBppCMask; + m_htileBaseIndex += MaxNumOfAA; - m_xmaskBaseIndex += m_pipesLog2 * MaxNumOfAA; + m_xmaskBaseIndex += m_pipesLog2 * MaxNumOfBppCMask; + m_htileBaseIndex += m_pipesLog2 * MaxNumOfAA; m_colorBaseIndex += m_pipesLog2 * MaxNumOfBpp; if (m_settings.supportRbPlus) @@ -966,7 +953,8 @@ BOOL_32 Gfx10Lib::HwlInitGlobalParams( if (m_numPkrLog2 >= 2) { m_colorBaseIndex += (2 * m_numPkrLog2 - 2) * MaxNumOfBpp; - m_xmaskBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfAA; + m_xmaskBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfBppCMask; + m_htileBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfAA; } } else @@ -976,9 +964,8 @@ BOOL_32 Gfx10Lib::HwlInitGlobalParams( 1; ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) == (numPipeType + 1) * MaxNumOfAA); - - ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) == - sizeof(GFX10_CMASK_64K_PATIDX) / sizeof(GFX10_CMASK_64K_PATIDX[0])); + ADDR_C_ASSERT(sizeof(GFX10_CMASK_64K_PATIDX) / sizeof(GFX10_CMASK_64K_PATIDX[0]) == + (numPipeType + 1) * MaxNumOfBppCMask); } } @@ -1083,15 +1070,15 @@ ChipFamily Gfx10Lib::HwlConvertChipFamily( ADDR_ASSERT(!"Unknown chip revision"); } break; - case FAMILY_GC_10_3_6: - if (ASICREV_IS_GFX1036(chipRevision)) + case FAMILY_RPL: + if (ASICREV_IS_RAPHAEL(chipRevision)) { m_settings.supportRbPlus = 1; m_settings.dccUnsup3DSwDis = 0; } break; - case FAMILY_GC_10_3_7: - if (ASICREV_IS_GFX1037(chipRevision)) + case FAMILY_MDN: + if (ASICREV_IS_MENDOCINO(chipRevision)) { m_settings.supportRbPlus = 1; m_settings.dccUnsup3DSwDis = 0; @@ -1460,13 +1447,15 @@ VOID Gfx10Lib::ConvertSwizzlePatternToEquation( ADDR_EQUATION* pEquation) ///< [out] equation converted from swizzle pattern const { - ADDR_BIT_SETTING fullSwizzlePattern[20]; + // Get full swizzle pattern and store it as an ADDR_BIT_SETTING list + ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT]; GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern); const ADDR_BIT_SETTING* pSwizzle = fullSwizzlePattern; const UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode); - + memset(pEquation, 0, sizeof(ADDR_EQUATION)); pEquation->numBits = blockSizeLog2; + pEquation->numBitComponents = pPatInfo->maxItemCount; pEquation->stackedDepthSlices = FALSE; for (UINT_32 i = 0; i < elemLog2; i++) @@ -1994,37 +1983,45 @@ VOID Gfx10Lib::InitEquationTable() { memset(m_equationTable, 0, sizeof(m_equationTable)); + // Iterate through resourceTypes, up to MaxRsrcType where a "resourceType" refers to AddrResourceType (1D/2D/3D) + // resources. This starts with rsrcTypeIdx = 0, however there is an offset added that will start us off at + // computing 2D resources. for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++) { + // Add offset. Start iterating from ADDR_RSRC_TEX_2D const AddrResourceType rsrcType = static_cast(rsrcTypeIdx + ADDR_RSRC_TEX_2D); + // Iterate through the maximum number of swizzlemodes a type can hold for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++) { const AddrSwizzleMode swMode = static_cast(swModeIdx); + // Iterate through the different bits-per-pixel settings (8bpp/16bpp/32bpp/64bpp/128bpp) for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++) { UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX; + // May or may not return a ADDR_SW_PATINFO for a completely different swizzle mode, essentially + // overwriting the choice. const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(swMode, rsrcType, elemLog2, 1); if (pPatInfo != NULL) { ADDR_ASSERT(IsValidSwMode(swMode)); - - if (pPatInfo->maxItemCount <= 3) + if (pPatInfo->maxItemCount <= 3) // Get a valid equationIndex { ADDR_EQUATION equation = {}; + // Passing in pPatInfo to get the addr equation ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation); equationIndex = m_numEquations; ADDR_ASSERT(equationIndex < EquationTableSize); - + // Updates m_equationTable[m_numEquations] to be the addr equation for this PatInfo m_equationTable[equationIndex] = equation; - + // Increment m_numEquations m_numEquations++; } - else + else // There is no equationIndex { // We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X under RB+ case ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4)); @@ -2033,7 +2030,8 @@ VOID Gfx10Lib::InitEquationTable() ADDR_ASSERT(m_settings.supportRbPlus == 1); } } - + // equationIndex, which is used to look up equations in m_equationTable, will be cached for every + // iteration in this nested for-loop m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex; } } @@ -2318,9 +2316,9 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeNonBlockCompressedView( { ADDR_E_RETURNCODE returnCode = ADDR_OK; - if (pIn->resourceType != ADDR_RSRC_TEX_2D) + if (IsThin(pIn->resourceType, pIn->swizzleMode) == FALSE) { - // Only 2D resource can have a NonBC view... + // Only thin swizzle mode can have a NonBC view... returnCode = ADDR_INVALIDPARAMS; } else if (((pIn->format < ADDR_FMT_ASTC_4x4) || (pIn->format > ADDR_FMT_ETC2_128BPP)) && @@ -2347,6 +2345,7 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeNonBlockCompressedView( infoIn.numFrags = 1; ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {}; + ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels); ADDR2_COMPUTE_SURFACE_INFO_OUTPUT infoOut = {}; infoOut.pMipInfo = mipInfo; @@ -2597,6 +2596,7 @@ BOOL_32 Gfx10Lib::ValidateSwModeParams( const BOOL_32 linear = IsLinear(swizzle); const BOOL_32 blk256B = IsBlock256b(swizzle); const BOOL_32 blkVar = IsBlockVariable(swizzle); + const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle); const BOOL_32 prt = flags.prt; const BOOL_32 fmask = flags.fmask; @@ -2755,7 +2755,7 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoSanityCheck( * Gfx10Lib::HwlGetPreferredSurfaceSetting * * @brief -* Internal function to get suggested surface information for cliet to use +* Internal function to get suggested surface information for client to use * * @return * ADDR_E_RETURNCODE @@ -2824,7 +2824,7 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement); } - if (BlockTypeWithinMemoryBudget(padSize[0], + if (Addr2BlockTypeWithinMemoryBudget(padSize[0], padSize[1], ratioLow, ratioHi, @@ -3057,7 +3057,9 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( if (pIn->flags.needEquation) { - FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3)); + UINT_32 components = pIn->flags.allowExtEquation ? ADDR_MAX_EQUATION_COMP : + ADDR_MAX_LEGACY_EQUATION_COMP; + FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3), components); } if (allowedSwModeSet.value == Gfx10LinearSwModeMask) @@ -3076,11 +3078,13 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( allowedSwModeSet.swLinear = 0; } + // A bitfield where each bit represents a block type. Each swizzle mode maps to a block. ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType); // Determine block size if there are 2 or more block type candidates if (IsPow2(allowedBlockSet.value) == FALSE) { + // Tracks a valid SwizzleMode for each valid block type AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {}; swMode[AddrBlockLinear] = ADDR_SW_LINEAR; @@ -3103,18 +3107,21 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( swMode[AddrBlockThin64KB] = ADDR_SW_64KB_S; } + // Tracks the size of each valid swizzle mode's surface in bytes UINT_64 padSize[AddrBlockMaxTiledType] = {}; const UINT_32 ratioLow = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2); const UINT_32 ratioHi = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1); - UINT_32 minSizeBlk = AddrBlockMicro; - UINT_64 minSize = 0; + const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u); + UINT_32 minSizeBlk = AddrBlockMicro; // Tracks the most optimal block to use + UINT_64 minSize = 0; // Tracks the minimum acceptable block type ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {}; + // Iterate through all block types for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++) { - if (IsBlockTypeAvaiable(allowedBlockSet, static_cast(i))) + if (Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i))) { localIn.swizzleMode = swMode[i]; @@ -3138,7 +3145,8 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( } else { - if (BlockTypeWithinMemoryBudget( + // Checks if the block type is within the memory budget but favors larger blocks + if (Addr2BlockTypeWithinMemoryBudget( minSize, padSize[i], ratioLow, @@ -3187,9 +3195,9 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting( for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++) { if ((i != minSizeBlk) && - IsBlockTypeAvaiable(allowedBlockSet, static_cast(i))) + Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i))) { - if (BlockTypeWithinMemoryBudget( + if (Addr2BlockTypeWithinMemoryBudget( minSize, padSize[i], 0, @@ -3679,6 +3687,7 @@ ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMacroTiled( UINT_64 mipSize[MaxMipLevels]; UINT_64 mipSliceSize[MaxMipLevels]; + ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels); Dim3d fixedTailMaxDim = tailMaxDim; if (m_settings.dsMipmapHtileFix && IsZOrderSwizzle(pIn->swizzleMode) && (index <= 1)) @@ -3895,54 +3904,23 @@ UINT_32 Gfx10Lib::ComputeOffsetFromEquation( { UINT_32 v = 0; - if (pEq->addr[i].valid) + for (UINT_32 c = 0; c < pEq->numBitComponents; c++) { - if (pEq->addr[i].channel == 0) + if (pEq->comps[c][i].valid) { - v ^= (x >> pEq->addr[i].index) & 1; - } - else if (pEq->addr[i].channel == 1) - { - v ^= (y >> pEq->addr[i].index) & 1; - } - else - { - ADDR_ASSERT(pEq->addr[i].channel == 2); - v ^= (z >> pEq->addr[i].index) & 1; - } - } - - if (pEq->xor1[i].valid) - { - if (pEq->xor1[i].channel == 0) - { - v ^= (x >> pEq->xor1[i].index) & 1; - } - else if (pEq->xor1[i].channel == 1) - { - v ^= (y >> pEq->xor1[i].index) & 1; - } - else - { - ADDR_ASSERT(pEq->xor1[i].channel == 2); - v ^= (z >> pEq->xor1[i].index) & 1; - } - } - - if (pEq->xor2[i].valid) - { - if (pEq->xor2[i].channel == 0) - { - v ^= (x >> pEq->xor2[i].index) & 1; - } - else if (pEq->xor2[i].channel == 1) - { - v ^= (y >> pEq->xor2[i].index) & 1; - } - else - { - ADDR_ASSERT(pEq->xor2[i].channel == 2); - v ^= (z >> pEq->xor2[i].index) & 1; + if (pEq->comps[c][i].channel == 0) + { + v ^= (x >> pEq->comps[c][i].index) & 1; + } + else if (pEq->comps[c][i].channel == 1) + { + v ^= (y >> pEq->comps[c][i].index) & 1; + } + else + { + ADDR_ASSERT(pEq->comps[c][i].channel == 2); + v ^= (z >> pEq->comps[c][i].index) & 1; + } } } @@ -4071,6 +4049,8 @@ const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo( UINT_32 numFrag ///< Number of fragment ) const { + // Now elemLog2 is going to be used to access the correct index insode of the pPatInfo array so we will start from + // the right location const UINT_32 index = IsXor(swizzleMode) ? (m_colorBaseIndex + elemLog2) : elemLog2; const ADDR_SW_PATINFO* patInfo = NULL; const UINT_32 swizzleMask = 1 << swizzleMode; @@ -4133,8 +4113,15 @@ const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo( { if (IsRtOptSwizzle(swizzleMode)) { - patInfo = m_settings.supportRbPlus ? - GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO; + if (swizzleMode == ADDR_SW_4KB_R_X) + { + patInfo = NULL; + } + else + { + patInfo = m_settings.supportRbPlus ? + GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO; + } } else if (IsZOrderSwizzle(swizzleMode)) { @@ -4228,6 +4215,10 @@ const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo( patInfo = m_settings.supportRbPlus ? GFX10_SW_4K_D_RBPLUS_PATINFO : GFX10_SW_4K_D_PATINFO; } + else if (swizzleMode == ADDR_SW_4KB_R_X) + { + patInfo = NULL; + } else { ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_D_X); @@ -4351,6 +4342,7 @@ ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled( ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {}; ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {}; ADDR2_MIP_INFO mipInfo[MaxMipLevels]; + ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels); localIn.swizzleMode = pIn->swizzleMode; localIn.flags = pIn->flags; @@ -4417,6 +4409,7 @@ ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled( ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {}; ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {}; ADDR2_MIP_INFO mipInfo[MaxMipLevels]; + ADDR_ASSERT(pIn->numMipLevels <= MaxMipLevels); localIn.swizzleMode = pIn->swizzleMode; localIn.flags = pIn->flags; @@ -4809,4 +4802,3 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoLinear( } // V2 } // Addr -} // rocr diff --git a/runtime/hsa-runtime/image/addrlib/src/gfx10/gfx10addrlib.h b/runtime/hsa-runtime/image/addrlib/src/gfx10/gfx10addrlib.h index 542d51a437..f13dcb2f39 100644 --- a/runtime/hsa-runtime/image/addrlib/src/gfx10/gfx10addrlib.h +++ b/runtime/hsa-runtime/image/addrlib/src/gfx10/gfx10addrlib.h @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ @@ -37,9 +20,10 @@ #include "coord.h" #include "gfx10SwizzlePattern.h" -namespace rocr { -namespace Addr { -namespace V2 { +namespace Addr +{ +namespace V2 +{ /** ************************************************************************************************************************ @@ -57,8 +41,8 @@ struct Gfx10ChipSettings UINT_32 supportRbPlus : 1; UINT_32 dsMipmapHtileFix : 1; UINT_32 dccUnsup3DSwDis : 1; - UINT_32 : 2; - UINT_32 reserved2 : 26; + UINT_32 : 4; + UINT_32 reserved2 : 24; }; }; @@ -158,7 +142,9 @@ const UINT_32 Gfx10Rsrc3dPrtSwModeMask = Gfx10Rsrc2dPrtSwModeMask & ~Gfx10Displa const UINT_32 Gfx10Rsrc3dThin64KBSwModeMask = (1u << ADDR_SW_64KB_Z_X) | (1u << ADDR_SW_64KB_R_X); -const UINT_32 Gfx10Rsrc3dThinSwModeMask = Gfx10Rsrc3dThin64KBSwModeMask | Gfx10BlkVarSwModeMask; + +const UINT_32 Gfx10Rsrc3dThinSwModeMask = Gfx10Rsrc3dThin64KBSwModeMask | + Gfx10BlkVarSwModeMask; const UINT_32 Gfx10Rsrc3dThickSwModeMask = Gfx10Rsrc3dSwModeMask & ~(Gfx10Rsrc3dThinSwModeMask | Gfx10LinearSwModeMask); @@ -166,8 +152,9 @@ const UINT_32 Gfx10Rsrc3dThick4KBSwModeMask = Gfx10Rsrc3dThickSwModeMask & Gfx10 const UINT_32 Gfx10Rsrc3dThick64KBSwModeMask = Gfx10Rsrc3dThickSwModeMask & Gfx10Blk64KBSwModeMask; -const UINT_32 Gfx10MsaaSwModeMask = Gfx10ZSwModeMask | - Gfx10RenderSwModeMask; +const UINT_32 Gfx10MsaaSwModeMask = (Gfx10ZSwModeMask | + Gfx10RenderSwModeMask) + ; const UINT_32 Dcn20NonBpp64SwModeMask = (1u << ADDR_SW_LINEAR) | (1u << ADDR_SW_4KB_S) | @@ -396,6 +383,12 @@ private: UINT_32 log2Elem, UINT_32 numFrag) const; + /** + * Will use the indices, "nibbles", to build an index equation inside pSwizzle + * + * @param pPatInfo Pointer to a patInfo. Contains indices mapping to the 2D nibble arrays which will be used to build an index equation. + * @param pSwizzle Array to write the index equation to. + */ VOID GetSwizzlePatternFromPatternInfo( const ADDR_SW_PATINFO* pPatInfo, ADDR_BIT_SETTING (&pSwizzle)[20]) const @@ -570,12 +563,12 @@ private: UINT_32 m_colorBaseIndex; UINT_32 m_xmaskBaseIndex; + UINT_32 m_htileBaseIndex; UINT_32 m_dccBaseIndex; }; } // V2 } // Addr -} // rocr #endif diff --git a/runtime/hsa-runtime/image/addrlib/src/gfx11/gfx11SwizzlePattern.h b/runtime/hsa-runtime/image/addrlib/src/gfx11/gfx11SwizzlePattern.h index c9f92bbead..e8adff6ac4 100644 --- a/runtime/hsa-runtime/image/addrlib/src/gfx11/gfx11SwizzlePattern.h +++ b/runtime/hsa-runtime/image/addrlib/src/gfx11/gfx11SwizzlePattern.h @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ @@ -33,10 +16,11 @@ #ifndef __GFX11_SWIZZLE_PATTERN_H__ #define __GFX11_SWIZZLE_PATTERN_H__ -namespace rocr { -namespace Addr { -namespace V2 { +namespace Addr +{ +namespace V2 +{ const ADDR_SW_PATINFO GFX11_SW_256_D_PATINFO[] = { { 1, 0, 0, 0, 0, } , // 1 pipes (1 PKRs) 1 bpe @ SW_256_D @@ -3050,7 +3034,7 @@ const UINT_64 GFX11_HTILE_SW_PATTERN[][18] = {0, 0, 0, X3, Y3, X7, Y7, X8, Y8, Y4^X9^Y9, Z1^X4^Y4, Z0^Y5^X8, X5^Y8, Y6^X7, X6^Y7, X9, Y9, X10, }, //17 }; -} // V2 +}// V2 } // Addr -} // rocr + #endif diff --git a/runtime/hsa-runtime/image/addrlib/src/gfx11/gfx11addrlib.cpp b/runtime/hsa-runtime/image/addrlib/src/gfx11/gfx11addrlib.cpp index c56be1a575..e5206be496 100644 --- a/runtime/hsa-runtime/image/addrlib/src/gfx11/gfx11addrlib.cpp +++ b/runtime/hsa-runtime/image/addrlib/src/gfx11/gfx11addrlib.cpp @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ @@ -38,8 +21,8 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -namespace rocr { -namespace Addr { +namespace Addr +{ /** ************************************************************************************************************************ * Gfx11HwlInit @@ -289,18 +272,23 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlComputeDccInfo( } else { - const UINT_32 elemLog2 = Log2(pIn->bpp >> 3); + const UINT_32 elemLog2 = Log2(pIn->bpp >> 3); + const UINT_32 numFragLog2 = Log2(Max(pIn->numFrags, 1u)); + Dim3d compBlock = {}; - const BOOL_32 isThick = IsThick(pIn->resourceType, pIn->swizzleMode); - - pOut->compressBlkWidth = isThick ? Block256_3d[elemLog2].w : Block256_2d[elemLog2].w; - pOut->compressBlkHeight = isThick ? Block256_3d[elemLog2].h : Block256_2d[elemLog2].h; - pOut->compressBlkDepth = isThick ? Block256_3d[elemLog2].d : 1; + GetCompressedBlockSizeLog2(Gfx11DataColor, + pIn->resourceType, + pIn->swizzleMode, + elemLog2, + numFragLog2, + &compBlock); + pOut->compressBlkWidth = 1 << compBlock.w; + pOut->compressBlkHeight = 1 << compBlock.h; + pOut->compressBlkDepth = 1 << compBlock.d; if (ret == ADDR_OK) { Dim3d metaBlk = {}; - const UINT_32 numFragLog2 = Log2(Max(pIn->numFrags, 1u)); const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx11DataColor, pIn->resourceType, pIn->swizzleMode, @@ -386,6 +374,7 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlComputeDccInfo( // Get the DCC address equation (copied from DccAddrFromCoord) const UINT_32 elemLog2 = Log2(pIn->bpp >> 3); + const UINT_32 numPipeLog2 = m_pipesLog2; UINT_32 index = m_dccBaseIndex + elemLog2; const UINT_8* patIdxTable = (pIn->swizzleMode == ADDR_SW_64KB_R_X) ? GFX11_DCC_64K_R_X_PATIDX : GFX11_DCC_256K_R_X_PATIDX; @@ -739,27 +728,26 @@ ChipFamily Gfx11Lib::HwlConvertChipFamily( switch (chipFamily) { - case FAMILY_GFX1100: - if (ASICREV_IS_GFX1100(chipRevision)) + case FAMILY_NV3: + if (ASICREV_IS_NAVI31_P(chipRevision)) { } - if (ASICREV_IS_GFX1101(chipRevision)) + if (ASICREV_IS_NAVI32_P(chipRevision)) { } - if (ASICREV_IS_GFX1102(chipRevision)) - { - } - break; - case FAMILY_GFX1103: - if (ASICREV_IS_GFX1103(chipRevision)) + if (ASICREV_IS_NAVI33_P(chipRevision)) { } break; case FAMILY_GFX1150: if (ASICREV_IS_GFX1150(chipRevision)) { + m_settings.isGfx1150 = 1; } break; + case FAMILY_GFX1103: + m_settings.isGfx1103 = 1; + break; default: ADDR_ASSERT(!"Unknown chip family"); break; @@ -1103,13 +1091,14 @@ VOID Gfx11Lib::ConvertSwizzlePatternToEquation( ADDR_EQUATION* pEquation) ///< [out] equation converted from swizzle pattern const { - ADDR_BIT_SETTING fullSwizzlePattern[20]; + ADDR_BIT_SETTING fullSwizzlePattern[ADDR_MAX_EQUATION_BIT]; GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern); const ADDR_BIT_SETTING* pSwizzle = fullSwizzlePattern; const UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode); - + memset(pEquation, 0, sizeof(ADDR_EQUATION)); pEquation->numBits = blockSizeLog2; + pEquation->numBitComponents = pPatInfo->maxItemCount; pEquation->stackedDepthSlices = FALSE; for (UINT_32 i = 0; i < elemLog2; i++) @@ -1658,20 +1647,21 @@ VOID Gfx11Lib::InitEquationTable() { ADDR_ASSERT(IsValidSwMode(swMode)); - if (pPatInfo->maxItemCount <= 3) + if (pPatInfo->maxItemCount <= 3) // Get a valid equationIndex { ADDR_EQUATION equation = {}; + // Passing in pPatInfo to get the addr equation ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation); equationIndex = m_numEquations; ADDR_ASSERT(equationIndex < EquationTableSize); - + // Updates m_equationTable[m_numEquations] to be the addr equation for this PatInfo m_equationTable[equationIndex] = equation; - + // Increment m_numEquations m_numEquations++; } - else + else // There is no equationIndex { // We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4)); @@ -1744,7 +1734,19 @@ UINT_32 Gfx11Lib::GetValidDisplaySwizzleModes( if (bpp <= 64) { + const ChipFamily family = GetChipFamily(); + swModeMask = Dcn32SwModeMask; + + if (false + || (m_settings.isGfx1103) + || (m_settings.isGfx1150) + ) + { + // Not all GPUs support displaying with 256kB swizzle modes. + swModeMask &= ~((1u << ADDR_SW_256KB_D_X) | + (1u << ADDR_SW_256KB_R_X)); + } } return swModeMask; @@ -1936,15 +1938,15 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlComputeNonBlockCompressedView( { ADDR_E_RETURNCODE returnCode = ADDR_OK; - if (pIn->resourceType != ADDR_RSRC_TEX_2D) + if (IsThin(pIn->resourceType, pIn->swizzleMode) == FALSE) { - // Only 2D resource can have a NonBC view... + // Only thin swizzle mode can have a NonBC view... returnCode = ADDR_INVALIDPARAMS; } - else if ((pIn->format != ADDR_FMT_ASTC_8x8) && + else if (((pIn->format < ADDR_FMT_ASTC_4x4) || (pIn->format > ADDR_FMT_ETC2_128BPP)) && ((pIn->format < ADDR_FMT_BC1) || (pIn->format > ADDR_FMT_BC7))) { - // Only support BC1~BC7 or ASTC_8x8 for now... + // Only support BC1~BC7, ASTC, or ETC2 for now... returnCode = ADDR_NOTSUPPORTED; } else @@ -1957,8 +1959,8 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlComputeNonBlockCompressedView( infoIn.swizzleMode = pIn->swizzleMode; infoIn.resourceType = pIn->resourceType; infoIn.bpp = bpp; - infoIn.width = PowTwoAlign(pIn->width, bcWidth) / bcWidth; - infoIn.height = PowTwoAlign(pIn->height, bcHeight) / bcHeight; + infoIn.width = RoundUpQuotient(pIn->width, bcWidth); + infoIn.height = RoundUpQuotient(pIn->height, bcHeight); infoIn.numSlices = pIn->numSlices; infoIn.numMipLevels = pIn->numMipLevels; infoIn.numSamples = 1; @@ -2010,8 +2012,8 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlComputeNonBlockCompressedView( pOut->pipeBankXor = slicePbXorOut.pipeBankXor; const BOOL_32 inTail = tiled && (pIn->mipId >= infoOut.firstMipIdInTail) ? TRUE : FALSE; - const UINT_32 requestMipWidth = PowTwoAlign(Max(pIn->width >> pIn->mipId, 1u), bcWidth) / bcWidth; - const UINT_32 requestMipHeight = PowTwoAlign(Max(pIn->height >> pIn->mipId, 1u), bcHeight) / bcHeight; + const UINT_32 requestMipWidth = RoundUpQuotient(Max(pIn->width >> pIn->mipId, 1u), bcWidth); + const UINT_32 requestMipHeight = RoundUpQuotient(Max(pIn->height >> pIn->mipId, 1u), bcHeight); if (inTail) { @@ -2061,10 +2063,8 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlComputeNonBlockCompressedView( pOut->mipId = 1; pOut->numMipLevels = 2; - const UINT_32 upperMipWidth = - PowTwoAlign(Max(pIn->width >> (pIn->mipId - 1), 1u), bcWidth) / bcWidth; - const UINT_32 upperMipHeight = - PowTwoAlign(Max(pIn->height >> (pIn->mipId - 1), 1u), bcHeight) / bcHeight; + const UINT_32 upperMipWidth = RoundUpQuotient(Max(pIn->width >> (pIn->mipId - 1), 1u), bcWidth); + const UINT_32 upperMipHeight = RoundUpQuotient(Max(pIn->height >> (pIn->mipId - 1), 1u), bcHeight); const BOOL_32 needToAvoidInTail = tiled && (requestMipWidth <= infoOut.blockWidth / 2) && (requestMipHeight <= infoOut.blockHeight) ? @@ -2226,6 +2226,7 @@ BOOL_32 Gfx11Lib::ValidateSwModeParams( const BOOL_32 thin3d = flags.view3dAs2dArray; const BOOL_32 linear = IsLinear(swizzle); const BOOL_32 blk256B = IsBlock256b(swizzle); + const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle); const BOOL_32 prt = flags.prt; // Misc check @@ -2571,8 +2572,9 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlGetPreferredSurfaceSetting( pOut->resourceType = pIn->resourceType; pOut->validSwModeSet = allowedSwModeSet; pOut->canXor = (allowedSwModeSet.value & Gfx11XorSwModeMask) ? TRUE : FALSE; - pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType); - pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet); + + GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType, &(pOut->validBlockSet)); + GetAllowedSwSet(allowedSwModeSet, &(pOut->validSwTypeSet)); pOut->clientPreferredSwSet = pIn->preferredSwSet; @@ -2584,7 +2586,9 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlGetPreferredSurfaceSetting( // Apply optional restrictions if (pIn->flags.needEquation) { - FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3)); + UINT_32 components = pIn->flags.allowExtEquation ? ADDR_MAX_EQUATION_COMP : + ADDR_MAX_LEGACY_EQUATION_COMP; + FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3), components); } if (allowedSwModeSet.value == Gfx11LinearSwModeMask) @@ -2603,7 +2607,8 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlGetPreferredSurfaceSetting( allowedSwModeSet.swLinear = 0; } - ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType); + ADDR2_BLOCK_SET allowedBlockSet = {}; + GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType, &allowedBlockSet); // Determine block size if there are 2 or more block type candidates if (IsPow2(allowedBlockSet.value) == FALSE) @@ -2632,6 +2637,7 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlGetPreferredSurfaceSetting( const UINT_32 ratioLow = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2); const UINT_32 ratioHi = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1); + const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u); UINT_32 minSizeBlk = AddrBlockMicro; UINT_64 minSize = 0; @@ -2639,7 +2645,7 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlGetPreferredSurfaceSetting( for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++) { - if (IsBlockTypeAvaiable(allowedBlockSet, static_cast(i))) + if (Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i))) { localIn.swizzleMode = swMode[i]; @@ -2657,7 +2663,7 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlGetPreferredSurfaceSetting( padSize[i] = localOut.surfSize; if ((minSize == 0) || - BlockTypeWithinMemoryBudget(minSize, padSize[i], ratioLow, ratioHi)) + Addr2BlockTypeWithinMemoryBudget(minSize, padSize[i], ratioLow, ratioHi)) { minSize = padSize[i]; minSizeBlk = i; @@ -2702,9 +2708,9 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlGetPreferredSurfaceSetting( for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++) { if ((i != minSizeBlk) && - IsBlockTypeAvaiable(allowedBlockSet, static_cast(i))) + Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i))) { - if (BlockTypeWithinMemoryBudget(minSize, padSize[i], 0, 0, pIn->memoryBudget) == FALSE) + if (Addr2BlockTypeWithinMemoryBudget(minSize, padSize[i], 0, 0, pIn->memoryBudget) == FALSE) { // Clear the block type if the memory waste is unacceptable allowedBlockSet.value &= ~(1u << (i - 1)); @@ -2776,9 +2782,11 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlGetPreferredSurfaceSetting( } // Block type should be determined. - ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value)); + GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType, &allowedBlockSet); + ADDR_ASSERT(IsPow2(allowedBlockSet.value)); - ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet); + ADDR2_SWTYPE_SET allowedSwSet = {}; + GetAllowedSwSet(allowedSwModeSet, &allowedSwSet); // Determine swizzle type if there are 2 or more swizzle type candidates if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE)) @@ -2819,7 +2827,7 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlGetPreferredSurfaceSetting( { if (pIn->flags.color && allowedSwSet.sw_R) { - allowedSwModeSet.value &= Gfx11DisplaySwModeMask; + allowedSwModeSet.value &= Gfx11RenderSwModeMask; } else if (allowedSwSet.sw_S) { @@ -2827,7 +2835,7 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlGetPreferredSurfaceSetting( } else if (allowedSwSet.sw_D) { - allowedSwModeSet.value &= Gfx11RenderSwModeMask; + allowedSwModeSet.value &= Gfx11DisplaySwModeMask; } else { @@ -2856,7 +2864,8 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlGetPreferredSurfaceSetting( } // Swizzle type should be determined. - ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value)); + GetAllowedSwSet(allowedSwModeSet, &allowedSwSet); + ADDR_ASSERT(IsPow2(allowedSwSet.value)); } // Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type + @@ -2883,6 +2892,271 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlGetPreferredSurfaceSetting( return returnCode; } +/** +************************************************************************************************************************ +* Gfx11Lib::HwlGetPossibleSwizzleModes +* +* @brief +* Returns a list of swizzle modes that are valid from the hardware's perspective for the client to choose from +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Gfx11Lib::HwlGetPossibleSwizzleModes( + const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, ///< [in] input structure + ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pIn->flags.fmask) + { + // There is no FMASK for GFX11 ASICs. + ADDR_ASSERT_ALWAYS(); + + returnCode = ADDR_INVALIDPARAMS; + } + else + { + UINT_32 bpp = pIn->bpp; + UINT_32 width = Max(pIn->width, 1u); + UINT_32 height = Max(pIn->height, 1u); + + // Set format to INVALID will skip this conversion + if (pIn->format != ADDR_FMT_INVALID) + { + ElemMode elemMode = ADDR_UNCOMPRESSED; + UINT_32 expandX, expandY; + + // Get compression/expansion factors and element mode which indicates compression/expansion + bpp = GetElemLib()->GetBitsPerPixel(pIn->format, + &elemMode, + &expandX, + &expandY); + + UINT_32 basePitch = 0; + GetElemLib()->AdjustSurfaceInfo(elemMode, + expandX, + expandY, + &bpp, + &basePitch, + &width, + &height); + } + + const UINT_32 numSlices = Max(pIn->numSlices, 1u); + const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u); + const UINT_32 numSamples = Max(pIn->numSamples, 1u); + const BOOL_32 msaa = numSamples > 1; + + // Pre sanity check on non swizzle mode parameters + ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {}; + localIn.flags = pIn->flags; + localIn.resourceType = pIn->resourceType; + localIn.format = pIn->format; + localIn.bpp = bpp; + localIn.width = width; + localIn.height = height; + localIn.numSlices = numSlices; + localIn.numMipLevels = numMipLevels; + localIn.numSamples = numSamples; + localIn.numFrags = numSamples; + + if (ValidateNonSwModeParams(&localIn)) + { + // Allow appropriate swizzle modes by default + ADDR2_SWMODE_SET allowedSwModeSet = {}; + allowedSwModeSet.value |= Gfx11LinearSwModeMask | Gfx11Blk256BSwModeMask; + if (pIn->resourceType == ADDR_RSRC_TEX_3D) + { + allowedSwModeSet.value |= Gfx11Rsrc3dThick4KBSwModeMask | + Gfx11Rsrc3dThin64KBSwModeMask | + Gfx11Rsrc3dThick64KBSwModeMask | + Gfx11Rsrc3dThin256KBSwModeMask | + Gfx11Rsrc3dThick256KBSwModeMask; + } + else + { + allowedSwModeSet.value |= Gfx11Blk4KBSwModeMask | Gfx11Blk64KBSwModeMask | Gfx11Blk256KBSwModeMask; + } + + // Filter out invalid swizzle mode(s) by image attributes and HW restrictions + switch (pIn->resourceType) + { + case ADDR_RSRC_TEX_1D: + allowedSwModeSet.value &= Gfx11Rsrc1dSwModeMask; + break; + + case ADDR_RSRC_TEX_2D: + allowedSwModeSet.value &= pIn->flags.prt ? Gfx11Rsrc2dPrtSwModeMask : Gfx11Rsrc2dSwModeMask; + break; + + case ADDR_RSRC_TEX_3D: + allowedSwModeSet.value &= pIn->flags.prt ? Gfx11Rsrc3dPrtSwModeMask : Gfx11Rsrc3dSwModeMask; + + if (pIn->flags.view3dAs2dArray) + { + allowedSwModeSet.value &= Gfx11Rsrc3dThinSwModeMask; + } + break; + + default: + ADDR_ASSERT_ALWAYS(); + allowedSwModeSet.value = 0; + break; + } + + // TODO: figure out if following restrictions are correct on GFX11... + if (ElemLib::IsBlockCompressed(pIn->format) || + ElemLib::IsMacroPixelPacked(pIn->format) || + (bpp > 64) || + (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered))) + { + allowedSwModeSet.value &= ~Gfx11ZSwModeMask; + } + + if (pIn->format == ADDR_FMT_32_32_32) + { + allowedSwModeSet.value &= Gfx11LinearSwModeMask; + } + + if (msaa) + { + allowedSwModeSet.value &= Gfx11MsaaSwModeMask; + } + + if (pIn->flags.depth || pIn->flags.stencil) + { + allowedSwModeSet.value &= Gfx11ZSwModeMask; + } + + if (pIn->flags.display) + { + allowedSwModeSet.value &= GetValidDisplaySwizzleModes(bpp); + } + + if (allowedSwModeSet.value != 0) + { +#if DEBUG + // Post sanity check, at least AddrLib should accept the output generated by its own + UINT_32 validateSwModeSet = allowedSwModeSet.value; + + for (UINT_32 i = 0; validateSwModeSet != 0; i++) + { + if (validateSwModeSet & 1) + { + localIn.swizzleMode = static_cast(i); + ADDR_ASSERT(ValidateSwModeParams(&localIn)); + } + + validateSwModeSet >>= 1; + } +#endif + + pOut->resourceType = pIn->resourceType; + pOut->clientPreferredSwSet = pIn->preferredSwSet; + + if (pOut->clientPreferredSwSet.value == 0) + { + pOut->clientPreferredSwSet.value = AddrSwSetAll; + } + + if (pIn->flags.needEquation) + { + UINT_32 components = pIn->flags.allowExtEquation ? ADDR_MAX_EQUATION_COMP : + ADDR_MAX_LEGACY_EQUATION_COMP; + FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3), components); + } + + pOut->validSwModeSet = allowedSwModeSet; + pOut->canXor = (allowedSwModeSet.value & Gfx11XorSwModeMask) ? TRUE : FALSE; + } + else + { + // Invalid combination... + ADDR_ASSERT_ALWAYS(); + returnCode = ADDR_INVALIDPARAMS; + } + } + else + { + // Invalid combination... + ADDR_ASSERT_ALWAYS(); + returnCode = ADDR_INVALIDPARAMS; + } + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Gfx11Lib::HwlGetAllowedBlockSet +* +* @brief +* Returns the set of allowed block sizes given the allowed swizzle modes and resource type +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Gfx11Lib::HwlGetAllowedBlockSet( + ADDR2_SWMODE_SET allowedSwModeSet, ///< [in] allowed swizzle modes + AddrResourceType rsrcType, ///< [in] resource type + ADDR2_BLOCK_SET* pAllowedBlockSet ///< [out] allowed block sizes + ) const +{ + ADDR2_BLOCK_SET allowedBlockSet = {}; + + allowedBlockSet.micro = (allowedSwModeSet.value & Gfx11Blk256BSwModeMask) ? TRUE : FALSE; + allowedBlockSet.linear = (allowedSwModeSet.value & Gfx11LinearSwModeMask) ? TRUE : FALSE; + + if (rsrcType == ADDR_RSRC_TEX_3D) + { + allowedBlockSet.macroThick4KB = (allowedSwModeSet.value & Gfx11Rsrc3dThick4KBSwModeMask) ? TRUE : FALSE; + allowedBlockSet.macroThin64KB = (allowedSwModeSet.value & Gfx11Rsrc3dThin64KBSwModeMask) ? TRUE : FALSE; + allowedBlockSet.macroThick64KB = (allowedSwModeSet.value & Gfx11Rsrc3dThick64KBSwModeMask) ? TRUE : FALSE; + allowedBlockSet.gfx11.thin256KB = (allowedSwModeSet.value & Gfx11Rsrc3dThin256KBSwModeMask) ? TRUE : FALSE; + allowedBlockSet.gfx11.thick256KB = (allowedSwModeSet.value & Gfx11Rsrc3dThick256KBSwModeMask) ? TRUE : FALSE; + } + else + { + allowedBlockSet.macroThin4KB = (allowedSwModeSet.value & Gfx11Blk4KBSwModeMask) ? TRUE : FALSE; + allowedBlockSet.macroThin64KB = (allowedSwModeSet.value & Gfx11Blk64KBSwModeMask) ? TRUE : FALSE; + allowedBlockSet.gfx11.thin256KB = (allowedSwModeSet.value & Gfx11Blk256KBSwModeMask) ? TRUE : FALSE; + } + + *pAllowedBlockSet = allowedBlockSet; + return ADDR_OK; +} + +/** +************************************************************************************************************************ +* Gfx11Lib::HwlGetAllowedSwSet +* +* @brief +* Returns the set of allowed swizzle types given the allowed swizzle modes +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Gfx11Lib::HwlGetAllowedSwSet( + ADDR2_SWMODE_SET allowedSwModeSet, ///< [in] allowed swizzle modes + ADDR2_SWTYPE_SET* pAllowedSwSet ///< [out] allowed swizzle types + ) const +{ + ADDR2_SWTYPE_SET allowedSwSet = {}; + + allowedSwSet.sw_Z = (allowedSwModeSet.value & Gfx11ZSwModeMask) ? TRUE : FALSE; + allowedSwSet.sw_S = (allowedSwModeSet.value & Gfx11StandardSwModeMask) ? TRUE : FALSE; + allowedSwSet.sw_D = (allowedSwModeSet.value & Gfx11DisplaySwModeMask) ? TRUE : FALSE; + allowedSwSet.sw_R = (allowedSwModeSet.value & Gfx11RenderSwModeMask) ? TRUE : FALSE; + + *pAllowedSwSet = allowedSwSet; + return ADDR_OK; +} + /** ************************************************************************************************************************ * Gfx11Lib::ComputeStereoInfo @@ -3182,12 +3456,12 @@ ADDR_E_RETURNCODE Gfx11Lib::ComputeSurfaceInfoMacroTiled( UINT_64 mipSize[MaxMipLevels]; UINT_64 mipSliceSize[MaxMipLevels]; + // For htile, we need to make z16 and stencil enter the mip tail at the same time as z32 would Dim3d fixedTailMaxDim = tailMaxDim; - - if ((IsZOrderSwizzle(pIn->swizzleMode) || IsRtOptSwizzle(pIn->swizzleMode)) && (index <= 1)) + if (IsZOrderSwizzle(pIn->swizzleMode) && (index <= 1)) { fixedTailMaxDim.w /= Block256_2d[index].w / Block256_2d[2].w; - fixedTailMaxDim.h /= Block256_2d[index].h / Block256_2d[2].h; + fixedTailMaxDim.h /= Block256_2d[index].w / Block256_2d[2].w; } for (UINT_32 i = 0; i < pIn->numMipLevels; i++) @@ -3400,54 +3674,23 @@ UINT_32 Gfx11Lib::ComputeOffsetFromEquation( { UINT_32 v = 0; - if (pEq->addr[i].valid) + for (UINT_32 c = 0; c < pEq->numBitComponents; c++) { - if (pEq->addr[i].channel == 0) + if (pEq->comps[c][i].valid) { - v ^= (x >> pEq->addr[i].index) & 1; - } - else if (pEq->addr[i].channel == 1) - { - v ^= (y >> pEq->addr[i].index) & 1; - } - else - { - ADDR_ASSERT(pEq->addr[i].channel == 2); - v ^= (z >> pEq->addr[i].index) & 1; - } - } - - if (pEq->xor1[i].valid) - { - if (pEq->xor1[i].channel == 0) - { - v ^= (x >> pEq->xor1[i].index) & 1; - } - else if (pEq->xor1[i].channel == 1) - { - v ^= (y >> pEq->xor1[i].index) & 1; - } - else - { - ADDR_ASSERT(pEq->xor1[i].channel == 2); - v ^= (z >> pEq->xor1[i].index) & 1; - } - } - - if (pEq->xor2[i].valid) - { - if (pEq->xor2[i].channel == 0) - { - v ^= (x >> pEq->xor2[i].index) & 1; - } - else if (pEq->xor2[i].channel == 1) - { - v ^= (y >> pEq->xor2[i].index) & 1; - } - else - { - ADDR_ASSERT(pEq->xor2[i].channel == 2); - v ^= (z >> pEq->xor2[i].index) & 1; + if (pEq->comps[c][i].channel == 0) + { + v ^= (x >> pEq->comps[c][i].index) & 1; + } + else if (pEq->comps[c][i].channel == 1) + { + v ^= (y >> pEq->comps[c][i].index) & 1; + } + else + { + ADDR_ASSERT(pEq->comps[c][i].channel == 2); + v ^= (z >> pEq->comps[c][i].index) & 1; + } } } @@ -4033,6 +4276,7 @@ UINT_32 Gfx11Lib::HwlComputeMaxMetaBaseAlignments() const } // Max base alignment for 2D Dcc + // swizzle mode support DCC... const AddrSwizzleMode ValidSwizzleModeForDcc2D[] = { ADDR_SW_64KB_R_X, @@ -4250,4 +4494,3 @@ ADDR_E_RETURNCODE Gfx11Lib::HwlComputeSurfaceInfoLinear( } // V2 } // Addr -} // rocr diff --git a/runtime/hsa-runtime/image/addrlib/src/gfx11/gfx11addrlib.h b/runtime/hsa-runtime/image/addrlib/src/gfx11/gfx11addrlib.h index 391eda2cbc..b07c989126 100644 --- a/runtime/hsa-runtime/image/addrlib/src/gfx11/gfx11addrlib.h +++ b/runtime/hsa-runtime/image/addrlib/src/gfx11/gfx11addrlib.h @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ @@ -37,9 +20,10 @@ #include "coord.h" #include "gfx11SwizzlePattern.h" -namespace rocr { -namespace Addr { -namespace V2 { +namespace Addr +{ +namespace V2 +{ /** ************************************************************************************************************************ @@ -50,7 +34,9 @@ struct Gfx11ChipSettings { struct { - UINT_32 reserved1 : 32; + UINT_32 isGfx1150 : 1; + UINT_32 isGfx1103 : 1; + UINT_32 reserved1 : 30; // Misc configuration bits UINT_32 reserved2 : 32; @@ -285,6 +271,19 @@ protected: const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const; + virtual ADDR_E_RETURNCODE HwlGetPossibleSwizzleModes( + const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, + ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const; + + virtual ADDR_E_RETURNCODE HwlGetAllowedBlockSet( + ADDR2_SWMODE_SET allowedSwModeSet, + AddrResourceType rsrcType, + ADDR2_BLOCK_SET* pAllowedBlockSet) const; + + virtual ADDR_E_RETURNCODE HwlGetAllowedSwSet( + ADDR2_SWMODE_SET allowedSwModeSet, + ADDR2_SWTYPE_SET* pAllowedSwSet) const; + virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoSanityCheck( const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const; @@ -467,43 +466,6 @@ private: UINT_32 GetMaxNumMipsInTail(UINT_32 blockSizeLog2, BOOL_32 isThin) const; - static ADDR2_BLOCK_SET GetAllowedBlockSet(ADDR2_SWMODE_SET allowedSwModeSet, AddrResourceType rsrcType) - { - ADDR2_BLOCK_SET allowedBlockSet = {}; - - allowedBlockSet.micro = (allowedSwModeSet.value & Gfx11Blk256BSwModeMask) ? TRUE : FALSE; - allowedBlockSet.linear = (allowedSwModeSet.value & Gfx11LinearSwModeMask) ? TRUE : FALSE; - - if (rsrcType == ADDR_RSRC_TEX_3D) - { - allowedBlockSet.macroThick4KB = (allowedSwModeSet.value & Gfx11Rsrc3dThick4KBSwModeMask) ? TRUE : FALSE; - allowedBlockSet.macroThin64KB = (allowedSwModeSet.value & Gfx11Rsrc3dThin64KBSwModeMask) ? TRUE : FALSE; - allowedBlockSet.macroThick64KB = (allowedSwModeSet.value & Gfx11Rsrc3dThick64KBSwModeMask) ? TRUE : FALSE; - allowedBlockSet.gfx11.thin256KB = (allowedSwModeSet.value & Gfx11Rsrc3dThin256KBSwModeMask) ? TRUE : FALSE; - allowedBlockSet.gfx11.thick256KB = (allowedSwModeSet.value & Gfx11Rsrc3dThick256KBSwModeMask) ? TRUE : FALSE; - } - else - { - allowedBlockSet.macroThin4KB = (allowedSwModeSet.value & Gfx11Blk4KBSwModeMask) ? TRUE : FALSE; - allowedBlockSet.macroThin64KB = (allowedSwModeSet.value & Gfx11Blk64KBSwModeMask) ? TRUE : FALSE; - allowedBlockSet.gfx11.thin256KB = (allowedSwModeSet.value & Gfx11Blk256KBSwModeMask) ? TRUE : FALSE; - } - - return allowedBlockSet; - } - - static ADDR2_SWTYPE_SET GetAllowedSwSet(ADDR2_SWMODE_SET allowedSwModeSet) - { - ADDR2_SWTYPE_SET allowedSwSet = {}; - - allowedSwSet.sw_Z = (allowedSwModeSet.value & Gfx11ZSwModeMask) ? TRUE : FALSE; - allowedSwSet.sw_S = (allowedSwModeSet.value & Gfx11StandardSwModeMask) ? TRUE : FALSE; - allowedSwSet.sw_D = (allowedSwModeSet.value & Gfx11DisplaySwModeMask) ? TRUE : FALSE; - allowedSwSet.sw_R = (allowedSwModeSet.value & Gfx11RenderSwModeMask) ? TRUE : FALSE; - - return allowedSwSet; - } - BOOL_32 IsInMipTail( Dim3d mipTailDim, UINT_32 maxNumMipsInTail, @@ -555,5 +517,6 @@ private: } // V2 } // Addr -} // rocr + #endif + diff --git a/runtime/hsa-runtime/image/addrlib/src/gfx12/gfx12SwizzlePattern.h b/runtime/hsa-runtime/image/addrlib/src/gfx12/gfx12SwizzlePattern.h new file mode 100644 index 0000000000..b29fbb8ef4 --- /dev/null +++ b/runtime/hsa-runtime/image/addrlib/src/gfx12/gfx12SwizzlePattern.h @@ -0,0 +1,279 @@ +/* +************************************************************************************************************************ +* +* Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved. +* SPDX-License-Identifier: MIT +* +***********************************************************************************************************************/ + +/** +************************************************************************************************************************ +* @file gfx12SwizzlePattern.h +* @brief swizzle pattern for gfx12. +************************************************************************************************************************ +*/ + +#ifndef __GFX12_SWIZZLE_PATTERN_H__ +#define __GFX12_SWIZZLE_PATTERN_H__ + +namespace Addr +{ +namespace V3 +{ + const ADDR_SW_PATINFO GFX12_SW_256B_2D_1xAA_PATINFO[] = + { + { 0, 0, 0, 0, } , // 1 BPE @ SW_256B_2D_1xAA + { 1, 0, 0, 0, } , // 2 BPE @ SW_256B_2D_1xAA + { 2, 0, 0, 0, } , // 4 BPE @ SW_256B_2D_1xAA + { 3, 0, 0, 0, } , // 8 BPE @ SW_256B_2D_1xAA + { 4, 0, 0, 0, } , // 16 BPE @ SW_256B_2D_1xAA + }; + + const ADDR_SW_PATINFO GFX12_SW_256B_2D_2xAA_PATINFO[] = + { + { 5, 0, 0, 0, } , // 1 BPE @ SW_256B_2D_2xAA + { 6, 0, 0, 0, } , // 2 BPE @ SW_256B_2D_2xAA + { 7, 0, 0, 0, } , // 4 BPE @ SW_256B_2D_2xAA + { 8, 0, 0, 0, } , // 8 BPE @ SW_256B_2D_2xAA + { 9, 0, 0, 0, } , // 16 BPE @ SW_256B_2D_2xAA + }; + + const ADDR_SW_PATINFO GFX12_SW_256B_2D_4xAA_PATINFO[] = + { + { 10, 0, 0, 0, } , // 1 BPE @ SW_256B_2D_4xAA + { 11, 0, 0, 0, } , // 2 BPE @ SW_256B_2D_4xAA + { 12, 0, 0, 0, } , // 4 BPE @ SW_256B_2D_4xAA + { 13, 0, 0, 0, } , // 8 BPE @ SW_256B_2D_4xAA + { 14, 0, 0, 0, } , // 16 BPE @ SW_256B_2D_4xAA + }; + + const ADDR_SW_PATINFO GFX12_SW_256B_2D_8xAA_PATINFO[] = + { + { 15, 0, 0, 0, } , // 1 BPE @ SW_256B_2D_8xAA + { 16, 0, 0, 0, } , // 2 BPE @ SW_256B_2D_8xAA + { 17, 0, 0, 0, } , // 4 BPE @ SW_256B_2D_8xAA + { 18, 0, 0, 0, } , // 8 BPE @ SW_256B_2D_8xAA + { 19, 0, 0, 0, } , // 16 BPE @ SW_256B_2D_8xAA + }; + + const ADDR_SW_PATINFO GFX12_SW_4KB_2D_1xAA_PATINFO[] = + { + { 0, 1, 0, 0, } , // 1 BPE @ SW_4KB_2D_1xAA + { 1, 2, 0, 0, } , // 2 BPE @ SW_4KB_2D_1xAA + { 2, 3, 0, 0, } , // 4 BPE @ SW_4KB_2D_1xAA + { 3, 4, 0, 0, } , // 8 BPE @ SW_4KB_2D_1xAA + { 4, 5, 0, 0, } , // 16 BPE @ SW_4KB_2D_1xAA + }; + + const ADDR_SW_PATINFO GFX12_SW_4KB_2D_2xAA_PATINFO[] = + { + { 5, 2, 0, 0, } , // 1 BPE @ SW_4KB_2D_2xAA + { 6, 3, 0, 0, } , // 2 BPE @ SW_4KB_2D_2xAA + { 7, 4, 0, 0, } , // 4 BPE @ SW_4KB_2D_2xAA + { 8, 5, 0, 0, } , // 8 BPE @ SW_4KB_2D_2xAA + { 9, 6, 0, 0, } , // 16 BPE @ SW_4KB_2D_2xAA + }; + + const ADDR_SW_PATINFO GFX12_SW_4KB_2D_4xAA_PATINFO[] = + { + { 10, 3, 0, 0, } , // 1 BPE @ SW_4KB_2D_4xAA + { 11, 4, 0, 0, } , // 2 BPE @ SW_4KB_2D_4xAA + { 12, 5, 0, 0, } , // 4 BPE @ SW_4KB_2D_4xAA + { 13, 6, 0, 0, } , // 8 BPE @ SW_4KB_2D_4xAA + { 14, 7, 0, 0, } , // 16 BPE @ SW_4KB_2D_4xAA + }; + + const ADDR_SW_PATINFO GFX12_SW_4KB_2D_8xAA_PATINFO[] = + { + { 15, 4, 0, 0, } , // 1 BPE @ SW_4KB_2D_8xAA + { 16, 5, 0, 0, } , // 2 BPE @ SW_4KB_2D_8xAA + { 17, 6, 0, 0, } , // 4 BPE @ SW_4KB_2D_8xAA + { 18, 7, 0, 0, } , // 8 BPE @ SW_4KB_2D_8xAA + { 19, 8, 0, 0, } , // 16 BPE @ SW_4KB_2D_8xAA + }; + + const ADDR_SW_PATINFO GFX12_SW_64KB_2D_1xAA_PATINFO[] = + { + { 0, 1, 1, 0, } , // 1 BPE @ SW_64KB_2D_1xAA + { 1, 2, 2, 0, } , // 2 BPE @ SW_64KB_2D_1xAA + { 2, 3, 3, 0, } , // 4 BPE @ SW_64KB_2D_1xAA + { 3, 4, 4, 0, } , // 8 BPE @ SW_64KB_2D_1xAA + { 4, 5, 5, 0, } , // 16 BPE @ SW_64KB_2D_1xAA + }; + + const ADDR_SW_PATINFO GFX12_SW_64KB_2D_2xAA_PATINFO[] = + { + { 5, 2, 2, 0, } , // 1 BPE @ SW_64KB_2D_2xAA + { 6, 3, 3, 0, } , // 2 BPE @ SW_64KB_2D_2xAA + { 7, 4, 4, 0, } , // 4 BPE @ SW_64KB_2D_2xAA + { 8, 5, 5, 0, } , // 8 BPE @ SW_64KB_2D_2xAA + { 9, 6, 6, 0, } , // 16 BPE @ SW_64KB_2D_2xAA + }; + + const ADDR_SW_PATINFO GFX12_SW_64KB_2D_4xAA_PATINFO[] = + { + { 10, 3, 3, 0, } , // 1 BPE @ SW_64KB_2D_4xAA + { 11, 4, 4, 0, } , // 2 BPE @ SW_64KB_2D_4xAA + { 12, 5, 5, 0, } , // 4 BPE @ SW_64KB_2D_4xAA + { 13, 6, 6, 0, } , // 8 BPE @ SW_64KB_2D_4xAA + { 14, 7, 7, 0, } , // 16 BPE @ SW_64KB_2D_4xAA + }; + + const ADDR_SW_PATINFO GFX12_SW_64KB_2D_8xAA_PATINFO[] = + { + { 15, 4, 4, 0, } , // 1 BPE @ SW_64KB_2D_8xAA + { 16, 5, 5, 0, } , // 2 BPE @ SW_64KB_2D_8xAA + { 17, 6, 6, 0, } , // 4 BPE @ SW_64KB_2D_8xAA + { 18, 7, 7, 0, } , // 8 BPE @ SW_64KB_2D_8xAA + { 19, 8, 8, 0, } , // 16 BPE @ SW_64KB_2D_8xAA + }; + + const ADDR_SW_PATINFO GFX12_SW_256KB_2D_1xAA_PATINFO[] = + { + { 0, 1, 1, 1, } , // 1 BPE @ SW_256KB_2D_1xAA + { 1, 2, 2, 2, } , // 2 BPE @ SW_256KB_2D_1xAA + { 2, 3, 3, 3, } , // 4 BPE @ SW_256KB_2D_1xAA + { 3, 4, 4, 4, } , // 8 BPE @ SW_256KB_2D_1xAA + { 4, 5, 5, 5, } , // 16 BPE @ SW_256KB_2D_1xAA + }; + + const ADDR_SW_PATINFO GFX12_SW_256KB_2D_2xAA_PATINFO[] = + { + { 5, 2, 2, 2, } , // 1 BPE @ SW_256KB_2D_2xAA + { 6, 3, 3, 3, } , // 2 BPE @ SW_256KB_2D_2xAA + { 7, 4, 4, 4, } , // 4 BPE @ SW_256KB_2D_2xAA + { 8, 5, 5, 5, } , // 8 BPE @ SW_256KB_2D_2xAA + { 9, 6, 6, 6, } , // 16 BPE @ SW_256KB_2D_2xAA + }; + + const ADDR_SW_PATINFO GFX12_SW_256KB_2D_4xAA_PATINFO[] = + { + { 10, 3, 3, 3, } , // 1 BPE @ SW_256KB_2D_4xAA + { 11, 4, 4, 4, } , // 2 BPE @ SW_256KB_2D_4xAA + { 12, 5, 5, 5, } , // 4 BPE @ SW_256KB_2D_4xAA + { 13, 6, 6, 6, } , // 8 BPE @ SW_256KB_2D_4xAA + { 14, 7, 7, 7, } , // 16 BPE @ SW_256KB_2D_4xAA + }; + + const ADDR_SW_PATINFO GFX12_SW_256KB_2D_8xAA_PATINFO[] = + { + { 15, 4, 4, 4, } , // 1 BPE @ SW_256KB_2D_8xAA + { 16, 5, 5, 5, } , // 2 BPE @ SW_256KB_2D_8xAA + { 17, 6, 6, 6, } , // 4 BPE @ SW_256KB_2D_8xAA + { 18, 7, 7, 7, } , // 8 BPE @ SW_256KB_2D_8xAA + { 19, 8, 8, 8, } , // 16 BPE @ SW_256KB_2D_8xAA + }; + + const ADDR_SW_PATINFO GFX12_SW_4KB_3D_PATINFO[] = + { + { 20, 9, 0, 0, } , // 1 BPE @ SW_4KB_3D + { 21, 10, 0, 0, } , // 2 BPE @ SW_4KB_3D + { 22, 11, 0, 0, } , // 4 BPE @ SW_4KB_3D + { 23, 12, 0, 0, } , // 8 BPE @ SW_4KB_3D + { 24, 13, 0, 0, } , // 16 BPE @ SW_4KB_3D + }; + + const ADDR_SW_PATINFO GFX12_SW_64KB_3D_PATINFO[] = + { + { 20, 9, 9, 0, } , // 1 BPE @ SW_64KB_3D + { 21, 10, 10, 0, } , // 2 BPE @ SW_64KB_3D + { 22, 11, 11, 0, } , // 4 BPE @ SW_64KB_3D + { 23, 12, 12, 0, } , // 8 BPE @ SW_64KB_3D + { 24, 13, 13, 0, } , // 16 BPE @ SW_64KB_3D + }; + + const ADDR_SW_PATINFO GFX12_SW_256KB_3D_PATINFO[] = + { + { 20, 9, 9, 9, } , // 1 BPE @ SW_256KB_3D + { 21, 10, 10, 9, } , // 2 BPE @ SW_256KB_3D + { 22, 11, 11, 10, } , // 4 BPE @ SW_256KB_3D + { 23, 12, 12, 11, } , // 8 BPE @ SW_256KB_3D + { 24, 13, 13, 11, } , // 16 BPE @ SW_256KB_3D + }; + + + const UINT_64 GFX12_SW_PATTERN_NIBBLE1[][8] = + { + {X0, X1, Y0, X2, Y1, Y2, X3, Y3, }, // 0 + {0, X0, Y0, X1, Y1, X2, Y2, X3, }, // 1 + {0, 0, X0, Y0, X1, Y1, X2, Y2, }, // 2 + {0, 0, 0, X0, Y0, X1, X2, Y1, }, // 3 + {0, 0, 0, 0, X0, Y0, X1, Y1, }, // 4 + {S0, X0, Y0, X1, Y1, X2, Y2, X3, }, // 5 + {0, S0, X0, Y0, X1, Y1, X2, Y2, }, // 6 + {0, 0, S0, X0, Y0, X1, Y1, X2, }, // 7 + {0, 0, 0, S0, X0, Y0, X1, Y1, }, // 8 + {0, 0, 0, 0, S0, X0, Y0, X1, }, // 9 + {S0, S1, X0, Y0, X1, Y1, X2, Y2, }, // 10 + {0, S0, S1, X0, Y0, X1, Y1, X2, }, // 11 + {0, 0, S0, S1, X0, Y0, X1, Y1, }, // 12 + {0, 0, 0, S0, S1, X0, Y0, X1, }, // 13 + {0, 0, 0, 0, S0, S1, X0, Y0, }, // 14 + {S0, S1, S2, X0, Y0, X1, Y1, X2, }, // 15 + {0, S0, S1, S2, X0, Y0, X1, Y1, }, // 16 + {0, 0, S0, S1, S2, X0, Y0, X1, }, // 17 + {0, 0, 0, S0, S1, S2, X0, Y0, }, // 18 + {0, 0, 0, 0, S0, S1, S2, X0, }, // 19 + {X0, X1, Z0, Y0, Y1, Z1, X2, Z2, }, // 20 + {0, X0, Z0, Y0, X1, Z1, Y1, Z2, }, // 21 + {0, 0, X0, Y0, X1, Z0, Y1, Z1, }, // 22 + {0, 0, 0, X0, Y0, Z0, X1, Z1, }, // 23 + {0, 0, 0, 0, X0, Z0, Y0, Z1, }, // 24 + }; + + const UINT_64 GFX12_SW_PATTERN_NIBBLE2[][4] = + { + {0, 0, 0, 0, }, // 0 + {Y4, X4, Y5, X5, }, // 1 + {Y3, X4, Y4, X5, }, // 2 + {Y3, X3, Y4, X4, }, // 3 + {Y2, X3, Y3, X4, }, // 4 + {Y2, X2, Y3, X3, }, // 5 + {Y1, X2, Y2, X3, }, // 6 + {Y1, X1, Y2, X2, }, // 7 + {Y0, X1, Y1, X2, }, // 8 + {Y2, X3, Z3, Y3, }, // 9 + {Y2, X2, Z3, Y3, }, // 10 + {Y2, X2, Z2, Y3, }, // 11 + {Y1, X2, Z2, Y2, }, // 12 + {Y1, X1, Z2, Y2, }, // 13 + }; + + const UINT_64 GFX12_SW_PATTERN_NIBBLE3[][4] = + { + {0, 0, 0, 0, }, // 0 + {Y6, X6, Y7, X7, }, // 1 + {Y5, X6, Y6, X7, }, // 2 + {Y5, X5, Y6, X6, }, // 3 + {Y4, X5, Y5, X6, }, // 4 + {Y4, X4, Y5, X5, }, // 5 + {Y3, X4, Y4, X5, }, // 6 + {Y3, X3, Y4, X4, }, // 7 + {Y2, X3, Y3, X4, }, // 8 + {X4, Z4, Y4, X5, }, // 9 + {X3, Z4, Y4, X4, }, // 10 + {X3, Z3, Y4, X4, }, // 11 + {X3, Z3, Y3, X4, }, // 12 + {X2, Z3, Y3, X3, }, // 13 + }; + + const UINT_64 GFX12_SW_PATTERN_NIBBLE4[][2] = + { + {0, 0, }, // 0 + {Y8, X8, }, // 1 + {Y7, X8, }, // 2 + {Y7, X7, }, // 3 + {Y6, X7, }, // 4 + {Y6, X6, }, // 5 + {Y5, X6, }, // 6 + {Y5, X5, }, // 7 + {Y4, X5, }, // 8 + {Z5, Y5, }, // 9 + {Z4, Y5, }, // 10 + {Z4, Y4, }, // 11 + }; + +} // V3 +} // Addr + +#endif diff --git a/runtime/hsa-runtime/image/addrlib/src/gfx12/gfx12addrlib.cpp b/runtime/hsa-runtime/image/addrlib/src/gfx12/gfx12addrlib.cpp new file mode 100644 index 0000000000..509ede1e15 --- /dev/null +++ b/runtime/hsa-runtime/image/addrlib/src/gfx12/gfx12addrlib.cpp @@ -0,0 +1,1312 @@ +/* +************************************************************************************************************************ +* +* Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved. +* SPDX-License-Identifier: MIT +* +***********************************************************************************************************************/ + +/** +************************************************************************************************************************ +* @file gfx12addrlib.cpp +* @brief Contain the implementation for the Gfx12Lib class. +************************************************************************************************************************ +*/ + +#include "gfx12addrlib.h" +#include "gfx12_gb_reg.h" + +#include "amdgpu_asic_addr.h" + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace Addr +{ +/** +************************************************************************************************************************ +* Gfx12HwlInit +* +* @brief +* Creates an Gfx12Lib object. +* +* @return +* Returns an Gfx12Lib object pointer. +************************************************************************************************************************ +*/ +Addr::Lib* Gfx12HwlInit( + const Client* pClient) +{ + return V3::Gfx12Lib::CreateObj(pClient); +} + +namespace V3 +{ + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Static Const Member +//////////////////////////////////////////////////////////////////////////////////////////////////// +const SwizzleModeFlags Gfx12Lib::SwizzleModeTable[ADDR3_MAX_TYPE] = +{//Linear 2d 3d 256B 4KB 64KB 256KB Reserved + {{1, 0, 0, 0, 0, 0, 0, 0}}, // ADDR3_LINEAR + {{0, 1, 0, 1, 0, 0, 0, 0}}, // ADDR3_256B_2D + {{0, 1, 0, 0, 1, 0, 0, 0}}, // ADDR3_4KB_2D + {{0, 1, 0, 0, 0, 1, 0, 0}}, // ADDR3_64KB_2D + {{0, 1, 0, 0, 0, 0, 1, 0}}, // ADDR3_256KB_2D + {{0, 0, 1, 0, 1, 0, 0, 0}}, // ADDR3_4KB_3D + {{0, 0, 1, 0, 0, 1, 0, 0}}, // ADDR3_64KB_3D + {{0, 0, 1, 0, 0, 0, 1, 0}}, // ADDR3_256KB_3D +}; + +const ADDR_EXTENT3D Gfx12Lib::Block4K_Log2_3d[] = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}}; +const ADDR_EXTENT3D Gfx12Lib::Block64K_Log2_3d[] = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}}; +const ADDR_EXTENT3D Gfx12Lib::Block256K_Log2_3d[] = {{6, 6, 6}, {5, 6, 6}, {5, 6, 5}, {5, 5, 5}, {4, 5, 5}}; + +/** +************************************************************************************************************************ +* Gfx12Lib::Gfx12Lib +* +* @brief +* Constructor +* +************************************************************************************************************************ +*/ +Gfx12Lib::Gfx12Lib( + const Client* pClient) + : + Lib(pClient), + m_numSwizzleBits(0) +{ + memset(&m_settings, 0, sizeof(m_settings)); + memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable)); +} + +/** +************************************************************************************************************************ +* Gfx12Lib::~Gfx12Lib +* +* @brief +* Destructor +************************************************************************************************************************ +*/ +Gfx12Lib::~Gfx12Lib() +{ +} + +/** +************************************************************************************************************************ +* Gfx12Lib::ConvertSwizzlePatternToEquation +* +* @brief +* Convert swizzle pattern to equation. +* +* @return +* N/A +************************************************************************************************************************ +*/ +VOID Gfx12Lib::ConvertSwizzlePatternToEquation( + UINT_32 elemLog2, ///< [in] element bytes log2 + Addr3SwizzleMode swMode, ///< [in] swizzle mode + const ADDR_SW_PATINFO* pPatInfo, ///< [in] swizzle pattern info + ADDR_EQUATION* pEquation) ///< [out] equation converted from swizzle pattern + const +{ + ADDR_BIT_SETTING fullSwizzlePattern[Log2Size256K]; + GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern); + + const ADDR_BIT_SETTING* pSwizzle = fullSwizzlePattern; + const UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode, TRUE); + + pEquation->numBits = blockSizeLog2; + pEquation->stackedDepthSlices = FALSE; + + for (UINT_32 i = 0; i < elemLog2; i++) + { + pEquation->addr[i].channel = 0; + pEquation->addr[i].valid = 1; + pEquation->addr[i].index = i; + } + + for (UINT_32 i = elemLog2; i < blockSizeLog2; i++) + { + ADDR_ASSERT(IsPow2(pSwizzle[i].value)); + + if (pSwizzle[i].x != 0) + { + ADDR_ASSERT(IsPow2(static_cast(pSwizzle[i].x))); + + pEquation->addr[i].channel = 0; + pEquation->addr[i].valid = 1; + pEquation->addr[i].index = Log2(pSwizzle[i].x) + elemLog2; + } + else if (pSwizzle[i].y != 0) + { + ADDR_ASSERT(IsPow2(static_cast(pSwizzle[i].y))); + + pEquation->addr[i].channel = 1; + pEquation->addr[i].valid = 1; + pEquation->addr[i].index = Log2(pSwizzle[i].y); + } + else if (pSwizzle[i].z != 0) + { + ADDR_ASSERT(IsPow2(static_cast(pSwizzle[i].z))); + + pEquation->addr[i].channel = 2; + pEquation->addr[i].valid = 1; + pEquation->addr[i].index = Log2(pSwizzle[i].z); + } + else if (pSwizzle[i].s != 0) + { + ADDR_ASSERT(IsPow2(static_cast(pSwizzle[i].s))); + + pEquation->addr[i].channel = 3; + pEquation->addr[i].valid = 1; + pEquation->addr[i].index = Log2(pSwizzle[i].s); + } + else + { + ADDR_ASSERT_ALWAYS(); + } + } +} + +/** +************************************************************************************************************************ +* Gfx12Lib::InitEquationTable +* +* @brief +* Initialize Equation table. +* +* @return +* N/A +************************************************************************************************************************ +*/ +VOID Gfx12Lib::InitEquationTable() +{ + memset(m_equationTable, 0, sizeof(m_equationTable)); + + for (UINT_32 swModeIdx = 0; swModeIdx < ADDR3_MAX_TYPE; swModeIdx++) + { + const Addr3SwizzleMode swMode = static_cast(swModeIdx); + + if (IsLinear(swMode)) + { + // Skip linear equation (data table is not useful for 2D/3D images-- only contains x-coordinate bits) + continue; + } + + const UINT_32 maxMsaa = Is2dSwizzle(swMode) ? MaxMsaaRateLog2 : 1; + + for (UINT_32 msaaIdx = 0; msaaIdx < maxMsaa; msaaIdx++) + { + for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++) + { + UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX; + const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(swMode, elemLog2, 1 << msaaIdx); + + if (pPatInfo != NULL) + { + ADDR_ASSERT(IsValidSwMode(swMode)); + + ADDR_EQUATION equation = {}; + + ConvertSwizzlePatternToEquation(elemLog2, swMode, pPatInfo, &equation); + + equationIndex = m_numEquations; + ADDR_ASSERT(equationIndex < NumSwizzlePatterns); + + m_equationTable[equationIndex] = equation; + m_numEquations++; + } + SetEquationTableEntry(swMode, msaaIdx, elemLog2, equationIndex); + } + } + } +} + +/** +************************************************************************************************************************ +* Gfx12Lib::GetBlockPixelDimensions +* +* @brief +* Returns the pixel dimensions of one block. +* +************************************************************************************************************************ +*/ +ADDR_EXTENT3D Gfx12Lib::GetBlockPixelDimensions( + Addr3SwizzleMode swizzleMode, + UINT_32 log2BytesPerPixel + ) const +{ + ADDR_EXTENT3D log2Dim = {}; + + switch (swizzleMode) + { + case ADDR3_4KB_3D: + log2Dim = Block4K_Log2_3d[log2BytesPerPixel]; + break; + case ADDR3_64KB_3D: + log2Dim = Block64K_Log2_3d[log2BytesPerPixel]; + break; + case ADDR3_256KB_3D: + log2Dim = Block256K_Log2_3d[log2BytesPerPixel]; + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + + return { 1u << log2Dim.width, 1u << log2Dim.height, 1u << log2Dim.depth }; +} + +/** +************************************************************************************************************************ +* Gfx12Lib::GetMipOrigin +* +* @brief +* Internal function to calculate origins of the mip levels +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +VOID Gfx12Lib::GetMipOrigin( + const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure + const ADDR_EXTENT3D& mipExtentFirstInTail, + ADDR3_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure + ) const +{ + const BOOL_32 is3d = Is3dSwizzle(pIn->swizzleMode); + const UINT_32 bytesPerPixel = pIn->bpp >> 3; + const UINT_32 log2Bpp = Log2(bytesPerPixel); + const ADDR_EXTENT3D pixelBlockDims = GetBlockPixelDimensions(ADDR3_4KB_3D, log2Bpp); + const ADDR_EXTENT3D tailMaxDim = GetMipTailDim(pIn->swizzleMode, + pOut->blockExtent); + const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode); + const UINT_32 maxMipsInTail = GetMaxNumMipsInTail(pIn->swizzleMode, blockSizeLog2); + + UINT_32 pitch = tailMaxDim.width; + UINT_32 height = tailMaxDim.height; + + UINT_32 depth = (is3d ? PowTwoAlign(mipExtentFirstInTail.depth, pixelBlockDims.depth) : 1); + + const UINT_32 tailMaxDepth = (is3d ? (depth / pixelBlockDims.depth) : 1); + + for (UINT_32 i = pOut->firstMipIdInTail; i < pIn->numMipLevels; i++) + { + INT_32 mipInTail = static_cast(i) - static_cast(pOut->firstMipIdInTail); + if ((mipInTail < 0) || (pIn->numMipLevels == 1)) + { + mipInTail = MaxMipLevels; + } + + // "m" can be negative + const INT_32 signedM = static_cast(maxMipsInTail) - static_cast(1) - mipInTail; + const UINT_32 m = Max(0, signedM); + const UINT_32 mipOffset = (m > 6) ? (16 << m) : (m << 8); + + pOut->pMipInfo[i].offset = mipOffset * tailMaxDepth; + pOut->pMipInfo[i].mipTailOffset = mipOffset; + pOut->pMipInfo[i].macroBlockOffset = 0; + + pOut->pMipInfo[i].pitch = pitch; + pOut->pMipInfo[i].height = height; + pOut->pMipInfo[i].depth = depth; + + if (IsLinear(pIn->swizzleMode)) + { + pOut->pMipInfo[i].mipTailCoordX = mipOffset >> 8; + pOut->pMipInfo[i].mipTailCoordY = 0; + pOut->pMipInfo[i].mipTailCoordZ = 0; + + pitch = Max(pitch >> 1, 1u); + } + else + { + UINT_32 mipX = ((mipOffset >> 9) & 1) | + ((mipOffset >> 10) & 2) | + ((mipOffset >> 11) & 4) | + ((mipOffset >> 12) & 8) | + ((mipOffset >> 13) & 16) | + ((mipOffset >> 14) & 32); + UINT_32 mipY = ((mipOffset >> 8) & 1) | + ((mipOffset >> 9) & 2) | + ((mipOffset >> 10) & 4) | + ((mipOffset >> 11) & 8) | + ((mipOffset >> 12) & 16) | + ((mipOffset >> 13) & 32); + + if (is3d == FALSE) + { + pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_2d[log2Bpp].w; + pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_2d[log2Bpp].h; + pOut->pMipInfo[i].mipTailCoordZ = 0; + + pitch = Max(pitch >> 1, Block256_2d[log2Bpp].w); + height = Max(height >> 1, Block256_2d[log2Bpp].h); + depth = 1; + } + else + { + pOut->pMipInfo[i].mipTailCoordX = mipX * pixelBlockDims.width; + pOut->pMipInfo[i].mipTailCoordY = mipY * pixelBlockDims.height; + pOut->pMipInfo[i].mipTailCoordZ = 0; + + pitch = Max(pitch >> 1, pixelBlockDims.width); + height = Max(height >> 1, pixelBlockDims.height); + depth = PowTwoAlign(Max(depth >> 1, 1u), pixelBlockDims.depth); + } + } + } +} + +/** +************************************************************************************************************************ +* Gfx12Lib::GetMipOffset +* +* @brief +* Internal function to calculate alignment for a surface +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +VOID Gfx12Lib::GetMipOffset( + const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure + ADDR3_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure + ) const +{ + const UINT_32 bytesPerPixel = pIn->bpp >> 3; + const UINT_32 log2Bpp = Log2(bytesPerPixel); + const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode); + const UINT_32 blockSize = 1 << blockSizeLog2; + const ADDR_EXTENT3D tailMaxDim = GetMipTailDim(pIn->swizzleMode, + pOut->blockExtent); + const ADDR_EXTENT3D mip0Dims = GetBaseMipExtents(pIn); + const UINT_32 maxMipsInTail = GetMaxNumMipsInTail(pIn->swizzleMode, blockSizeLog2); + + UINT_32 firstMipInTail = pIn->numMipLevels; + UINT_64 mipChainSliceSize = 0; + UINT_64 mipSize[MaxMipLevels]; + UINT_64 mipSliceSize[MaxMipLevels]; + + const ADDR_EXTENT3D fixedTailMaxDim = tailMaxDim; + + for (UINT_32 mipIdx = 0; mipIdx < pIn->numMipLevels; mipIdx++) + { + const ADDR_EXTENT3D mipExtents = GetMipExtent(mip0Dims, mipIdx); + + if (SupportsMipTail(pIn->swizzleMode) && + IsInMipTail(fixedTailMaxDim, mipExtents, maxMipsInTail, pIn->numMipLevels - mipIdx)) + { + firstMipInTail = mipIdx; + mipChainSliceSize += blockSize / pOut->blockExtent.depth; + break; + } + else + { + const UINT_32 pitch = UseCustomPitch(pIn) + ? pOut->pitch + : ((mipIdx == 0) && CanTrimLinearPadding(pIn)) + ? PowTwoAlign(mipExtents.width, 128u / bytesPerPixel) + : PowTwoAlign(mipExtents.width, pOut->blockExtent.width); + const UINT_32 height = UseCustomHeight(pIn) + ? pOut->height + : PowTwoAlign(mipExtents.height, pOut->blockExtent.height); + const UINT_32 depth = PowTwoAlign(mipExtents.depth, pOut->blockExtent.depth); + + // The original "blockExtent" calculation does subtraction of logs (i.e., division) to get the + // sizes. We aligned our pitch and height to those sizes, which means we need to multiply the various + // factors back together to get back to the slice size. + const UINT_64 sliceSize = static_cast(pitch) * height * pIn->numSamples * (pIn->bpp >> 3); + + mipSize[mipIdx] = sliceSize * depth; + mipSliceSize[mipIdx] = sliceSize * pOut->blockExtent.depth; + mipChainSliceSize += sliceSize; + + if (pOut->pMipInfo != NULL) + { + pOut->pMipInfo[mipIdx].pitch = pitch; + pOut->pMipInfo[mipIdx].height = height; + pOut->pMipInfo[mipIdx].depth = depth; + + // The slice size of a linear image was calculated above as if the "pitch" is 256 byte aligned. + // However, the rendering pitch is aligned to 128 bytes, and that is what needs to be reported + // to our clients. + if (IsLinear(pIn->swizzleMode)) + { + pOut->pMipInfo[mipIdx].pitch = PowTwoAlign(mipExtents.width, 128u / bytesPerPixel); + } + } + } + } + + pOut->sliceSize = mipChainSliceSize; + pOut->surfSize = mipChainSliceSize * pOut->numSlices; + pOut->mipChainInTail = (firstMipInTail == 0) ? TRUE : FALSE; + pOut->firstMipIdInTail = firstMipInTail; + + if (pOut->pMipInfo != NULL) + { + if (IsLinear(pIn->swizzleMode)) + { + // 1. Linear swizzle mode doesn't have miptails. + // 2. The organization of linear 3D mipmap resource is same as GFX11, we should use mip slice size to + // caculate mip offset. + ADDR_ASSERT(firstMipInTail == pIn->numMipLevels); + + UINT_64 sliceSize = 0; + + for (INT_32 i = static_cast(pIn->numMipLevels) - 1; i >= 0; i--) + { + pOut->pMipInfo[i].offset = sliceSize; + pOut->pMipInfo[i].macroBlockOffset = sliceSize; + pOut->pMipInfo[i].mipTailOffset = 0; + + sliceSize += mipSliceSize[i]; + } + } + else + { + UINT_64 offset = 0; + UINT_64 macroBlkOffset = 0; + UINT_32 tailMaxDepth = 0; + + ADDR_EXTENT3D mipExtentFirstInTail = {}; + if (firstMipInTail != pIn->numMipLevels) + { + mipExtentFirstInTail = GetMipExtent(mip0Dims, firstMipInTail); + + offset = blockSize * + PowTwoAlign(mipExtentFirstInTail.depth, + pOut->blockExtent.depth) / pOut->blockExtent.depth; + macroBlkOffset = blockSize; + } + + for (INT_32 i = firstMipInTail - 1; i >= 0; i--) + { + pOut->pMipInfo[i].offset = offset; + pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset; + pOut->pMipInfo[i].mipTailOffset = 0; + + offset += mipSize[i]; + macroBlkOffset += mipSliceSize[i]; + } + + GetMipOrigin(pIn, mipExtentFirstInTail, pOut); + } + } +} + +/** +************************************************************************************************************************ +* Gfx12Lib::HwlComputeSurfaceInfo +* +* @brief +* Internal function to calculate alignment for a surface +* +* @return +* VOID +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Gfx12Lib::HwlComputeSurfaceInfo( + const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure + ADDR3_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure + ) const +{ + ComputeBlockDimensionForSurf(&pOut->blockExtent, + pIn->bpp, + pIn->numSamples, + pIn->swizzleMode); + + ADDR_E_RETURNCODE returnCode = ApplyCustomizedPitchHeight(pIn, pOut); + + if (returnCode == ADDR_OK) + { + pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockExtent.depth); + pOut->baseAlign = 1 << GetBlockSizeLog2(pIn->swizzleMode); + + GetMipOffset(pIn, pOut); + + SanityCheckSurfSize(pIn, pOut); + + // Slices must be exact multiples of the block sizes. However: + // - with 3D images, one block will contain multiple slices, so that needs to be taken into account. + // - with linear images that have only once slice, we may trim and use the pitch alignment for size. + ADDR_ASSERT(((pOut->sliceSize * pOut->blockExtent.depth) % + GetBlockSize(pIn->swizzleMode, CanTrimLinearPadding(pIn))) == 0); + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Gfx12Lib::GetBaseMipExtents +* +* @brief +* Return the size of the base mip level in a nice cozy little structure. +* +************************************************************************************************************************ +*/ +ADDR_EXTENT3D Gfx12Lib::GetBaseMipExtents( + const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn + ) const +{ + return { pIn->width, + pIn->height, + (IsTex3d(pIn->resourceType) ? pIn->numSlices : 1) }; // slices is depth for 3d +} + +/** +************************************************************************************************************************ +* Gfx12Lib::GetMaxNumMipsInTail +* +* @brief +* Return max number of mips in tails +* +* @return +* Max number of mips in tails +************************************************************************************************************************ +*/ +UINT_32 Gfx12Lib::GetMaxNumMipsInTail( + Addr3SwizzleMode swizzleMode, + UINT_32 blockSizeLog2 ///< block size log2 + ) const +{ + UINT_32 effectiveLog2 = blockSizeLog2; + UINT_32 mipsInTail = 1; + + if (Is3dSwizzle(swizzleMode)) + { + effectiveLog2 -= (blockSizeLog2 - 8) / 3; + } + + if (effectiveLog2 > 8) + { + mipsInTail = (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4); + } + + return mipsInTail; +} + +/** +************************************************************************************************************************ +* Gfx12Lib::HwlComputeSurfaceAddrFromCoordTiled +* +* @brief +* Internal function to calculate address from coord for tiled swizzle surface +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Gfx12Lib::HwlComputeSurfaceAddrFromCoordTiled( + const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure + ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure + ) const +{ + // 256B block cannot support 3D image. + ADDR_ASSERT((IsTex3d(pIn->resourceType) && IsBlock256b(pIn->swizzleMode)) == FALSE); + + ADDR3_COMPUTE_SURFACE_INFO_INPUT localIn = {}; + ADDR3_COMPUTE_SURFACE_INFO_OUTPUT localOut = {}; + ADDR3_MIP_INFO mipInfo[MaxMipLevels]; + + localIn.size = sizeof(localIn); + localIn.flags = pIn->flags; + localIn.swizzleMode = pIn->swizzleMode; + localIn.resourceType = pIn->resourceType; + localIn.format = ADDR_FMT_INVALID; + localIn.bpp = pIn->bpp; + localIn.width = Max(pIn->unAlignedDims.width, 1u); + localIn.height = Max(pIn->unAlignedDims.height, 1u); + localIn.numSlices = Max(pIn->unAlignedDims.depth, 1u); + localIn.numMipLevels = Max(pIn->numMipLevels, 1u); + localIn.numSamples = Max(pIn->numSamples, 1u); + + localOut.size = sizeof(localOut); + localOut.pMipInfo = mipInfo; + + ADDR_E_RETURNCODE ret = ComputeSurfaceInfo(&localIn, &localOut); + + if (ret == ADDR_OK) + { + const UINT_32 elemLog2 = Log2(pIn->bpp >> 3); + const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode); + const UINT_32 eqIndex = GetEquationTableEntry(pIn->swizzleMode, Log2(localIn.numSamples), elemLog2); + + if (eqIndex != ADDR_INVALID_EQUATION_INDEX) + { + const BOOL_32 inTail = ((mipInfo[pIn->mipId].mipTailOffset != 0) && (blkSizeLog2 != Log2Size256)); + const BOOL_32 is3dNoMsaa = ((IsTex3d(pIn->resourceType) == TRUE) && (localIn.numSamples == 1)); + const UINT_64 sliceSize = is3dNoMsaa ? (localOut.sliceSize * localOut.blockExtent.depth) + : localOut.sliceSize; + const UINT_32 sliceId = is3dNoMsaa ? (pIn->slice / localOut.blockExtent.depth) : pIn->slice; + const UINT_32 x = inTail ? (pIn->x + mipInfo[pIn->mipId].mipTailCoordX) : pIn->x; + const UINT_32 y = inTail ? (pIn->y + mipInfo[pIn->mipId].mipTailCoordY) : pIn->y; + const UINT_32 z = inTail ? (pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ) : pIn->slice; + const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockExtent.width; + const UINT_32 yb = pIn->y / localOut.blockExtent.height; + const UINT_32 xb = pIn->x / localOut.blockExtent.width; + const UINT_64 blkIdx = yb * pb + xb; + const UINT_32 blkOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex], + x << elemLog2, + y, + z, + pIn->sample); + pOut->addr = sliceSize * sliceId + + mipInfo[pIn->mipId].macroBlockOffset + + (blkIdx << blkSizeLog2) + + blkOffset; + } + else + { + ret = ADDR_INVALIDPARAMS; + } + } + + return ret; +} + +/** +************************************************************************************************************************ +* Gfx12Lib::HwlComputePipeBankXor +* +* @brief +* Generate a PipeBankXor value to be ORed into bits above numSwizzleBits of address +* +* @return +* PipeBankXor value +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Gfx12Lib::HwlComputePipeBankXor( + const ADDR3_COMPUTE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure + ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure + ) const +{ + if ((m_numSwizzleBits != 0) && // does this configuration support swizzling + // base address XOR in GFX12 will be applied to all blk_size = 4KB, 64KB, or 256KB swizzle modes, + // Note that Linear and 256B are excluded. + (IsLinear(pIn->swizzleMode) == FALSE) && + (IsBlock256b(pIn->swizzleMode) == FALSE)) + { + pOut->pipeBankXor = pIn->surfIndex % (1 << m_numSwizzleBits); + } + else + { + pOut->pipeBankXor = 0; + } + + return ADDR_OK; +} + +/** +************************************************************************************************************************ +* Gfx12Lib::ComputeOffsetFromEquation +* +* @brief +* Compute offset from equation +* +* @return +* Offset +************************************************************************************************************************ +*/ +UINT_32 Gfx12Lib::ComputeOffsetFromEquation( + const ADDR_EQUATION* pEq, ///< Equation + UINT_32 x, ///< x coord in bytes + UINT_32 y, ///< y coord in pixel + UINT_32 z, ///< z coord in slice + UINT_32 s ///< MSAA sample index + ) const +{ + UINT_32 offset = 0; + + for (UINT_32 i = 0; i < pEq->numBits; i++) + { + UINT_32 v = 0; + + if (pEq->addr[i].valid) + { + if (pEq->addr[i].channel == 0) + { + v ^= (x >> pEq->addr[i].index) & 1; + } + else if (pEq->addr[i].channel == 1) + { + v ^= (y >> pEq->addr[i].index) & 1; + } + else if (pEq->addr[i].channel == 2) + { + v ^= (z >> pEq->addr[i].index) & 1; + } + else if (pEq->addr[i].channel == 3) + { + v ^= (s >> pEq->addr[i].index) & 1; + } + else + { + ADDR_ASSERT_ALWAYS(); + } + } + + offset |= (v << i); + } + + return offset; +} + +/** +************************************************************************************************************************ +* Gfx12Lib::GetSwizzlePatternInfo +* +* @brief +* Get swizzle pattern +* +* @return +* Swizzle pattern information +************************************************************************************************************************ +*/ +const ADDR_SW_PATINFO* Gfx12Lib::GetSwizzlePatternInfo( + Addr3SwizzleMode swizzleMode, ///< Swizzle mode + UINT_32 elemLog2, ///< Element size in bytes log2 + UINT_32 numFrag ///< Number of fragment + ) const +{ + const ADDR_SW_PATINFO* patInfo = NULL; + + if (Is2dSwizzle(swizzleMode) == FALSE) + { + ADDR_ASSERT(numFrag == 1); + } + + switch (swizzleMode) + { + case ADDR3_256KB_2D: + switch (numFrag) + { + case 1: + patInfo = GFX12_SW_256KB_2D_1xAA_PATINFO; + break; + case 2: + patInfo = GFX12_SW_256KB_2D_2xAA_PATINFO; + break; + case 4: + patInfo = GFX12_SW_256KB_2D_4xAA_PATINFO; + break; + case 8: + patInfo = GFX12_SW_256KB_2D_8xAA_PATINFO; + break; + default: + ADDR_ASSERT_ALWAYS(); + } + break; + case ADDR3_256KB_3D: + patInfo = GFX12_SW_256KB_3D_PATINFO; + break; + case ADDR3_64KB_2D: + switch (numFrag) + { + case 1: + patInfo = GFX12_SW_64KB_2D_1xAA_PATINFO; + break; + case 2: + patInfo = GFX12_SW_64KB_2D_2xAA_PATINFO; + break; + case 4: + patInfo = GFX12_SW_64KB_2D_4xAA_PATINFO; + break; + case 8: + patInfo = GFX12_SW_64KB_2D_8xAA_PATINFO; + break; + default: + ADDR_ASSERT_ALWAYS(); + } + break; + case ADDR3_64KB_3D: + patInfo = GFX12_SW_64KB_3D_PATINFO; + break; + case ADDR3_4KB_2D: + switch (numFrag) + { + case 1: + patInfo = GFX12_SW_4KB_2D_1xAA_PATINFO; + break; + case 2: + patInfo = GFX12_SW_4KB_2D_2xAA_PATINFO; + break; + case 4: + patInfo = GFX12_SW_4KB_2D_4xAA_PATINFO; + break; + case 8: + patInfo = GFX12_SW_4KB_2D_8xAA_PATINFO; + break; + default: + ADDR_ASSERT_ALWAYS(); + } + break; + case ADDR3_4KB_3D: + patInfo = GFX12_SW_4KB_3D_PATINFO; + break; + case ADDR3_256B_2D: + switch (numFrag) + { + case 1: + patInfo = GFX12_SW_256B_2D_1xAA_PATINFO; + break; + case 2: + patInfo = GFX12_SW_256B_2D_2xAA_PATINFO; + break; + case 4: + patInfo = GFX12_SW_256B_2D_4xAA_PATINFO; + break; + case 8: + patInfo = GFX12_SW_256B_2D_8xAA_PATINFO; + break; + default: + break; + } + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + + return (patInfo != NULL) ? &patInfo[elemLog2] : NULL; +} +/** +************************************************************************************************************************ +* Gfx12Lib::HwlInitGlobalParams +* +* @brief +* Initializes global parameters +* +* @return +* TRUE if all settings are valid +* +************************************************************************************************************************ +*/ +BOOL_32 Gfx12Lib::HwlInitGlobalParams( + const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input +{ + BOOL_32 valid = TRUE; + GB_ADDR_CONFIG_GFX12 gbAddrConfig; + + gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig; + + switch (gbAddrConfig.bits.NUM_PIPES) + { + case ADDR_CONFIG_1_PIPE: + m_pipesLog2 = 0; + break; + case ADDR_CONFIG_2_PIPE: + m_pipesLog2 = 1; + break; + case ADDR_CONFIG_4_PIPE: + m_pipesLog2 = 2; + break; + case ADDR_CONFIG_8_PIPE: + m_pipesLog2 = 3; + break; + case ADDR_CONFIG_16_PIPE: + m_pipesLog2 = 4; + break; + case ADDR_CONFIG_32_PIPE: + m_pipesLog2 = 5; + break; + case ADDR_CONFIG_64_PIPE: + m_pipesLog2 = 6; + break; + default: + ADDR_ASSERT_ALWAYS(); + valid = FALSE; + break; + } + + switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE) + { + case ADDR_CONFIG_PIPE_INTERLEAVE_256B: + m_pipeInterleaveLog2 = 8; + break; + case ADDR_CONFIG_PIPE_INTERLEAVE_512B: + m_pipeInterleaveLog2 = 9; + break; + case ADDR_CONFIG_PIPE_INTERLEAVE_1KB: + m_pipeInterleaveLog2 = 10; + break; + case ADDR_CONFIG_PIPE_INTERLEAVE_2KB: + m_pipeInterleaveLog2 = 11; + break; + default: + ADDR_ASSERT_ALWAYS(); + valid = FALSE; + break; + } + + m_numSwizzleBits = ((m_pipesLog2 >= 3) ? m_pipesLog2 - 2 : 0); + + if (valid) + { + InitEquationTable(); + } + + return valid; +} + +/** +************************************************************************************************************************ +* Gfx12Lib::HwlComputeNonBlockCompressedView +* +* @brief +* Compute non-block-compressed view for a given mipmap level/slice. +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Gfx12Lib::HwlComputeNonBlockCompressedView( + const ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn, ///< [in] input structure + ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (((pIn->format < ADDR_FMT_ASTC_4x4) || (pIn->format > ADDR_FMT_ETC2_128BPP)) && + ((pIn->format < ADDR_FMT_BC1) || (pIn->format > ADDR_FMT_BC7))) + { + // Only support BC1~BC7, ASTC, or ETC2 for now... + returnCode = ADDR_NOTSUPPORTED; + } + else + { + UINT_32 bcWidth, bcHeight; + const UINT_32 bpp = GetElemLib()->GetBitsPerPixel(pIn->format, NULL, &bcWidth, &bcHeight); + + ADDR3_COMPUTE_SURFACE_INFO_INPUT infoIn = {}; + infoIn.size = sizeof(infoIn); + infoIn.flags = pIn->flags; + infoIn.swizzleMode = pIn->swizzleMode; + infoIn.resourceType = pIn->resourceType; + infoIn.format = pIn->format; + infoIn.bpp = bpp; + infoIn.width = RoundUpQuotient(pIn->unAlignedDims.width, bcWidth); + infoIn.height = RoundUpQuotient(pIn->unAlignedDims.height, bcHeight); + infoIn.numSlices = pIn->unAlignedDims.depth; + infoIn.numMipLevels = pIn->numMipLevels; + infoIn.numSamples = 1; + + ADDR3_MIP_INFO mipInfo[MaxMipLevels] = {}; + + ADDR3_COMPUTE_SURFACE_INFO_OUTPUT infoOut = {}; + infoOut.size = sizeof(infoOut); + infoOut.pMipInfo = mipInfo; + + returnCode = HwlComputeSurfaceInfo(&infoIn, &infoOut); + + if (returnCode == ADDR_OK) + { + ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT subOffIn = {}; + subOffIn.size = sizeof(subOffIn); + subOffIn.swizzleMode = infoIn.swizzleMode; + subOffIn.resourceType = infoIn.resourceType; + subOffIn.pipeBankXor = pIn->pipeBankXor; + subOffIn.slice = pIn->slice; + subOffIn.sliceSize = infoOut.sliceSize; + subOffIn.macroBlockOffset = mipInfo[pIn->mipId].macroBlockOffset; + subOffIn.mipTailOffset = mipInfo[pIn->mipId].mipTailOffset; + + ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT subOffOut = {}; + subOffOut.size = sizeof(subOffOut); + + // For any mipmap level, move nonBc view base address by offset + HwlComputeSubResourceOffsetForSwizzlePattern(&subOffIn, &subOffOut); + pOut->offset = subOffOut.offset; + + ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT slicePbXorIn = {}; + slicePbXorIn.size = sizeof(slicePbXorIn); + slicePbXorIn.swizzleMode = infoIn.swizzleMode; + slicePbXorIn.resourceType = infoIn.resourceType; + slicePbXorIn.bpe = infoIn.bpp; + slicePbXorIn.basePipeBankXor = pIn->pipeBankXor; + slicePbXorIn.slice = pIn->slice; + slicePbXorIn.numSamples = 1; + + ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT slicePbXorOut = {}; + slicePbXorOut.size = sizeof(slicePbXorOut); + + // For any mipmap level, nonBc view should use computed pbXor + HwlComputeSlicePipeBankXor(&slicePbXorIn, &slicePbXorOut); + pOut->pipeBankXor = slicePbXorOut.pipeBankXor; + + const BOOL_32 tiled = (pIn->swizzleMode != ADDR3_LINEAR); + const BOOL_32 inTail = tiled && (pIn->mipId >= infoOut.firstMipIdInTail); + const UINT_32 requestMipWidth = + RoundUpQuotient(Max(pIn->unAlignedDims.width >> pIn->mipId, 1u), bcWidth); + const UINT_32 requestMipHeight = + RoundUpQuotient(Max(pIn->unAlignedDims.height >> pIn->mipId, 1u), bcHeight); + + if (inTail) + { + // For mipmap level that is in mip tail block, hack a lot of things... + // Basically all mipmap levels in tail block will be viewed as a small mipmap chain that all levels + // are fit in tail block: + + // - mipId = relative mip id (which is counted from first mip ID in tail in original mip chain) + pOut->mipId = pIn->mipId - infoOut.firstMipIdInTail; + + // - at least 2 mipmap levels (since only 1 mipmap level will not be viewed as mipmap!) + pOut->numMipLevels = Max(infoIn.numMipLevels - infoOut.firstMipIdInTail, 2u); + + // - (mip0) width = requestMipWidth << mipId, the value can't exceed mip tail dimension threshold + pOut->unAlignedDims.width = Min(requestMipWidth << pOut->mipId, infoOut.blockExtent.width / 2); + + // - (mip0) height = requestMipHeight << mipId, the value can't exceed mip tail dimension threshold + pOut->unAlignedDims.height = Min(requestMipHeight << pOut->mipId, infoOut.blockExtent.height); + } + // This check should cover at least mipId == 0 + else if ((requestMipWidth << pIn->mipId) == infoIn.width) + { + // For mipmap level [N] that is not in mip tail block and downgraded without losing element: + // - only one mipmap level and mipId = 0 + pOut->mipId = 0; + pOut->numMipLevels = 1; + + // (mip0) width = requestMipWidth + pOut->unAlignedDims.width = requestMipWidth; + + // (mip0) height = requestMipHeight + pOut->unAlignedDims.height = requestMipHeight; + } + else + { + // For mipmap level [N] that is not in mip tail block and downgraded with element losing, + // We have to make it a multiple mipmap view (2 levels view here), add one extra element if needed, + // because single mip view may have different pitch value than original (multiple) mip view... + // A simple case would be: + // - 64KB block swizzle mode, 8 Bytes-Per-Element. Block dim = [0x80, 0x40] + // - 2 mipmap levels with API mip0 width = 0x401/mip1 width = 0x200 and non-BC view + // mip0 width = 0x101/mip1 width = 0x80 + // By multiple mip view, the pitch for mip level 1 would be 0x100 bytes, due to rounding up logic in + // GetMipSize(), and by single mip level view the pitch will only be 0x80 bytes. + + // - 2 levels and mipId = 1 + pOut->mipId = 1; + pOut->numMipLevels = 2; + + const UINT_32 upperMipWidth = + RoundUpQuotient(Max(pIn->unAlignedDims.width >> (pIn->mipId - 1), 1u), bcWidth); + const UINT_32 upperMipHeight = + RoundUpQuotient(Max(pIn->unAlignedDims.height >> (pIn->mipId - 1), 1u), bcHeight); + + const BOOL_32 needToAvoidInTail = tiled && + (requestMipWidth <= infoOut.blockExtent.width / 2) && + (requestMipHeight <= infoOut.blockExtent.height); + + const UINT_32 hwMipWidth = + PowTwoAlign(ShiftCeil(infoIn.width, pIn->mipId), infoOut.blockExtent.width); + const UINT_32 hwMipHeight = + PowTwoAlign(ShiftCeil(infoIn.height, pIn->mipId), infoOut.blockExtent.height); + + const BOOL_32 needExtraWidth = + ((upperMipWidth < requestMipWidth * 2) || + ((upperMipWidth == requestMipWidth * 2) && + ((needToAvoidInTail == TRUE) || + (hwMipWidth > PowTwoAlign(requestMipWidth, infoOut.blockExtent.width))))); + + const BOOL_32 needExtraHeight = + ((upperMipHeight < requestMipHeight * 2) || + ((upperMipHeight == requestMipHeight * 2) && + ((needToAvoidInTail == TRUE) || + (hwMipHeight > PowTwoAlign(requestMipHeight, infoOut.blockExtent.height))))); + + // (mip0) width = requestLastMipLevelWidth + pOut->unAlignedDims.width = upperMipWidth + (needExtraWidth ? 1: 0); + + // (mip0) height = requestLastMipLevelHeight + pOut->unAlignedDims.height = upperMipHeight + (needExtraHeight ? 1: 0); + } + + // Assert the downgrading from this mip[0] width would still generate correct mip[N] width + ADDR_ASSERT(ShiftRight(pOut->unAlignedDims.width, pOut->mipId) == requestMipWidth); + // Assert the downgrading from this mip[0] height would still generate correct mip[N] height + ADDR_ASSERT(ShiftRight(pOut->unAlignedDims.height, pOut->mipId) == requestMipHeight); + } + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Gfx12Lib::HwlComputeSubResourceOffsetForSwizzlePattern +* +* @brief +* Compute sub resource offset to support swizzle pattern +* +* @return +* VOID +************************************************************************************************************************ +*/ +VOID Gfx12Lib::HwlComputeSubResourceOffsetForSwizzlePattern( + const ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, ///< [in] input structure + ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut ///< [out] output structure + ) const +{ + pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset; +} + +/** +************************************************************************************************************************ +* Gfx12Lib::HwlComputeSlicePipeBankXor +* +* @brief +* Generate slice PipeBankXor value based on base PipeBankXor value and slice id +* +* @return +* PipeBankXor value +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Gfx12Lib::HwlComputeSlicePipeBankXor( + const ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure + ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + // PipeBankXor is only applied to 4KB, 64KB and 256KB on GFX12. + if ((IsLinear(pIn->swizzleMode) == FALSE) && (IsBlock256b(pIn->swizzleMode) == FALSE)) + { + if (pIn->bpe == 0) + { + // Require a valid bytes-per-element value passed from client... + returnCode = ADDR_INVALIDPARAMS; + } + else + { + const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode, + Log2(pIn->bpe >> 3), + 1); + + if (pPatInfo != NULL) + { + const UINT_32 elemLog2 = Log2(pIn->bpe >> 3); + const UINT_32 eqIndex = GetEquationTableEntry(pIn->swizzleMode, Log2(pIn->numSamples), elemLog2); + + const UINT_32 pipeBankXorOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex], + 0, + 0, + pIn->slice, + 0); + + const UINT_32 pipeBankXor = pipeBankXorOffset >> m_pipeInterleaveLog2; + + // Should have no bit set under pipe interleave + ADDR_ASSERT((pipeBankXor << m_pipeInterleaveLog2) == pipeBankXorOffset); + + pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeBankXor; + } + else + { + // Should never come here... + ADDR_NOT_IMPLEMENTED(); + + returnCode = ADDR_NOTSUPPORTED; + } + } + } + else + { + pOut->pipeBankXor = 0; + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Gfx12Lib::SanityCheckSurfSize +* +* @brief +* Calculate the surface size via the exact hardware algorithm to see if it matches. +* +* @return +************************************************************************************************************************ +*/ +void Gfx12Lib::SanityCheckSurfSize( + const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn, + const ADDR3_COMPUTE_SURFACE_INFO_OUTPUT* pOut + ) const +{ +#if DEBUG + // Verify that the requested image size is valid for the below algorithm. The below code includes + // implicit assumptions about the surface dimensions being less than "MaxImageDim"; otherwise, it can't + // calculate "firstMipInTail" accurately and the below assertion will trip incorrectly. + // + // Surfaces destined for use only on the SDMA engine can exceed the gfx-engine-imposed limitations of + // the "maximum" image dimensions. + if ((pIn->width <= MaxImageDim) && + (pIn->height <= MaxImageDim) && + (pIn->numMipLevels <= MaxMipLevels) && + (UseCustomPitch(pIn) == FALSE) && + (UseCustomHeight(pIn) == FALSE) && + // HiZS surfaces have a reduced image size (i.e,. each pixel represents an 8x8 region of the parent + // image, at least for single samples) but they still have the same number of mip levels as the + // parent image. This disconnect produces false assertions below as the image size doesn't apparently + // support the specified number of mip levels. + ((pIn->flags.hiZHiS == 0) || (pIn->numMipLevels == 1))) + { + UINT_32 lastMipSize = 1; + UINT_32 dataChainSize = 0; + + const ADDR_EXTENT3D mip0Dims = GetBaseMipExtents(pIn); + const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode); + const ADDR_EXTENT3D tailMaxDim = GetMipTailDim(pIn->swizzleMode, pOut->blockExtent); + const UINT_32 maxMipsInTail = GetMaxNumMipsInTail(pIn->swizzleMode, blockSizeLog2); + + UINT_32 firstMipInTail = 0; + for (INT_32 mipIdx = MaxMipLevels - 1; mipIdx >= 0; mipIdx--) + { + const ADDR_EXTENT3D mipExtents = GetMipExtent(mip0Dims, mipIdx); + + if ((mipExtents.width <= tailMaxDim.width) && + (mipExtents.height <= tailMaxDim.height) && + ((static_cast(pIn->numMipLevels) - mipIdx) < static_cast(maxMipsInTail))) + { + firstMipInTail = mipIdx; + } + } + + for (INT_32 mipIdx = firstMipInTail - 1; mipIdx >= -1; mipIdx--) + { + const ADDR_EXTENT3D mipExtents = GetMipExtent(mip0Dims, mipIdx); + const UINT_32 mipBlockWidth = ShiftCeil(mipExtents.width, Log2(pOut->blockExtent.width)); + const UINT_32 mipBlockHeight = ShiftCeil(mipExtents.height, Log2(pOut->blockExtent.height)); + + if (mipIdx < (static_cast(pIn->numMipLevels) - 1)) + { + dataChainSize += lastMipSize; + } + + if (mipIdx >= 0) + { + lastMipSize = 4 * lastMipSize + - ((mipBlockWidth & 1) ? mipBlockHeight : 0) + - ((mipBlockHeight & 1) ? mipBlockWidth : 0) + - ((mipBlockWidth & mipBlockHeight & 1) ? 1 : 0); + } + } + + if (CanTrimLinearPadding(pIn)) + { + ADDR_ASSERT((pOut->sliceSize * pOut->blockExtent.depth) <= (dataChainSize << blockSizeLog2)); + } + else + { + ADDR_ASSERT((pOut->sliceSize * pOut->blockExtent.depth) == (dataChainSize << blockSizeLog2)); + } + } +#endif +} + +} // V3 +} // Addr diff --git a/runtime/hsa-runtime/image/addrlib/src/gfx12/gfx12addrlib.h b/runtime/hsa-runtime/image/addrlib/src/gfx12/gfx12addrlib.h new file mode 100644 index 0000000000..c56fe719b0 --- /dev/null +++ b/runtime/hsa-runtime/image/addrlib/src/gfx12/gfx12addrlib.h @@ -0,0 +1,217 @@ +/* +************************************************************************************************************************ +* +* Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved. +* SPDX-License-Identifier: MIT +* +***********************************************************************************************************************/ + +/** +************************************************************************************************************************ +* @file gfx12addrlib.h +* @brief Contains the Gfx12Lib class definition. +************************************************************************************************************************ +*/ + +#ifndef __GFX12_ADDR_LIB_H__ +#define __GFX12_ADDR_LIB_H__ + +#include "addrlib3.h" +#include "coord.h" +#include "gfx12SwizzlePattern.h" + +namespace Addr +{ +namespace V3 +{ + +/** +************************************************************************************************************************ +* @brief GFX12 specific settings structure. +************************************************************************************************************************ +*/ +struct Gfx12ChipSettings +{ + struct + { + // Misc configuration bits + UINT_32 reserved : 32; + }; +}; + +/** +************************************************************************************************************************ +* @brief GFX12 data surface type. +************************************************************************************************************************ +*/ + +/** +************************************************************************************************************************ +* @brief This class is the GFX12 specific address library +* function set. +************************************************************************************************************************ +*/ +class Gfx12Lib : public Lib +{ +public: + /// Creates Gfx12Lib object + static Addr::Lib* CreateObj(const Client* pClient) + { + VOID* pMem = Object::ClientAlloc(sizeof(Gfx12Lib), pClient); + return (pMem != NULL) ? new (pMem) Gfx12Lib(pClient) : NULL; + } + +protected: + Gfx12Lib(const Client* pClient); + virtual ~Gfx12Lib(); + + // Meta surfaces such as Hi-S/Z are essentially images on GFX12, so just return the max + // image alignment. + virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const { return 256 * 1024; } + + UINT_32 GetMaxNumMipsInTail( + Addr3SwizzleMode swizzleMode, + UINT_32 blockSizeLog2) const; + + BOOL_32 IsInMipTail( + const ADDR_EXTENT3D& mipTailDim, + const ADDR_EXTENT3D& mipDims, + UINT_32 maxNumMipsInTail, + UINT_32 numMipsToTheEnd) const + { + BOOL_32 inTail = ((mipDims.width <= mipTailDim.width) && + (mipDims.height <= mipTailDim.height) && + (numMipsToTheEnd <= maxNumMipsInTail)); + + return inTail; + } + + virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoordTiled( + const ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, + ADDR3_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const; + + virtual ADDR_E_RETURNCODE HwlComputeNonBlockCompressedView( + const ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn, + ADDR3_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT* pOut) const; + + virtual VOID HwlComputeSubResourceOffsetForSwizzlePattern( + const ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, + ADDR3_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) const; + + virtual ADDR_E_RETURNCODE HwlComputeSlicePipeBankXor( + const ADDR3_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, + ADDR3_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) const; + + virtual UINT_32 HwlGetEquationTableInfo(const ADDR_EQUATION** ppEquationTable) const + { + *ppEquationTable = m_equationTable; + + return m_numEquations; + } + +private: + Gfx12ChipSettings m_settings; + static const SwizzleModeFlags SwizzleModeTable[ADDR3_MAX_TYPE]; + + virtual ADDR_E_RETURNCODE HwlComputePipeBankXor( + const ADDR3_COMPUTE_PIPEBANKXOR_INPUT* pIn, + ADDR3_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const override; + + virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn) override; + + void SanityCheckSurfSize( + const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn, + const ADDR3_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; + + UINT_32 m_numSwizzleBits; + + static const ADDR_EXTENT3D Block4K_Log2_3d[]; + static const ADDR_EXTENT3D Block64K_Log2_3d[]; + static const ADDR_EXTENT3D Block256K_Log2_3d[]; + + // Initialize equation table + VOID InitEquationTable(); + + VOID GetSwizzlePatternFromPatternInfo( + const ADDR_SW_PATINFO* pPatInfo, + ADDR_BIT_SETTING (&pSwizzle)[Log2Size256K]) const + { + memcpy(pSwizzle, + GFX12_SW_PATTERN_NIBBLE1[pPatInfo->nibble1Idx], + sizeof(GFX12_SW_PATTERN_NIBBLE1[pPatInfo->nibble1Idx])); + + memcpy(&pSwizzle[8], + GFX12_SW_PATTERN_NIBBLE2[pPatInfo->nibble2Idx], + sizeof(GFX12_SW_PATTERN_NIBBLE2[pPatInfo->nibble2Idx])); + + memcpy(&pSwizzle[12], + GFX12_SW_PATTERN_NIBBLE3[pPatInfo->nibble3Idx], + sizeof(GFX12_SW_PATTERN_NIBBLE3[pPatInfo->nibble3Idx])); + + memcpy(&pSwizzle[16], + GFX12_SW_PATTERN_NIBBLE4[pPatInfo->nibble4Idx], + sizeof(GFX12_SW_PATTERN_NIBBLE4[pPatInfo->nibble4Idx])); + } + + VOID ConvertSwizzlePatternToEquation( + UINT_32 elemLog2, + Addr3SwizzleMode swMode, + const ADDR_SW_PATINFO* pPatInfo, + ADDR_EQUATION* pEquation) const; + + ADDR_EXTENT3D GetBaseMipExtents( + const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn) const; + + ADDR_EXTENT3D GetBlockPixelDimensions( + Addr3SwizzleMode swizzleMode, + UINT_32 log2BytesPerPixel) const; + + virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo( + const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR3_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const override; + + static ADDR_EXTENT3D GetMipExtent( + const ADDR_EXTENT3D& mip0, + UINT_32 mipId) + { + return { + ShiftCeil(Max(mip0.width, 1u), mipId), + ShiftCeil(Max(mip0.height, 1u), mipId), + ShiftCeil(Max(mip0.depth, 1u), mipId) + }; + } + + //# See 6.3 in //gfxip/gfx10/doc/architecture/ImageAddressing/gfx10_image_addressing.docx + // miptail is applied to only larger block size (4kb, 64kb, 256kb), so there is no miptail in linear and + // 256b_2d addressing since they are both 256b block. + BOOL_32 SupportsMipTail(Addr3SwizzleMode swizzleMode) const + { + return GetBlockSize(swizzleMode) > 256u; + } + + UINT_32 ComputeOffsetFromEquation( + const ADDR_EQUATION* pEq, + UINT_32 x, + UINT_32 y, + UINT_32 z, + UINT_32 s) const; + + const ADDR_SW_PATINFO* GetSwizzlePatternInfo( + Addr3SwizzleMode swizzleMode, + UINT_32 log2Elem, + UINT_32 numFrag) const; + + VOID GetMipOffset( + const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR3_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; + + VOID GetMipOrigin( + const ADDR3_COMPUTE_SURFACE_INFO_INPUT* pIn, + const ADDR_EXTENT3D& mipExtentFirstInTail, + ADDR3_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; +}; + +} // V3 +} // Addr + +#endif diff --git a/runtime/hsa-runtime/image/addrlib/src/gfx9/gfx9addrlib.cpp b/runtime/hsa-runtime/image/addrlib/src/gfx9/gfx9addrlib.cpp index 3b62d2d78a..65e491ff4a 100644 --- a/runtime/hsa-runtime/image/addrlib/src/gfx9/gfx9addrlib.cpp +++ b/runtime/hsa-runtime/image/addrlib/src/gfx9/gfx9addrlib.cpp @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ @@ -39,8 +22,8 @@ //////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////// -namespace rocr { -namespace Addr { +namespace Addr +{ /** ************************************************************************************************************************ @@ -363,6 +346,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo( // Generate the CMASK address equation. pOut->equation.gfx9.num_bits = Min(32u, eq->getsize()); + bool checked = false; for (unsigned b = 0; b < pOut->equation.gfx9.num_bits; b++) { CoordTerm &bit = (*eq)[b]; @@ -727,6 +711,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo( // Generate the DCC address equation. pOut->equation.gfx9.num_bits = Min(32u, eq->getsize()); + bool checked = false; for (unsigned b = 0; b < pOut->equation.gfx9.num_bits; b++) { CoordTerm &bit = (*eq)[b]; @@ -2409,6 +2394,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation( ADDR_E_RETURNCODE ret = ADDR_OK; pEquation->numBits = 8; + pEquation->numBitComponents = 1; UINT_32 i = 0; for (; i < elementBytesLog2; i++) @@ -2735,6 +2721,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation( } } + FillEqBitComponents(pEquation); pEquation->numBits = blockSizeLog2; } @@ -3012,6 +2999,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation( } } + FillEqBitComponents(pEquation); pEquation->numBits = blockSizeLog2; } @@ -3722,7 +3710,9 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( // Apply optional restrictions if (pIn->flags.needEquation) { - FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3)); + UINT_32 components = pIn->flags.allowExtEquation ? ADDR_MAX_EQUATION_COMP : + ADDR_MAX_LEGACY_EQUATION_COMP; + FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3), components); } if (allowedSwModeSet.value == Gfx9LinearSwModeMask) @@ -3763,6 +3753,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( const UINT_32 ratioLow = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2); const UINT_32 ratioHi = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1); + const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u); UINT_32 minSizeBlk = AddrBlockMicro; UINT_64 minSize = 0; @@ -3770,7 +3761,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++) { - if (IsBlockTypeAvaiable(allowedBlockSet, static_cast(i))) + if (Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i))) { localIn.swizzleMode = swMode[i]; @@ -3788,7 +3779,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( padSize[i] = localOut.surfSize; if ((minSize == 0) || - BlockTypeWithinMemoryBudget(minSize, padSize[i], ratioLow, ratioHi)) + Addr2BlockTypeWithinMemoryBudget(minSize, padSize[i], ratioLow, ratioHi)) { minSize = padSize[i]; minSizeBlk = i; @@ -3829,9 +3820,9 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++) { if ((i != minSizeBlk) && - IsBlockTypeAvaiable(allowedBlockSet, static_cast(i))) + Addr2IsBlockTypeAvailable(allowedBlockSet, static_cast<::AddrBlockType>(i))) { - if (BlockTypeWithinMemoryBudget(minSize, padSize[i], 0, 0, pIn->memoryBudget) == FALSE) + if (Addr2BlockTypeWithinMemoryBudget(minSize, padSize[i], 0, 0, pIn->memoryBudget) == FALSE) { // Clear the block type if the memory waste is unacceptable allowedBlockSet.value &= ~(1u << (i - 1)); @@ -5227,4 +5218,3 @@ VOID Gfx9Lib::ComputeThinBlockDimension( } // V2 } // Addr -} // rocr \ No newline at end of file diff --git a/runtime/hsa-runtime/image/addrlib/src/gfx9/gfx9addrlib.h b/runtime/hsa-runtime/image/addrlib/src/gfx9/gfx9addrlib.h index 10ea35139c..6a0fa661ac 100644 --- a/runtime/hsa-runtime/image/addrlib/src/gfx9/gfx9addrlib.h +++ b/runtime/hsa-runtime/image/addrlib/src/gfx9/gfx9addrlib.h @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ @@ -36,9 +19,10 @@ #include "addrlib2.h" #include "coord.h" -namespace rocr { -namespace Addr { -namespace V2 { +namespace Addr +{ +namespace V2 +{ /** ************************************************************************************************************************ @@ -647,7 +631,6 @@ private: } // V2 } // Addr -} // rocr #endif diff --git a/runtime/hsa-runtime/image/addrlib/src/r800/ciaddrlib.cpp b/runtime/hsa-runtime/image/addrlib/src/r800/ciaddrlib.cpp index 200f71589e..aedfb0acb6 100644 --- a/runtime/hsa-runtime/image/addrlib/src/r800/ciaddrlib.cpp +++ b/runtime/hsa-runtime/image/addrlib/src/r800/ciaddrlib.cpp @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ @@ -38,8 +21,9 @@ //////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////// -namespace rocr { -namespace Addr { + +namespace Addr +{ /** **************************************************************************************************** @@ -2351,4 +2335,3 @@ BOOL_32 CiLib::CheckTcCompatibility( } // V1 } // Addr -} // rocr \ No newline at end of file diff --git a/runtime/hsa-runtime/image/addrlib/src/r800/ciaddrlib.h b/runtime/hsa-runtime/image/addrlib/src/r800/ciaddrlib.h index 894ddd321c..060d30e343 100644 --- a/runtime/hsa-runtime/image/addrlib/src/r800/ciaddrlib.h +++ b/runtime/hsa-runtime/image/addrlib/src/r800/ciaddrlib.h @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ @@ -36,9 +19,10 @@ #include "addrlib1.h" #include "siaddrlib.h" -namespace rocr { -namespace Addr { -namespace V1 { +namespace Addr +{ +namespace V1 +{ /** **************************************************************************************************** @@ -204,5 +188,7 @@ private: } // V1 } // Addr -} // rocr + #endif + + diff --git a/runtime/hsa-runtime/image/addrlib/src/r800/egbaddrlib.cpp b/runtime/hsa-runtime/image/addrlib/src/r800/egbaddrlib.cpp index c762ab9347..48e2440874 100644 --- a/runtime/hsa-runtime/image/addrlib/src/r800/egbaddrlib.cpp +++ b/runtime/hsa-runtime/image/addrlib/src/r800/egbaddrlib.cpp @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ /** @@ -31,9 +14,10 @@ #include "egbaddrlib.h" -namespace rocr { -namespace Addr { -namespace V1 { +namespace Addr +{ +namespace V1 +{ /** **************************************************************************************************** @@ -1558,6 +1542,8 @@ ADDR_E_RETURNCODE EgBasedLib::ComputeMacroTileEquation( pEquation->xor2[bankBitStart + i] = equation.xor2[i]; pEquation->numBits++; } + + FillEqBitComponents(pEquation); } } } @@ -3133,6 +3119,7 @@ UINT_32 EgBasedLib::ComputePipeRotation( } + /** **************************************************************************************************** * EgBasedLib::ComputeBankRotation @@ -4156,4 +4143,3 @@ UINT_32 EgBasedLib::HwlStereoCheckRightOffsetPadding( } // V1 } // Addr -} // rocr diff --git a/runtime/hsa-runtime/image/addrlib/src/r800/egbaddrlib.h b/runtime/hsa-runtime/image/addrlib/src/r800/egbaddrlib.h index ebae1ad6dd..e855585048 100644 --- a/runtime/hsa-runtime/image/addrlib/src/r800/egbaddrlib.h +++ b/runtime/hsa-runtime/image/addrlib/src/r800/egbaddrlib.h @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ @@ -35,9 +18,10 @@ #include "addrlib1.h" -namespace rocr { -namespace Addr { -namespace V1 { +namespace Addr +{ +namespace V1 +{ /// Structures for functions struct CoordFromBankPipe { @@ -423,7 +407,6 @@ protected: } // V1 } // Addr -} // rocr #endif diff --git a/runtime/hsa-runtime/image/addrlib/src/r800/siaddrlib.cpp b/runtime/hsa-runtime/image/addrlib/src/r800/siaddrlib.cpp index cb37605597..afe00ae81a 100644 --- a/runtime/hsa-runtime/image/addrlib/src/r800/siaddrlib.cpp +++ b/runtime/hsa-runtime/image/addrlib/src/r800/siaddrlib.cpp @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ @@ -37,8 +20,8 @@ //////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////// -namespace rocr { -namespace Addr { +namespace Addr +{ /** **************************************************************************************************** @@ -419,6 +402,7 @@ ADDR_E_RETURNCODE SiLib::ComputeBankEquation( } } } + FillEqBitComponents(pEquation); if ((pTileInfo->bankWidth == 1) && ((pTileInfo->pipeConfig == ADDR_PIPECFG_P4_32x32) || @@ -1661,7 +1645,9 @@ UINT_32 SiLib::HwlGetPitchAlignmentLinear( } else { - pitchAlign = Max(8u, 64 / BITS_TO_BYTES(bpp)); + { + pitchAlign = Max(8u, 64 / BITS_TO_BYTES(bpp)); + } } return pitchAlign; @@ -2279,7 +2265,10 @@ BOOL_32 SiLib::DecodeGbRegs( reg.val = pRegValue->gbAddrConfig; - switch (reg.f.pipe_interleave_size) + UINT_32 pipe_interleave_size = reg.f.pipe_interleave_size; + UINT_32 row_size = reg.f.row_size; + + switch (pipe_interleave_size) { case ADDR_CONFIG_PIPE_INTERLEAVE_256B: m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B; @@ -2293,7 +2282,7 @@ BOOL_32 SiLib::DecodeGbRegs( break; } - switch (reg.f.row_size) + switch (row_size) { case ADDR_CONFIG_1KB_ROW: m_rowSize = ADDR_ROWSIZE_1KB; @@ -3869,4 +3858,3 @@ BOOL_32 SiLib::IsEquationSupported( } // V1 } // Addr -} // rocr diff --git a/runtime/hsa-runtime/image/addrlib/src/r800/siaddrlib.h b/runtime/hsa-runtime/image/addrlib/src/r800/siaddrlib.h index d5f23d80a7..d0dd841ceb 100644 --- a/runtime/hsa-runtime/image/addrlib/src/r800/siaddrlib.h +++ b/runtime/hsa-runtime/image/addrlib/src/r800/siaddrlib.h @@ -2,24 +2,7 @@ ************************************************************************************************************************ * * Copyright (C) 2007-2022 Advanced Micro Devices, Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE +* SPDX-License-Identifier: MIT * ***********************************************************************************************************************/ @@ -36,9 +19,10 @@ #include "addrlib1.h" #include "egbaddrlib.h" -namespace rocr { -namespace Addr { -namespace V1 { +namespace Addr +{ +namespace V1 +{ /** **************************************************************************************************** @@ -84,8 +68,11 @@ struct SiChipSettings UINT_32 isPolaris10 : 1; UINT_32 isPolaris11 : 1; UINT_32 isPolaris12 : 1; + // VI fusion UINT_32 isVegaM : 1; UINT_32 isCarrizo : 1; + + UINT_32 : 2; }; /** @@ -339,6 +326,6 @@ private: } // V1 } // Addr -} // rocr + #endif