diff --git a/runtime/hsa-runtime/image/addrlib/inc/addrinterface.h b/runtime/hsa-runtime/image/addrlib/inc/addrinterface.h index 5fb3c46e48..c0e370f2be 100644 --- a/runtime/hsa-runtime/image/addrlib/inc/addrinterface.h +++ b/runtime/hsa-runtime/image/addrlib/inc/addrinterface.h @@ -3349,17 +3349,23 @@ typedef struct _ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT UINT_32 mipId; ///< mipmap level id ADDR2_META_FLAGS dccKeyFlags; ///< DCC flags - ADDR2_SURFACE_FLAGS colorFlags; ///< Color surface flags AddrResourceType resourceType; ///< Color surface type AddrSwizzleMode swizzleMode; ///< Color surface swizzle mode UINT_32 bpp; ///< Color surface bits per pixel - UINT_32 unalignedWidth; ///< Color surface original width (of mip0) - UINT_32 unalignedHeight; ///< Color surface original height (of mip0) UINT_32 numSlices; ///< Color surface original slices (of mip0) UINT_32 numMipLevels; ///< Color surface mipmap levels UINT_32 numFrags; ///< Color surface fragment number UINT_32 pipeXor; ///< pipe Xor setting + UINT_32 pitch; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::pitch + UINT_32 height; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::height + UINT_32 compressBlkWidth; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::compressBlkWidth + UINT_32 compressBlkHeight; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::compressBlkHeight + UINT_32 compressBlkDepth; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::compressBlkDepth + UINT_32 metaBlkWidth; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::metaBlkWidth + UINT_32 metaBlkHeight; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::metaBlkHeight + UINT_32 metaBlkDepth; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::metaBlkDepth + UINT_32 dccRamSliceSize; ///< ADDR2_COMPUTE_DCC_INFO_OUTPUT::dccRamSliceSize } ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT; /** diff --git a/runtime/hsa-runtime/image/addrlib/src/amdgpu_asic_addr.h b/runtime/hsa-runtime/image/addrlib/src/amdgpu_asic_addr.h index 6b598a39df..3307f1941a 100644 --- a/runtime/hsa-runtime/image/addrlib/src/amdgpu_asic_addr.h +++ b/runtime/hsa-runtime/image/addrlib/src/amdgpu_asic_addr.h @@ -97,6 +97,7 @@ #define AMDGPU_NAVI10_RANGE 0x01, 0x0A #define AMDGPU_NAVI12_RANGE 0x0A, 0x14 #define AMDGPU_NAVI14_RANGE 0x14, 0x28 +#define AMDGPU_SIENNA_RANGE 0x28, 0x32 #define AMDGPU_EXPAND_FIX(x) x #define AMDGPU_RANGE_HELPER(val, min, max) ((val >= min) && (val < max)) @@ -144,5 +145,6 @@ #define ASICREV_IS_NAVI10_P(r) ASICREV_IS(r, NAVI10) #define ASICREV_IS_NAVI12(r) ASICREV_IS(r, NAVI12) #define ASICREV_IS_NAVI14(r) ASICREV_IS(r, NAVI14) +#define ASICREV_IS_SIENNA_M(r) ASICREV_IS(r, SIENNA) #endif // _AMDGPU_ASIC_ADDR_H diff --git a/runtime/hsa-runtime/image/addrlib/src/chip/gfx9/gfx9_gb_reg.h b/runtime/hsa-runtime/image/addrlib/src/chip/gfx9/gfx9_gb_reg.h index 42f24ca46d..6d0475c4ae 100644 --- a/runtime/hsa-runtime/image/addrlib/src/chip/gfx9/gfx9_gb_reg.h +++ b/runtime/hsa-runtime/image/addrlib/src/chip/gfx9/gfx9_gb_reg.h @@ -34,7 +34,7 @@ * */ -union GB_ADDR_CONFIG { +union GB_ADDR_CONFIG_gfx9 { struct { #if defined(LITTLEENDIAN_CPU) unsigned int NUM_PIPES : 3; diff --git a/runtime/hsa-runtime/image/addrlib/src/core/coord.cpp b/runtime/hsa-runtime/image/addrlib/src/core/coord.cpp index b46149e7f8..3cf066daa9 100644 --- a/runtime/hsa-runtime/image/addrlib/src/core/coord.cpp +++ b/runtime/hsa-runtime/image/addrlib/src/core/coord.cpp @@ -83,11 +83,11 @@ BOOL_32 Coordinate::operator<(const Coordinate& b) } else { - if (dim == 's' || b.dim == 'm') + if (dim == DIM_S || b.dim == DIM_M) { ret = TRUE; } - else if (b.dim == 's' || dim == 'm') + else if (b.dim == DIM_S || dim == DIM_M) { ret = FALSE; } diff --git a/runtime/hsa-runtime/image/addrlib/src/gfx10/gfx10addrlib.cpp b/runtime/hsa-runtime/image/addrlib/src/gfx10/gfx10addrlib.cpp index eea3deefff..4033c2398d 100644 --- a/runtime/hsa-runtime/image/addrlib/src/gfx10/gfx10addrlib.cpp +++ b/runtime/hsa-runtime/image/addrlib/src/gfx10/gfx10addrlib.cpp @@ -673,88 +673,67 @@ ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccAddrFromCoord( } else { - ADDR2_COMPUTE_DCCINFO_INPUT input = {0}; - input.size = sizeof(input); - input.dccKeyFlags = pIn->dccKeyFlags; - input.colorFlags = pIn->colorFlags; - input.swizzleMode = pIn->swizzleMode; - input.resourceType = pIn->resourceType; - input.bpp = pIn->bpp; - input.unalignedWidth = Max(pIn->unalignedWidth, 1u); - input.unalignedHeight = Max(pIn->unalignedHeight, 1u); - input.numSlices = Max(pIn->numSlices, 1u); - input.numFrags = Max(pIn->numFrags, 1u); - input.numMipLevels = Max(pIn->numMipLevels, 1u); + const UINT_32 elemLog2 = Log2(pIn->bpp >> 3); + const UINT_32 numPipeLog2 = m_pipesLog2; + const UINT_32 pipeMask = (1 << numPipeLog2) - 1; + UINT_32 index = m_dccBaseIndex + elemLog2; + const UINT_8* patIdxTable; - ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0}; - output.size = sizeof(output); - - returnCode = ComputeDccInfo(&input, &output); - - if (returnCode == ADDR_OK) + if (m_settings.supportRbPlus) { - const UINT_32 elemLog2 = Log2(pIn->bpp >> 3); - const UINT_32 numPipeLog2 = m_pipesLog2; - const UINT_32 pipeMask = (1 << numPipeLog2) - 1; - UINT_32 index = m_dccBaseIndex + elemLog2; - const UINT_8* patIdxTable; + patIdxTable = DCC_64K_R_X_RBPLUS_PATIDX; - if (m_settings.supportRbPlus) + if (pIn->dccKeyFlags.pipeAligned) { - patIdxTable = DCC_64K_R_X_RBPLUS_PATIDX; + index += MaxNumOfBpp; - if (pIn->dccKeyFlags.pipeAligned) + if (m_numPkrLog2 < 2) { - index += MaxNumOfBpp; - - if (m_numPkrLog2 < 2) - { - index += m_pipesLog2 * MaxNumOfBpp; - } - else - { - // 4 groups for "m_numPkrLog2 < 2" case - index += 4 * MaxNumOfBpp; - - const UINT_32 dccPipePerPkr = 3; - - index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp + - (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp; - } - } - } - else - { - patIdxTable = DCC_64K_R_X_PATIDX; - - if (pIn->dccKeyFlags.pipeAligned) - { - index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp; + index += m_pipesLog2 * MaxNumOfBpp; } else { - index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp; + // 4 groups for "m_numPkrLog2 < 2" case + index += 4 * MaxNumOfBpp; + + const UINT_32 dccPipePerPkr = 3; + + index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp + + (m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp; } } - - const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) + elemLog2 - 8; - const UINT_32 blkMask = (1 << blkSizeLog2) - 1; - const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(DCC_64K_R_X_SW_PATTERN[patIdxTable[index]], - blkSizeLog2 + 1, // +1 for nibble offset - pIn->x, - pIn->y, - pIn->slice, - 0); - const UINT_32 xb = pIn->x / output.metaBlkWidth; - const UINT_32 yb = pIn->y / output.metaBlkHeight; - const UINT_32 pb = output.pitch / output.metaBlkWidth; - const UINT_32 blkIndex = (yb * pb) + xb; - const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask; - - pOut->addr = (static_cast(output.dccRamSliceSize) * pIn->slice) + - (blkIndex * (1 << blkSizeLog2)) + - ((blkOffset >> 1) ^ pipeXor); } + else + { + patIdxTable = DCC_64K_R_X_PATIDX; + + if (pIn->dccKeyFlags.pipeAligned) + { + index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp; + } + else + { + index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp; + } + } + + const UINT_32 blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8; + const UINT_32 blkMask = (1 << blkSizeLog2) - 1; + const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(DCC_64K_R_X_SW_PATTERN[patIdxTable[index]], + blkSizeLog2 + 1, // +1 for nibble offset + pIn->x, + pIn->y, + pIn->slice, + 0); + const UINT_32 xb = pIn->x / pIn->metaBlkWidth; + const UINT_32 yb = pIn->y / pIn->metaBlkHeight; + const UINT_32 pb = pIn->pitch / pIn->metaBlkWidth; + const UINT_32 blkIndex = (yb * pb) + xb; + const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask; + + pOut->addr = (static_cast(pIn->dccRamSliceSize) * pIn->slice) + + (blkIndex * (1 << blkSizeLog2)) + + ((blkOffset >> 1) ^ pipeXor); } return returnCode; @@ -943,6 +922,12 @@ ChipFamily Gfx10Lib::HwlConvertChipFamily( { case FAMILY_NV: m_settings.isDcn2 = 1; + + if (ASICREV_IS_SIENNA_M(chipRevision)) + { + m_settings.supportRbPlus = 1; + m_settings.dccUnsup3DSwDis = 0; + } break; default: ADDR_ASSERT(!"Unknown chip family"); diff --git a/runtime/hsa-runtime/image/addrlib/src/gfx9/gfx9addrlib.cpp b/runtime/hsa-runtime/image/addrlib/src/gfx9/gfx9addrlib.cpp index ece83592fc..dcc080cbec 100644 --- a/runtime/hsa-runtime/image/addrlib/src/gfx9/gfx9addrlib.cpp +++ b/runtime/hsa-runtime/image/addrlib/src/gfx9/gfx9addrlib.cpp @@ -987,62 +987,41 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord( } else { - ADDR2_COMPUTE_DCCINFO_INPUT input = {0}; - input.size = sizeof(input); - input.dccKeyFlags = pIn->dccKeyFlags; - input.colorFlags = pIn->colorFlags; - input.swizzleMode = pIn->swizzleMode; - input.resourceType = pIn->resourceType; - input.bpp = pIn->bpp; - input.unalignedWidth = Max(pIn->unalignedWidth, 1u); - input.unalignedHeight = Max(pIn->unalignedHeight, 1u); - input.numSlices = Max(pIn->numSlices, 1u); - input.numFrags = Max(pIn->numFrags, 1u); - input.numMipLevels = Max(pIn->numMipLevels, 1u); + UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3); + UINT_32 numSamplesLog2 = Log2(pIn->numFrags); + UINT_32 metaBlkWidthLog2 = Log2(pIn->metaBlkWidth); + UINT_32 metaBlkHeightLog2 = Log2(pIn->metaBlkHeight); + UINT_32 metaBlkDepthLog2 = Log2(pIn->metaBlkDepth); + UINT_32 compBlkWidthLog2 = Log2(pIn->compressBlkWidth); + UINT_32 compBlkHeightLog2 = Log2(pIn->compressBlkHeight); + UINT_32 compBlkDepthLog2 = Log2(pIn->compressBlkDepth); - ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0}; - output.size = sizeof(output); + MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags, + Gfx9DataColor, pIn->swizzleMode, pIn->resourceType, + metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2, + compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2}; - returnCode = ComputeDccInfo(&input, &output); + const CoordEq* pMetaEq = GetMetaEquation(metaEqParams); - if (returnCode == ADDR_OK) - { - UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3); - UINT_32 numSamplesLog2 = Log2(pIn->numFrags); - UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth); - UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight); - UINT_32 metaBlkDepthLog2 = Log2(output.metaBlkDepth); - UINT_32 compBlkWidthLog2 = Log2(output.compressBlkWidth); - UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight); - UINT_32 compBlkDepthLog2 = Log2(output.compressBlkDepth); + UINT_32 xb = pIn->x / pIn->metaBlkWidth; + UINT_32 yb = pIn->y / pIn->metaBlkHeight; + UINT_32 zb = pIn->slice / pIn->metaBlkDepth; - MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags, - Gfx9DataColor, pIn->swizzleMode, pIn->resourceType, - metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2, - compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2}; + UINT_32 pitchInBlock = pIn->pitch / pIn->metaBlkWidth; + UINT_32 sliceSizeInBlock = (pIn->height / pIn->metaBlkHeight) * pitchInBlock; + UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb; - const CoordEq* pMetaEq = GetMetaEquation(metaEqParams); + UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex }; + UINT_64 address = pMetaEq->solve(coords); - UINT_32 xb = pIn->x / output.metaBlkWidth; - UINT_32 yb = pIn->y / output.metaBlkHeight; - UINT_32 zb = pIn->slice / output.metaBlkDepth; + pOut->addr = address >> 1; - UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth; - UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock; - UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb; + UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned, + pIn->swizzleMode); - UINT_32 coords[] = { pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex }; - UINT_64 address = pMetaEq->solve(coords); + UINT_64 pipeXor = static_cast(pIn->pipeXor & ((1 << numPipeBits) - 1)); - pOut->addr = address >> 1; - - UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned, - pIn->swizzleMode); - - UINT_64 pipeXor = static_cast(pIn->pipeXor & ((1 << numPipeBits) - 1)); - - pOut->addr ^= (pipeXor << m_pipeInterleaveLog2); - } + pOut->addr ^= (pipeXor << m_pipeInterleaveLog2); } return returnCode; @@ -1067,7 +1046,7 @@ BOOL_32 Gfx9Lib::HwlInitGlobalParams( if (m_settings.isArcticIsland) { - GB_ADDR_CONFIG gbAddrConfig; + GB_ADDR_CONFIG_gfx9 gbAddrConfig; gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;