Implement AddrLib support for GFX12
Add new files image_manager_gfx12.{h,cpp}.
Implement BUF/IMG/SAMP desc changes for GFX12.
Implement compute surface info code using AddrLib3 API (new starting
from GFX12).
Implement algorithm for choosing "best" swizzle mode (starting
from AddrLib3/GFX12, AddrLib provides only list of suitable swizzle mode,
up to client, ROCr, to choose the best). Algorithm implemented follows
behaviour in GFX11 and behaviour for GFX12 on other platforms.
Signed-off-by: David Belanger <david.belanger@amd.com>
Change-Id: Ib344c86228a98bbac5acdab421ee2ef9b1e84eef
Signed-off-by: Chris Freehill <cfreehil@amd.com>
This commit is contained in:
zatwierdzone przez
Chris Freehill
rodzic
def4a6c326
commit
f8a015f53e
@@ -248,6 +248,7 @@ if(${IMAGE_SUPPORT})
|
||||
image/image_manager_ai.cpp
|
||||
image/image_manager_nv.cpp
|
||||
image/image_manager_gfx11.cpp
|
||||
image/image_manager_gfx12.cpp
|
||||
image/image_lut_kv.cpp
|
||||
image/image_lut_gfx11.cpp
|
||||
image/blit_object_gfx7xx.cpp
|
||||
|
||||
@@ -0,0 +1,896 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define NOMINMAX
|
||||
#include "image_manager_gfx12.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <climits>
|
||||
|
||||
#include "inc/hsa_ext_amd.h"
|
||||
#include "core/inc/hsa_internal.h"
|
||||
#include "core/util/utils.h"
|
||||
#include "addrlib/src/core/addrlib.h"
|
||||
#include "image_runtime.h"
|
||||
#include "resource.h"
|
||||
#include "resource_gfx12.h"
|
||||
#include "util.h"
|
||||
#include "device_info.h"
|
||||
|
||||
namespace rocr {
|
||||
namespace image {
|
||||
|
||||
static_assert(sizeof(SQ_BUF_RSRC_WORD0) == sizeof(uint32_t));
|
||||
static_assert(sizeof(SQ_BUF_RSRC_WORD1) == sizeof(uint32_t));
|
||||
static_assert(sizeof(SQ_BUF_RSRC_WORD2) == sizeof(uint32_t));
|
||||
static_assert(sizeof(SQ_BUF_RSRC_WORD3) == sizeof(uint32_t));
|
||||
|
||||
static_assert(sizeof(SQ_IMG_RSRC_WORD0) == sizeof(uint32_t));
|
||||
static_assert(sizeof(SQ_IMG_RSRC_WORD1) == sizeof(uint32_t));
|
||||
static_assert(sizeof(SQ_IMG_RSRC_WORD2) == sizeof(uint32_t));
|
||||
static_assert(sizeof(SQ_IMG_RSRC_WORD3) == sizeof(uint32_t));
|
||||
static_assert(sizeof(SQ_IMG_RSRC_WORD4) == sizeof(uint32_t));
|
||||
static_assert(sizeof(SQ_IMG_RSRC_WORD5) == sizeof(uint32_t));
|
||||
static_assert(sizeof(SQ_IMG_RSRC_WORD6) == sizeof(uint32_t));
|
||||
static_assert(sizeof(SQ_IMG_RSRC_WORD7) == sizeof(uint32_t));
|
||||
|
||||
static_assert(sizeof(SQ_IMG_SAMP_WORD0) == sizeof(uint32_t));
|
||||
static_assert(sizeof(SQ_IMG_SAMP_WORD1) == sizeof(uint32_t));
|
||||
static_assert(sizeof(SQ_IMG_SAMP_WORD2) == sizeof(uint32_t));
|
||||
static_assert(sizeof(SQ_IMG_SAMP_WORD3) == sizeof(uint32_t));
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Workaround switch to combined format/type codes and missing gfx11
|
||||
// specific look up table. Only covers types used in image_lut_gfx11.cpp.
|
||||
//-----------------------------------------------------------------------------
|
||||
struct formatconverstion_t {
|
||||
FMT fmt;
|
||||
type type;
|
||||
FORMAT format;
|
||||
};
|
||||
|
||||
// Format/Type to combined format code table.
|
||||
// Sorted and indexed to allow fast searches.
|
||||
static const formatconverstion_t FormatLUT[] = {
|
||||
{FMT_1_5_5_5, TYPE_UNORM, CFMT_1_5_5_5_UNORM}, // 0
|
||||
{FMT_10_10_10_2, TYPE_UNORM, CFMT_10_10_10_2_UNORM}, // 1
|
||||
{FMT_10_10_10_2, TYPE_SNORM, CFMT_10_10_10_2_SNORM}, // 2
|
||||
{FMT_10_10_10_2, TYPE_UINT, CFMT_10_10_10_2_UINT}, // 3
|
||||
{FMT_10_10_10_2, TYPE_SINT, CFMT_10_10_10_2_SINT}, // 4
|
||||
{FMT_16, TYPE_UNORM, CFMT_16_UNORM}, // 5
|
||||
{FMT_16, TYPE_SNORM, CFMT_16_SNORM}, // 6
|
||||
{FMT_16, TYPE_UINT, CFMT_16_UINT}, // 7
|
||||
{FMT_16, TYPE_SINT, CFMT_16_SINT}, // 8
|
||||
{FMT_16, TYPE_FLOAT, CFMT_16_FLOAT}, // 9
|
||||
{FMT_16, TYPE_USCALED, CFMT_16_USCALED}, // 10
|
||||
{FMT_16, TYPE_SSCALED, CFMT_16_SSCALED}, // 11
|
||||
{FMT_16_16, TYPE_UNORM, CFMT_16_16_UNORM}, // 12
|
||||
{FMT_16_16, TYPE_SNORM, CFMT_16_16_SNORM}, // 13
|
||||
{FMT_16_16, TYPE_UINT, CFMT_16_16_UINT}, // 14
|
||||
{FMT_16_16, TYPE_SINT, CFMT_16_16_SINT}, // 15
|
||||
{FMT_16_16, TYPE_FLOAT, CFMT_16_16_FLOAT}, // 16
|
||||
{FMT_16_16, TYPE_USCALED, CFMT_16_16_USCALED}, // 17
|
||||
{FMT_16_16, TYPE_SSCALED, CFMT_16_16_SSCALED}, // 18
|
||||
{FMT_16_16_16_16, TYPE_UNORM, CFMT_16_16_16_16_UNORM}, // 19
|
||||
{FMT_16_16_16_16, TYPE_SNORM, CFMT_16_16_16_16_SNORM}, // 20
|
||||
{FMT_16_16_16_16, TYPE_UINT, CFMT_16_16_16_16_UINT}, // 21
|
||||
{FMT_16_16_16_16, TYPE_SINT, CFMT_16_16_16_16_SINT}, // 22
|
||||
{FMT_16_16_16_16, TYPE_FLOAT, CFMT_16_16_16_16_FLOAT}, // 23
|
||||
{FMT_16_16_16_16, TYPE_USCALED, CFMT_16_16_16_16_USCALED}, // 24
|
||||
{FMT_16_16_16_16, TYPE_SSCALED, CFMT_16_16_16_16_SSCALED}, // 25
|
||||
{FMT_2_10_10_10, TYPE_UNORM, CFMT_2_10_10_10_UNORM}, // 26
|
||||
{FMT_2_10_10_10, TYPE_SNORM, CFMT_2_10_10_10_SNORM}, // 27
|
||||
{FMT_2_10_10_10, TYPE_UINT, CFMT_2_10_10_10_UINT}, // 28
|
||||
{FMT_2_10_10_10, TYPE_SINT, CFMT_2_10_10_10_SINT}, // 29
|
||||
{FMT_2_10_10_10, TYPE_USCALED, CFMT_2_10_10_10_USCALED}, // 30
|
||||
{FMT_2_10_10_10, TYPE_SSCALED, CFMT_2_10_10_10_SSCALED}, // 31
|
||||
{FMT_24_8, TYPE_UNORM, CFMT_24_8_UNORM}, // 32
|
||||
{FMT_24_8, TYPE_UINT, CFMT_24_8_UINT}, // 33
|
||||
{FMT_32, TYPE_UINT, CFMT_32_UINT}, // 34
|
||||
{FMT_32, TYPE_SINT, CFMT_32_SINT}, // 35
|
||||
{FMT_32, TYPE_FLOAT, CFMT_32_FLOAT}, // 36
|
||||
{FMT_32_32, TYPE_UINT, CFMT_32_32_UINT}, // 37
|
||||
{FMT_32_32, TYPE_SINT, CFMT_32_32_SINT}, // 38
|
||||
{FMT_32_32, TYPE_FLOAT, CFMT_32_32_FLOAT}, // 39
|
||||
{FMT_32_32_32, TYPE_UINT, CFMT_32_32_32_UINT}, // 40
|
||||
{FMT_32_32_32, TYPE_SINT, CFMT_32_32_32_SINT}, // 41
|
||||
{FMT_32_32_32, TYPE_FLOAT, CFMT_32_32_32_FLOAT}, // 42
|
||||
{FMT_32_32_32_32, TYPE_UINT, CFMT_32_32_32_32_UINT}, // 43
|
||||
{FMT_32_32_32_32, TYPE_SINT, CFMT_32_32_32_32_SINT}, // 44
|
||||
{FMT_32_32_32_32, TYPE_FLOAT, CFMT_32_32_32_32_FLOAT}, // 45
|
||||
{FMT_5_5_5_1, TYPE_UNORM, CFMT_5_5_5_1_UNORM}, // 46
|
||||
{FMT_5_6_5, TYPE_UNORM, CFMT_5_6_5_UNORM}, // 47
|
||||
{FMT_8, TYPE_UNORM, CFMT_8_UNORM}, // 48
|
||||
{FMT_8, TYPE_SNORM, CFMT_8_SNORM}, // 49
|
||||
{FMT_8, TYPE_UINT, CFMT_8_UINT}, // 50
|
||||
{FMT_8, TYPE_SINT, CFMT_8_SINT}, // 51
|
||||
{FMT_8, TYPE_SRGB, CFMT_8_SRGB}, // 52
|
||||
{FMT_8, TYPE_USCALED, CFMT_8_USCALED}, // 53
|
||||
{FMT_8, TYPE_SSCALED, CFMT_8_SSCALED}, // 54
|
||||
{FMT_8_24, TYPE_UNORM, CFMT_8_24_UNORM}, // 55
|
||||
{FMT_8_24, TYPE_UINT, CFMT_8_24_UINT}, // 56
|
||||
{FMT_8_8, TYPE_UNORM, CFMT_8_8_UNORM}, // 57
|
||||
{FMT_8_8, TYPE_SNORM, CFMT_8_8_SNORM}, // 58
|
||||
{FMT_8_8, TYPE_UINT, CFMT_8_8_UINT}, // 59
|
||||
{FMT_8_8, TYPE_SINT, CFMT_8_8_SINT}, // 60
|
||||
{FMT_8_8, TYPE_SRGB, CFMT_8_8_SRGB}, // 61
|
||||
{FMT_8_8, TYPE_USCALED, CFMT_8_8_USCALED}, // 62
|
||||
{FMT_8_8, TYPE_SSCALED, CFMT_8_8_SSCALED}, // 63
|
||||
{FMT_8_8_8_8, TYPE_UNORM, CFMT_8_8_8_8_UNORM}, // 64
|
||||
{FMT_8_8_8_8, TYPE_SNORM, CFMT_8_8_8_8_SNORM}, // 65
|
||||
{FMT_8_8_8_8, TYPE_UINT, CFMT_8_8_8_8_UINT}, // 66
|
||||
{FMT_8_8_8_8, TYPE_SINT, CFMT_8_8_8_8_SINT}, // 67
|
||||
{FMT_8_8_8_8, TYPE_SRGB, CFMT_8_8_8_8_SRGB}, // 68
|
||||
{FMT_8_8_8_8, TYPE_USCALED, CFMT_8_8_8_8_USCALED}, // 69
|
||||
{FMT_8_8_8_8, TYPE_SSCALED, CFMT_8_8_8_8_SSCALED} // 70
|
||||
};
|
||||
static const int FormatLUTSize = sizeof(FormatLUT)/sizeof(formatconverstion_t);
|
||||
|
||||
//Index in FormatLUT to start search, indexed by FMT enum.
|
||||
static const int FormatEntryPoint[] = {
|
||||
71, // FMT_INVALID
|
||||
48, // FMT_8
|
||||
5, // FMT_16
|
||||
57, // FMT_8_8
|
||||
34, // FMT_32
|
||||
12, // FMT_16_16
|
||||
71, // FMT_10_11_11
|
||||
71, // FMT_11_11_10
|
||||
1, // FMT_10_10_10_2
|
||||
26, // FMT_2_10_10_10
|
||||
64, // FMT_8_8_8_8
|
||||
37, // FMT_32_32
|
||||
19, // FMT_16_16_16_16
|
||||
40, // FMT_32_32_32
|
||||
43, // FMT_32_32_32_32
|
||||
71, // RESERVED
|
||||
47, // FMT_5_6_5
|
||||
0, // FMT_1_5_5_5
|
||||
46, // FMT_5_5_5_1
|
||||
71, // FMT_4_4_4_4
|
||||
55, // FMT_8_24
|
||||
32 // FMT_24_8
|
||||
};
|
||||
|
||||
static FORMAT GetCombinedFormat(uint8_t fmt, uint8_t type) {
|
||||
assert(fmt < sizeof(FormatEntryPoint)/sizeof(int) && "FMT out of range.");
|
||||
int start = FormatEntryPoint[fmt];
|
||||
int stop = std::min(start + 6, FormatLUTSize); // Only 6 types are used in image_kv_lut.cpp
|
||||
|
||||
for(int i=start; i<stop; i++) {
|
||||
if((FormatLUT[i].fmt == fmt) && (FormatLUT[i].type == type))
|
||||
return FormatLUT[i].format;
|
||||
}
|
||||
return CFMT_INVALID;
|
||||
};
|
||||
//-----------------------------------------------------------------------------
|
||||
// End workaround
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
ImageManagerGfx12::ImageManagerGfx12() : ImageManagerKv() {}
|
||||
|
||||
ImageManagerGfx12::~ImageManagerGfx12() {}
|
||||
|
||||
// TODO(cfreehil) remove from class, make it a utility function
|
||||
hsa_status_t ImageManagerGfx12::CalculateImageSizeAndAlignment(
|
||||
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
|
||||
hsa_ext_image_data_layout_t image_data_layout,
|
||||
size_t image_data_row_pitch,
|
||||
size_t image_data_slice_pitch,
|
||||
hsa_ext_image_data_info_t& image_info) const {
|
||||
ADDR3_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
|
||||
hsa_profile_t profile;
|
||||
|
||||
hsa_status_t status = HSA::hsa_agent_get_info(component, HSA_AGENT_INFO_PROFILE, &profile);
|
||||
if (status != HSA_STATUS_SUCCESS) return status;
|
||||
|
||||
Image::TileMode tileMode = Image::TileMode::LINEAR;
|
||||
if (image_data_layout == HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE) {
|
||||
tileMode = (profile == HSA_PROFILE_BASE &&
|
||||
desc.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB)?
|
||||
Image::TileMode::TILED : Image::TileMode::LINEAR;
|
||||
}
|
||||
if (GetAddrlibSurfaceInfoNv(component, desc, tileMode,
|
||||
image_data_row_pitch, image_data_slice_pitch, out) ==
|
||||
(uint32_t)(-1)) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
size_t rowPitch = (out.bpp >> 3) * out.pitch;
|
||||
size_t slicePitch = rowPitch * out.height;
|
||||
if (desc.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB &&
|
||||
image_data_layout == HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR &&
|
||||
((image_data_row_pitch && (rowPitch != image_data_row_pitch)) ||
|
||||
(image_data_slice_pitch && (slicePitch != image_data_slice_pitch)))) {
|
||||
return static_cast<hsa_status_t>(
|
||||
HSA_EXT_STATUS_ERROR_IMAGE_PITCH_UNSUPPORTED);
|
||||
}
|
||||
|
||||
image_info.size = out.surfSize;
|
||||
assert(image_info.size != 0);
|
||||
image_info.alignment = out.baseAlign;
|
||||
assert(image_info.alignment != 0);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
bool ImageManagerGfx12::IsLocalMemory(const void* address) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
hsa_status_t ImageManagerGfx12::PopulateImageSrd(Image& image,
|
||||
const metadata_amd_t* descriptor) const {
|
||||
const metadata_amd_gfx12_t* desc = reinterpret_cast<const metadata_amd_gfx12_t*>(descriptor);
|
||||
const void* image_data_addr = image.data;
|
||||
|
||||
ImageProperty image_prop = ImageLut().MapFormat(image.desc.format, image.desc.geometry);
|
||||
if ((image_prop.cap == HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED) ||
|
||||
(image_prop.element_size == 0))
|
||||
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
|
||||
|
||||
const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order);
|
||||
|
||||
if (IsLocalMemory(image.data)) {
|
||||
image_data_addr = reinterpret_cast<const void*>(
|
||||
reinterpret_cast<uintptr_t>(image.data) - local_memory_base_address_);
|
||||
}
|
||||
|
||||
image.srd[0] = desc->word0.u32All;
|
||||
image.srd[1] = desc->word1.u32All;
|
||||
image.srd[2] = desc->word2.u32All;
|
||||
image.srd[3] = desc->word3.u32All;
|
||||
image.srd[4] = desc->word4.u32All;
|
||||
image.srd[5] = desc->word5.u32All;
|
||||
image.srd[6] = desc->word6.u32All;
|
||||
image.srd[7] = desc->word7.u32All;
|
||||
|
||||
if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
|
||||
SQ_BUF_RSRC_WORD0 word0;
|
||||
SQ_BUF_RSRC_WORD1 word1;
|
||||
SQ_BUF_RSRC_WORD3 word3;
|
||||
|
||||
word0.val = 0;
|
||||
word0.f.BASE_ADDRESS = PtrLow32(image_data_addr);
|
||||
|
||||
word1.val = image.srd[1];
|
||||
word1.f.BASE_ADDRESS_HI = PtrHigh32(image_data_addr);
|
||||
word1.f.STRIDE = image_prop.element_size;
|
||||
|
||||
word3.val = image.srd[3];
|
||||
word3.f.DST_SEL_X = swizzle.x;
|
||||
word3.f.DST_SEL_Y = swizzle.y;
|
||||
word3.f.DST_SEL_Z = swizzle.z;
|
||||
word3.f.DST_SEL_W = swizzle.w;
|
||||
|
||||
word3.f.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type);
|
||||
|
||||
word3.f.INDEX_STRIDE = image_prop.element_size;
|
||||
|
||||
// New to GFX12
|
||||
//word3.f.WRITE_COMPRESS_ENABLE = 0;
|
||||
//word3.f.COMPRESSION_EN = 0;
|
||||
//word3.f.COMPRESSION_ACCESS_MODE = 0;
|
||||
|
||||
image.srd[0] = word0.val;
|
||||
image.srd[1] = word1.val;
|
||||
image.srd[3] = word3.val;
|
||||
} else {
|
||||
uint32_t hwPixelSize = ImageLut().GetPixelSize(image_prop.data_format, image_prop.data_type);
|
||||
|
||||
if (image_prop.element_size != hwPixelSize) {
|
||||
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
|
||||
}
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD0*>(&image.srd[0])->bits.BASE_ADDRESS =
|
||||
PtrLow40Shift8(image_data_addr);
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&image.srd[1])->bits.BASE_ADDRESS_HI =
|
||||
PtrHigh64Shift40(image_data_addr);
|
||||
|
||||
// New to GFX12...
|
||||
//reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&image.srd[1])->bits.MAX_MIP = 0;
|
||||
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&image.srd[1])->bits.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type);
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3])->bits.DST_SEL_X =
|
||||
swizzle.x;
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3])->bits.DST_SEL_Y =
|
||||
swizzle.y;
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3])->bits.DST_SEL_Z =
|
||||
swizzle.z;
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3])->bits.DST_SEL_W =
|
||||
swizzle.w;
|
||||
if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ||
|
||||
image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1D) {
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3])->bits.TYPE =
|
||||
ImageLut().MapGeometry(image.desc.geometry);
|
||||
}
|
||||
}
|
||||
|
||||
// Looks like this is only used for CPU copies.
|
||||
image.row_pitch = 0;
|
||||
image.slice_pitch = 0;
|
||||
|
||||
// Used by HSAIL shader ABI
|
||||
image.srd[8] = image.desc.format.channel_type;
|
||||
image.srd[9] = image.desc.format.channel_order;
|
||||
image.srd[10] = static_cast<uint32_t>(image.desc.width);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
static TEX_BC_SWIZZLE GetBcSwizzle(const Swizzle& swizzle) {
|
||||
SEL r = (SEL)swizzle.x;
|
||||
SEL g = (SEL)swizzle.y;
|
||||
SEL b = (SEL)swizzle.z;
|
||||
SEL a = (SEL)swizzle.w;
|
||||
|
||||
TEX_BC_SWIZZLE bcSwizzle = TEX_BC_Swizzle_XYZW;
|
||||
|
||||
if (a == SEL_X) {
|
||||
// Have to use either TEX_BC_Swizzle_WZYX or TEX_BC_Swizzle_WXYZ
|
||||
//
|
||||
// For the pre-defined border color values (white, opaque black,
|
||||
// transparent black), the only thing that matters is that the alpha
|
||||
// channel winds up in the correct place (because the RGB channels are
|
||||
// all the same) so either of these TEX_BC_Swizzle enumerations will
|
||||
// work. Not sure what happens with border color palettes.
|
||||
if (b == SEL_Y) {
|
||||
// ABGR
|
||||
bcSwizzle = TEX_BC_Swizzle_WZYX;
|
||||
} else if ((r == SEL_X) && (g == SEL_X) && (b == SEL_X)) {
|
||||
// RGBA
|
||||
bcSwizzle = TEX_BC_Swizzle_XYZW;
|
||||
} else {
|
||||
// ARGB
|
||||
bcSwizzle = TEX_BC_Swizzle_WXYZ;
|
||||
}
|
||||
} else if (r == SEL_X) {
|
||||
// Have to use either TEX_BC_Swizzle_XYZW or TEX_BC_Swizzle_XWYZ
|
||||
if (g == SEL_Y) {
|
||||
// RGBA
|
||||
bcSwizzle = TEX_BC_Swizzle_XYZW;
|
||||
} else if ((g == SEL_X) && (b == SEL_X) && (a == SEL_W)) {
|
||||
// RGBA
|
||||
bcSwizzle = TEX_BC_Swizzle_XYZW;
|
||||
} else {
|
||||
// RAGB
|
||||
bcSwizzle = TEX_BC_Swizzle_XWYZ;
|
||||
}
|
||||
} else if (g == SEL_X) {
|
||||
// GRAB, have to use TEX_BC_Swizzle_YXWZ
|
||||
bcSwizzle = TEX_BC_Swizzle_YXWZ;
|
||||
} else if (b == SEL_X) {
|
||||
// BGRA, have to use TEX_BC_Swizzle_ZYXW
|
||||
bcSwizzle = TEX_BC_Swizzle_ZYXW;
|
||||
}
|
||||
|
||||
return bcSwizzle;
|
||||
}
|
||||
|
||||
|
||||
hsa_status_t ImageManagerGfx12::PopulateImageSrd(Image& image) const {
|
||||
ImageProperty image_prop = ImageLut().MapFormat(image.desc.format, image.desc.geometry);
|
||||
assert(image_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);
|
||||
assert(image_prop.element_size != 0);
|
||||
|
||||
const void* image_data_addr = image.data;
|
||||
|
||||
if (IsLocalMemory(image.data))
|
||||
image_data_addr = reinterpret_cast<const void*>(
|
||||
reinterpret_cast<uintptr_t>(image.data) - local_memory_base_address_);
|
||||
|
||||
if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
|
||||
SQ_BUF_RSRC_WORD0 word0;
|
||||
SQ_BUF_RSRC_WORD1 word1;
|
||||
SQ_BUF_RSRC_WORD2 word2;
|
||||
SQ_BUF_RSRC_WORD3 word3;
|
||||
|
||||
word0.val = 0;
|
||||
word0.f.BASE_ADDRESS = PtrLow32(image_data_addr);
|
||||
|
||||
word1.val = 0;
|
||||
word1.f.BASE_ADDRESS_HI = PtrHigh32(image_data_addr);
|
||||
word1.f.STRIDE = image_prop.element_size;
|
||||
|
||||
word1.f.SWIZZLE_ENABLE = 0;
|
||||
|
||||
word2.f.NUM_RECORDS = image.desc.width * image_prop.element_size;
|
||||
|
||||
const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order);
|
||||
word3.val = 0;
|
||||
word3.f.DST_SEL_X = swizzle.x;
|
||||
word3.f.DST_SEL_Y = swizzle.y;
|
||||
word3.f.DST_SEL_Z = swizzle.z;
|
||||
word3.f.DST_SEL_W = swizzle.w;
|
||||
word3.f.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type);
|
||||
|
||||
word3.f.INDEX_STRIDE = image_prop.element_size;
|
||||
|
||||
// New to GFX12
|
||||
//word3.f.WRITE_COMPRESS_ENABLE = 0;
|
||||
//word3.f.COMPRESSION_EN = 0;
|
||||
//word3.f.COMPRESSION_ACCESS_MODE = 0;
|
||||
|
||||
word3.f.TYPE = ImageLut().MapGeometry(image.desc.geometry);
|
||||
|
||||
image.srd[0] = word0.val;
|
||||
image.srd[1] = word1.val;
|
||||
image.srd[2] = word2.val;
|
||||
image.srd[3] = word3.val;
|
||||
|
||||
image.row_pitch = image.desc.width * image_prop.element_size;
|
||||
image.slice_pitch = image.row_pitch;
|
||||
} else {
|
||||
SQ_IMG_RSRC_WORD0 word0;
|
||||
SQ_IMG_RSRC_WORD1 word1;
|
||||
SQ_IMG_RSRC_WORD2 word2;
|
||||
SQ_IMG_RSRC_WORD3 word3;
|
||||
SQ_IMG_RSRC_WORD4 word4;
|
||||
SQ_IMG_RSRC_WORD5 word5;
|
||||
SQ_IMG_RSRC_WORD5 word6;
|
||||
SQ_IMG_RSRC_WORD5 word7;
|
||||
|
||||
ADDR3_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
|
||||
|
||||
uint32_t swizzleMode = GetAddrlibSurfaceInfoNv(
|
||||
image.component, image.desc, image.tile_mode,
|
||||
image.row_pitch, image.slice_pitch, out);
|
||||
if (swizzleMode == (uint32_t)(-1)) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
assert((out.bpp / 8) == image_prop.element_size);
|
||||
|
||||
const size_t row_pitch_size = out.pitch * image_prop.element_size;
|
||||
|
||||
word0.f.BASE_ADDRESS = PtrLow40Shift8(image_data_addr);
|
||||
|
||||
word1.val = 0;
|
||||
word1.f.BASE_ADDRESS_HI = PtrHigh64Shift40(image_data_addr);
|
||||
|
||||
// New to GFX12
|
||||
//word1.f.MAX_MIP = 0;
|
||||
//word1.f.BASE_LEVEL = 0;
|
||||
|
||||
word1.f.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type);
|
||||
// Only take the lowest 2 bits of (image.desc.width - 1)
|
||||
word1.f.WIDTH = BitSelect<0, 1>(image.desc.width - 1);
|
||||
|
||||
word2.val = 0;
|
||||
// Take the high 14 bits of (image.desc.width - 1)
|
||||
word2.f.WIDTH_HI = BitSelect<2, 15>(image.desc.width - 1);
|
||||
word2.f.HEIGHT = image.desc.height ? image.desc.height - 1 : 0;
|
||||
|
||||
const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order);
|
||||
word3.val = 0;
|
||||
word3.f.DST_SEL_X = swizzle.x;
|
||||
word3.f.DST_SEL_Y = swizzle.y;
|
||||
word3.f.DST_SEL_Z = swizzle.z;
|
||||
word3.f.DST_SEL_W = swizzle.w;
|
||||
//word3.f.NO_EDGE_CLAMP = 0; // New to GFX12
|
||||
//word3.f.LAST_LEVEL = 0; // New to GFX12
|
||||
word3.f.SW_MODE = swizzleMode;
|
||||
word3.f.BC_SWIZZLE = GetBcSwizzle(swizzle);
|
||||
word3.f.TYPE = ImageLut().MapGeometry(image.desc.geometry);
|
||||
|
||||
const bool image_array =
|
||||
(image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ||
|
||||
image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DA ||
|
||||
image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DADEPTH);
|
||||
const bool image_3d = (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_3D);
|
||||
|
||||
word4.val = 0;
|
||||
word4.f.DEPTH =
|
||||
(image_array) // Doesn't hurt but isn't array_size already >0?
|
||||
? std::max(image.desc.array_size, static_cast<size_t>(1)) - 1
|
||||
: (image_3d) ? image.desc.depth - 1 : 0;
|
||||
|
||||
// For 1d, 2d and 2d-msaa this is pitch-1
|
||||
if (!image_array && !image_3d) {
|
||||
uint32_t encPitch = out.pitch - 1;
|
||||
word4.f.DEPTH = encPitch & 0x1fff; // 13 bits
|
||||
word4.f.PITCH_MSB = (encPitch >> 13) & 0x3; // last 2 bits
|
||||
}
|
||||
|
||||
word5.val = 0;
|
||||
word6.val = 0;
|
||||
word7.val = 0;
|
||||
|
||||
image.srd[0] = word0.val;
|
||||
image.srd[1] = word1.val;
|
||||
image.srd[2] = word2.val;
|
||||
image.srd[3] = word3.val;
|
||||
image.srd[4] = word4.val;
|
||||
image.srd[5] = word5.val;
|
||||
image.srd[6] = word6.val;
|
||||
image.srd[7] = word7.val;
|
||||
|
||||
image.row_pitch = row_pitch_size;
|
||||
image.slice_pitch = out.sliceSize;
|
||||
}
|
||||
|
||||
image.srd[8] = image.desc.format.channel_type;
|
||||
image.srd[9] = image.desc.format.channel_order;
|
||||
image.srd[10] = static_cast<uint32_t>(image.desc.width);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t ImageManagerGfx12::ModifyImageSrd(
|
||||
Image& image, hsa_ext_image_format_t& new_format) const {
|
||||
image.desc.format = new_format;
|
||||
|
||||
ImageProperty image_prop = ImageLut().MapFormat(image.desc.format, image.desc.geometry);
|
||||
assert(image_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);
|
||||
assert(image_prop.element_size != 0);
|
||||
|
||||
if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
|
||||
const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order);
|
||||
SQ_BUF_RSRC_WORD3* word3 =
|
||||
reinterpret_cast<SQ_BUF_RSRC_WORD3*>(&image.srd[3]);
|
||||
word3->bits.DST_SEL_X = swizzle.x;
|
||||
word3->bits.DST_SEL_Y = swizzle.y;
|
||||
word3->bits.DST_SEL_Z = swizzle.z;
|
||||
word3->bits.DST_SEL_W = swizzle.w;
|
||||
word3->bits.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type);
|
||||
} else {
|
||||
SQ_IMG_RSRC_WORD1* word1 =
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&image.srd[1]);
|
||||
word1->bits.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type);
|
||||
|
||||
const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order);
|
||||
SQ_IMG_RSRC_WORD3* word3 =
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3]);
|
||||
word3->bits.DST_SEL_X = swizzle.x;
|
||||
word3->bits.DST_SEL_Y = swizzle.y;
|
||||
word3->bits.DST_SEL_Z = swizzle.z;
|
||||
word3->bits.DST_SEL_W = swizzle.w;
|
||||
}
|
||||
|
||||
image.srd[8] = image.desc.format.channel_type;
|
||||
image.srd[9] = image.desc.format.channel_order;
|
||||
image.srd[10] = static_cast<uint32_t>(image.desc.width);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t ImageManagerGfx12::PopulateSamplerSrd(Sampler& sampler) const {
|
||||
const hsa_ext_sampler_descriptor_t sampler_descriptor = sampler.desc;
|
||||
|
||||
SQ_IMG_SAMP_WORD0 word0;
|
||||
SQ_IMG_SAMP_WORD1 word1;
|
||||
SQ_IMG_SAMP_WORD2 word2;
|
||||
SQ_IMG_SAMP_WORD3 word3;
|
||||
|
||||
word0.u32All = 0;
|
||||
switch (sampler_descriptor.address_mode) {
|
||||
case HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE:
|
||||
word0.bits.CLAMP_X = static_cast<int>(SQ_TEX_CLAMP_LAST_TEXEL);
|
||||
break;
|
||||
case HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_BORDER:
|
||||
word0.bits.CLAMP_X = static_cast<int>(SQ_TEX_CLAMP_BORDER);
|
||||
break;
|
||||
case HSA_EXT_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT:
|
||||
word0.bits.CLAMP_X = static_cast<int>(SQ_TEX_MIRROR);
|
||||
break;
|
||||
case HSA_EXT_SAMPLER_ADDRESSING_MODE_UNDEFINED:
|
||||
case HSA_EXT_SAMPLER_ADDRESSING_MODE_REPEAT:
|
||||
word0.bits.CLAMP_X = static_cast<int>(SQ_TEX_WRAP);
|
||||
break;
|
||||
default:
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
word0.bits.CLAMP_Y = word0.bits.CLAMP_X;
|
||||
word0.bits.CLAMP_Z = word0.bits.CLAMP_X;
|
||||
word0.bits.FORCE_UNNORMALIZED = (sampler_descriptor.coordinate_mode ==
|
||||
HSA_EXT_SAMPLER_COORDINATE_MODE_UNNORMALIZED);
|
||||
|
||||
word1.u32All = 0;
|
||||
word1.bits.MAX_LOD = 4095;
|
||||
|
||||
word2.u32All = 0;
|
||||
switch (sampler_descriptor.filter_mode) {
|
||||
case HSA_EXT_SAMPLER_FILTER_MODE_NEAREST:
|
||||
word2.bits.XY_MAG_FILTER = static_cast<int>(SQ_TEX_XY_FILTER_POINT);
|
||||
break;
|
||||
case HSA_EXT_SAMPLER_FILTER_MODE_LINEAR:
|
||||
word2.bits.XY_MAG_FILTER = static_cast<int>(SQ_TEX_XY_FILTER_BILINEAR);
|
||||
break;
|
||||
default:
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
word2.bits.XY_MIN_FILTER = word2.bits.XY_MAG_FILTER;
|
||||
word2.bits.Z_FILTER = SQ_TEX_Z_FILTER_NONE;
|
||||
word2.bits.MIP_FILTER = SQ_TEX_MIP_FILTER_NONE;
|
||||
|
||||
word3.u32All = 0;
|
||||
|
||||
// TODO: check this bit with HSAIL spec.
|
||||
word3.bits.BORDER_COLOR_TYPE = SQ_TEX_BORDER_COLOR_TRANS_BLACK;
|
||||
|
||||
sampler.srd[0] = word0.u32All;
|
||||
sampler.srd[1] = word1.u32All;
|
||||
sampler.srd[2] = word2.u32All;
|
||||
sampler.srd[3] = word3.u32All;
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
uint32_t ImageManagerGfx12::GetAddrlibSurfaceInfoNv(
|
||||
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
|
||||
Image::TileMode tileMode,
|
||||
size_t image_data_row_pitch,
|
||||
size_t image_data_slice_pitch,
|
||||
ADDR3_COMPUTE_SURFACE_INFO_OUTPUT& out) const {
|
||||
const ImageProperty image_prop =
|
||||
GetImageProperty(component, desc.format, desc.geometry);
|
||||
|
||||
const AddrFormat addrlib_format = GetAddrlibFormat(image_prop);
|
||||
|
||||
const uint32_t width = static_cast<uint32_t>(desc.width);
|
||||
const uint32_t height = static_cast<uint32_t>(desc.height);
|
||||
static const size_t kMinNumSlice = 1;
|
||||
const uint32_t num_slice = static_cast<uint32_t>(
|
||||
std::max(kMinNumSlice, std::max(desc.array_size, desc.depth)));
|
||||
|
||||
ADDR3_COMPUTE_SURFACE_INFO_INPUT in = {0};
|
||||
in.size = sizeof(ADDR3_COMPUTE_SURFACE_INFO_INPUT);
|
||||
in.format = addrlib_format;
|
||||
in.bpp = static_cast<unsigned int>(image_prop.element_size) * 8;
|
||||
in.width = width;
|
||||
in.height = height;
|
||||
in.numSlices = num_slice;
|
||||
in.pitchInElement = image_data_row_pitch / image_prop.element_size;
|
||||
|
||||
switch (desc.geometry) {
|
||||
case HSA_EXT_IMAGE_GEOMETRY_1D:
|
||||
case HSA_EXT_IMAGE_GEOMETRY_1DB:
|
||||
case HSA_EXT_IMAGE_GEOMETRY_1DA:
|
||||
in.resourceType = ADDR_RSRC_TEX_1D;
|
||||
break;
|
||||
|
||||
case HSA_EXT_IMAGE_GEOMETRY_2D:
|
||||
case HSA_EXT_IMAGE_GEOMETRY_2DDEPTH:
|
||||
case HSA_EXT_IMAGE_GEOMETRY_2DA:
|
||||
case HSA_EXT_IMAGE_GEOMETRY_2DADEPTH:
|
||||
in.resourceType = ADDR_RSRC_TEX_2D;
|
||||
break;
|
||||
|
||||
case HSA_EXT_IMAGE_GEOMETRY_3D:
|
||||
in.resourceType = ADDR_RSRC_TEX_3D;
|
||||
break;
|
||||
}
|
||||
in.flags.texture = 1;
|
||||
|
||||
if (tileMode == Image::TileMode::LINEAR)
|
||||
{
|
||||
in.swizzleMode = ADDR3_LINEAR;
|
||||
} else {
|
||||
|
||||
/*
|
||||
* AddrLib3 does not provide the best swizzle mode (unlike AddrLib2).
|
||||
* Instead, client has to request the list of possible swizzle mode and
|
||||
* then pick the best one for its needs (i.e. performance/space tradeoffs).
|
||||
*
|
||||
*/
|
||||
ADDR3_GET_POSSIBLE_SWIZZLE_MODE_OUTPUT swOut = { 0 };
|
||||
swOut.size = sizeof(ADDR3_GET_POSSIBLE_SWIZZLE_MODE_OUTPUT);
|
||||
|
||||
ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT swIn = { 0 };
|
||||
swIn.size = sizeof(ADDR3_GET_POSSIBLE_SWIZZLE_MODE_INPUT);
|
||||
swIn.flags = in.flags;
|
||||
swIn.resourceType = in.resourceType;
|
||||
swIn.bpp = in.bpp;
|
||||
swIn.width = in.width;
|
||||
swIn.height = in.height;
|
||||
swIn.numSlices = in.numSlices;
|
||||
swIn.numMipLevels = in.numMipLevels;
|
||||
swIn.numSamples = in.numSamples;
|
||||
/*
|
||||
* Cannot leave it to 0 like GFX11 Addr2GetPreferredSurfaceSetting method
|
||||
* as it triggers an ASSERT in AddrLib3 code.
|
||||
*
|
||||
* Setting it to 256K to allow for maximum number of swizzle mode in set
|
||||
* returned (similar behaviour as GFX11).
|
||||
*
|
||||
*/
|
||||
swIn.maxAlign = 256 * 1024;
|
||||
|
||||
|
||||
if (ADDR_OK != Addr3GetPossibleSwizzleModes(addr_lib_, &swIn, &swOut)) {
|
||||
debug_print("Addr3GetPossibleSwizzleModes failed!\n");
|
||||
return (uint32_t) -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove any modes that the client does not want (if any).
|
||||
*/
|
||||
//swOut.validModes.sw***** = 0;
|
||||
|
||||
|
||||
/*
|
||||
* Pick the "best" swizzle mode.
|
||||
*
|
||||
* This algorithm is based on behaviour in GFX11 AddrLib and on
|
||||
* GFX12 code in PAL (that is also based on the GFX11 behaviour).
|
||||
*
|
||||
* Ratio variables control the extra space that can be used to get a larger
|
||||
* swizzle mode.
|
||||
*
|
||||
* ratioLow:ratioHi meanings:
|
||||
*
|
||||
* 2:1 ratio - same behaviour as GFX11.
|
||||
* 3:2 ratio - would be equivalent if flag opt4space in GFX11 (not used in ROCr)
|
||||
* 1:1 ratio - minimum size, not necessary best for performance
|
||||
*
|
||||
*/
|
||||
const UINT_32 ratioLow = 2;
|
||||
const UINT_32 ratioHigh = 1;
|
||||
|
||||
// Same behaviour as GFX11, remove linear if height is 1.
|
||||
if (in.height > 1) {
|
||||
swOut.validModes.swLinear = 0;
|
||||
}
|
||||
|
||||
UINT_64 minSize = 0;
|
||||
Addr3SwizzleMode bestSwizzle = ADDR3_MAX_TYPE;
|
||||
|
||||
for (uint32_t i = ADDR3_LINEAR; i < ADDR3_MAX_TYPE; i++) {
|
||||
|
||||
if (swOut.validModes.value & (1 << i)) {
|
||||
ADDR3_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
|
||||
localOut.size = sizeof(ADDR3_COMPUTE_SURFACE_INFO_OUTPUT);
|
||||
|
||||
in.swizzleMode = (Addr3SwizzleMode) i;
|
||||
|
||||
if (ADDR_OK != Addr3ComputeSurfaceInfo(addr_lib_, &in, &localOut)) {
|
||||
// Should not happen, if it does, ignore this swizzle mode.
|
||||
debug_print("Addr3ComputeSurfaceInfo failed!\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
UINT_64 surfaceSize = localOut.surfSize;
|
||||
|
||||
if (bestSwizzle == ADDR3_MAX_TYPE) {
|
||||
minSize = surfaceSize;
|
||||
bestSwizzle = (Addr3SwizzleMode) i;
|
||||
} else if ((surfaceSize * ratioHigh) <= (minSize * ratioLow)) {
|
||||
bestSwizzle = (Addr3SwizzleMode) i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (bestSwizzle < ADDR3_MAX_TYPE) {
|
||||
in.swizzleMode = (Addr3SwizzleMode) bestSwizzle;
|
||||
} else {
|
||||
debug_print("Unable to find a valid swizzleMode for the surface!\n");
|
||||
return (uint32_t) -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
out.size = sizeof(ADDR3_COMPUTE_SURFACE_INFO_OUTPUT);
|
||||
|
||||
if (ADDR_OK != Addr3ComputeSurfaceInfo(addr_lib_, &in, &out)) {
|
||||
return (uint32_t)(-1);
|
||||
}
|
||||
if (out.surfSize == 0) {
|
||||
return (uint32_t)(-1);
|
||||
}
|
||||
|
||||
return in.swizzleMode;
|
||||
}
|
||||
|
||||
hsa_status_t ImageManagerGfx12::FillImage(const Image& image, const void* pattern,
|
||||
const hsa_ext_image_region_t& region) {
|
||||
if (BlitQueueInit().queue_ == NULL) {
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
Image* image_view = const_cast<Image*>(&image);
|
||||
|
||||
SQ_BUF_RSRC_WORD3* word3_buff = NULL;
|
||||
SQ_IMG_RSRC_WORD3* word3_image = NULL;
|
||||
uint32_t dst_sel_w_original = 0;
|
||||
if (image_view->desc.format.channel_type ==
|
||||
HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_101010) {
|
||||
// Force GPU to ignore the last two bits (alpha bits).
|
||||
if (image_view->desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
|
||||
word3_buff = reinterpret_cast<SQ_BUF_RSRC_WORD3*>(&image_view->srd[3]);
|
||||
dst_sel_w_original = word3_buff->bits.DST_SEL_W;
|
||||
word3_buff->bits.DST_SEL_W = SEL_0;
|
||||
} else {
|
||||
word3_image = reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image_view->srd[3]);
|
||||
dst_sel_w_original = word3_image->bits.DST_SEL_W;
|
||||
word3_image->bits.DST_SEL_W = SEL_0;
|
||||
}
|
||||
}
|
||||
|
||||
SQ_IMG_RSRC_WORD1* word1 = NULL;
|
||||
uint32_t num_format_original = 0;
|
||||
const void* new_pattern = pattern;
|
||||
float fill_value[4] = {0};
|
||||
switch (image_view->desc.format.channel_order) {
|
||||
case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA:
|
||||
case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB:
|
||||
case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX:
|
||||
case HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA: {
|
||||
// We do not have write support for SRGBA image, so convert pattern
|
||||
// to standard form and treat the image as RGBA image.
|
||||
const float* pattern_f = reinterpret_cast<const float*>(pattern);
|
||||
fill_value[0] = LinearToStandardRGB(pattern_f[0]);
|
||||
fill_value[1] = LinearToStandardRGB(pattern_f[1]);
|
||||
fill_value[2] = LinearToStandardRGB(pattern_f[2]);
|
||||
fill_value[3] = pattern_f[3];
|
||||
new_pattern = fill_value;
|
||||
|
||||
ImageProperty image_prop = ImageLut().MapFormat(image.desc.format, image.desc.geometry);
|
||||
|
||||
word1 = reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&image_view->srd[1]);
|
||||
num_format_original = word1->bits.FORMAT;
|
||||
word1->bits.FORMAT = GetCombinedFormat(image_prop.data_format, TYPE_UNORM);
|
||||
} break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
hsa_status_t status = ImageRuntime::instance()->blit_kernel().FillImage(
|
||||
blit_queue_, blit_code_catalog_, *image_view, new_pattern, region);
|
||||
|
||||
// Revert back original configuration.
|
||||
if (word3_buff != NULL) {
|
||||
word3_buff->bits.DST_SEL_W = dst_sel_w_original;
|
||||
}
|
||||
|
||||
if (word3_image != NULL) {
|
||||
word3_image->bits.DST_SEL_W = dst_sel_w_original;
|
||||
}
|
||||
|
||||
if (word1 != NULL) {
|
||||
word1->bits.FORMAT = num_format_original;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
} // namespace image
|
||||
} // namespace rocr
|
||||
+101
@@ -0,0 +1,101 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef EXT_IMAGE_IMAGE_MANAGER_GFX12_H_
|
||||
#define EXT_IMAGE_IMAGE_MANAGER_GFX12_H_
|
||||
|
||||
#include "addrlib/inc/addrinterface.h"
|
||||
#include "image_lut_gfx11.h"
|
||||
#include "image_manager_kv.h"
|
||||
|
||||
namespace rocr {
|
||||
namespace image {
|
||||
|
||||
class ImageManagerGfx12 : public ImageManagerKv {
|
||||
public:
|
||||
ImageManagerGfx12();
|
||||
virtual ~ImageManagerGfx12();
|
||||
|
||||
/// @brief Calculate the size and alignment of the backing storage of an
|
||||
/// image.
|
||||
virtual hsa_status_t CalculateImageSizeAndAlignment(
|
||||
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
|
||||
hsa_ext_image_data_layout_t image_data_layout,
|
||||
size_t image_data_row_pitch, size_t image_data_slice_pitch,
|
||||
hsa_ext_image_data_info_t& image_info) const;
|
||||
|
||||
/// @brief Fill image structure with device specific image object.
|
||||
virtual hsa_status_t PopulateImageSrd(Image& image) const;
|
||||
|
||||
/// @brief Fill image structure with device specific image object using the given format.
|
||||
virtual hsa_status_t PopulateImageSrd(Image& image, const metadata_amd_t* desc) const;
|
||||
|
||||
/// @brief Modify device specific image object according to the specified
|
||||
/// new format.
|
||||
virtual hsa_status_t ModifyImageSrd(Image& image,
|
||||
hsa_ext_image_format_t& new_format) const;
|
||||
|
||||
/// @brief Fill sampler structure with device specific sampler object.
|
||||
virtual hsa_status_t PopulateSamplerSrd(Sampler& sampler) const;
|
||||
|
||||
/// @brief Fill image backing storage using agent copy.
|
||||
virtual hsa_status_t FillImage(const Image& image, const void* pattern,
|
||||
const hsa_ext_image_region_t& region);
|
||||
protected:
|
||||
uint32_t GetAddrlibSurfaceInfoNv(hsa_agent_t component,
|
||||
const hsa_ext_image_descriptor_t& desc,
|
||||
Image::TileMode tileMode,
|
||||
size_t image_data_row_pitch,
|
||||
size_t image_data_slice_pitch,
|
||||
ADDR3_COMPUTE_SURFACE_INFO_OUTPUT& out) const;
|
||||
|
||||
bool IsLocalMemory(const void* address) const;
|
||||
virtual const ImageLutGfx11& ImageLut() const { return image_lut_gfx11; };
|
||||
|
||||
private:
|
||||
ImageLutGfx11 image_lut_gfx11;
|
||||
DISALLOW_COPY_AND_ASSIGN(ImageManagerGfx12);
|
||||
};
|
||||
|
||||
} // namespace image
|
||||
} // namespace rocr
|
||||
#endif // EXT_IMAGE_IMAGE_MANAGER_GFX12_H_
|
||||
@@ -55,6 +55,7 @@
|
||||
#include "image_manager_ai.h"
|
||||
#include "image_manager_nv.h"
|
||||
#include "image_manager_gfx11.h"
|
||||
#include "image_manager_gfx12.h"
|
||||
#include "device_info.h"
|
||||
|
||||
namespace rocr {
|
||||
@@ -110,14 +111,22 @@ hsa_status_t ImageRuntime::CreateImageManager(hsa_agent_t agent, void* data) {
|
||||
|
||||
ImageManager* image_manager;
|
||||
|
||||
if (major_ver >= 11) {
|
||||
switch (major_ver) {
|
||||
case 12:
|
||||
image_manager = new ImageManagerGfx12();
|
||||
break;
|
||||
case 11:
|
||||
image_manager = new ImageManagerGfx11();
|
||||
} else if (major_ver >= 10) {
|
||||
break;
|
||||
case 10:
|
||||
image_manager = new ImageManagerNv();
|
||||
} else if (major_ver >= 9) {
|
||||
break;
|
||||
case 9:
|
||||
image_manager = new ImageManagerAi();
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
image_manager = new ImageManagerKv();
|
||||
break;
|
||||
}
|
||||
hsa_error_code = image_manager->Initialize(agent);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user