rocr: Add support for Mipmapped Array (#1847)

SWDEV-539526 - Add support for Mipmapped Array in Rocr

Add support for Mipmapped Array functionality in Rocr Runtimeenabling GPU applications to work with multi-level texture mipmaps. The implementation introduces new public APIs for creating, querying, and managing mipmapped arrays across different GPU architectures.

Signed-off-by: Apurv Mishra <Apurv.Mishra@amd.com>
Co-authored-by: Shweta Khatri <shweta.khatri@amd.com>
Co-authored-by: taosang2 <tao.sang@amd.com>
Этот коммит содержится в:
Apurv Mishra
2026-01-08 18:14:39 -05:00
коммит произвёл GitHub
родитель 8b529e7b29
Коммит be375c2dbf
21 изменённых файлов: 2918 добавлений и 101 удалений
+5
Просмотреть файл
@@ -55,6 +55,11 @@ namespace rocr {
namespace core {
struct ImageExtTableInternal : public ImageExtTable {
decltype(::hsa_amd_image_get_info_max_dim)* hsa_amd_image_get_info_max_dim_fn;
// V2 unified APIs for images and mipmaps
decltype(::hsa_ext_image_data_get_info_v2)* hsa_ext_image_data_get_info_v2_fn;
decltype(::hsa_ext_image_create_v2)* hsa_ext_image_create_v2_fn;
decltype(::hsa_ext_image_destroy_v2)* hsa_ext_image_destroy_v2_fn;
decltype(::hsa_ext_image_mipmap_array_get_level)* hsa_ext_image_mipmap_array_get_level_fn;
};
struct PcSamplingExtTableInternal : public PcSamplingExtTable {};
+33
Просмотреть файл
@@ -476,6 +476,39 @@ hsa_status_t hsa_ext_image_create_with_layout(
image);
}
hsa_status_t hsa_ext_image_data_get_info_v2(
hsa_agent_t agent, const hsa_ext_image_descriptor_v2_t* image_descriptor,
hsa_access_permission_t access_permission,
hsa_ext_image_data_info_t* image_data_info) {
return rocr::core::Runtime::runtime_singleton_->extensions_.image_api
.hsa_ext_image_data_get_info_v2_fn(agent, image_descriptor,
access_permission, image_data_info);
}
hsa_status_t hsa_ext_image_create_v2(hsa_agent_t agent,
const hsa_ext_image_descriptor_v2_t* image_descriptor,
const void* image_data,
hsa_access_permission_t access_permission,
hsa_ext_image_t* image) {
return rocr::core::Runtime::runtime_singleton_->extensions_.image_api
.hsa_ext_image_create_v2_fn(agent, image_descriptor, image_data,
access_permission, image);
}
hsa_status_t hsa_ext_image_destroy_v2(hsa_agent_t agent, hsa_ext_image_t image) {
return rocr::core::Runtime::runtime_singleton_->extensions_.image_api
.hsa_ext_image_destroy_v2_fn(agent, image);
}
hsa_status_t hsa_ext_image_mipmap_array_get_level(hsa_agent_t agent,
const hsa_ext_image_t* mipmap_array,
uint32_t mip_level,
hsa_ext_image_t* level_view) {
return rocr::core::Runtime::runtime_singleton_->extensions_.image_api
.hsa_ext_image_mipmap_array_get_level_fn(agent, mipmap_array, mip_level,
level_view);
}
hsa_status_t HSA_API hsa_ven_amd_pcs_iterate_configuration(
hsa_agent_t agent, hsa_ven_amd_pcs_iterate_configuration_callback_t configuration_callback,
void* callback_data) {
+5 -1
Просмотреть файл
@@ -223,4 +223,8 @@ EXPORTS
hsa_amd_queue_get_info
hsa_amd_enable_logging
hsa_amd_signal_wait_all
hsa_amd_portable_export_dmabuf_v2
hsa_amd_portable_export_dmabuf_v2
hsa_ext_image_mipmap_array_get_level
hsa_ext_image_create_v2
hsa_ext_image_data_get_info_v2
hsa_ext_image_destroy_v2
+4
Просмотреть файл
@@ -262,6 +262,10 @@ global:
hsa_amd_portable_export_dmabuf_v2;
hsa_amd_ais_file_write;
hsa_amd_ais_file_read;
hsa_ext_image_mipmap_array_get_level;
hsa_ext_image_create_v2;
hsa_ext_image_data_get_info_v2;
hsa_ext_image_destroy_v2;
local:
*;
};
+195
Просмотреть файл
@@ -369,6 +369,48 @@ hsa_status_t hsa_ext_image_create_with_layout(
CATCH;
}
hsa_status_t hsa_ext_image_data_get_info_with_layout_v2(
hsa_agent_t agent, const hsa_ext_image_descriptor_v2_t* image_descriptor,
hsa_access_permission_t access_permission, hsa_ext_image_data_layout_t image_data_layout,
size_t image_data_row_pitch, size_t image_data_slice_pitch,
hsa_ext_image_data_info_t* image_data_info) {
TRY;
if (agent.handle == 0) {
return HSA_STATUS_ERROR_INVALID_AGENT;
}
if ((image_descriptor == NULL) || (image_data_info == NULL) ||
(access_permission < HSA_ACCESS_PERMISSION_RO) ||
(access_permission > HSA_ACCESS_PERMISSION_RW) ||
(image_data_layout != HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR)) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
// V2 descriptor: only single-level images supported with LINEAR layout
// Mipmap levels must be 0 or 1 for LINEAR layout
uint32_t mipmap_levels =
(image_descriptor->mipmap_levels == 0) ? 1 : image_descriptor->mipmap_levels;
if (mipmap_levels > 1) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
// Convert V2 descriptor to V1 for internal use
hsa_ext_image_descriptor_t desc_v1 = {};
desc_v1.geometry = image_descriptor->geometry;
desc_v1.width = image_descriptor->width;
desc_v1.height = image_descriptor->height;
desc_v1.depth = image_descriptor->depth;
desc_v1.array_size = image_descriptor->array_size;
desc_v1.format = image_descriptor->format;
enforceDefaultPitch(agent, &desc_v1, image_data_row_pitch, image_data_slice_pitch);
return ImageRuntime::instance()->GetImageSizeAndAlignment(
agent, desc_v1, image_data_layout, image_data_row_pitch, image_data_slice_pitch,
*image_data_info);
CATCH;
}
hsa_status_t hsa_amd_image_create(hsa_agent_t agent,
const hsa_ext_image_descriptor_t* image_descriptor,
const hsa_amd_image_descriptor_t* image_layout,
@@ -388,6 +430,153 @@ hsa_status_t hsa_amd_image_create(hsa_agent_t agent,
CATCH;
}
hsa_status_t hsa_amd_image_create_v2(hsa_agent_t agent,
const hsa_ext_image_descriptor_v2_t* image_descriptor,
const hsa_amd_image_descriptor_t* image_layout,
const void* image_data,
hsa_access_permission_t access_permission,
hsa_ext_image_t* image) {
TRY;
if (agent.handle == 0) {
return HSA_STATUS_ERROR_INVALID_AGENT;
}
if (image_descriptor == NULL || image_data == NULL || image == NULL) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
// Convert V2 descriptor to V1 for internal use
hsa_ext_image_descriptor_t desc_v1 = {};
desc_v1.geometry = image_descriptor->geometry;
desc_v1.width = image_descriptor->width;
desc_v1.height = image_descriptor->height;
desc_v1.depth = image_descriptor->depth;
desc_v1.array_size = image_descriptor->array_size;
desc_v1.format = image_descriptor->format;
uint32_t mipmap_levels =
(image_descriptor->mipmap_levels == 0) ? 1 : image_descriptor->mipmap_levels;
if (mipmap_levels > 1) {
// Mipmapped array path with AMD layout
return ImageRuntime::instance()->CreateMipmapArrayHandleWithLayout(
agent, desc_v1, image_layout, image_data, access_permission, mipmap_levels, *image);
} else {
// Regular single-level image path with AMD layout
return ImageRuntime::instance()->CreateImageHandleWithLayout(
agent, desc_v1, image_layout, image_data, access_permission, *image);
}
CATCH;
}
//---------------------------------------------------------------------------//
// V2 API Implementations (Unified Mipmap Support)
//---------------------------------------------------------------------------//
hsa_status_t hsa_ext_image_data_get_info_v2(hsa_agent_t agent,
const hsa_ext_image_descriptor_v2_t* image_descriptor,
hsa_access_permission_t access_permission,
hsa_ext_image_data_info_t* image_data_info) {
TRY;
if (agent.handle == 0) {
return HSA_STATUS_ERROR_INVALID_AGENT;
}
if (image_descriptor == NULL || image_data_info == NULL) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
// Check if this is a mipmap request
uint32_t mipmap_levels =
(image_descriptor->mipmap_levels == 0) ? 1 : image_descriptor->mipmap_levels;
// Convert V2 descriptor to V1 for internal use
hsa_ext_image_descriptor_t desc_v1 = {};
desc_v1.geometry = image_descriptor->geometry;
desc_v1.width = image_descriptor->width;
desc_v1.height = image_descriptor->height;
desc_v1.depth = image_descriptor->depth;
desc_v1.array_size = image_descriptor->array_size;
desc_v1.format = image_descriptor->format;
if (mipmap_levels > 1) {
return ImageRuntime::instance()->GetMipmapArraySizeAndAlignment(
agent, desc_v1, mipmap_levels, HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE, 0, 0,
image_data_info->size, image_data_info->alignment);
} else {
// Regular image path (single level)
return ImageRuntime::instance()->GetImageSizeAndAlignment(
agent, desc_v1, HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE, 0, 0, *image_data_info);
}
CATCH;
}
hsa_status_t hsa_ext_image_create_v2(hsa_agent_t agent,
const hsa_ext_image_descriptor_v2_t* image_descriptor,
const void* image_data,
hsa_access_permission_t access_permission,
hsa_ext_image_t* image) {
TRY;
if (agent.handle == 0) {
return HSA_STATUS_ERROR_INVALID_AGENT;
}
if (image_descriptor == NULL || image_data == NULL || image == NULL) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
// Check if this is a mipmap request
uint32_t mipmap_levels =
(image_descriptor->mipmap_levels == 0) ? 1 : image_descriptor->mipmap_levels;
// Convert V2 descriptor to V1 for internal use
hsa_ext_image_descriptor_t desc_v1 = {};
desc_v1.geometry = image_descriptor->geometry;
desc_v1.width = image_descriptor->width;
desc_v1.height = image_descriptor->height;
desc_v1.depth = image_descriptor->depth;
desc_v1.array_size = image_descriptor->array_size;
desc_v1.format = image_descriptor->format;
if (mipmap_levels > 1) {
// Mipmapped array path
return ImageRuntime::instance()->CreateMipmapArrayHandle(
agent, desc_v1, image_data, access_permission, mipmap_levels,
HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE, 0, 0, *image);
} else {
// Regular image path (single level)
return ImageRuntime::instance()->CreateImageHandle(
agent, desc_v1, image_data, access_permission,
HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE, 0, 0, *image);
}
CATCH;
}
hsa_status_t hsa_ext_image_destroy_v2(hsa_agent_t agent, hsa_ext_image_t image) {
TRY;
if (agent.handle == 0) {
return HSA_STATUS_ERROR_INVALID_AGENT;
}
// The destroy operation is the same for both regular images and mipmaps
// The runtime internally determines the correct cleanup path
return ImageRuntime::instance()->DestroyImageHandle(image);
CATCH;
}
// per-level view retrieval implementation
hsa_status_t HSA_API hsa_ext_image_mipmap_array_get_level(hsa_agent_t agent,
const hsa_ext_image_t* mipmapped_array,
uint32_t mip_level,
hsa_ext_image_t* level_image_out) {
TRY;
if (!mipmapped_array || !level_image_out) { return HSA_STATUS_ERROR_INVALID_ARGUMENT; }
return ImageRuntime::instance()->GetMipmapArrayLevelHandle(agent, *mipmapped_array, mip_level, *level_image_out);
CATCH;
}
void LoadImage(core::ImageExtTableInternal* image_api,
decltype(::hsa_amd_image_create)** interface_api) {
image_api->hsa_ext_image_get_capability_fn = hsa_ext_image_get_capability;
@@ -420,6 +609,12 @@ void LoadImage(core::ImageExtTableInternal* image_api,
image_api->hsa_ext_sampler_create_v2_fn = hsa_ext_sampler_create_v2;
// V2 unified APIs for images and mipmaps
image_api->hsa_ext_image_data_get_info_v2_fn = hsa_ext_image_data_get_info_v2;
image_api->hsa_ext_image_create_v2_fn = hsa_ext_image_create_v2;
image_api->hsa_ext_image_destroy_v2_fn = hsa_ext_image_destroy_v2;
image_api->hsa_ext_image_mipmap_array_get_level_fn = hsa_ext_image_mipmap_array_get_level;
*interface_api = hsa_amd_image_create;
}
+32
Просмотреть файл
@@ -118,6 +118,38 @@ void Sampler::Destroy(const Sampler* sampler) {
assert(status == HSA_STATUS_SUCCESS);
}
MipmappedArray* MipmappedArray::Create(hsa_agent_t agent) {
hsa_amd_memory_pool_t pool = ImageRuntime::instance()->kernarg_pool();
MipmappedArray* mipmapped_array = NULL;
hsa_status_t status = AMD::hsa_amd_memory_pool_allocate(
pool, sizeof(MipmappedArray), 0, reinterpret_cast<void**>(&mipmapped_array));
assert(status == HSA_STATUS_SUCCESS);
if (status != HSA_STATUS_SUCCESS) return nullptr;
new (mipmapped_array) MipmappedArray();
// Allow agent access to the image data
status = AMD::hsa_amd_agents_allow_access(1, &agent, nullptr, mipmapped_array);
if (status != HSA_STATUS_SUCCESS) {
MipmappedArray::Destroy(mipmapped_array);
return nullptr;
}
return mipmapped_array;
}
void MipmappedArray::Destroy(const MipmappedArray* mipmapped_array) {
assert(mipmapped_array != NULL);
mipmapped_array->~MipmappedArray();
hsa_status_t status = AMD::hsa_amd_memory_pool_free(
const_cast<MipmappedArray*>(mipmapped_array));
assert(status == HSA_STATUS_SUCCESS);
}
ImageManager::ImageManager() {}
ImageManager::~ImageManager() {}
+20
Просмотреть файл
@@ -48,6 +48,7 @@
#include "inc/hsa_ext_image.h"
#include "resource.h"
#include "util.h"
#include "image/addrlib/inc/addrinterface.h"
namespace rocr {
namespace image {
@@ -82,6 +83,7 @@ class ImageManager {
virtual hsa_status_t CalculateImageSizeAndAlignment(
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
hsa_ext_image_data_layout_t image_data_layout,
uint32_t num_mipmap_levels,
size_t image_data_row_pitch,
size_t image_data_slice_pitch,
hsa_ext_image_data_info_t& image_info) const = 0;
@@ -120,6 +122,24 @@ class ImageManager {
virtual hsa_status_t FillImage(const Image& image, const void* pattern,
const hsa_ext_image_region_t& region);
/// @brief Get the address library handle
virtual ADDR_HANDLE GetAddrLib() const = 0;
/// @brief Fill mipmap structure with device specific mipmapped array object.
virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array) const = 0;
/// @brief Fill mipmap structure with pre-computed AMD metadata descriptor.
virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array, const metadata_amd_t* desc) const = 0;
/// @brief Create mip level view using SRD BASE_LEVEL/LAST_LEVEL fields
virtual hsa_status_t PopulateMipLevelSrd(MipmappedArray& level_view,
const MipmappedArray& mipmap_array, uint32_t mip_level) const = 0;
virtual void printSRDDetailed(const uint32_t* srd) const = 0;
virtual void printChannelSelect(uint32_t sel) const = 0;
virtual void printResourceType(uint32_t type) const = 0;
virtual void printSwizzleMode(uint32_t sw_mode) const = 0;
protected:
static uint16_t FloatToHalf(float in);
+411 -7
Просмотреть файл
@@ -87,6 +87,7 @@ ASSERT_SIZE_UINT32(SQ_IMG_SAMP_WORD3)
hsa_status_t ImageManagerAi::CalculateImageSizeAndAlignment(
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
hsa_ext_image_data_layout_t image_data_layout,
uint32_t num_mipmap_levels,
size_t image_data_row_pitch,
size_t image_data_slice_pitch,
hsa_ext_image_data_info_t& image_info) const {
@@ -102,8 +103,8 @@ hsa_status_t ImageManagerAi::CalculateImageSizeAndAlignment(
desc.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB)?
Image::TileMode::TILED : Image::TileMode::LINEAR;
}
if (GetAddrlibSurfaceInfoAi(component, desc, tileMode,
image_data_row_pitch, image_data_slice_pitch, out) == (uint32_t)(-1)) {
if (GetAddrlibSurfaceInfoAi(component, desc, num_mipmap_levels, tileMode,
image_data_row_pitch, image_data_slice_pitch, out) == (uint32_t)(-1)) {
return HSA_STATUS_ERROR;
}
@@ -196,7 +197,7 @@ hsa_status_t ImageManagerAi::PopulateImageSrd(Image& image, const metadata_amd_t
((SQ_IMG_RSRC_WORD3*)(&image.srd[3]))->bits.TYPE =
ImageLut().MapGeometry(image.desc.geometry);
}
// Imported metadata holds the offset to metadata, add the image base address.
uintptr_t meta = uintptr_t(((SQ_IMG_RSRC_WORD5*)(&image.srd[5]))->bits.META_DATA_ADDRESS_HI) << 40;
meta |= uintptr_t(((SQ_IMG_RSRC_WORD7*)(&image.srd[7]))->bits.META_DATA_ADDRESS) << 8;
@@ -341,8 +342,8 @@ hsa_status_t ImageManagerAi::PopulateImageSrd(Image& image) const {
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
uint32_t swizzleMode = GetAddrlibSurfaceInfoAi(image.component, image.desc, image.tile_mode,
image.row_pitch, image.slice_pitch, out);
uint32_t swizzleMode = GetAddrlibSurfaceInfoAi(image.component, image.desc,
1, image.tile_mode, image.row_pitch, image.slice_pitch, out);
if (swizzleMode == (uint32_t)(-1)) {
return HSA_STATUS_ERROR;
}
@@ -499,6 +500,7 @@ hsa_status_t ImageManagerAi::PopulateSamplerSrd(Sampler& sampler) const {
uint32_t ImageManagerAi::GetAddrlibSurfaceInfoAi(
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
uint32_t num_mipmap_levels,
Image::TileMode tileMode,
size_t image_data_row_pitch,
size_t image_data_slice_pitch,
@@ -521,7 +523,8 @@ uint32_t ImageManagerAi::GetAddrlibSurfaceInfoAi(
in.width = width;
in.height = height;
in.numSlices = num_slice;
in.pitchInElement = image_data_row_pitch / image_prop.element_size;
in.numMipLevels = num_mipmap_levels;
switch(desc.geometry) {
case HSA_EXT_IMAGE_GEOMETRY_1D:
case HSA_EXT_IMAGE_GEOMETRY_1DB:
@@ -583,7 +586,7 @@ uint32_t ImageManagerAi::GetAddrlibSurfaceInfoAi(
prefSettingsInput.resourceType = in.resourceType;
// Disallow all swizzles but linear.
if (tileMode == Image::TileMode::LINEAR)
if (tileMode == Image::TileMode::LINEAR)
{
prefSettingsInput.forbiddenBlock.macroThin4KB = 1;
prefSettingsInput.forbiddenBlock.macroThick4KB = 1;
@@ -611,5 +614,406 @@ uint32_t ImageManagerAi::GetAddrlibSurfaceInfoAi(
return in.swizzleMode;
}
hsa_status_t ImageManagerAi::PopulateMipmapSrd(MipmappedArray& mipmap) const {
ImageProperty mipmap_prop = ImageLut().MapFormat(mipmap.desc.format, mipmap.desc.geometry);
assert(mipmap_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);
assert(mipmap_prop.element_size != 0);
assert(mipmap.num_levels >= 1);
const void* mipmap_data_addr = mipmap.data;
if (IsLocalMemory(mipmap.data))
mipmap_data_addr = reinterpret_cast<const void*>(
reinterpret_cast<uintptr_t>(mipmap.data) - local_memory_base_address_);
if (mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
sq_buf_rsrc_word0_u word0;
sq_buf_rsrc_word1_u word1;
sq_buf_rsrc_word2_u word2;
sq_buf_rsrc_word3_u word3;
word0.val = 0;
word0.f.base_address = PtrLow32(mipmap_data_addr);
word1.val = 0;
word1.f.base_address_hi = PtrHigh32(mipmap_data_addr);
word1.f.stride = mipmap_prop.element_size;
word1.f.swizzle_enable = false;
word1.f.cache_swizzle = false;
word2.val = 0;
word2.f.num_records = mipmap.desc.width * mipmap_prop.element_size;
const Swizzle swizzle = ImageLut().MapSwizzle(mipmap.desc.format.channel_order);
word3.val = 0;
word3.f.dst_sel_x = swizzle.x;
word3.f.dst_sel_y = swizzle.y;
word3.f.dst_sel_z = swizzle.z;
word3.f.dst_sel_w = swizzle.w;
word3.f.num_format = mipmap_prop.data_type;
word3.f.data_format = mipmap_prop.data_format;
word3.f.index_stride = mipmap_prop.element_size;
word3.f.type = ImageLut().MapGeometry(mipmap.desc.geometry);
mipmap.srd[0] = word0.val;
mipmap.srd[1] = word1.val;
mipmap.srd[2] = word2.val;
mipmap.srd[3] = word3.val;
mipmap.row_pitch = mipmap.desc.width * mipmap_prop.element_size;
mipmap.slice_pitch = mipmap.row_pitch;
} else {
sq_img_rsrc_word0_u word0;
sq_img_rsrc_word1_u word1;
sq_img_rsrc_word2_u word2;
sq_img_rsrc_word3_u word3;
sq_img_rsrc_word4_u word4;
sq_img_rsrc_word5_u word5;
sq_img_rsrc_word6_u word6;
sq_img_rsrc_word7_u word7;
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
// pMipInfo not needed - set to nullptr and AddrLib will ignore it
out.pMipInfo = nullptr;
uint32_t swizzleMode = GetAddrlibSurfaceInfoAi(
mipmap.component, mipmap.desc, mipmap.num_levels,
mipmap.tile_mode, mipmap.row_pitch, mipmap.slice_pitch, out);
if (swizzleMode == (uint32_t)(-1)) {
return HSA_STATUS_ERROR;
}
mipmap.addr_output.addr2 = out;
mipmap.size = out.surfSize;
assert((out.bpp / 8) == mipmap_prop.element_size);
const size_t row_pitch_size = out.pitch * mipmap_prop.element_size;
word0.f.base_address = PtrLow40Shift8(mipmap_data_addr);
word1.val = 0;
word1.f.base_address_hi = PtrHigh64Shift40(mipmap_data_addr);
word1.f.min_lod = 0;
word1.f.data_format = mipmap_prop.data_format;
word1.f.num_format = mipmap_prop.data_type;
word2.val = 0;
word2.f.width = mipmap.desc.width - 1;
word2.f.height = mipmap.desc.height - 1;
word2.f.perf_mod = 0;
const Swizzle swizzle = ImageLut().MapSwizzle(mipmap.desc.format.channel_order);
word3.val = 0;
word3.f.dst_sel_x = swizzle.x;
word3.f.dst_sel_y = swizzle.y;
word3.f.dst_sel_z = swizzle.z;
word3.f.dst_sel_w = swizzle.w;
word3.f.sw_mode = swizzleMode;
word3.f.base_level = 0;
word3.f.last_level = mipmap.num_levels - 1;
word3.f.type = ImageLut().MapGeometry(mipmap.desc.geometry);
const bool mipmap_array =
(mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ||
mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DA ||
mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DADEPTH);
const bool mipmap_3d = (mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_3D);
word4.val = 0;
word4.f.depth =
(mipmap_array)
? std::max(mipmap.desc.array_size, static_cast<size_t>(1)) - 1
: (mipmap_3d) ? mipmap.desc.depth - 1 : 0;
word4.f.pitch = out.pitch - 1;
word4.f.bc_swizzle = GetBcSwizzle(swizzle);
word5.val = 0;
word5.f.max_mip = mipmap.num_levels - 1;
word6.val = 0;
word7.val = 0;
mipmap.srd[0] = word0.val;
mipmap.srd[1] = word1.val;
mipmap.srd[2] = word2.val;
mipmap.srd[3] = word3.val;
mipmap.srd[4] = word4.val;
mipmap.srd[5] = word5.val;
mipmap.srd[6] = word6.val;
mipmap.srd[7] = word7.val;
mipmap.row_pitch = row_pitch_size;
mipmap.slice_pitch = out.sliceSize;
}
mipmap.srd[8] = mipmap.desc.format.channel_type;
mipmap.srd[9] = mipmap.desc.format.channel_order;
mipmap.srd[10] = static_cast<uint32_t>(mipmap.desc.width);
// Mipmap-specific
mipmap.srd[11] = mipmap.num_levels;
return HSA_STATUS_SUCCESS;
}
hsa_status_t ImageManagerAi::PopulateMipmapSrd(MipmappedArray& mipmap_array, const metadata_amd_t* desc) const {
const metadata_amd_ai_t* desc_ai = reinterpret_cast<const metadata_amd_ai_t*>(desc);
const void* mipmap_data_addr = mipmap_array.data;
ImageProperty mipmap_prop = ImageLut().MapFormat(mipmap_array.desc.format, mipmap_array.desc.geometry);
if (mipmap_prop.cap == HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED || mipmap_prop.element_size == 0) {
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
}
const Swizzle swizzle = ImageLut().MapSwizzle(mipmap_array.desc.format.channel_order);
if (IsLocalMemory(mipmap_array.data)) {
mipmap_data_addr = reinterpret_cast<const void*>(
reinterpret_cast<uintptr_t>(mipmap_array.data) - local_memory_base_address_);
}
// Copy the pre-computed SRD words 0-7 from metadata
mipmap_array.srd[0] = desc_ai->word0.u32All;
mipmap_array.srd[1] = desc_ai->word1.u32All;
mipmap_array.srd[2] = desc_ai->word2.u32All;
mipmap_array.srd[3] = desc_ai->word3.u32All;
mipmap_array.srd[4] = desc_ai->word4.u32All;
mipmap_array.srd[5] = desc_ai->word5.u32All;
mipmap_array.srd[6] = desc_ai->word6.u32All;
mipmap_array.srd[7] = desc_ai->word7.u32All;
// Override specific fields after copying
uint32_t hwPixelSize = ImageLut().GetPixelSize(mipmap_prop.data_format, mipmap_prop.data_type);
if (mipmap_prop.element_size != hwPixelSize) {
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
}
reinterpret_cast<SQ_IMG_RSRC_WORD0*>(&mipmap_array.srd[0])->bits.BASE_ADDRESS = PtrLow40Shift8(mipmap_data_addr);
reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&mipmap_array.srd[1])->bits.BASE_ADDRESS_HI = PtrHigh64Shift40(mipmap_data_addr);
reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&mipmap_array.srd[1])->bits.DATA_FORMAT = mipmap_prop.data_format;
reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&mipmap_array.srd[1])->bits.NUM_FORMAT = mipmap_prop.data_type;
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.DST_SEL_X = swizzle.x;
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.DST_SEL_Y = swizzle.y;
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.DST_SEL_Z = swizzle.z;
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.DST_SEL_W = swizzle.w;
reinterpret_cast<SQ_IMG_RSRC_WORD5*>(&mipmap_array.srd[5])->bits.MAX_MIP = mipmap_array.num_levels - 1;
if (mipmap_array.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ||
mipmap_array.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1D) {
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.TYPE =
ImageLut().MapGeometry(mipmap_array.desc.geometry);
}
// Looks like this is only used for CPU copies.
mipmap_array.row_pitch = 0;
mipmap_array.slice_pitch = 0;
// Store mipmap-specific metadata
mipmap_array.srd[8] = mipmap_array.desc.format.channel_type;
mipmap_array.srd[9] = mipmap_array.desc.format.channel_order;
mipmap_array.srd[10] = static_cast<uint32_t>(mipmap_array.desc.width);
mipmap_array.srd[11] = mipmap_array.num_levels;
// Allocate and populate pMipInfo from metadata mip_offsets (ADDR2 for Ai/GFX9)
ADDR2_MIP_INFO* mip_info_storage = new ADDR2_MIP_INFO[mipmap_array.num_levels];
memset(mip_info_storage, 0, sizeof(ADDR2_MIP_INFO) * mipmap_array.num_levels);
// Extract per-level information from mip_offsets array
for (uint32_t level = 0; level < mipmap_array.num_levels; level++) {
// mip_offsets contains offset bits [39:8], shift left by 8 to get actual byte offset
mip_info_storage[level].offset = static_cast<uint64_t>(desc_ai->mip_offsets[level]) << 8;
// Calculate dimensions for this level (halve at each level)
mip_info_storage[level].pitch = std::max(1u, static_cast<uint32_t>(mipmap_array.desc.width >> level));
mip_info_storage[level].height = std::max(1u, static_cast<uint32_t>(mipmap_array.desc.height >> level));
mip_info_storage[level].depth = std::max(1u, static_cast<uint32_t>(mipmap_array.desc.depth >> level));
}
// Store pMipInfo in addr_output for later use by PopulateMipLevelSrd
mipmap_array.addr_output.addr2.pMipInfo = mip_info_storage;
// Total size calculation from metadata
uint32_t last_level = mipmap_array.num_levels - 1;
uint64_t last_level_size = mip_info_storage[last_level].pitch *
mip_info_storage[last_level].height *
mip_info_storage[last_level].depth *
mipmap_prop.element_size;
mipmap_array.size = mip_info_storage[last_level].offset + last_level_size;
return HSA_STATUS_SUCCESS;
}
void ImageManagerAi::printSRDDetailed(const uint32_t* srd) const {
if (!srd) {
printf("\n========== Image SRD (GFX9/AI) - Detailed ==========\n");
printf("ERROR: No SRD data provided.\n");
printf("===============================================\n\n");
return;
}
printf("\n========== Image SRD (GFX9/AI) - Detailed ==========\n");
// Print all 12 words with bit field annotations
for (int i = 0; i < 12; i++) {
printf("WORD %d: 0x%08x ", i, srd[i]);
// Binary representation
printf("(");
for (int bit = 31; bit >= 0; bit--) {
printf("%d", (srd[i] >> bit) & 1);
if (bit % 4 == 0 && bit != 0) printf("_");
}
printf(")\n");
}
// WORD 0: BASE_ADDRESS (bits 39:8)
sq_img_rsrc_word0_u word0;
word0.val = srd[0];
printf("\nWORD 0: BASE_ADDRESS (bits 39:8) = 0x%08x\n", word0.f.base_address);
// WORD 1: Contains BASE_ADDRESS_HI, MIN_LOD, DATA_FORMAT, NUM_FORMAT
sq_img_rsrc_word1_u word1;
word1.val = srd[1];
printf("WORD 1: BASE_ADDRESS_HI = 0x%02x\n", word1.f.base_address_hi);
printf(" MIN_LOD = %u\n", word1.f.min_lod);
printf(" DATA_FORMAT = %u\n", word1.f.data_format);
printf(" NUM_FORMAT = %u\n", word1.f.num_format);
// Calculate full address (GFX9 uses 40-bit shifted by 8)
uint64_t base_addr = ((uint64_t)word1.f.base_address_hi << 32) | ((uint64_t)word0.f.base_address << 8);
printf(" → Full Base Address = 0x%016lx\n", base_addr);
// WORD 2: WIDTH, HEIGHT, PERF_MOD
sq_img_rsrc_word2_u word2;
word2.val = srd[2];
printf("WORD 2: WIDTH = %u (actual: %u)\n", word2.f.width, word2.f.width + 1);
printf(" HEIGHT = %u (actual: %u)\n", word2.f.height, word2.f.height + 1);
printf(" PERF_MOD = %u\n", word2.f.perf_mod);
// WORD 3: Channel selectors, SW_MODE, BASE_LEVEL, LAST_LEVEL, TYPE
sq_img_rsrc_word3_u word3;
word3.val = srd[3];
printf("WORD 3: DST_SEL_X = %u ", word3.f.dst_sel_x);
printChannelSelect(word3.f.dst_sel_x);
printf(" DST_SEL_Y = %u ", word3.f.dst_sel_y);
printChannelSelect(word3.f.dst_sel_y);
printf(" DST_SEL_Z = %u ", word3.f.dst_sel_z);
printChannelSelect(word3.f.dst_sel_z);
printf(" DST_SEL_W = %u ", word3.f.dst_sel_w);
printChannelSelect(word3.f.dst_sel_w);
printf(" BASE_LEVEL = %u ◄──── Current base level\n", word3.f.base_level);
printf(" LAST_LEVEL = %u ◄──── Current last level\n", word3.f.last_level);
printf(" SW_MODE = %u ", word3.f.sw_mode);
printSwizzleMode(word3.f.sw_mode);
printf(" TYPE = %u ", word3.f.type);
printResourceType(word3.f.type);
// WORD 4: DEPTH, PITCH, BC_SWIZZLE
sq_img_rsrc_word4_u word4;
word4.val = srd[4];
printf("WORD 4: DEPTH = %u\n", word4.f.depth);
printf(" PITCH = %u (actual: %u)\n", word4.f.pitch, word4.f.pitch + 1);
printf(" BC_SWIZZLE = %u\n", word4.f.bc_swizzle);
// Calculate effective depth based on geometry
uint32_t type = word3.f.type;
if (type == 10) { // 3D
printf(" → 3D Depth = %u (actual: %u)\n", word4.f.depth, word4.f.depth + 1);
} else if (type == 13 || type == 12) { // Arrays
printf(" → Array Size = %u (actual: %u)\n", word4.f.depth, word4.f.depth + 1);
}
// WORD 5-7: Usually zero for basic images, but may contain metadata addresses
printf("WORD 5: META_DATA_ADDRESS_HI = 0x%08x\n", srd[5]);
printf("WORD 6: Reserved = 0x%08x\n", srd[6]);
printf("WORD 7: META_DATA_ADDRESS = 0x%08x\n", srd[7]);
// Additional mipmap information
printf("WORD 8: CHANNEL_TYPE = 0x%08x\n", srd[8]);
printf("WORD 9: CHANNEL_ORDER = 0x%08x\n", srd[9]);
printf("WORD 10: WIDTH_ORIGINAL = 0x%08x\n", srd[10]);
printf("WORD 11: NUM_LEVELS = 0x%08x\n", srd[11]);
// Mipmap analysis
if (word3.f.last_level > word3.f.base_level || word3.f.last_level > 0) {
printf("\nMIPMAP ANALYSIS:\n");
printf(" Total Levels = %u\n", srd[11]);
printf(" Active Range = [%u, %u]\n", word3.f.base_level, word3.f.last_level);
if (word3.f.base_level == word3.f.last_level) {
printf(" Mode = SINGLE LEVEL VIEW ◄──── Mip level view\n");
uint32_t level = word3.f.base_level;
uint32_t level_width = std::max(1u, static_cast<uint32_t>((word2.f.width + 1) >> level));
uint32_t level_height = std::max(1u, static_cast<uint32_t>((word2.f.height + 1) >> level));
printf(" Effective Dimensions = %ux%u (level %u)\n", level_width, level_height, level);
} else {
printf(" Mode = FULL MIPMAP CHAIN\n");
}
}
printf("===============================================\n\n");
}
void ImageManagerAi::printChannelSelect(uint32_t sel) const {
switch(sel) {
case 0: printf("(SEL_0)\n"); break;
case 1: printf("(SEL_1)\n"); break;
case 4: printf("(SEL_X/R)\n"); break;
case 5: printf("(SEL_Y/G)\n"); break;
case 6: printf("(SEL_Z/B)\n"); break;
case 7: printf("(SEL_W/A)\n"); break;
default: printf("(UNKNOWN)\n"); break;
}
}
void ImageManagerAi::printResourceType(uint32_t type) const {
switch(type) {
case 8: printf("(1D)\n"); break;
case 9: printf("(2D)\n"); break;
case 10: printf("(3D)\n"); break;
case 11: printf("(CUBE)\n"); break;
case 12: printf("(1D_ARRAY/1DB)\n"); break;
case 13: printf("(2D_ARRAY)\n"); break;
case 14: printf("(2D_MSAA)\n"); break;
case 15: printf("(2D_MSAA_ARRAY)\n"); break;
default: printf("(UNKNOWN=%u)\n", type); break;
}
}
void ImageManagerAi::printSwizzleMode(uint32_t sw_mode) const {
// GFX9 swizzle modes
if (sw_mode == 0) {
printf("(LINEAR)\n");
} else if (sw_mode < 5) {
printf("(SW_256B_%u)\n", sw_mode);
} else if (sw_mode < 9) {
printf("(SW_4KB_%u)\n", sw_mode - 4);
} else if (sw_mode < 13) {
printf("(SW_64KB_%u)\n", sw_mode - 8);
} else if (sw_mode < 22) {
printf("(SW_VAR_%u)\n", sw_mode - 12);
} else {
printf("(UNKNOWN=%u)\n", sw_mode);
}
}
hsa_status_t ImageManagerAi::PopulateMipLevelSrd(
MipmappedArray& level_view,
const MipmappedArray& mipmap_array,
uint32_t mip_level) const {
// SRD already copied from parent, just modify BASE_LEVEL/LAST_LEVEL fields
uint32_t* srd_words = reinterpret_cast<uint32_t*>(level_view.srd);
// SRD WORD3 has BASE_LEVEL and LAST_LEVEL fields
sq_img_rsrc_word3_u* word3 = reinterpret_cast<sq_img_rsrc_word3_u*>(&srd_words[3]);
// Set both to same value - hardware samples only this level
word3->f.base_level = mip_level;
word3->f.last_level = mip_level;
debug_print("Set SRD mip selection: BASE_LEVEL=%u, LAST_LEVEL=%u", mip_level, mip_level);
return HSA_STATUS_SUCCESS;
}
} // namespace image
} // namespace rocr
+22 -5
Просмотреть файл
@@ -59,6 +59,7 @@ class ImageManagerAi : public ImageManagerKv {
virtual hsa_status_t CalculateImageSizeAndAlignment(
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
hsa_ext_image_data_layout_t image_data_layout,
uint32_t num_mipmap_levels,
size_t image_data_row_pitch, size_t image_data_slice_pitch,
hsa_ext_image_data_info_t& image_info) const;
@@ -76,13 +77,29 @@ class ImageManagerAi : public ImageManagerKv {
/// @brief Fill sampler structure with device specific sampler object.
virtual hsa_status_t PopulateSamplerSrd(Sampler& sampler) const;
/// @brief Fill mipmap structure with device specific mipmapped array object.
virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array) const;
/// @brief Fill mipmap structure with pre-computed AMD metadata descriptor.
virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array, const metadata_amd_t* desc) const;
/// @brief Create mip level view using SRD BASE_LEVEL/LAST_LEVEL fields
virtual hsa_status_t PopulateMipLevelSrd(MipmappedArray& level_view,
const MipmappedArray& mipmap_array, uint32_t mip_level) const;
virtual void printSRDDetailed(const uint32_t* srd) const;
virtual void printChannelSelect(uint32_t sel) const;
virtual void printResourceType(uint32_t type) const;
virtual void printSwizzleMode(uint32_t sw_mode) const;
protected:
uint32_t GetAddrlibSurfaceInfoAi(hsa_agent_t component,
const hsa_ext_image_descriptor_t& desc,
Image::TileMode tileMode,
size_t image_data_row_pitch,
size_t image_data_slice_pitch,
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT& out) const;
const hsa_ext_image_descriptor_t& desc,
uint32_t num_mipmap_levels,
Image::TileMode tileMode,
size_t image_data_row_pitch,
size_t image_data_slice_pitch,
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT& out) const;
bool IsLocalMemory(const void* address) const;
+413 -8
Просмотреть файл
@@ -215,6 +215,7 @@ ImageManagerGfx11::~ImageManagerGfx11() {}
hsa_status_t ImageManagerGfx11::CalculateImageSizeAndAlignment(
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
hsa_ext_image_data_layout_t image_data_layout,
uint32_t num_mipmap_levels,
size_t image_data_row_pitch,
size_t image_data_slice_pitch,
hsa_ext_image_data_info_t& image_info) const {
@@ -230,9 +231,8 @@ hsa_status_t ImageManagerGfx11::CalculateImageSizeAndAlignment(
desc.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB)?
Image::TileMode::TILED : Image::TileMode::LINEAR;
}
if (GetAddrlibSurfaceInfoNv(component, desc, tileMode,
image_data_row_pitch, image_data_slice_pitch, out) ==
(uint32_t)(-1)) {
if (GetAddrlibSurfaceInfoNv(component, desc, num_mipmap_levels, tileMode,
image_data_row_pitch, image_data_slice_pitch, out) == (uint32_t)(-1)) {
return HSA_STATUS_ERROR;
}
@@ -332,7 +332,7 @@ hsa_status_t ImageManagerGfx11::PopulateImageSrd(Image& image,
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3])->bits.TYPE =
ImageLut().MapGeometry(image.desc.geometry);
}
// Imported metadata holds the offset to metadata, add the image base address.
uintptr_t meta = uintptr_t(((SQ_IMG_RSRC_WORD7*)(&image.srd[7]))->bits.META_DATA_ADDRESS_HI) << 16;
meta |= uintptr_t(((SQ_IMG_RSRC_WORD6*)(&image.srd[6]))->bits.META_DATA_ADDRESS) << 8;
@@ -460,9 +460,8 @@ hsa_status_t ImageManagerGfx11::PopulateImageSrd(Image& image) const {
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
uint32_t swizzleMode = GetAddrlibSurfaceInfoNv(
image.component, image.desc, image.tile_mode,
image.row_pitch, image.slice_pitch, out);
uint32_t swizzleMode = GetAddrlibSurfaceInfoNv(image.component, image.desc,
1, image.tile_mode, image.row_pitch, image.slice_pitch, out);
if (swizzleMode == (uint32_t)(-1)) {
return HSA_STATUS_ERROR;
}
@@ -619,6 +618,7 @@ hsa_status_t ImageManagerGfx11::PopulateSamplerSrd(Sampler& sampler) const {
uint32_t ImageManagerGfx11::GetAddrlibSurfaceInfoNv(
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
uint32_t num_mipmap_levels,
Image::TileMode tileMode,
size_t image_data_row_pitch,
size_t image_data_slice_pitch,
@@ -641,7 +641,7 @@ uint32_t ImageManagerGfx11::GetAddrlibSurfaceInfoNv(
in.width = width;
in.height = height;
in.numSlices = num_slice;
in.pitchInElement = image_data_row_pitch / image_prop.element_size;
in.numMipLevels = num_mipmap_levels;
switch (desc.geometry) {
case HSA_EXT_IMAGE_GEOMETRY_1D:
@@ -810,5 +810,410 @@ hsa_status_t ImageManagerGfx11::FillImage(const Image& image, const void* patter
return status;
}
hsa_status_t ImageManagerGfx11::PopulateMipmapSrd(MipmappedArray& mipmap) const {
ImageProperty mipmap_prop = ImageLut().MapFormat(mipmap.desc.format, mipmap.desc.geometry);
assert(mipmap_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);
assert(mipmap_prop.element_size != 0);
assert(mipmap.num_levels >= 1);
const void* mipmap_data_addr = mipmap.data;
if (IsLocalMemory(mipmap.data))
mipmap_data_addr = reinterpret_cast<const void*>(
reinterpret_cast<uintptr_t>(mipmap.data) - local_memory_base_address_);
if (mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
SQ_BUF_RSRC_WORD0 word0;
SQ_BUF_RSRC_WORD1 word1;
SQ_BUF_RSRC_WORD2 word2;
SQ_BUF_RSRC_WORD3 word3;
word0.val = 0;
word0.f.BASE_ADDRESS = PtrLow32(mipmap_data_addr);
word1.val = 0;
word1.f.BASE_ADDRESS_HI = PtrHigh32(mipmap_data_addr);
word1.f.STRIDE = mipmap_prop.element_size;
word1.f.SWIZZLE_ENABLE = 0;
word2.f.NUM_RECORDS = mipmap.desc.width * mipmap_prop.element_size;
const Swizzle swizzle = ImageLut().MapSwizzle(mipmap.desc.format.channel_order);
word3.val = 0;
word3.f.DST_SEL_X = swizzle.x;
word3.f.DST_SEL_Y = swizzle.y;
word3.f.DST_SEL_Z = swizzle.z;
word3.f.DST_SEL_W = swizzle.w;
word3.f.FORMAT = GetCombinedFormat(mipmap_prop.data_format, mipmap_prop.data_type);
word3.f.INDEX_STRIDE = mipmap_prop.element_size;
word3.f.TYPE = ImageLut().MapGeometry(mipmap.desc.geometry);
mipmap.srd[0] = word0.val;
mipmap.srd[1] = word1.val;
mipmap.srd[2] = word2.val;
mipmap.srd[3] = word3.val;
mipmap.row_pitch = mipmap.desc.width * mipmap_prop.element_size;
mipmap.slice_pitch = mipmap.row_pitch;
} else {
SQ_IMG_RSRC_WORD0 word0;
SQ_IMG_RSRC_WORD1 word1;
SQ_IMG_RSRC_WORD2 word2;
SQ_IMG_RSRC_WORD3 word3;
SQ_IMG_RSRC_WORD4 word4;
SQ_IMG_RSRC_WORD5 word5;
SQ_IMG_RSRC_WORD5 word6;
SQ_IMG_RSRC_WORD5 word7;
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
// pMipInfo not needed - set to nullptr and AddrLib will ignore it
out.pMipInfo = nullptr;
uint32_t swizzleMode = GetAddrlibSurfaceInfoNv(
mipmap.component, mipmap.desc, mipmap.num_levels,
mipmap.tile_mode, mipmap.row_pitch, mipmap.slice_pitch, out);
if (swizzleMode == (uint32_t)(-1)) {
return HSA_STATUS_ERROR;
}
mipmap.addr_output.addr2 = out;
mipmap.size = out.surfSize;
assert((out.bpp / 8) == mipmap_prop.element_size);
const size_t row_pitch_size = out.pitch * mipmap_prop.element_size;
word0.val = 0;
word0.f.BASE_ADDRESS = PtrLow40Shift8(mipmap_data_addr);
word1.val = 0;
word1.f.BASE_ADDRESS_HI = PtrHigh64Shift40(mipmap_data_addr);
word1.f.MAX_MIP = mipmap.num_levels - 1;
word1.f.FORMAT = GetCombinedFormat(mipmap_prop.data_format, mipmap_prop.data_type);
// Only take the lowest 2 bits of (mipmap.desc.width - 1)
word1.f.WIDTH = BitSelect<0, 1>(mipmap.desc.width - 1);
word2.val = 0;
// Take the high 12 bits of (mipmap.desc.width - 1)
word2.f.WIDTH_HI = BitSelect<2, 13>(mipmap.desc.width - 1);
word2.f.HEIGHT = mipmap.desc.height ? mipmap.desc.height - 1 : 0;
const Swizzle swizzle = ImageLut().MapSwizzle(mipmap.desc.format.channel_order);
word3.val = 0;
word3.f.DST_SEL_X = swizzle.x;
word3.f.DST_SEL_Y = swizzle.y;
word3.f.DST_SEL_Z = swizzle.z;
word3.f.DST_SEL_W = swizzle.w;
word3.f.SW_MODE = swizzleMode;
word3.f.BASE_LEVEL = 0;
word3.f.LAST_LEVEL = mipmap.num_levels - 1;
word3.f.BC_SWIZZLE = GetBcSwizzle(swizzle);
word3.f.TYPE = ImageLut().MapGeometry(mipmap.desc.geometry);
const bool mipmap_array =
(mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ||
mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DA ||
mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DADEPTH);
const bool mipmap_3d = (mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_3D);
word4.val = 0;
word4.f.DEPTH =
(mipmap_array) // Doesn't hurt but isn't array_size already >0?
? std::max(mipmap.desc.array_size, static_cast<size_t>(1)) - 1
: (mipmap_3d) ? mipmap.desc.depth - 1 : 0;
// For 1d, 2d and 2d-msaa in gfx11 this is pitch-1
if (!mipmap_array && !mipmap_3d) {
word4.f.PITCH = out.pitch - 1;
}
word5.val = 0;
word6.val = 0;
word7.val = 0;
mipmap.srd[0] = word0.val;
mipmap.srd[1] = word1.val;
mipmap.srd[2] = word2.val;
mipmap.srd[3] = word3.val;
mipmap.srd[4] = word4.val;
mipmap.srd[5] = word5.val;
mipmap.srd[6] = word6.val;
mipmap.srd[7] = word7.val;
mipmap.row_pitch = row_pitch_size;
mipmap.slice_pitch = out.sliceSize;
}
mipmap.srd[8] = mipmap.desc.format.channel_type;
mipmap.srd[9] = mipmap.desc.format.channel_order;
mipmap.srd[10] = static_cast<uint32_t>(mipmap.desc.width);
// Mipmap-specific auxiliary fields
mipmap.srd[11] = mipmap.num_levels;
return HSA_STATUS_SUCCESS;
}
hsa_status_t ImageManagerGfx11::PopulateMipmapSrd(MipmappedArray& mipmap_array, const metadata_amd_t* desc) const {
const metadata_amd_gfx11_t* desc_gfx11 = reinterpret_cast<const metadata_amd_gfx11_t*>(desc);
const void* mipmap_data_addr = mipmap_array.data;
ImageProperty mipmap_prop = ImageLut().MapFormat(mipmap_array.desc.format, mipmap_array.desc.geometry);
if (mipmap_prop.cap == HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED || mipmap_prop.element_size == 0) {
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
}
const Swizzle swizzle = ImageLut().MapSwizzle(mipmap_array.desc.format.channel_order);
if (IsLocalMemory(mipmap_array.data)) {
mipmap_data_addr = reinterpret_cast<const void*>(
reinterpret_cast<uintptr_t>(mipmap_array.data) - local_memory_base_address_);
}
// Copy the pre-computed SRD words 0-7 from metadata
mipmap_array.srd[0] = desc_gfx11->word0.u32All;
mipmap_array.srd[1] = desc_gfx11->word1.u32All;
mipmap_array.srd[2] = desc_gfx11->word2.u32All;
mipmap_array.srd[3] = desc_gfx11->word3.u32All;
mipmap_array.srd[4] = desc_gfx11->word4.u32All;
mipmap_array.srd[5] = desc_gfx11->word5.u32All;
mipmap_array.srd[6] = desc_gfx11->word6.u32All;
mipmap_array.srd[7] = desc_gfx11->word7.u32All;
// Override specific fields after copying
uint32_t hwPixelSize = ImageLut().GetPixelSize(mipmap_prop.data_format, mipmap_prop.data_type);
if (mipmap_prop.element_size != hwPixelSize) {
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
}
reinterpret_cast<SQ_IMG_RSRC_WORD0*>(&mipmap_array.srd[0])->bits.BASE_ADDRESS = PtrLow40Shift8(mipmap_data_addr);
reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&mipmap_array.srd[1])->bits.BASE_ADDRESS_HI = PtrHigh64Shift40(mipmap_data_addr);
reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&mipmap_array.srd[1])->bits.FORMAT = GetCombinedFormat(mipmap_prop.data_format, mipmap_prop.data_type);
reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&mipmap_array.srd[1])->bits.MAX_MIP = mipmap_array.num_levels - 1;
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.DST_SEL_X = swizzle.x;
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.DST_SEL_Y = swizzle.y;
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.DST_SEL_Z = swizzle.z;
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.DST_SEL_W = swizzle.w;
if (mipmap_array.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ||
mipmap_array.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1D) {
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.TYPE =
ImageLut().MapGeometry(mipmap_array.desc.geometry);
}
// Looks like this is only used for CPU copies.
mipmap_array.row_pitch = 0;
mipmap_array.slice_pitch = 0;
// Store mipmap-specific metadata
mipmap_array.srd[8] = mipmap_array.desc.format.channel_type;
mipmap_array.srd[9] = mipmap_array.desc.format.channel_order;
mipmap_array.srd[10] = static_cast<uint32_t>(mipmap_array.desc.width);
mipmap_array.srd[11] = mipmap_array.num_levels;
// Allocate and populate pMipInfo from metadata mip_offsets (ADDR2 for GFX11)
ADDR2_MIP_INFO* mip_info_storage = new ADDR2_MIP_INFO[mipmap_array.num_levels];
memset(mip_info_storage, 0, sizeof(ADDR2_MIP_INFO) * mipmap_array.num_levels);
// Extract per-level information from mip_offsets array
for (uint32_t level = 0; level < mipmap_array.num_levels; level++) {
// mip_offsets contains offset bits [39:8], shift left by 8 to get actual byte offset
mip_info_storage[level].offset = static_cast<uint64_t>(desc_gfx11->mip_offsets[level]) << 8;
// Calculate dimensions for this level (halve at each level)
mip_info_storage[level].pitch = std::max(1u, static_cast<uint32_t>(mipmap_array.desc.width >> level));
mip_info_storage[level].height = std::max(1u, static_cast<uint32_t>(mipmap_array.desc.height >> level));
mip_info_storage[level].depth = std::max(1u, static_cast<uint32_t>(mipmap_array.desc.depth >> level));
}
// Store pMipInfo in addr_output for later use by PopulateMipLevelSrd
mipmap_array.addr_output.addr2.pMipInfo = mip_info_storage;
// Total size calculation from metadata
uint32_t last_level = mipmap_array.num_levels - 1;
uint64_t last_level_size = mip_info_storage[last_level].pitch *
mip_info_storage[last_level].height *
mip_info_storage[last_level].depth *
mipmap_prop.element_size;
mipmap_array.size = mip_info_storage[last_level].offset + last_level_size;
return HSA_STATUS_SUCCESS;
}
void ImageManagerGfx11::printSRDDetailed(const uint32_t* srd) const {
if (!srd) {
printf("\n========== Image SRD (GFX11) - Detailed ==========\n");
printf("ERROR: No SRD data provided.\n");
printf("===============================================\n\n");
return;
}
printf("\n========== Image SRD (GFX11) - Detailed ==========\n");
// Print all 12 words with bit field annotations
for (int i = 0; i < 12; i++) {
printf("WORD %d: 0x%08x ", i, srd[i]);
// Binary representation
printf("(");
for (int bit = 31; bit >= 0; bit--) {
printf("%d", (srd[i] >> bit) & 1);
if (bit % 4 == 0 && bit != 0) printf("_");
}
printf(")\n");
}
// WORD 0: BASE_ADDRESS (bits 39:8)
SQ_IMG_RSRC_WORD0 word0;
word0.val = srd[0];
printf("\nWORD 0: BASE_ADDRESS (bits 39:8) = 0x%08x\n", word0.f.BASE_ADDRESS);
// WORD 1: Contains BASE_ADDRESS_HI, FORMAT, WIDTH (bits 1:0)
SQ_IMG_RSRC_WORD1 word1;
word1.val = srd[1];
printf("WORD 1: BASE_ADDRESS_HI = 0x%02x\n", word1.f.BASE_ADDRESS_HI);
printf(" FORMAT = %u\n", word1.f.FORMAT);
printf(" WIDTH (bits 1:0) = %u\n", word1.f.WIDTH);
// Calculate full address (GFX11 uses 40-bit shifted by 8)
uint64_t base_addr = ((uint64_t)word1.f.BASE_ADDRESS_HI << 32) | ((uint64_t)word0.f.BASE_ADDRESS << 8);
printf(" → Full Base Address = 0x%016lx\n", base_addr);
// WORD 2: WIDTH_HI, HEIGHT
SQ_IMG_RSRC_WORD2 word2;
word2.val = srd[2];
printf("WORD 2: WIDTH_HI (bits 13:2) = %u\n", word2.f.WIDTH_HI);
printf(" HEIGHT = %u\n", word2.f.HEIGHT);
// Calculate full width (GFX11 uses 14 bits split: 2 in WORD1 + 12 in WORD2)
uint32_t full_width = word1.f.WIDTH | (word2.f.WIDTH_HI << 2);
printf(" → Full Width = %u (actual: %u)\n", full_width, full_width + 1);
printf(" → Full Height = %u (actual: %u)\n", word2.f.HEIGHT, word2.f.HEIGHT + 1);
// WORD 3: Channel selectors, SW_MODE, BASE_LEVEL, LAST_LEVEL, BC_SWIZZLE, TYPE
SQ_IMG_RSRC_WORD3 word3;
word3.val = srd[3];
printf("WORD 3: DST_SEL_X = %u ", word3.f.DST_SEL_X);
printChannelSelect(word3.f.DST_SEL_X);
printf(" DST_SEL_Y = %u ", word3.f.DST_SEL_Y);
printChannelSelect(word3.f.DST_SEL_Y);
printf(" DST_SEL_Z = %u ", word3.f.DST_SEL_Z);
printChannelSelect(word3.f.DST_SEL_Z);
printf(" DST_SEL_W = %u ", word3.f.DST_SEL_W);
printChannelSelect(word3.f.DST_SEL_W);
printf(" BASE_LEVEL = %u ◄──── Current base level\n", word3.f.BASE_LEVEL);
printf(" LAST_LEVEL = %u ◄──── Current last level\n", word3.f.LAST_LEVEL);
printf(" SW_MODE = %u ", word3.f.SW_MODE);
printSwizzleMode(word3.f.SW_MODE);
printf(" BC_SWIZZLE = %u\n", word3.f.BC_SWIZZLE);
printf(" TYPE = %u ", word3.f.TYPE);
printResourceType(word3.f.TYPE);
// WORD 4: DEPTH, PITCH
SQ_IMG_RSRC_WORD4 word4;
word4.val = srd[4];
printf("WORD 4: DEPTH = %u\n", word4.f.DEPTH);
printf(" PITCH = %u (actual: %u)\n", word4.f.PITCH, word4.f.PITCH + 1);
// Calculate effective depth based on geometry
uint32_t type = word3.f.TYPE;
if (type == 10) { // 3D
printf(" → 3D Depth = %u (actual: %u)\n", word4.f.DEPTH, word4.f.DEPTH + 1);
} else if (type == 13 || type == 12) { // Arrays
printf(" → Array Size = %u (actual: %u)\n", word4.f.DEPTH, word4.f.DEPTH + 1);
}
// WORD 5-7: Usually zero for basic images, but may contain metadata addresses
printf("WORD 5: Reserved = 0x%08x\n", srd[5]);
printf("WORD 6: META_DATA_ADDRESS = 0x%08x\n", srd[6]);
printf("WORD 7: META_DATA_ADDRESS_HI = 0x%08x\n", srd[7]);
// Additional mipmap information
printf("WORD 8: CHANNEL_TYPE = 0x%08x\n", srd[8]);
printf("WORD 9: CHANNEL_ORDER = 0x%08x\n", srd[9]);
printf("WORD 10: WIDTH_ORIGINAL = 0x%08x\n", srd[10]);
printf("WORD 11: NUM_LEVELS = 0x%08x\n", srd[11]);
// Mipmap analysis
if (word3.f.LAST_LEVEL > word3.f.BASE_LEVEL || word3.f.LAST_LEVEL > 0) {
printf("\nMIPMAP ANALYSIS:\n");
printf(" Total Levels = %u\n", srd[11]);
printf(" Active Range = [%u, %u]\n", word3.f.BASE_LEVEL, word3.f.LAST_LEVEL);
if (word3.f.BASE_LEVEL == word3.f.LAST_LEVEL) {
printf(" Mode = SINGLE LEVEL VIEW ◄──── Mip level view\n");
uint32_t level = word3.f.BASE_LEVEL;
uint32_t level_width = std::max(1u, (full_width + 1) >> level);
uint32_t level_height = std::max(1u, static_cast<uint32_t>((word2.f.HEIGHT + 1) >> level));
printf(" Effective Dimensions = %ux%u (level %u)\n", level_width, level_height, level);
} else {
printf(" Mode = FULL MIPMAP CHAIN\n");
}
}
printf("===============================================\n\n");
}
void ImageManagerGfx11::printChannelSelect(uint32_t sel) const {
switch(sel) {
case 0: printf("(SEL_0)\n"); break;
case 1: printf("(SEL_1)\n"); break;
case 4: printf("(SEL_X/R)\n"); break;
case 5: printf("(SEL_Y/G)\n"); break;
case 6: printf("(SEL_Z/B)\n"); break;
case 7: printf("(SEL_W/A)\n"); break;
default: printf("(UNKNOWN)\n"); break;
}
}
void ImageManagerGfx11::printResourceType(uint32_t type) const {
switch(type) {
case 8: printf("(1D)\n"); break;
case 9: printf("(2D)\n"); break;
case 10: printf("(3D)\n"); break;
case 11: printf("(CUBE)\n"); break;
case 12: printf("(1D_ARRAY/1DB)\n"); break;
case 13: printf("(2D_ARRAY)\n"); break;
case 14: printf("(2D_MSAA)\n"); break;
case 15: printf("(2D_MSAA_ARRAY)\n"); break;
default: printf("(UNKNOWN=%u)\n", type); break;
}
}
void ImageManagerGfx11::printSwizzleMode(uint32_t sw_mode) const {
// GFX11 swizzle modes (similar to GFX9/10)
if (sw_mode == 0) {
printf("(LINEAR)\n");
} else if (sw_mode < 5) {
printf("(SW_256B_%u)\n", sw_mode);
} else if (sw_mode < 9) {
printf("(SW_4KB_%u)\n", sw_mode - 4);
} else if (sw_mode < 13) {
printf("(SW_64KB_%u)\n", sw_mode - 8);
} else if (sw_mode < 22) {
printf("(SW_VAR_%u)\n", sw_mode - 12);
} else {
printf("(UNKNOWN=%u)\n", sw_mode);
}
}
hsa_status_t ImageManagerGfx11::PopulateMipLevelSrd(
MipmappedArray& level_view,
const MipmappedArray& mipmap_array,
uint32_t mip_level) const {
// SRD already copied from parent, just modify BASE_LEVEL/LAST_LEVEL fields
uint32_t* srd_words = reinterpret_cast<uint32_t*>(level_view.srd);
// GFX11 SRD WORD3 has BASE_LEVEL and LAST_LEVEL fields
SQ_IMG_RSRC_WORD3* word3 = reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&srd_words[3]);
// Set both to same value - hardware samples only this level
word3->f.BASE_LEVEL = mip_level;
word3->f.LAST_LEVEL = mip_level;
debug_print("Set SRD mip selection: BASE_LEVEL=%u, LAST_LEVEL=%u", mip_level, mip_level);
return HSA_STATUS_SUCCESS;
}
} // namespace image
} // namespace rocr
+18
Просмотреть файл
@@ -60,6 +60,7 @@ class ImageManagerGfx11 : public ImageManagerKv {
virtual hsa_status_t CalculateImageSizeAndAlignment(
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
hsa_ext_image_data_layout_t image_data_layout,
uint32_t num_mipmap_levels,
size_t image_data_row_pitch, size_t image_data_slice_pitch,
hsa_ext_image_data_info_t& image_info) const;
@@ -80,9 +81,26 @@ class ImageManagerGfx11 : public ImageManagerKv {
/// @brief Fill image backing storage using agent copy.
virtual hsa_status_t FillImage(const Image& image, const void* pattern,
const hsa_ext_image_region_t& region);
/// @brief Fill mipmap structure with device specific mipmapped array object.
virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array) const;
/// @brief Fill mipmap structure with pre-computed AMD metadata descriptor.
virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array, const metadata_amd_t* desc) const;
/// @brief Create mip level view using SRD BASE_LEVEL/LAST_LEVEL fields
virtual hsa_status_t PopulateMipLevelSrd(MipmappedArray& level_view,
const MipmappedArray& mipmap_array, uint32_t mip_level) const;
virtual void printSRDDetailed(const uint32_t* srd) const;
virtual void printChannelSelect(uint32_t sel) const;
virtual void printResourceType(uint32_t type) const;
virtual void printSwizzleMode(uint32_t sw_mode) const;
protected:
uint32_t GetAddrlibSurfaceInfoNv(hsa_agent_t component,
const hsa_ext_image_descriptor_t& desc,
uint32_t num_mipmap_levels,
Image::TileMode tileMode,
size_t image_data_row_pitch,
size_t image_data_slice_pitch,
+511 -44
Просмотреть файл
@@ -216,14 +216,24 @@ ImageManagerGfx12::~ImageManagerGfx12() {}
hsa_status_t ImageManagerGfx12::CalculateImageSizeAndAlignment(
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
hsa_ext_image_data_layout_t image_data_layout,
uint32_t num_mipmap_levels,
size_t image_data_row_pitch,
size_t image_data_slice_pitch,
hsa_ext_image_data_info_t& image_info) const {
ADDR3_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
hsa_profile_t profile;
ADDR3_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
// Allocate persistent memory for mip info on the heap
ADDR3_MIP_INFO* mip_info_storage = new ADDR3_MIP_INFO[num_mipmap_levels];
memset(mip_info_storage, 0, sizeof(ADDR3_MIP_INFO) * num_mipmap_levels);
out.pMipInfo = mip_info_storage;
hsa_profile_t profile;
hsa_status_t status = HSA::hsa_agent_get_info(component, HSA_AGENT_INFO_PROFILE, &profile);
if (status != HSA_STATUS_SUCCESS) return status;
if (status != HSA_STATUS_SUCCESS) {
delete[] mip_info_storage;
return status;
}
Image::TileMode tileMode = Image::TileMode::LINEAR;
if (image_data_layout == HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE) {
@@ -231,9 +241,9 @@ hsa_status_t ImageManagerGfx12::CalculateImageSizeAndAlignment(
desc.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB)?
Image::TileMode::TILED : Image::TileMode::LINEAR;
}
if (GetAddrlibSurfaceInfoNv(component, desc, tileMode,
image_data_row_pitch, image_data_slice_pitch, out) ==
(uint32_t)(-1)) {
if (GetAddrlibSurfaceInfoNv(component, desc, num_mipmap_levels, tileMode,
image_data_row_pitch, image_data_slice_pitch, out) == (uint32_t)(-1)) {
delete[] mip_info_storage;
return HSA_STATUS_ERROR;
}
@@ -243,6 +253,7 @@ hsa_status_t ImageManagerGfx12::CalculateImageSizeAndAlignment(
image_data_layout == HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR &&
((image_data_row_pitch && (rowPitch != image_data_row_pitch)) ||
(image_data_slice_pitch && (slicePitch != image_data_slice_pitch)))) {
delete[] mip_info_storage;
return static_cast<hsa_status_t>(
HSA_EXT_STATUS_ERROR_IMAGE_PITCH_UNSUPPORTED);
}
@@ -252,6 +263,9 @@ hsa_status_t ImageManagerGfx12::CalculateImageSizeAndAlignment(
image_info.alignment = out.baseAlign;
assert(image_info.alignment != 0);
// Clean up temporary mip info storage
delete[] mip_info_storage;
return HSA_STATUS_SUCCESS;
}
@@ -430,7 +444,6 @@ hsa_status_t ImageManagerGfx12::PopulateImageSrd(Image& image) const {
word1.val = 0;
word1.f.BASE_ADDRESS_HI = PtrHigh32(image_data_addr);
word1.f.STRIDE = image_prop.element_size;
word1.f.SWIZZLE_ENABLE = 0;
word2.f.NUM_RECORDS = image.desc.width * image_prop.element_size;
@@ -471,9 +484,8 @@ hsa_status_t ImageManagerGfx12::PopulateImageSrd(Image& image) const {
ADDR3_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
uint32_t swizzleMode = GetAddrlibSurfaceInfoNv(
image.component, image.desc, image.tile_mode,
image.row_pitch, image.slice_pitch, out);
uint32_t swizzleMode = GetAddrlibSurfaceInfoNv(image.component, image.desc,
1, image.tile_mode, image.row_pitch, image.slice_pitch, out);
if (swizzleMode == (uint32_t)(-1)) {
return HSA_STATUS_ERROR;
}
@@ -642,6 +654,7 @@ hsa_status_t ImageManagerGfx12::PopulateSamplerSrd(Sampler& sampler) const {
uint32_t ImageManagerGfx12::GetAddrlibSurfaceInfoNv(
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
uint32_t num_mipmap_levels,
Image::TileMode tileMode,
size_t image_data_row_pitch,
size_t image_data_slice_pitch,
@@ -664,7 +677,7 @@ uint32_t ImageManagerGfx12::GetAddrlibSurfaceInfoNv(
in.width = width;
in.height = height;
in.numSlices = num_slice;
in.pitchInElement = image_data_row_pitch / image_prop.element_size;
in.numMipLevels = num_mipmap_levels;
switch (desc.geometry) {
case HSA_EXT_IMAGE_GEOMETRY_1D:
@@ -672,46 +685,44 @@ uint32_t ImageManagerGfx12::GetAddrlibSurfaceInfoNv(
case HSA_EXT_IMAGE_GEOMETRY_1DA:
in.resourceType = ADDR_RSRC_TEX_1D;
break;
case HSA_EXT_IMAGE_GEOMETRY_2D:
case HSA_EXT_IMAGE_GEOMETRY_2DDEPTH:
case HSA_EXT_IMAGE_GEOMETRY_2DA:
case HSA_EXT_IMAGE_GEOMETRY_2DADEPTH:
in.resourceType = ADDR_RSRC_TEX_2D;
break;
case HSA_EXT_IMAGE_GEOMETRY_3D:
{
in.resourceType = ADDR_RSRC_TEX_3D;
/*
* 3D swizzle modes on GFX12 enforces alignment
* of the number of slices to the block depth.
* If numSlices = 3 then the 3 slices are
* interleaved for 3D locality among the 8 slices
* that make up each block. This causes the memory
* footprint to jump from an ideal size of ~12 GB
* to ~32 GB.
* 'enable3DSwizzleMode' flag tests for env variable
* HSA_IMAGE_ENABLE_3D_SWIZZLE_DEBUG to enable or disable
* 3D swizzle:
* true: Keep view3dAs2dArray = 0 for real 3D interleaving.
* false: Use view3dAs2dArray = 1 to avoid the alignment
* expansion.
* 2D swizzle modes can lower size overhead but may yield
* suboptimal cache behavior for fully 3D volumetric
* operations.
*/
bool enable3DSwizzleMode = core::Runtime::runtime_singleton_->flag().enable_3d_swizzle();
if (enable3DSwizzleMode)
{
in.resourceType = ADDR_RSRC_TEX_3D;
/*
* 3D swizzle modes on GFX12 enforces alignment
* of the number of slices to the block depth.
* If numSlices = 3 then the 3 slices are
* interleaved for 3D locality among the 8 slices
* that make up each block. This causes the memory
* footprint to jump from an ideal size of ~12 GB
* to ~32 GB.
* 'enable3DSwizzleMode' flag tests for env variable
* HSA_IMAGE_ENABLE_3D_SWIZZLE_DEBUG to enable or disable
* 3D swizzle:
* true: Keep view3dAs2dArray = 0 for real 3D interleaving.
* false: Use view3dAs2dArray = 1 to avoid the alignment
* expansion.
* 2D swizzle modes can lower size overhead but may yield
* suboptimal cache behavior for fully 3D volumetric
* operations.
*/
bool enable3DSwizzleMode = core::Runtime::runtime_singleton_->flag().enable_3d_swizzle();
if (enable3DSwizzleMode)
{
in.flags.view3dAs2dArray = 0;
}
else
{
in.flags.view3dAs2dArray = 1;
}
break;
in.flags.view3dAs2dArray = 0;
}
else
{
in.flags.view3dAs2dArray = 1;
}
break;
}
}
in.flags.texture = 1;
@@ -781,8 +792,9 @@ uint32_t ImageManagerGfx12::GetAddrlibSurfaceInfoNv(
const UINT_32 ratioLow = 2;
const UINT_32 ratioHigh = 1;
// Same behaviour as GFX11, remove linear if height is 1.
if (in.height > 1) {
// Remove linear swizzle mode for multi-dimensional or mipmapped textures.
// Linear mode is only appropriate for simple 1D single-level textures.
if (in.height > 1 || in.numMipLevels > 1) {
swOut.validModes.swLinear = 0;
}
@@ -793,6 +805,10 @@ uint32_t ImageManagerGfx12::GetAddrlibSurfaceInfoNv(
if (swOut.validModes.value & (1 << i)) {
ADDR3_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
// pMipInfo not needed - set to nullptr and AddrLib will ignore it
localOut.pMipInfo = nullptr;
localOut.size = sizeof(ADDR3_COMPUTE_SURFACE_INFO_OUTPUT);
in.swizzleMode = (Addr3SwizzleMode) i;
@@ -908,5 +924,456 @@ hsa_status_t ImageManagerGfx12::FillImage(const Image& image, const void* patter
return status;
}
hsa_status_t ImageManagerGfx12::PopulateMipmapSrd(MipmappedArray& mipmap) const {
// Map format/geometry to hardware encoding
ImageProperty mipmap_prop = ImageLut().MapFormat(mipmap.desc.format, mipmap.desc.geometry);
assert(mipmap_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);
assert(mipmap_prop.element_size != 0);
assert(mipmap.num_levels >= 1);
const void* mipmap_data_addr = mipmap.data;
if (IsLocalMemory(mipmap.data)) {
mipmap_data_addr = reinterpret_cast<const void*>(
reinterpret_cast<uintptr_t>(mipmap.data) - local_memory_base_address_);
}
if (mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
SQ_BUF_RSRC_WORD0 word0;
SQ_BUF_RSRC_WORD1 word1;
SQ_BUF_RSRC_WORD2 word2;
SQ_BUF_RSRC_WORD3 word3;
word0.val = 0;
word0.f.BASE_ADDRESS = PtrLow32(mipmap_data_addr);
word1.val = 0;
word1.f.BASE_ADDRESS_HI = PtrHigh32(mipmap_data_addr);
word1.f.STRIDE = mipmap_prop.element_size;
word1.f.SWIZZLE_ENABLE = 0;
word2.val = 0;
word2.f.NUM_RECORDS = mipmap.desc.width * mipmap_prop.element_size;
const Swizzle swizzle = ImageLut().MapSwizzle(mipmap.desc.format.channel_order);
word3.val = 0;
word3.f.DST_SEL_X = swizzle.x;
word3.f.DST_SEL_Y = swizzle.y;
word3.f.DST_SEL_Z = swizzle.z;
word3.f.DST_SEL_W = swizzle.w;
word3.f.FORMAT = GetCombinedFormat(mipmap_prop.data_format, mipmap_prop.data_type);
word3.f.INDEX_STRIDE = mipmap_prop.element_size;
// GFX12 compression features (disabled for now)
// word3.f.WRITE_COMPRESS_ENABLE = 0;
// word3.f.COMPRESSION_EN = 0;
// word3.f.COMPRESSION_ACCESS_MODE = 0;
word3.f.TYPE = ImageLut().MapGeometry(mipmap.desc.geometry);
mipmap.srd[0] = word0.val;
mipmap.srd[1] = word1.val;
mipmap.srd[2] = word2.val;
mipmap.srd[3] = word3.val;
// 1DB mipmaps don't use words 4-7
mipmap.srd[4] = 0;
mipmap.srd[5] = 0;
mipmap.srd[6] = 0;
mipmap.srd[7] = 0;
mipmap.row_pitch = mipmap.desc.width * mipmap_prop.element_size;
mipmap.slice_pitch = mipmap.row_pitch;
} else {
SQ_IMG_RSRC_WORD0 word0;
SQ_IMG_RSRC_WORD1 word1;
SQ_IMG_RSRC_WORD2 word2;
SQ_IMG_RSRC_WORD3 word3;
SQ_IMG_RSRC_WORD4 word4;
SQ_IMG_RSRC_WORD5 word5;
SQ_IMG_RSRC_WORD6 word6;
SQ_IMG_RSRC_WORD7 word7;
// Get ADDR3 surface information
ADDR3_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
// pMipInfo not needed - set to nullptr and AddrLib will ignore it
out.pMipInfo = nullptr;
unsigned int swizzleMode = GetAddrlibSurfaceInfoNv(mipmap.component,
mipmap.desc, mipmap.num_levels, mipmap.tile_mode,
mipmap.row_pitch, mipmap.slice_pitch, out);
if (swizzleMode == (uint32_t)(-1)) {
return HSA_STATUS_ERROR;
}
mipmap.addr_output.addr3 = out;
mipmap.size = out.surfSize;
assert((out.bpp / 8) == mipmap_prop.element_size);
const size_t row_pitch_size = out.pitch * mipmap_prop.element_size;
word0.val = 0;
word0.f.BASE_ADDRESS = PtrLow40Shift8(mipmap_data_addr);
word1.val = 0;
word1.f.BASE_ADDRESS_HI = PtrHigh64Shift40(mipmap_data_addr);
word1.f.MAX_MIP = mipmap.num_levels - 1;
word1.f.BASE_LEVEL = 0; // New to GFX12
word1.f.FORMAT = GetCombinedFormat(mipmap_prop.data_format, mipmap_prop.data_type);
// Only take the lowest 2 bits of (image.desc.width - 1)
word1.f.WIDTH = BitSelect<0, 1>(mipmap.desc.width - 1);
word2.val = 0;
// Take the high 14 bits of (mipmap.desc.width - 1)
word2.f.WIDTH_HI = BitSelect<2, 15>(mipmap.desc.width - 1);
word2.f.HEIGHT = mipmap.desc.height ? mipmap.desc.height - 1 : 0;
const Swizzle swizzle = ImageLut().MapSwizzle(mipmap.desc.format.channel_order);
word3.val = 0;
word3.f.DST_SEL_X = swizzle.x;
word3.f.DST_SEL_Y = swizzle.y;
word3.f.DST_SEL_Z = swizzle.z;
word3.f.DST_SEL_W = swizzle.w;
// word3.f.NO_EDGE_CLAMP = 0; // New to GFX12
word3.f.LAST_LEVEL = mipmap.num_levels - 1;
word3.f.SW_MODE = swizzleMode;
word3.f.BC_SWIZZLE = GetBcSwizzle(swizzle);
word3.f.TYPE = ImageLut().MapGeometry(mipmap.desc.geometry);
const bool mipmap_array =
(mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ||
mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DA ||
mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DADEPTH);
const bool mipmap_3d = (mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_3D);
word4.val = 0;
// For 1d, 2d and 2d-msaa, fields DEPTH+PITCH_MSB encode pitch-1
if (!mipmap_array && !mipmap_3d) {
uint32_t encPitch = out.pitch - 1;
word4.f.DEPTH = encPitch & 0x3fff; // first 14 bits
word4.f.PITCH_MSB = (encPitch >> 14) & 0x3; // last 2 bits
} else {
word4.f.DEPTH =
(mipmap_array) // Doesn't hurt but isn't array_size already >0?
? std::max(mipmap.desc.array_size, static_cast<size_t>(1)) - 1
: (mipmap_3d) ? mipmap.desc.depth - 1 : 0;
}
word5.val = 0;
word6.val = 0;
word7.val = 0;
mipmap.srd[0] = word0.val;
mipmap.srd[1] = word1.val;
mipmap.srd[2] = word2.val;
mipmap.srd[3] = word3.val;
mipmap.srd[4] = word4.val;
mipmap.srd[5] = word5.val;
mipmap.srd[6] = word6.val;
mipmap.srd[7] = word7.val;
mipmap.row_pitch = row_pitch_size;
mipmap.slice_pitch = out.sliceSize;
}
mipmap.srd[8] = mipmap.desc.format.channel_type;
mipmap.srd[9] = mipmap.desc.format.channel_order;
mipmap.srd[10] = static_cast<uint32_t>(mipmap.desc.width);
// Mipmap-specific
mipmap.srd[11] = mipmap.num_levels;
return HSA_STATUS_SUCCESS;
}
hsa_status_t ImageManagerGfx12::PopulateMipmapSrd(MipmappedArray& mipmap_array, const metadata_amd_t* desc) const {
const metadata_amd_gfx12_t* desc_gfx12 = reinterpret_cast<const metadata_amd_gfx12_t*>(desc);
const void* mipmap_data_addr = mipmap_array.data;
ImageProperty mipmap_prop = ImageLut().MapFormat(mipmap_array.desc.format, mipmap_array.desc.geometry);
if (mipmap_prop.cap == HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED || mipmap_prop.element_size == 0) {
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
}
const Swizzle swizzle = ImageLut().MapSwizzle(mipmap_array.desc.format.channel_order);
if (IsLocalMemory(mipmap_array.data)) {
mipmap_data_addr = reinterpret_cast<const void*>(
reinterpret_cast<uintptr_t>(mipmap_array.data) - local_memory_base_address_);
}
// Copy the pre-computed SRD words 0-7 from metadata
mipmap_array.srd[0] = desc_gfx12->word0.u32All;
mipmap_array.srd[1] = desc_gfx12->word1.u32All;
mipmap_array.srd[2] = desc_gfx12->word2.u32All;
mipmap_array.srd[3] = desc_gfx12->word3.u32All;
mipmap_array.srd[4] = desc_gfx12->word4.u32All;
mipmap_array.srd[5] = desc_gfx12->word5.u32All;
mipmap_array.srd[6] = desc_gfx12->word6.u32All;
mipmap_array.srd[7] = desc_gfx12->word7.u32All;
if (mipmap_array.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
// 1DB uses buffer descriptors
SQ_BUF_RSRC_WORD0 word0;
SQ_BUF_RSRC_WORD1 word1;
SQ_BUF_RSRC_WORD3 word3;
word0.val = 0;
word0.f.BASE_ADDRESS = PtrLow32(mipmap_data_addr);
word1.val = mipmap_array.srd[1];
word1.f.BASE_ADDRESS_HI = PtrHigh32(mipmap_data_addr);
word1.f.STRIDE = mipmap_prop.element_size;
word3.val = mipmap_array.srd[3];
word3.f.DST_SEL_X = swizzle.x;
word3.f.DST_SEL_Y = swizzle.y;
word3.f.DST_SEL_Z = swizzle.z;
word3.f.DST_SEL_W = swizzle.w;
word3.f.FORMAT = GetCombinedFormat(mipmap_prop.data_format, mipmap_prop.data_type);
word3.f.INDEX_STRIDE = mipmap_prop.element_size;
mipmap_array.srd[0] = word0.val;
mipmap_array.srd[1] = word1.val;
mipmap_array.srd[3] = word3.val;
mipmap_array.row_pitch = mipmap_array.desc.width * mipmap_prop.element_size;
mipmap_array.slice_pitch = mipmap_array.row_pitch;
} else {
// Non-1DB uses image descriptors
uint32_t hwPixelSize = ImageLut().GetPixelSize(mipmap_prop.data_format, mipmap_prop.data_type);
if (mipmap_prop.element_size != hwPixelSize) {
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
}
reinterpret_cast<SQ_IMG_RSRC_WORD0*>(&mipmap_array.srd[0])->bits.BASE_ADDRESS = PtrLow40Shift8(mipmap_data_addr);
reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&mipmap_array.srd[1])->bits.BASE_ADDRESS_HI = PtrHigh64Shift40(mipmap_data_addr);
reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&mipmap_array.srd[1])->bits.FORMAT = GetCombinedFormat(mipmap_prop.data_format, mipmap_prop.data_type);
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.DST_SEL_X = swizzle.x;
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.DST_SEL_Y = swizzle.y;
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.DST_SEL_Z = swizzle.z;
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.DST_SEL_W = swizzle.w;
if (mipmap_array.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ||
mipmap_array.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1D) {
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.TYPE =
ImageLut().MapGeometry(mipmap_array.desc.geometry);
}
}
// Looks like this is only used for CPU copies.
mipmap_array.row_pitch = 0;
mipmap_array.slice_pitch = 0;
// Store mipmap-specific metadata
mipmap_array.srd[8] = mipmap_array.desc.format.channel_type;
mipmap_array.srd[9] = mipmap_array.desc.format.channel_order;
mipmap_array.srd[10] = static_cast<uint32_t>(mipmap_array.desc.width);
mipmap_array.srd[11] = mipmap_array.num_levels;
// Allocate and populate pMipInfo from metadata mip_offsets
ADDR3_MIP_INFO* mip_info_storage = new ADDR3_MIP_INFO[mipmap_array.num_levels];
memset(mip_info_storage, 0, sizeof(ADDR3_MIP_INFO) * mipmap_array.num_levels);
// Extract per-level information from mip_offsets array
for (uint32_t level = 0; level < mipmap_array.num_levels; level++) {
// mip_offsets contains offset bits [39:8], shift left by 8 to get actual byte offset
mip_info_storage[level].offset = static_cast<uint64_t>(desc_gfx12->mip_offsets[level]) << 8;
// Calculate dimensions for this level (halve at each level)
mip_info_storage[level].pixelPitch = std::max(1u, static_cast<uint32_t>(mipmap_array.desc.width >> level));
mip_info_storage[level].pixelHeight = std::max(1u, static_cast<uint32_t>(mipmap_array.desc.height >> level));
mip_info_storage[level].depth = std::max(1u, static_cast<uint32_t>(mipmap_array.desc.depth >> level));
}
// Store pMipInfo in addr_output for later use by PopulateMipLevelSrd
mipmap_array.addr_output.addr3.pMipInfo = mip_info_storage;
// Total size calculation from metadata (estimate from last level)
uint32_t last_level = mipmap_array.num_levels - 1;
uint64_t last_level_size = mip_info_storage[last_level].pixelPitch *
mip_info_storage[last_level].pixelHeight *
mip_info_storage[last_level].depth *
mipmap_prop.element_size;
mipmap_array.size = mip_info_storage[last_level].offset + last_level_size;
return HSA_STATUS_SUCCESS;
}
void ImageManagerGfx12::printSRDDetailed(const uint32_t* srd) const {
if (!srd) {
printf("\n========== Image SRD (GFX12) - Detailed ==========\n");
printf("ERROR: No SRD data provided.\n");
printf("===============================================\n\n");
return;
}
printf("\n========== Image SRD (GFX12) - Detailed ==========\n");
// Print all 12 words with bit field annotations
for (int i = 0; i < 12; i++) {
printf("WORD %d: 0x%08x ", i, srd[i]);
// Binary representation
printf("(");
for (int bit = 31; bit >= 0; bit--) {
printf("%d", (srd[i] >> bit) & 1);
if (bit % 4 == 0 && bit != 0) printf("_");
}
printf(")\n");
}
// WORD 0: SQ_IMG_RSRC_WORD0
SQ_IMG_RSRC_WORD0 word0;
word0.val = srd[0];
printf("\nWORD 0: BASE_ADDRESS (bits 39:8) = 0x%08x\n", word0.f.BASE_ADDRESS);
// WORD 1: SQ_IMG_RSRC_WORD1
SQ_IMG_RSRC_WORD1 word1;
word1.val = srd[1];
printf("WORD 1: BASE_ADDRESS_HI = 0x%08x\n", word1.f.BASE_ADDRESS_HI);
printf(" MAX_MIP = %u ◄──── Total mip levels - 1\n", word1.f.MAX_MIP);
printf(" BASE_LEVEL = %u ◄──── Current base level\n", word1.f.BASE_LEVEL);
printf(" FORMAT = %u\n", word1.f.FORMAT);
printf(" WIDTH (bits 1:0) = %u\n", word1.f.WIDTH);
// Calculate full address (GFX12 uses 40-bit shifted by 8)
uint64_t base_addr = ((uint64_t)word1.f.BASE_ADDRESS_HI << 40) | ((uint64_t)word0.f.BASE_ADDRESS << 8);
printf(" → Full Base Address = 0x%016lx\n", base_addr);
// WORD 2: SQ_IMG_RSRC_WORD2
SQ_IMG_RSRC_WORD2 word2;
word2.val = srd[2];
printf("WORD 2: WIDTH_HI (bits 15:2) = %u\n", word2.f.WIDTH_HI);
printf(" HEIGHT = %u\n", word2.f.HEIGHT);
// Calculate full width
uint32_t full_width = word1.f.WIDTH | (word2.f.WIDTH_HI << 2);
printf(" → Full Width = %u (actual: %u)\n", full_width, full_width + 1);
printf(" → Full Height = %u (actual: %u)\n", word2.f.HEIGHT, word2.f.HEIGHT + 1);
// WORD 3: SQ_IMG_RSRC_WORD3
SQ_IMG_RSRC_WORD3 word3;
word3.val = srd[3];
printf("WORD 3: DST_SEL_X = %u ", word3.f.DST_SEL_X);
printChannelSelect(word3.f.DST_SEL_X);
printf(" DST_SEL_Y = %u ", word3.f.DST_SEL_Y);
printChannelSelect(word3.f.DST_SEL_Y);
printf(" DST_SEL_Z = %u ", word3.f.DST_SEL_Z);
printChannelSelect(word3.f.DST_SEL_Z);
printf(" DST_SEL_W = %u ", word3.f.DST_SEL_W);
printChannelSelect(word3.f.DST_SEL_W);
printf(" LAST_LEVEL = %u ◄──── Current last level (GFX12 NEW)\n", word3.f.LAST_LEVEL);
printf(" SW_MODE = %u ", word3.f.SW_MODE);
printSwizzleMode(word3.f.SW_MODE);
printf(" BC_SWIZZLE = %u\n", word3.f.BC_SWIZZLE);
printf(" TYPE = %u ", word3.f.TYPE);
printResourceType(word3.f.TYPE);
// WORD 4: SQ_IMG_RSRC_WORD4
SQ_IMG_RSRC_WORD4 word4;
word4.val = srd[4];
printf("WORD 4: DEPTH = %u\n", word4.f.DEPTH);
printf(" PITCH_MSB = %u\n", word4.f.PITCH_MSB);
// Calculate effective depth/pitch based on geometry
uint32_t type = word3.f.TYPE;
if (type == 10) { // 3D
printf(" → 3D Depth = %u (actual: %u)\n", word4.f.DEPTH, word4.f.DEPTH + 1);
} else if (type == 13 || type == 12) { // Arrays
printf(" → Array Size = %u (actual: %u)\n", word4.f.DEPTH, word4.f.DEPTH + 1);
} else { // 1D/2D - encodes pitch
uint32_t encoded_pitch = word4.f.DEPTH | (word4.f.PITCH_MSB << 14);
printf(" → Encoded Pitch = %u (actual: %u)\n", encoded_pitch, encoded_pitch + 1);
}
// WORD 5-7: Usually zero for basic images
printf("WORD 5: Reserved = 0x%08x\n", srd[5]);
printf("WORD 6: Reserved = 0x%08x\n", srd[6]);
printf("WORD 7: Reserved = 0x%08x\n", srd[7]);
// Mipmap analysis
if (word1.f.MAX_MIP > 0) {
printf("\nMIPMAP ANALYSIS:\n");
printf(" Total Levels = %u (MAX_MIP + 1)\n", word1.f.MAX_MIP + 1);
printf(" Active Range = [%u, %u]\n", word1.f.BASE_LEVEL, word3.f.LAST_LEVEL);
if (word1.f.BASE_LEVEL == word3.f.LAST_LEVEL) {
printf(" Mode = SINGLE LEVEL VIEW ◄──── Mip level view\n");
uint32_t level = word1.f.BASE_LEVEL;
uint32_t level_width = std::max(1u, (full_width + 1) >> level);
uint32_t level_height = std::max(1u, static_cast<uint32_t>((word2.f.HEIGHT + 1) >> level));
printf(" Effective Dimensions = %ux%u (level %u)\n", level_width, level_height, level);
} else {
printf(" Mode = FULL MIPMAP CHAIN\n");
}
}
printf("===============================================\n\n");
}
void ImageManagerGfx12::printChannelSelect(uint32_t sel) const {
switch(sel) {
case 0: printf("(SEL_0)\n"); break;
case 1: printf("(SEL_1)\n"); break;
case 4: printf("(SEL_X/R)\n"); break;
case 5: printf("(SEL_Y/G)\n"); break;
case 6: printf("(SEL_Z/B)\n"); break;
case 7: printf("(SEL_W/A)\n"); break;
default: printf("(UNKNOWN)\n"); break;
}
}
void ImageManagerGfx12::printResourceType(uint32_t type) const {
switch(type) {
case 8: printf("(1D)\n"); break;
case 9: printf("(2D)\n"); break;
case 10: printf("(3D)\n"); break;
case 11: printf("(CUBE)\n"); break;
case 12: printf("(1D_ARRAY/1DB)\n"); break;
case 13: printf("(2D_ARRAY)\n"); break;
case 14: printf("(2D_MSAA)\n"); break;
case 15: printf("(2D_MSAA_ARRAY)\n"); break;
default: printf("(UNKNOWN=%u)\n", type); break;
}
}
void ImageManagerGfx12::printSwizzleMode(uint32_t sw_mode) const {
if (sw_mode == 0) {
printf("(LINEAR)\n");
} else if (sw_mode < 5) {
printf("(SW_256B_%u)\n", sw_mode);
} else if (sw_mode < 9) {
printf("(SW_4KB_%u)\n", sw_mode - 4);
} else if (sw_mode < 13) {
printf("(SW_64KB_%u)\n", sw_mode - 8);
} else if (sw_mode < 22) {
printf("(SW_VAR_%u)\n", sw_mode - 12);
} else {
printf("(UNKNOWN=%u)\n", sw_mode);
}
}
hsa_status_t ImageManagerGfx12::PopulateMipLevelSrd(
MipmappedArray& level_view,
const MipmappedArray& mipmap_array,
uint32_t mip_level) const {
// SRD already copied from parent, just modify BASE_LEVEL/LAST_LEVEL fields
uint32_t* srd_words = reinterpret_cast<uint32_t*>(level_view.srd);
// GFX12 SRD WORDs 1 and 3 has BASE_LEVEL and LAST_LEVEL fields
SQ_IMG_RSRC_WORD1* word1 = reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&srd_words[1]);
SQ_IMG_RSRC_WORD3* word3 = reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&srd_words[3]);
// Set both to same value - hardware samples only this level
word1->f.BASE_LEVEL = mip_level;
word3->f.LAST_LEVEL = mip_level;
debug_print("Set SRD mip selection: BASE_LEVEL=%u, LAST_LEVEL=%u", mip_level, mip_level);
return HSA_STATUS_SUCCESS;
}
} // namespace image
} // namespace rocr
+18
Просмотреть файл
@@ -60,6 +60,7 @@ class ImageManagerGfx12 : public ImageManagerKv {
virtual hsa_status_t CalculateImageSizeAndAlignment(
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
hsa_ext_image_data_layout_t image_data_layout,
uint32_t num_mipmap_levels,
size_t image_data_row_pitch, size_t image_data_slice_pitch,
hsa_ext_image_data_info_t& image_info) const;
@@ -80,9 +81,26 @@ class ImageManagerGfx12 : public ImageManagerKv {
/// @brief Fill image backing storage using agent copy.
virtual hsa_status_t FillImage(const Image& image, const void* pattern,
const hsa_ext_image_region_t& region);
/// @brief Fill mipmap structure with device specific mipmapped array object.
virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array) const;
/// @brief Fill mipmap structure with pre-computed AMD metadata descriptor.
virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array, const metadata_amd_t* desc) const;
/// @brief Create mip level view using SRD BASE_LEVEL/LAST_LEVEL fields
virtual hsa_status_t PopulateMipLevelSrd(MipmappedArray& level_view,
const MipmappedArray& mipmap_array, uint32_t mip_level) const;
virtual void printSRDDetailed(const uint32_t* srd) const;
virtual void printChannelSelect(uint32_t sel) const;
virtual void printResourceType(uint32_t type) const;
virtual void printSwizzleMode(uint32_t sw_mode) const;
protected:
uint32_t GetAddrlibSurfaceInfoNv(hsa_agent_t component,
const hsa_ext_image_descriptor_t& desc,
uint32_t num_mipmap_levels,
Image::TileMode tileMode,
size_t image_data_row_pitch,
size_t image_data_slice_pitch,
+158 -1
Просмотреть файл
@@ -220,6 +220,7 @@ void ImageManagerKv::GetImageInfoMaxDimension(hsa_agent_t component,
hsa_status_t ImageManagerKv::CalculateImageSizeAndAlignment(
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
hsa_ext_image_data_layout_t image_data_layout,
uint32_t num_mipmap_levels,
size_t image_data_row_pitch,
size_t image_data_slice_pitch,
hsa_ext_image_data_info_t& image_info) const {
@@ -719,6 +720,162 @@ hsa_status_t ImageManagerKv::FillImage(const Image& image, const void* pattern,
return status;
}
hsa_status_t ImageManagerKv::PopulateMipmapSrd(MipmappedArray& mipmap) const {
// Kv (GFX8) architecture does not support mipmaps
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
}
hsa_status_t ImageManagerKv::PopulateMipmapSrd(MipmappedArray& mipmap_array, const metadata_amd_t* desc) const {
// Kv (GFX8) architecture does not support mipmaps
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
}
void ImageManagerKv::printSRDDetailed(const uint32_t* srd) const {
if (!srd) {
printf("\n========== Image SRD (KV) - Detailed ==========\n");
printf("ERROR: No SRD data provided.\n");
printf("===============================================\n\n");
return;
}
printf("\n========== Image SRD (KV) - Detailed ==========\n");
// Print all 12 words with bit field annotations
for (int i = 0; i < 12; i++) {
printf("WORD %d: 0x%08x ", i, srd[i]);
// Binary representation
printf("(");
for (int bit = 31; bit >= 0; bit--) {
printf("%d", (srd[i] >> bit) & 1);
if (bit % 4 == 0 && bit != 0) printf("_");
}
printf(")\n");
}
// WORD 0: BASE_ADDRESS (bits 39:8)
SQ_IMG_RSRC_WORD0 word0;
word0.u32_all = srd[0];
printf("\nWORD 0: BASE_ADDRESS (bits 39:8) = 0x%08x\n", word0.bits.base_address);
// WORD 1: Contains BASE_ADDRESS_HI, MIN_LOD, DATA_FORMAT, NUM_FORMAT, MTYPE
SQ_IMG_RSRC_WORD1 word1;
word1.u32_all = srd[1];
printf("WORD 1: BASE_ADDRESS_HI = 0x%02x\n", word1.bits.base_address_hi);
printf(" MIN_LOD = %u\n", word1.bits.min_lod);
printf(" DATA_FORMAT = %u\n", word1.bits.data_format);
printf(" NUM_FORMAT = %u\n", word1.bits.num_format);
printf(" MTYPE = %u\n", word1.bits.mtype);
// Calculate full address (KV uses 40-bit shifted by 8)
uint64_t base_addr = ((uint64_t)word1.bits.base_address_hi << 40) | ((uint64_t)word0.bits.base_address << 8);
printf(" → Full Base Address = 0x%016lx\n", base_addr);
// WORD 2: WIDTH, HEIGHT, PERF_MOD, INTERLACED
SQ_IMG_RSRC_WORD2 word2;
word2.u32_all = srd[2];
printf("WORD 2: WIDTH = %u (actual: %u)\n", word2.bits.width, word2.bits.width + 1);
printf(" HEIGHT = %u (actual: %u)\n", word2.bits.height, word2.bits.height + 1);
printf(" PERF_MOD = %u\n", word2.bits.perf_mod);
printf(" INTERLACED = %u\n", word2.bits.interlaced);
// WORD 3: Channel selectors, TILING_INDEX, POW2_PAD, TYPE, ATC
SQ_IMG_RSRC_WORD3 word3;
word3.u32_all = srd[3];
printf("WORD 3: DST_SEL_X = %u ", word3.bits.dst_sel_x);
printChannelSelect(word3.bits.dst_sel_x);
printf(" DST_SEL_Y = %u ", word3.bits.dst_sel_y);
printChannelSelect(word3.bits.dst_sel_y);
printf(" DST_SEL_Z = %u ", word3.bits.dst_sel_z);
printChannelSelect(word3.bits.dst_sel_z);
printf(" DST_SEL_W = %u ", word3.bits.dst_sel_w);
printChannelSelect(word3.bits.dst_sel_w);
printf(" TILING_INDEX = %u ◄──── Tile configuration index\n", word3.bits.tiling_index);
printf(" POW2_PAD = %u ◄──── Power-of-2 padding\n", word3.bits.pow2_pad);
printf(" TYPE = %u ", word3.bits.type);
printResourceType(word3.bits.type);
printf(" ATC = %u ◄──── Address translation cache\n", word3.bits.atc);
// WORD 4: DEPTH, PITCH
SQ_IMG_RSRC_WORD4 word4;
word4.u32_all = srd[4];
printf("WORD 4: DEPTH = %u\n", word4.bits.depth);
printf(" PITCH = %u (actual: %u)\n", word4.bits.pitch, word4.bits.pitch + 1);
// Calculate effective depth/pitch based on geometry
uint32_t type = word3.bits.type;
if (type == 10) { // 3D
printf(" → 3D Depth = %u (actual: %u)\n", word4.bits.depth, word4.bits.depth + 1);
} else if (type == 13 || type == 12) { // Arrays
printf(" → Array Size = %u (actual: %u)\n", word4.bits.depth, word4.bits.depth + 1);
}
// WORD 5: LAST_ARRAY
SQ_IMG_RSRC_WORD5 word5;
word5.u32_all = srd[5];
printf("WORD 5: LAST_ARRAY = %u ◄──── Last array slice\n", word5.bits.last_array);
// WORD 6-7: Usually zero for basic images
printf("WORD 6: Reserved = 0x%08x\n", srd[6]);
printf("WORD 7: Reserved = 0x%08x\n", srd[7]);
// Additional information (HSA extension fields)
printf("WORD 8: CHANNEL_TYPE = 0x%08x\n", srd[8]);
printf("WORD 9: CHANNEL_ORDER = 0x%08x\n", srd[9]);
printf("WORD 10: WIDTH_ORIGINAL = 0x%08x\n", srd[10]);
printf("WORD 11: NUM_LEVELS = 0x%08x\n", srd[11]);
// Mipmap analysis (KV architecture limitations)
printf("\nMIPMAP ANALYSIS:\n");
printf(" Total Levels = %u\n", srd[11]);
printf(" Min LOD = %u ◄──── Minimum detail level\n", word1.bits.min_lod);
printf(" KV Architecture = LEGACY MIPMAP SUPPORT\n");
printf(" Note = KV lacks BASE_LEVEL/LAST_LEVEL fields\n");
printf(" Note = Mip level selection via shader only\n");
printf("===============================================\n\n");
}
void ImageManagerKv::printChannelSelect(uint32_t sel) const {
switch(sel) {
case 0: printf("(SEL_0)\n"); break;
case 1: printf("(SEL_1)\n"); break;
case 4: printf("(SEL_X/R)\n"); break;
case 5: printf("(SEL_Y/G)\n"); break;
case 6: printf("(SEL_Z/B)\n"); break;
case 7: printf("(SEL_W/A)\n"); break;
default: printf("(UNKNOWN)\n"); break;
}
}
void ImageManagerKv::printResourceType(uint32_t type) const {
switch(type) {
case 8: printf("(1D)\n"); break;
case 9: printf("(2D)\n"); break;
case 10: printf("(3D)\n"); break;
case 11: printf("(CUBE)\n"); break;
case 12: printf("(1D_ARRAY/1DB)\n"); break;
case 13: printf("(2D_ARRAY)\n"); break;
case 14: printf("(2D_MSAA)\n"); break;
case 15: printf("(2D_MSAA_ARRAY)\n"); break;
default: printf("(UNKNOWN=%u)\n", type); break;
}
}
void ImageManagerKv::printSwizzleMode(uint32_t sw_mode) const {
// KV architecture uses tiling modes instead of swizzle modes
// This function is not typically called for KV, but provided for completeness
printf("(TILING_MODE=%u)\n", sw_mode);
}
hsa_status_t ImageManagerKv::PopulateMipLevelSrd(
MipmappedArray& level_view,
const MipmappedArray& mipmap_array,
uint32_t mip_level) const {
// Mip level views not supported on GFX8 hardware
return HSA_STATUS_ERROR_NOT_INITIALIZED;
}
hsa_status_t ImageManagerKv::GetLocalMemoryRegion(hsa_region_t region,
void* data) {
if (data == NULL) {
@@ -845,7 +1002,7 @@ bool ImageManagerKv::GetAddrlibSurfaceInfo(
in.width = width;
in.height = height;
in.numSlices = num_slice;
in.pitchInElement = image_data_row_pitch / image_prop.element_size;
switch(desc.geometry) {
case HSA_EXT_IMAGE_GEOMETRY_1D:
case HSA_EXT_IMAGE_GEOMETRY_1DB:
Исполняемый файл → Обычный файл
+18
Просмотреть файл
@@ -79,6 +79,7 @@ class ImageManagerKv : public ImageManager {
virtual hsa_status_t CalculateImageSizeAndAlignment(
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
hsa_ext_image_data_layout_t image_data_layout,
uint32_t num_mipmap_levels,
size_t image_data_row_pitch, size_t image_data_slice_pitch,
hsa_ext_image_data_info_t& image_info) const;
@@ -116,6 +117,21 @@ class ImageManagerKv : public ImageManager {
virtual hsa_status_t FillImage(const Image& image, const void* pattern,
const hsa_ext_image_region_t& region);
/// @brief Fill mipmap structure with device specific mipmapped array object.
virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array) const;
/// @brief Fill mipmap structure with pre-computed AMD metadata descriptor.
virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array, const metadata_amd_t* desc) const;
/// @brief Create mip level view using SRD BASE_LEVEL/LAST_LEVEL fields
virtual hsa_status_t PopulateMipLevelSrd(MipmappedArray& level_view,
const MipmappedArray& mipmap_array, uint32_t mip_level) const;
virtual void printSRDDetailed(const uint32_t* srd) const;
virtual void printChannelSelect(uint32_t sel) const;
virtual void printResourceType(uint32_t type) const;
virtual void printSwizzleMode(uint32_t sw_mode) const;
protected:
static hsa_status_t GetLocalMemoryRegion(hsa_region_t region, void* data);
@@ -145,6 +161,8 @@ class ImageManagerKv : public ImageManager {
ADDR_HANDLE addr_lib_;
virtual ADDR_HANDLE GetAddrLib() const override { return addr_lib_; }
hsa_agent_t agent_;
uint32_t family_type_;
+429 -12
Просмотреть файл
@@ -190,7 +190,7 @@ static FORMAT GetCombinedFormat(uint8_t fmt, uint8_t type) {
return CFMT_INVALID;
};
//-----------------------------------------------------------------------------
// End workaround
// End workaround
//-----------------------------------------------------------------------------
ImageManagerNv::ImageManagerNv() : ImageManagerKv() {}
@@ -201,6 +201,7 @@ ImageManagerNv::~ImageManagerNv() {}
hsa_status_t ImageManagerNv::CalculateImageSizeAndAlignment(
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
hsa_ext_image_data_layout_t image_data_layout,
uint32_t num_mipmap_levels,
size_t image_data_row_pitch,
size_t image_data_slice_pitch,
hsa_ext_image_data_info_t& image_info) const {
@@ -216,9 +217,8 @@ hsa_status_t ImageManagerNv::CalculateImageSizeAndAlignment(
desc.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB)?
Image::TileMode::TILED : Image::TileMode::LINEAR;
}
if (GetAddrlibSurfaceInfoNv(component, desc, tileMode,
image_data_row_pitch, image_data_slice_pitch, out) ==
(uint32_t)(-1)) {
if (GetAddrlibSurfaceInfoNv(component, desc, num_mipmap_levels, tileMode,
image_data_row_pitch, image_data_slice_pitch, out) == (uint32_t)(-1)) {
return HSA_STATUS_ERROR;
}
@@ -319,7 +319,7 @@ hsa_status_t ImageManagerNv::PopulateImageSrd(Image& image,
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3])->bits.TYPE =
ImageLut().MapGeometry(image.desc.geometry);
}
// Imported metadata holds the offset to metadata, add the image base address.
uintptr_t meta = uintptr_t(((SQ_IMG_RSRC_WORD7*)(&image.srd[7]))->bits.META_DATA_ADDRESS_HI) << 16;
meta |= uintptr_t(((SQ_IMG_RSRC_WORD6*)(&image.srd[6]))->bits.META_DATA_ADDRESS) << 8;
@@ -450,9 +450,8 @@ hsa_status_t ImageManagerNv::PopulateImageSrd(Image& image) const {
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
uint32_t swizzleMode = GetAddrlibSurfaceInfoNv(
image.component, image.desc, image.tile_mode,
image.row_pitch, image.slice_pitch, out);
uint32_t swizzleMode = GetAddrlibSurfaceInfoNv(image.component, image.desc,
1, image.tile_mode, image.row_pitch, image.slice_pitch, out);
if (swizzleMode == (uint32_t)(-1)) {
return HSA_STATUS_ERROR;
}
@@ -612,6 +611,7 @@ hsa_status_t ImageManagerNv::PopulateSamplerSrd(Sampler& sampler) const {
uint32_t ImageManagerNv::GetAddrlibSurfaceInfoNv(
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
uint32_t num_mipmap_levels,
Image::TileMode tileMode,
size_t image_data_row_pitch,
size_t image_data_slice_pitch,
@@ -627,7 +627,9 @@ uint32_t ImageManagerNv::GetAddrlibSurfaceInfoNv(
const uint32_t num_slice = static_cast<uint32_t>(
std::max(kMinNumSlice, std::max(desc.array_size, desc.depth)));
uint32_t minor_ver = MinorVerFromDevID(chip_id_);
// Minor version used for future GPU-specific optimizations (currently unused)
(void)MinorVerFromDevID(chip_id_);
ADDR2_COMPUTE_SURFACE_INFO_INPUT in = {0};
in.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT);
in.format = addrlib_format;
@@ -635,9 +637,8 @@ uint32_t ImageManagerNv::GetAddrlibSurfaceInfoNv(
in.width = width;
in.height = height;
in.numSlices = num_slice;
// Custom Pitch is supported in gfx1030 and beyond
if (minor_ver >= 3)
in.pitchInElement = image_data_row_pitch / image_prop.element_size;
in.numMipLevels = num_mipmap_levels;
switch (desc.geometry) {
case HSA_EXT_IMAGE_GEOMETRY_1D:
case HSA_EXT_IMAGE_GEOMETRY_1DB:
@@ -804,5 +805,421 @@ hsa_status_t ImageManagerNv::FillImage(const Image& image, const void* pattern,
return status;
}
hsa_status_t ImageManagerNv::PopulateMipmapSrd(MipmappedArray& mipmap) const {
ImageProperty mipmap_prop = ImageLut().MapFormat(mipmap.desc.format, mipmap.desc.geometry);
assert(mipmap_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);
assert(mipmap_prop.element_size != 0);
assert(mipmap.num_levels >= 1);
const void* mipmap_data_addr = mipmap.data;
if (IsLocalMemory(mipmap.data)) {
mipmap_data_addr = reinterpret_cast<const void*>(
reinterpret_cast<uintptr_t>(mipmap.data) - local_memory_base_address_);
}
if (mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
SQ_BUF_RSRC_WORD0 word0;
SQ_BUF_RSRC_WORD1 word1;
SQ_BUF_RSRC_WORD2 word2;
SQ_BUF_RSRC_WORD3 word3;
word0.val = 0;
word0.f.BASE_ADDRESS = PtrLow32(mipmap_data_addr);
word1.val = 0;
word1.f.BASE_ADDRESS_HI = PtrHigh32(mipmap_data_addr);
word1.f.STRIDE = mipmap_prop.element_size;
word1.f.SWIZZLE_ENABLE = false;
word1.f.CACHE_SWIZZLE = false;
word2.f.NUM_RECORDS = mipmap.desc.width * mipmap_prop.element_size;
const Swizzle swizzle = ImageLut().MapSwizzle(mipmap.desc.format.channel_order);
word3.val = 0;
word3.f.RESOURCE_LEVEL = 1; // NV-specific resource level
word3.f.DST_SEL_X = swizzle.x;
word3.f.DST_SEL_Y = swizzle.y;
word3.f.DST_SEL_Z = swizzle.z;
word3.f.DST_SEL_W = swizzle.w;
word3.f.FORMAT = GetCombinedFormat(mipmap_prop.data_format, mipmap_prop.data_type);
word3.f.INDEX_STRIDE = mipmap_prop.element_size;
word3.f.TYPE = ImageLut().MapGeometry(mipmap.desc.geometry);
mipmap.srd[0] = word0.val;
mipmap.srd[1] = word1.val;
mipmap.srd[2] = word2.val;
mipmap.srd[3] = word3.val;
mipmap.row_pitch = mipmap.desc.width * mipmap_prop.element_size;
mipmap.slice_pitch = mipmap.row_pitch;
} else {
SQ_IMG_RSRC_WORD0 word0;
SQ_IMG_RSRC_WORD1 word1;
SQ_IMG_RSRC_WORD2 word2;
SQ_IMG_RSRC_WORD3 word3;
SQ_IMG_RSRC_WORD4 word4;
SQ_IMG_RSRC_WORD5 word5;
SQ_IMG_RSRC_WORD5 word6;
SQ_IMG_RSRC_WORD5 word7;
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
// pMipInfo not needed - set to nullptr and AddrLib will ignore it
out.pMipInfo = nullptr;
uint32_t swizzleMode = GetAddrlibSurfaceInfoNv(
mipmap.component, mipmap.desc, mipmap.num_levels,
mipmap.tile_mode, mipmap.row_pitch, mipmap.slice_pitch, out);
if (swizzleMode == (uint32_t)(-1)) {
return HSA_STATUS_ERROR;
}
mipmap.addr_output.addr2 = out;
mipmap.size = out.surfSize;
assert((out.bpp / 8) == mipmap_prop.element_size);
const size_t row_pitch_size = out.pitch * mipmap_prop.element_size;
word0.val = 0;
word0.f.BASE_ADDRESS = PtrLow40Shift8(mipmap_data_addr);
word1.val = 0;
word1.f.BASE_ADDRESS_HI = PtrHigh64Shift40(mipmap_data_addr);
word1.f.MIN_LOD = 0;
word1.f.FORMAT = GetCombinedFormat(mipmap_prop.data_format, mipmap_prop.data_type);
// Only take the lowest 2 bits of (mipmap.desc.width - 1)
word1.f.WIDTH = BitSelect<0, 1>(mipmap.desc.width - 1);
word2.val = 0;
// Take the high 12 bits of (mipmap.desc.width - 1)
word2.f.WIDTH_HI = BitSelect<2, 13>(mipmap.desc.width - 1);
word2.f.HEIGHT = mipmap.desc.height ? mipmap.desc.height - 1 : 0;
word2.f.RESOURCE_LEVEL = 1;
const Swizzle swizzle = ImageLut().MapSwizzle(mipmap.desc.format.channel_order);
word3.val = 0;
word3.f.DST_SEL_X = swizzle.x;
word3.f.DST_SEL_Y = swizzle.y;
word3.f.DST_SEL_Z = swizzle.z;
word3.f.DST_SEL_W = swizzle.w;
word3.f.SW_MODE = swizzleMode;
word3.f.BASE_LEVEL = 0;
word3.f.LAST_LEVEL = mipmap.num_levels - 1;
word3.f.BC_SWIZZLE = GetBcSwizzle(swizzle);
word3.f.TYPE = ImageLut().MapGeometry(mipmap.desc.geometry);
const bool mipmap_array =
(mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ||
mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DA ||
mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DADEPTH);
const bool mipmap_3d = (mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_3D);
word4.val = 0;
word4.f.DEPTH =
(mipmap_array)
? std::max(mipmap.desc.array_size, static_cast<size_t>(1)) - 1
: (mipmap_3d) ? mipmap.desc.depth - 1 : 0;
uint32_t minor_ver = MinorVerFromDevID(chip_id_);
// For 1d, 2d and 2d-msaa in gfx1030 and beyond this is pitch-1
if ((minor_ver >= 3) && !mipmap_array && !mipmap_3d)
word4.f.PITCH = out.pitch - 1;
word5.val = 0;
word5.f.MAX_MIP = mipmap.num_levels - 1;
word6.val = 0;
word7.val = 0;
mipmap.srd[0] = word0.val;
mipmap.srd[1] = word1.val;
mipmap.srd[2] = word2.val;
mipmap.srd[3] = word3.val;
mipmap.srd[4] = word4.val;
mipmap.srd[5] = word5.val;
mipmap.srd[6] = word6.val;
mipmap.srd[7] = word7.val;
mipmap.row_pitch = row_pitch_size;
mipmap.slice_pitch = out.sliceSize;
}
mipmap.srd[8] = mipmap.desc.format.channel_type;
mipmap.srd[9] = mipmap.desc.format.channel_order;
mipmap.srd[10] = static_cast<uint32_t>(mipmap.desc.width);
// Mipmap-specific auxiliary fields
mipmap.srd[11] = mipmap.num_levels;
return HSA_STATUS_SUCCESS;
}
void ImageManagerNv::printSRDDetailed(const uint32_t* srd) const {
if (!srd) {
printf("\n========== Image SRD (NV/GFX10) - Detailed ==========\n");
printf("ERROR: No SRD data provided.\n");
printf("===============================================\n\n");
return;
}
printf("\n========== Image SRD (NV/GFX10) - Detailed ==========\n");
// Print all 12 words with bit field annotations
for (int i = 0; i < 12; i++) {
printf("WORD %d: 0x%08x ", i, srd[i]);
// Binary representation
printf("(");
for (int bit = 31; bit >= 0; bit--) {
printf("%d", (srd[i] >> bit) & 1);
if (bit % 4 == 0 && bit != 0) printf("_");
}
printf(")\n");
}
// WORD 0: BASE_ADDRESS (bits 39:8)
SQ_IMG_RSRC_WORD0 word0;
word0.val = srd[0];
printf("\nWORD 0: BASE_ADDRESS (bits 39:8) = 0x%08x\n", word0.f.BASE_ADDRESS);
// WORD 1: Contains BASE_ADDRESS_HI, MIN_LOD, FORMAT, WIDTH (bits 1:0)
SQ_IMG_RSRC_WORD1 word1;
word1.val = srd[1];
printf("WORD 1: BASE_ADDRESS_HI = 0x%02x\n", word1.f.BASE_ADDRESS_HI);
printf(" MIN_LOD = %u\n", word1.f.MIN_LOD);
printf(" FORMAT = %u ◄──── Combined format/type\n", word1.f.FORMAT);
printf(" WIDTH (bits 1:0) = %u\n", word1.f.WIDTH);
// Calculate full address (NV uses 40-bit shifted by 8)
uint64_t base_addr = ((uint64_t)word1.f.BASE_ADDRESS_HI << 40) | ((uint64_t)word0.f.BASE_ADDRESS << 8);
printf(" → Full Base Address = 0x%016lx\n", base_addr);
// WORD 2: WIDTH_HI, HEIGHT, RESOURCE_LEVEL
SQ_IMG_RSRC_WORD2 word2;
word2.val = srd[2];
printf("WORD 2: WIDTH_HI (bits 13:2) = %u\n", word2.f.WIDTH_HI);
printf(" HEIGHT = %u\n", word2.f.HEIGHT);
printf(" RESOURCE_LEVEL = %u ◄──── NV-specific field\n", word2.f.RESOURCE_LEVEL);
// Calculate full width (NV uses 14 bits split: 2 in WORD1 + 12 in WORD2)
uint32_t full_width = word1.f.WIDTH | (word2.f.WIDTH_HI << 2);
printf(" → Full Width = %u (actual: %u)\n", full_width, full_width + 1);
printf(" → Full Height = %u (actual: %u)\n", word2.f.HEIGHT, word2.f.HEIGHT + 1);
// WORD 3: Channel selectors, SW_MODE, BASE_LEVEL, LAST_LEVEL, BC_SWIZZLE, TYPE
SQ_IMG_RSRC_WORD3 word3;
word3.val = srd[3];
printf("WORD 3: DST_SEL_X = %u ", word3.f.DST_SEL_X);
printChannelSelect(word3.f.DST_SEL_X);
printf(" DST_SEL_Y = %u ", word3.f.DST_SEL_Y);
printChannelSelect(word3.f.DST_SEL_Y);
printf(" DST_SEL_Z = %u ", word3.f.DST_SEL_Z);
printChannelSelect(word3.f.DST_SEL_Z);
printf(" DST_SEL_W = %u ", word3.f.DST_SEL_W);
printChannelSelect(word3.f.DST_SEL_W);
printf(" SW_MODE = %u ", word3.f.SW_MODE);
printSwizzleMode(word3.f.SW_MODE);
printf(" BASE_LEVEL = %u ◄──── Current base level\n", word3.f.BASE_LEVEL);
printf(" LAST_LEVEL = %u ◄──── Current last level\n", word3.f.LAST_LEVEL);
printf(" BC_SWIZZLE = %u ◄──── Border color swizzle\n", word3.f.BC_SWIZZLE);
printf(" TYPE = %u ", word3.f.TYPE);
printResourceType(word3.f.TYPE);
// WORD 4: DEPTH, optionally PITCH
SQ_IMG_RSRC_WORD4 word4;
word4.val = srd[4];
printf("WORD 4: DEPTH = %u\n", word4.f.DEPTH);
// Calculate effective depth based on geometry and chip version
uint32_t type = word3.f.TYPE;
uint32_t minor_ver = MinorVerFromDevID(chip_id_);
if (type == 10) { // 3D
printf(" → 3D Depth = %u (actual: %u)\n", word4.f.DEPTH, word4.f.DEPTH + 1);
} else if (type == 13 || type == 12) { // Arrays
printf(" → Array Size = %u (actual: %u)\n", word4.f.DEPTH, word4.f.DEPTH + 1);
} else if ((minor_ver >= 3) && (type == 8 || type == 9 || type == 14)) { // 1D/2D/2D_MSAA in GFX1030+
printf(" PITCH = %u (actual: %u) ◄──── GFX1030+ pitch\n", word4.f.PITCH, word4.f.PITCH + 1);
}
// WORD 5-7: Usually zero for basic images
printf("WORD 5: Reserved = 0x%08x\n", srd[5]);
printf("WORD 6: Reserved = 0x%08x\n", srd[6]);
printf("WORD 7: Reserved = 0x%08x\n", srd[7]);
// Additional information (HSA extension fields)
printf("WORD 8: CHANNEL_TYPE = 0x%08x\n", srd[8]);
printf("WORD 9: CHANNEL_ORDER = 0x%08x\n", srd[9]);
printf("WORD 10: WIDTH_ORIGINAL = 0x%08x\n", srd[10]);
printf("WORD 11: NUM_LEVELS = 0x%08x\n", srd[11]);
// Mipmap analysis
if (word3.f.LAST_LEVEL > word3.f.BASE_LEVEL || word3.f.LAST_LEVEL > 0) {
printf("\nMIPMAP ANALYSIS:\n");
printf(" Total Levels = %u\n", srd[11]);
printf(" Min LOD = %u\n", word1.f.MIN_LOD);
printf(" Active Range = [%u, %u]\n", word3.f.BASE_LEVEL, word3.f.LAST_LEVEL);
printf(" Resource Level = %u\n", word2.f.RESOURCE_LEVEL);
if (word3.f.BASE_LEVEL == word3.f.LAST_LEVEL) {
printf(" Mode = SINGLE LEVEL VIEW ◄──── Mip level view\n");
uint32_t level = word3.f.BASE_LEVEL;
uint32_t level_width = std::max(1u, (full_width + 1) >> level);
uint32_t level_height = std::max(1u, static_cast<uint32_t>((word2.f.HEIGHT + 1) >> level));
printf(" Effective Dimensions = %ux%u (level %u)\n", level_width, level_height, level);
} else {
printf(" Mode = FULL MIPMAP CHAIN\n");
}
}
printf("===============================================\n\n");
}
void ImageManagerNv::printChannelSelect(uint32_t sel) const {
switch(sel) {
case 0: printf("(SEL_0)\n"); break;
case 1: printf("(SEL_1)\n"); break;
case 4: printf("(SEL_X/R)\n"); break;
case 5: printf("(SEL_Y/G)\n"); break;
case 6: printf("(SEL_Z/B)\n"); break;
case 7: printf("(SEL_W/A)\n"); break;
default: printf("(UNKNOWN)\n"); break;
}
}
void ImageManagerNv::printResourceType(uint32_t type) const {
switch(type) {
case 8: printf("(1D)\n"); break;
case 9: printf("(2D)\n"); break;
case 10: printf("(3D)\n"); break;
case 11: printf("(CUBE)\n"); break;
case 12: printf("(1D_ARRAY/1DB)\n"); break;
case 13: printf("(2D_ARRAY)\n"); break;
case 14: printf("(2D_MSAA)\n"); break;
case 15: printf("(2D_MSAA_ARRAY)\n"); break;
default: printf("(UNKNOWN=%u)\n", type); break;
}
}
void ImageManagerNv::printSwizzleMode(uint32_t sw_mode) const {
// NV/GFX10 swizzle modes
if (sw_mode == 0) {
printf("(LINEAR)\n");
} else if (sw_mode < 5) {
printf("(SW_256B_%u)\n", sw_mode);
} else if (sw_mode < 9) {
printf("(SW_4KB_%u)\n", sw_mode - 4);
} else if (sw_mode < 13) {
printf("(SW_64KB_%u)\n", sw_mode - 8);
} else if (sw_mode < 22) {
printf("(SW_VAR_%u)\n", sw_mode - 12);
} else {
printf("(UNKNOWN=%u)\n", sw_mode);
}
}
hsa_status_t ImageManagerNv::PopulateMipLevelSrd(
MipmappedArray& level_view,
const MipmappedArray& mipmap_array,
uint32_t mip_level) const {
// SRD already copied from parent, just modify BASE_LEVEL/LAST_LEVEL fields
uint32_t* srd_words = reinterpret_cast<uint32_t*>(level_view.srd);
// WORD3 has BASE_LEVEL and LAST_LEVEL fields
SQ_IMG_RSRC_WORD3* word3 = reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&srd_words[3]);
// Set both to same value - hardware samples only this level
word3->f.BASE_LEVEL = mip_level;
word3->f.LAST_LEVEL = mip_level;
debug_print("Set SRD mip selection: BASE_LEVEL=%u, LAST_LEVEL=%u", mip_level, mip_level);
return HSA_STATUS_SUCCESS;
}
hsa_status_t ImageManagerNv::PopulateMipmapSrd(MipmappedArray& mipmap_array, const metadata_amd_t* desc) const {
const metadata_amd_nv_t* desc_nv = reinterpret_cast<const metadata_amd_nv_t*>(desc);
const void* mipmap_data_addr = mipmap_array.data;
ImageProperty mipmap_prop = ImageLut().MapFormat(mipmap_array.desc.format, mipmap_array.desc.geometry);
if (mipmap_prop.cap == HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED || mipmap_prop.element_size == 0) {
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
}
const Swizzle swizzle = ImageLut().MapSwizzle(mipmap_array.desc.format.channel_order);
if (IsLocalMemory(mipmap_array.data)) {
mipmap_data_addr = reinterpret_cast<const void*>(
reinterpret_cast<uintptr_t>(mipmap_array.data) - local_memory_base_address_);
}
// Copy the pre-computed SRD words 0-7 from metadata
mipmap_array.srd[0] = desc_nv->word0.u32All;
mipmap_array.srd[1] = desc_nv->word1.u32All;
mipmap_array.srd[2] = desc_nv->word2.u32All;
mipmap_array.srd[3] = desc_nv->word3.u32All;
mipmap_array.srd[4] = desc_nv->word4.u32All;
mipmap_array.srd[5] = desc_nv->word5.u32All;
mipmap_array.srd[6] = desc_nv->word6.u32All;
mipmap_array.srd[7] = desc_nv->word7.u32All;
// Override specific fields after copying
uint32_t hwPixelSize = ImageLut().GetPixelSize(mipmap_prop.data_format, mipmap_prop.data_type);
if (mipmap_prop.element_size != hwPixelSize) {
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
}
reinterpret_cast<SQ_IMG_RSRC_WORD0*>(&mipmap_array.srd[0])->bits.BASE_ADDRESS = PtrLow40Shift8(mipmap_data_addr);
reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&mipmap_array.srd[1])->bits.BASE_ADDRESS_HI = PtrHigh64Shift40(mipmap_data_addr);
reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&mipmap_array.srd[1])->bits.FORMAT = GetCombinedFormat(mipmap_prop.data_format, mipmap_prop.data_type);
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.DST_SEL_X = swizzle.x;
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.DST_SEL_Y = swizzle.y;
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.DST_SEL_Z = swizzle.z;
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.DST_SEL_W = swizzle.w;
reinterpret_cast<SQ_IMG_RSRC_WORD5*>(&mipmap_array.srd[5])->bits.MAX_MIP = mipmap_array.num_levels - 1;
if (mipmap_array.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ||
mipmap_array.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1D) {
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.TYPE =
ImageLut().MapGeometry(mipmap_array.desc.geometry);
}
// Looks like this is only used for CPU copies.
mipmap_array.row_pitch = 0;
mipmap_array.slice_pitch = 0;
// Store mipmap-specific metadata
mipmap_array.srd[8] = mipmap_array.desc.format.channel_type;
mipmap_array.srd[9] = mipmap_array.desc.format.channel_order;
mipmap_array.srd[10] = static_cast<uint32_t>(mipmap_array.desc.width);
mipmap_array.srd[11] = mipmap_array.num_levels;
// Allocate and populate pMipInfo from metadata mip_offsets (ADDR2 for Nv)
ADDR2_MIP_INFO* mip_info_storage = new ADDR2_MIP_INFO[mipmap_array.num_levels];
memset(mip_info_storage, 0, sizeof(ADDR2_MIP_INFO) * mipmap_array.num_levels);
// Extract per-level information from mip_offsets array
for (uint32_t level = 0; level < mipmap_array.num_levels; level++) {
// mip_offsets contains offset bits [39:8], shift left by 8 to get actual byte offset
mip_info_storage[level].offset = static_cast<uint64_t>(desc_nv->mip_offsets[level]) << 8;
// Calculate dimensions for this level (halve at each level)
mip_info_storage[level].pitch = std::max(1u, static_cast<uint32_t>(mipmap_array.desc.width >> level));
mip_info_storage[level].height = std::max(1u, static_cast<uint32_t>(mipmap_array.desc.height >> level));
mip_info_storage[level].depth = std::max(1u, static_cast<uint32_t>(mipmap_array.desc.depth >> level));
}
// Store pMipInfo in addr_output for later use by PopulateMipLevelSrd
mipmap_array.addr_output.addr2.pMipInfo = mip_info_storage;
// Total size calculation from metadata
uint32_t last_level = mipmap_array.num_levels - 1;
uint64_t last_level_size = mip_info_storage[last_level].pitch *
mip_info_storage[last_level].height *
mip_info_storage[last_level].depth *
mipmap_prop.element_size;
mipmap_array.size = mip_info_storage[last_level].offset + last_level_size;
return HSA_STATUS_SUCCESS;
}
} // namespace image
} // namespace rocr
+26 -8
Просмотреть файл
@@ -40,8 +40,8 @@
//
////////////////////////////////////////////////////////////////////////////////
#ifndef EXT_IMAGE_IMAGE_MANAGER_NV_H_
#define EXT_IMAGE_IMAGE_MANAGER_NV_H_
#ifndef EXT_IMAGE_IMAGE_MANAGER_NV_H_
#define EXT_IMAGE_IMAGE_MANAGER_NV_H_
#include "addrlib/inc/addrinterface.h"
#include "image_manager_kv.h"
@@ -59,6 +59,7 @@ class ImageManagerNv : public ImageManagerKv {
virtual hsa_status_t CalculateImageSizeAndAlignment(
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
hsa_ext_image_data_layout_t image_data_layout,
uint32_t num_mipmap_levels,
size_t image_data_row_pitch, size_t image_data_slice_pitch,
hsa_ext_image_data_info_t& image_info) const;
@@ -79,13 +80,30 @@ class ImageManagerNv : public ImageManagerKv {
/// @brief Fill image backing storage using agent copy.
virtual hsa_status_t FillImage(const Image& image, const void* pattern,
const hsa_ext_image_region_t& region);
/// @brief Fill mipmap structure with device specific mipmapped array object.
virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array) const;
/// @brief Fill mipmap structure with pre-computed AMD metadata descriptor.
virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array, const metadata_amd_t* desc) const;
/// @brief Create mip level view using SRD BASE_LEVEL/LAST_LEVEL fields
virtual hsa_status_t PopulateMipLevelSrd(MipmappedArray& level_view,
const MipmappedArray& mipmap_array, uint32_t mip_level) const;
virtual void printSRDDetailed(const uint32_t* srd) const;
virtual void printChannelSelect(uint32_t sel) const;
virtual void printResourceType(uint32_t type) const;
virtual void printSwizzleMode(uint32_t sw_mode) const;
protected:
uint32_t GetAddrlibSurfaceInfoNv(hsa_agent_t component,
const hsa_ext_image_descriptor_t& desc,
Image::TileMode tileMode,
size_t image_data_row_pitch,
size_t image_data_slice_pitch,
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT& out) const;
const hsa_ext_image_descriptor_t& desc,
uint32_t num_mipmap_levels,
Image::TileMode tileMode,
size_t image_data_row_pitch,
size_t image_data_slice_pitch,
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT& out) const;
bool IsLocalMemory(const void* address) const;
@@ -95,4 +113,4 @@ class ImageManagerNv : public ImageManagerKv {
} // namespace image
} // namespace rocr
#endif // EXT_IMAGE_IMAGE_MANAGER_NV_H_
#endif // EXT_IMAGE_IMAGE_MANAGER_NV_H_
+305 -8
Просмотреть файл
@@ -44,11 +44,15 @@
#include <assert.h>
#include <climits>
#include <cstring>
#include <vector>
#include <mutex>
#include <algorithm>
#include "core/inc/runtime.h"
#include "core/inc/hsa_internal.h"
#include "core/inc/hsa_ext_amd_impl.h"
#include "core/inc/exceptions.h"
#include "resource.h"
#include "image_manager_kv.h"
#include "image_manager_ai.h"
@@ -57,9 +61,96 @@
#include "image_manager_gfx12.h"
#include "device_info.h"
#define SINGLE_MIP_LEVEL 1
namespace rocr {
namespace image {
static inline uint32_t ComputeMaxMipLevels(const hsa_ext_image_descriptor_t& d) {
uint32_t w = d.width ? d.width : 1;
uint32_t h = d.height ? d.height : 1;
uint32_t depth = d.depth ? d.depth : 1;
uint32_t dim_max = w;
switch (d.geometry) {
case HSA_EXT_IMAGE_GEOMETRY_1D:
case HSA_EXT_IMAGE_GEOMETRY_1DA:
case HSA_EXT_IMAGE_GEOMETRY_1DB:
dim_max = w; break;
case HSA_EXT_IMAGE_GEOMETRY_2D:
case HSA_EXT_IMAGE_GEOMETRY_2DA:
case HSA_EXT_IMAGE_GEOMETRY_2DDEPTH:
case HSA_EXT_IMAGE_GEOMETRY_2DADEPTH:
dim_max = std::max(w, h); break;
case HSA_EXT_IMAGE_GEOMETRY_3D:
dim_max = std::max(std::max(w, h), depth); break;
default:
break;
}
uint32_t levels = 0;
while (dim_max > 0) { ++levels; dim_max >>= 1; }
return (levels == 0) ? 1 : levels;
}
hsa_status_t ImageRuntime::GetMipmapArraySizeAndAlignment(
hsa_agent_t component,
const hsa_ext_image_descriptor_t& desc,
uint32_t num_mipmap_levels,
hsa_ext_image_data_layout_t layout,
size_t row_pitch,
size_t slice_pitch,
size_t& size_out,
size_t& alignment_out) {
size_out = 0;
alignment_out = 0;
if (num_mipmap_levels == 0 || num_mipmap_levels > ComputeMaxMipLevels(desc))
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
// Validate the image format and geometry.
uint32_t capability = 0;
hsa_status_t status =
GetImageCapability(component, desc.format, desc.geometry, capability);
if (status != HSA_STATUS_SUCCESS) {
return status;
}
if (capability == 0) {
return static_cast<hsa_status_t>(
HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED);
}
const hsa_ext_image_geometry_t geometry = desc.geometry;
uint32_t max_width = 0;
uint32_t max_height = 0;
uint32_t max_depth = 0;
uint32_t max_array_size = 0;
ImageManager* manager = image_manager(component);
// Validate the image dimension.
manager->GetImageInfoMaxDimension(component, geometry, max_width, max_height,
max_depth, max_array_size);
if (desc.width > max_width || desc.height > max_height ||
desc.depth > max_depth || desc.array_size > max_array_size) {
return static_cast<hsa_status_t>(
HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED);
}
hsa_ext_image_data_info_t mipmap_info = {0};
status = manager->CalculateImageSizeAndAlignment(component, desc, layout,
num_mipmap_levels, row_pitch, slice_pitch, mipmap_info);
if (HSA_STATUS_SUCCESS != status) {
return status;
}
alignment_out = mipmap_info.alignment;
size_out = mipmap_info.size;
return HSA_STATUS_SUCCESS;
}
hsa_status_t FindKernelArgPool(hsa_amd_memory_pool_t pool, void* data) {
assert(data != nullptr);
@@ -162,9 +253,6 @@ ImageRuntime* ImageRuntime::instance() {
}
instance = CreateSingleton();
if (instance == NULL) {
return NULL;
}
// UnloadCallback = &ext_image::ImageRuntime::DestroySingleton;
}
@@ -178,13 +266,15 @@ ImageRuntime* ImageRuntime::CreateSingleton() {
if (HSA_STATUS_SUCCESS != instance->blit_kernel_.Initialize()) {
instance->Cleanup();
delete instance;
return NULL;
throw AMD::hsa_exception(HSA_STATUS_ERROR_OUT_OF_RESOURCES,
"ImageRuntime: Failed to initialize blit kernel");
}
if (HSA_STATUS_SUCCESS != HSA::hsa_iterate_agents(CreateImageManager, instance)) {
instance->Cleanup();
delete instance;
return NULL;
throw AMD::hsa_exception(HSA_STATUS_ERROR_OUT_OF_RESOURCES,
"ImageRuntime: Failed to create image managers");
}
assert(instance->kernarg_pool_.handle != 0);
@@ -350,8 +440,9 @@ hsa_status_t ImageRuntime::GetImageSizeAndAlignment(
HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED);
}
return manager->CalculateImageSizeAndAlignment(component, desc,
image_data_layout, image_data_row_pitch, image_data_slice_pitch, image_info);
return manager->CalculateImageSizeAndAlignment(
component, desc, image_data_layout, SINGLE_MIP_LEVEL,
image_data_row_pitch, image_data_slice_pitch, image_info);
}
hsa_status_t ImageRuntime::CreateImageHandle(
@@ -421,7 +512,7 @@ hsa_status_t ImageRuntime::CreateImageHandleWithLayout(
if(image_layout->version!=1)
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
uint32_t id;
HSA::hsa_agent_get_info(component, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_CHIP_ID, &id);
@@ -448,6 +539,64 @@ hsa_status_t ImageRuntime::CreateImageHandleWithLayout(
return HSA_STATUS_SUCCESS;
}
hsa_status_t ImageRuntime::CreateMipmapArrayHandleWithLayout(
hsa_agent_t component, const hsa_ext_image_descriptor_t& mipmap_descriptor,
const hsa_amd_image_descriptor_t* image_layout,
const void* image_data, const hsa_access_permission_t access_permission,
uint32_t num_mipmap_levels,
hsa_ext_image_t& image_handle) {
image_handle.handle = 0;
if (!IsMultipleOf(image_data, 256)) {
return HSA_STATUS_ERROR_INVALID_ALLOCATION;
}
if (image_layout->version != 1) {
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
}
uint32_t id;
HSA::hsa_agent_get_info(component, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_CHIP_ID, &id);
if (image_layout->deviceID != (0x1002 << 16 | id)) {
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
}
if (num_mipmap_levels == 0) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
const metadata_amd_t* desc = reinterpret_cast<const metadata_amd_t*>(image_layout);
MipmappedArray* mipmap_array = MipmappedArray::Create(component);
if (!mipmap_array) {
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
}
mipmap_array->component = component;
mipmap_array->desc = mipmap_descriptor;
mipmap_array->permission = access_permission;
mipmap_array->num_levels = num_mipmap_levels;
mipmap_array->data = const_cast<void*>(image_data);
mipmap_array->flags = 0;
ImageManager* manager = image_manager(component);
if (!manager) {
MipmappedArray::Destroy(mipmap_array);
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
}
hsa_status_t status = manager->PopulateMipmapSrd(*mipmap_array, desc);
if (status != HSA_STATUS_SUCCESS) {
MipmappedArray::Destroy(mipmap_array);
return status;
}
image_handle.handle = mipmap_array->Convert();
return HSA_STATUS_SUCCESS;
}
hsa_status_t ImageRuntime::DestroyImageHandle(
const hsa_ext_image_t& image_handle) {
const Image* image = Image::Convert(image_handle.handle);
@@ -574,6 +723,154 @@ hsa_status_t ImageRuntime::DestroySamplerHandle(
return HSA_STATUS_SUCCESS;
}
hsa_status_t ImageRuntime::CreateMipmapArrayHandle(
hsa_agent_t component, const hsa_ext_image_descriptor_t& mipmap_descriptor,
const void* image_data, const hsa_access_permission_t access_permission,
uint32_t num_mipmap_levels,
const hsa_ext_image_data_layout_t mipmap_layout,
size_t image_data_row_pitch, size_t image_data_slice_pitch,
hsa_ext_image_t& image_handle) {
image_handle.handle = 0;
if (mipmap_descriptor.width == 0 || num_mipmap_levels == 0) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
ImageManager* manager = image_manager(component);
if (!manager) return HSA_STATUS_ERROR_INVALID_AGENT;
// Validate mipmap array size and alignment requirements
size_t required_size = 0;
size_t required_alignment = 0;
hsa_status_t status = GetMipmapArraySizeAndAlignment(
component, mipmap_descriptor, num_mipmap_levels, mipmap_layout, image_data_row_pitch,
image_data_slice_pitch, required_size, required_alignment);
if (status != HSA_STATUS_SUCCESS) {
return status;
}
// Verify image_data alignment
assert(image_data != NULL);
assert(IsMultipleOf(image_data, required_alignment));
// Create a new mipmapped array object
MipmappedArray* mipmap_array = MipmappedArray::Create(component);
if (!mipmap_array) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
// Determine the tile mode
// 1DB (1D buffered) geometry MUST always be LINEAR per HSA spec
// LINEAR layout forces linear swizzle mode (required by API)
// TILED allows AddrLib to use internal heuristics to select optimal swizzle mode
if (mipmap_descriptor.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
// 1DB always uses linear addressing per HSA specification
mipmap_array->tile_mode = Image::TileMode::LINEAR;
} else if (mipmap_layout == HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR) {
// Explicit LINEAR layout forces linear swizzle mode
mipmap_array->tile_mode = Image::TileMode::LINEAR;
} else {
// OPAQUE layout: Let AddrLib choose the best swizzle mode
mipmap_array->tile_mode = Image::TileMode::TILED;
}
debug_print("Tile mode = %u (0: LINEAR, 1: TILED)", mipmap_array->tile_mode);
// Initialize the mipmapped array object
mipmap_array->component = component;
mipmap_array->data = const_cast<void*>(image_data);
mipmap_array->desc = mipmap_descriptor;
mipmap_array->permission = access_permission;
mipmap_array->num_levels = num_mipmap_levels;
mipmap_array->flags = 0;
manager->PopulateMipmapSrd(*mipmap_array);
debug_print("Populating mipmapped array SRD...");
if (core::Runtime::runtime_singleton_->flag().image_print_srd())
mipmap_array->printSRD();
manager->printSRDDetailed(mipmap_array->srd);
// assert(mipmap_array->size == required_size);
image_handle.handle = mipmap_array->Convert();
debug_print("output handle = %lu", image_handle.handle);
return HSA_STATUS_SUCCESS;
}
hsa_status_t ImageRuntime::DestroyMipmapArrayHandle(
const hsa_ext_image_t& image_handle) {
const MipmappedArray* mipmap_array = MipmappedArray::Convert(image_handle.handle);
if (mipmap_array == NULL) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
MipmappedArray::Destroy(const_cast<MipmappedArray*>(mipmap_array));
return HSA_STATUS_SUCCESS;
}
hsa_status_t ImageRuntime::GetMipmapArrayLevelHandle(
hsa_agent_t component, const hsa_ext_image_t& mipmapped_array,
uint32_t mip_level, hsa_ext_image_t& level_image_out) {
level_image_out.handle = 0;
// Get GPU architecture version
uint32_t chip_id;
hsa_status_t status = GetGPUAsicID(component, &chip_id);
if (status != HSA_STATUS_SUCCESS) {
return status;
}
uint32_t major_ver = MajorVerFromDevID(chip_id);
if (major_ver < 9) {
debug_print("ERROR: Mip level views not supported on GFX%u hardware\n", major_ver);
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
// Validate mip level
if (mip_level < 0) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
// Convert handle to internal object and perform basic sanity.
rocr::image::MipmappedArray* array =
rocr::image::MipmappedArray::Convert(mipmapped_array.handle);
if (!array || array->num_levels == 0 || mip_level >= array->num_levels) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
debug_print("Creating mip level %u view for %u level mipmap\n",
mip_level, array->num_levels);
// Create a view that references the parent mipmap array
MipmappedArray* level_view = MipmappedArray::Create(component);
if (!level_view) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
// Copy entire parent structure (srd is a fixed array, so it's deep-copied automatically)
*level_view = *array;
// Modify SRD to select only the specific mip level
ImageManager* manager = image_manager(component);
if (!manager) {
MipmappedArray::Destroy(level_view);
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
}
status = manager->PopulateMipLevelSrd(*level_view, *array, mip_level);
if (status != HSA_STATUS_SUCCESS) {
MipmappedArray::Destroy(level_view);
return status;
}
debug_print("Created mip level view using SRD fields");
if (core::Runtime::runtime_singleton_->flag().image_print_srd())
level_view->printSRD();
manager->printSRDDetailed(level_view->srd);
// Return handle
level_image_out.handle = level_view->Convert();
return HSA_STATUS_SUCCESS;
}
ImageRuntime::ImageRuntime()
: cpu_l2_cache_size_(0), kernarg_pool_({0}) {}
+36
Просмотреть файл
@@ -103,6 +103,14 @@ class ImageRuntime {
const void* image_data, const hsa_access_permission_t access_permission,
hsa_ext_image_t& image);
/// @brief Create mipmapped array object with AMD-specific layout and return its handle.
hsa_status_t CreateMipmapArrayHandleWithLayout(
hsa_agent_t component, const hsa_ext_image_descriptor_t& mipmap_descriptor,
const hsa_amd_image_descriptor_t* image_layout,
const void* image_data, const hsa_access_permission_t access_permission,
uint32_t num_mipmap_levels,
hsa_ext_image_t& image_handle);
/// @brief Destroy the device image object referenced by the handle.
hsa_status_t DestroyImageHandle(const hsa_ext_image_t& image);
@@ -137,6 +145,34 @@ class ImageRuntime {
/// @brief Destroy the device sampler object referenced by the handle.
hsa_status_t DestroySamplerHandle(hsa_ext_sampler_t& sampler);
/// @brief Create device Mipmap array object and return its handle
hsa_status_t CreateMipmapArrayHandle(
hsa_agent_t component, const hsa_ext_image_descriptor_t& mipmap_descriptor,
const void* image_data, const hsa_access_permission_t access_permission,
uint32_t num_mipmap_levels,
const hsa_ext_image_data_layout_t mipmap_layout,
size_t image_data_row_pitch, size_t image_data_slice_pitch,
hsa_ext_image_t& image_handle);
/// @brief - Helper function to compute mipmapped surface size / alignment & max levels.
hsa_status_t GetMipmapArraySizeAndAlignment(
hsa_agent_t component,
const hsa_ext_image_descriptor_t& desc,
uint32_t num_mipmap_levels,
hsa_ext_image_data_layout_t layout,
size_t row_pitch,
size_t slice_pitch,
size_t& size_out,
size_t& alignment_out);
/// @brief Destroy the mipmapped array object referenced by the handle.
hsa_status_t DestroyMipmapArrayHandle(const hsa_ext_image_t& image_handle);
/// @brief Get the handle for a specific mipmap level in a mipmapped array.
hsa_status_t GetMipmapArrayLevelHandle(
hsa_agent_t agent, const hsa_ext_image_t& mipmapped_array,
uint32_t mip_level, hsa_ext_image_t& level_image_out);
ImageManager* image_manager(hsa_agent_t agent) {
std::map<uint64_t, ImageManager*>::iterator it = image_managers_.find(agent.handle);
return (it != image_managers_.end()) ? it->second : NULL;
+68 -7
Просмотреть файл
@@ -49,6 +49,7 @@
#include "inc/hsa.h"
#include "inc/hsa_ext_image.h"
#include "addrlib/inc/addrinterface.h"
#include "util.h"
@@ -97,20 +98,21 @@ typedef struct ImageProperty {
/// @brief Structure to represent an HSA image object.
typedef struct Image {
private:
Image() {
protected:
Image()
: data(nullptr),
row_pitch(0),
slice_pitch(0) {
component.handle = 0;
permission = HSA_ACCESS_PERMISSION_RO;
data = NULL;
std::memset(srd, 0, sizeof(srd));
std::memset(&desc, 0, sizeof(desc));
row_pitch = slice_pitch = 0;
tile_mode = LINEAR;
}
~Image() {}
virtual ~Image() {}
public:
public:
typedef enum TileMode {
LINEAR,
TILED
@@ -127,7 +129,11 @@ public:
/// @brief Convert from HSA handle to vendor representation.
static Image* Convert(uint64_t handle) {
return reinterpret_cast<Image*>(handle - offsetof(Image, srd));
// Compute offset manually to avoid offsetof warning with virtual destructor
Image* dummy = nullptr;
const ptrdiff_t srd_offset =
reinterpret_cast<const char*>(&dummy->srd) - reinterpret_cast<const char*>(dummy);
return reinterpret_cast<Image*>(handle - srd_offset);
}
// Vendor specific image object.
@@ -202,6 +208,61 @@ public:
hsa_ext_sampler_descriptor_v2_t desc;
} Sampler;
/// @brief Structure representing a mipmapped image array.
typedef struct MipmappedArray : public Image {
private:
MipmappedArray()
: size(0),
num_levels(0),
flags(0) {
component.handle = 0;
std::memset(srd, 0, sizeof(srd));
std::memset(&desc, 0, sizeof(desc));
permission = HSA_ACCESS_PERMISSION_RO;
std::memset(&addr_output, 0, sizeof(addr_output));
tile_mode = LINEAR;
}
~MipmappedArray() {}
public:
/// @brief Create a MipmappedArray.
/// Only internal metadata is allocated; image data must be provided by the user.
static MipmappedArray* Create(hsa_agent_t agent);
/// @brief Destroy a MipmappedArray.
static void Destroy(const MipmappedArray* array);
/// @brief Convert from vendor representation to HSA handle.
uint64_t Convert() const { return reinterpret_cast<uint64_t>(srd); }
/// @brief Convert from HSA handle to vendor representation.
static MipmappedArray* Convert(uint64_t handle) {
// Compute offset manually to avoid offsetof warning with virtual destructor
MipmappedArray* dummy = nullptr;
const ptrdiff_t srd_offset =
reinterpret_cast<const char*>(&dummy->srd) - reinterpret_cast<const char*>(dummy);
return reinterpret_cast<MipmappedArray*>(handle - srd_offset);
}
// Total size of the allocated memory.
size_t size;
// Number of mipmap levels.
uint32_t num_levels;
// Reserved
uint32_t flags;
// Cached surface info.
union {
ADDR_COMPUTE_SURFACE_INFO_OUTPUT addr1; // Pre-GFX9 versions
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT addr2; // GFX9 and later
ADDR3_COMPUTE_SURFACE_INFO_OUTPUT addr3; // GFX10 and later
} addr_output;
} MipmappedArray;
} // namespace image
} // namespace rocr
#endif // HSA_RUNTIME_EXT_IMAGE_RESOURCE_H
+191
Просмотреть файл
@@ -356,6 +356,56 @@ typedef struct hsa_ext_image_descriptor_s {
hsa_ext_image_format_t format;
} hsa_ext_image_descriptor_t;
/**
* @brief Implementation independent image descriptor (Version 2).
*
* @details This version adds mipmap support, allowing both regular images
* (mipmap_levels = 0 or 1) and mipmapped arrays (mipmap_levels > 1) to be
* created with a single unified API.
*/
typedef struct hsa_ext_image_descriptor_v2_s {
/**
* Image geometry.
*/
hsa_ext_image_geometry_t geometry;
/**
* Width of the image, in components.
*/
size_t width;
/**
* Height of the image, in components. Only used if the geometry is
* ::HSA_EXT_IMAGE_GEOMETRY_2D, ::HSA_EXT_IMAGE_GEOMETRY_3D,
* HSA_EXT_IMAGE_GEOMETRY_2DA, HSA_EXT_IMAGE_GEOMETRY_2DDEPTH, or
* HSA_EXT_IMAGE_GEOMETRY_2DADEPTH, otherwise must be 0.
*/
size_t height;
/**
* Depth of the image, in components. Only used if the geometry is
* ::HSA_EXT_IMAGE_GEOMETRY_3D, otherwise must be 0.
*/
size_t depth;
/**
* Number of image layers in the image array. Only used if the geometry is
* ::HSA_EXT_IMAGE_GEOMETRY_1DA, ::HSA_EXT_IMAGE_GEOMETRY_2DA, or
* HSA_EXT_IMAGE_GEOMETRY_2DADEPTH, otherwise must be 0.
*/
size_t array_size;
/**
* Image format.
*/
hsa_ext_image_format_t format;
/**
* Number of mipmap levels.
* - 0 or 1: Regular single-level image (default behavior)
* - >1: Mipmapped array with multiple levels
*
* When mipmap_levels > 1, the image is treated as a complete mipmap chain.
* The maximum valid value is determined by the image dimensions and can be
* queried using ::hsa_ext_image_data_get_info_v2.
*/
size_t mipmap_levels;
} hsa_ext_image_descriptor_v2_t;
/**
* @brief Image capability.
*/
@@ -663,6 +713,48 @@ hsa_status_t HSA_API hsa_ext_image_data_get_info_with_layout(
size_t image_data_slice_pitch,
hsa_ext_image_data_info_t *image_data_info);
/**
* @brief Retrieve image data requirements with unified mipmap support (V2 API).
*
* @details This is a unified API that handles both regular images (mipmap_levels = 0 or 1)
* and mipmapped arrays (mipmap_levels > 1).
*
* For regular images:
* - Set image_descriptor->mipmap_levels to 0 or 1
* - Returns size/alignment for a single image level
*
* For mipmapped arrays:
* - Set image_descriptor->mipmap_levels to desired level count (> 1)
* - Returns total size/alignment for all mip levels combined
* - The maximum valid mipmap_levels is computed from image dimensions
*
* @param[in] agent Agent that will access the image.
*
* @param[in] image_descriptor Pointer to a V2 image descriptor. Must not be NULL.
*
* @param[in] access_permission Access permission when the image is accessed by the agent.
*
* @param[out] image_data_info Memory location where the runtime stores the size and
* alignment requirements. Must not be NULL.
*
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
*
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been initialized.
*
* @retval ::HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED The image format is not
* supported for the specified access permission.
*
* @retval ::HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED The image dimensions are not
* supported for the specified access permission.
*
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p image_descriptor is NULL,
* @p mipmap_levels exceeds maximum for image dimensions, @p access_permission is invalid,
* or @p image_data_info is NULL.
*/
hsa_status_t HSA_API hsa_ext_image_data_get_info_v2(
hsa_agent_t agent, const hsa_ext_image_descriptor_v2_t* image_descriptor,
hsa_access_permission_t access_permission, hsa_ext_image_data_info_t* image_data_info);
/**
* @brief Creates an agent specific image handle to an image with an
* opaque image data layout.
@@ -864,6 +956,105 @@ hsa_status_t HSA_API hsa_ext_image_destroy(
hsa_agent_t agent,
hsa_ext_image_t image);
/**
* @brief Creates an agent specific image handle with unified mipmap support (V2 API).
*
* @details This is a unified API that handles both regular images (mipmap_levels = 0 or 1)
* and mipmapped arrays (mipmap_levels > 1). This simplifies the API surface and aligns
* with modern graphics API conventions where all images are conceptually mipmapped.
*
* For regular images:
* - Set image_descriptor->mipmap_levels to 0 or 1
* - Behavior is identical to ::hsa_ext_image_create
*
* For mipmapped arrays:
* - Set image_descriptor->mipmap_levels to the desired level count (> 1)
* - Behavior is identical to ::hsa_amd_mipmap_array_create
* - The image_data must contain all mip levels laid out sequentially
*
* @param[in] agent Agent to be associated with the image handle created.
*
* @param[in] image_descriptor Pointer to a V2 image descriptor. Must not be NULL.
*
* @param[in] image_data Image data buffer allocated according to size and alignment
* requirements from ::hsa_ext_image_data_get_info_v2. Must not be NULL.
*
* @param[in] access_permission Access permission of the image when accessed by agent.
*
* @param[out] image Pointer to memory location where the HSA runtime stores the
* newly created image handle. Must not be NULL.
*
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
*
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been initialized.
*
* @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
*
* @retval ::HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED The agent does not support
* the image format for the specified access permission.
*
* @retval ::HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED The agent does not support
* the image dimensions for the specified access permission.
*
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p image_descriptor is NULL, @p image_data
* is NULL, @p image_data does not have valid alignment, @p access_permission is invalid,
* @p mipmap_levels exceeds maximum for image dimensions, or @p image is NULL.
*
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
* required resources.
*/
hsa_status_t HSA_API hsa_ext_image_create_v2(hsa_agent_t agent,
const hsa_ext_image_descriptor_v2_t* image_descriptor,
const void* image_data,
hsa_access_permission_t access_permission,
hsa_ext_image_t* image);
/**
* @brief Destroys an image handle created with ::hsa_ext_image_create_v2.
*
* @details This function can destroy both regular images and mipmapped arrays
* created with ::hsa_ext_image_create_v2. It does not free the image_data memory,
* which remains the responsibility of the caller.
*
* @param[in] agent Agent associated with the image handle.
*
* @param[in] image Image handle to destroy.
*
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
*
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been initialized.
*
* @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
*
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p image is invalid.
*/
hsa_status_t HSA_API hsa_ext_image_destroy_v2(hsa_agent_t agent, hsa_ext_image_t image);
/**
* @brief Create an image view for a specific mip level of a mipmapped array.
*
* @param[in] agent : GPU agent
* @param[in] mipmapped_array : Pointer to the mipmapped array handle previously
* created by hsa_amd_mipmap_array_create
* @param[in] mip_level : Level index (0 = base). Must be < array's num levels.
* @param[out] level_image_out : Output image handle for the level view
*
* @details
* - Dimensions are clamped to at least 1 when shifting (right shift per level).
* - Row/slice pitches follow underlying layout; for tiled images internal
* SRD setup derives pitches; for linear layout the base pitches may
* be adjusted if required per level (future enhancement).
* - The view inherits access permissions from the parent array.
*
* @retval HSA_STATUS_SUCCESS
* @retval HSA_STATUS_ERROR_INVALID_ARGUMENT (null pointers, bad level, bad handle)
* @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES (allocation of view metadata failed)
*/
hsa_status_t HSA_API hsa_ext_image_mipmap_array_get_level(hsa_agent_t agent,
const hsa_ext_image_t* mipmapped_array,
uint32_t mip_level,
hsa_ext_image_t* level_image_out);
/**
* @brief Copies a portion of one image (the source) to another image (the
* destination).