rocr: Add support for Mipmapped Array (#1847)
SWDEV-539526 - Add support for Mipmapped Array in Rocr Add support for Mipmapped Array functionality in Rocr Runtimeenabling GPU applications to work with multi-level texture mipmaps. The implementation introduces new public APIs for creating, querying, and managing mipmapped arrays across different GPU architectures. Signed-off-by: Apurv Mishra <Apurv.Mishra@amd.com> Co-authored-by: Shweta Khatri <shweta.khatri@amd.com> Co-authored-by: taosang2 <tao.sang@amd.com>
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
8b529e7b29
Коммит
be375c2dbf
@@ -55,6 +55,11 @@ namespace rocr {
|
||||
namespace core {
|
||||
struct ImageExtTableInternal : public ImageExtTable {
|
||||
decltype(::hsa_amd_image_get_info_max_dim)* hsa_amd_image_get_info_max_dim_fn;
|
||||
// V2 unified APIs for images and mipmaps
|
||||
decltype(::hsa_ext_image_data_get_info_v2)* hsa_ext_image_data_get_info_v2_fn;
|
||||
decltype(::hsa_ext_image_create_v2)* hsa_ext_image_create_v2_fn;
|
||||
decltype(::hsa_ext_image_destroy_v2)* hsa_ext_image_destroy_v2_fn;
|
||||
decltype(::hsa_ext_image_mipmap_array_get_level)* hsa_ext_image_mipmap_array_get_level_fn;
|
||||
};
|
||||
|
||||
struct PcSamplingExtTableInternal : public PcSamplingExtTable {};
|
||||
|
||||
@@ -476,6 +476,39 @@ hsa_status_t hsa_ext_image_create_with_layout(
|
||||
image);
|
||||
}
|
||||
|
||||
hsa_status_t hsa_ext_image_data_get_info_v2(
|
||||
hsa_agent_t agent, const hsa_ext_image_descriptor_v2_t* image_descriptor,
|
||||
hsa_access_permission_t access_permission,
|
||||
hsa_ext_image_data_info_t* image_data_info) {
|
||||
return rocr::core::Runtime::runtime_singleton_->extensions_.image_api
|
||||
.hsa_ext_image_data_get_info_v2_fn(agent, image_descriptor,
|
||||
access_permission, image_data_info);
|
||||
}
|
||||
|
||||
hsa_status_t hsa_ext_image_create_v2(hsa_agent_t agent,
|
||||
const hsa_ext_image_descriptor_v2_t* image_descriptor,
|
||||
const void* image_data,
|
||||
hsa_access_permission_t access_permission,
|
||||
hsa_ext_image_t* image) {
|
||||
return rocr::core::Runtime::runtime_singleton_->extensions_.image_api
|
||||
.hsa_ext_image_create_v2_fn(agent, image_descriptor, image_data,
|
||||
access_permission, image);
|
||||
}
|
||||
|
||||
hsa_status_t hsa_ext_image_destroy_v2(hsa_agent_t agent, hsa_ext_image_t image) {
|
||||
return rocr::core::Runtime::runtime_singleton_->extensions_.image_api
|
||||
.hsa_ext_image_destroy_v2_fn(agent, image);
|
||||
}
|
||||
|
||||
hsa_status_t hsa_ext_image_mipmap_array_get_level(hsa_agent_t agent,
|
||||
const hsa_ext_image_t* mipmap_array,
|
||||
uint32_t mip_level,
|
||||
hsa_ext_image_t* level_view) {
|
||||
return rocr::core::Runtime::runtime_singleton_->extensions_.image_api
|
||||
.hsa_ext_image_mipmap_array_get_level_fn(agent, mipmap_array, mip_level,
|
||||
level_view);
|
||||
}
|
||||
|
||||
hsa_status_t HSA_API hsa_ven_amd_pcs_iterate_configuration(
|
||||
hsa_agent_t agent, hsa_ven_amd_pcs_iterate_configuration_callback_t configuration_callback,
|
||||
void* callback_data) {
|
||||
|
||||
@@ -223,4 +223,8 @@ EXPORTS
|
||||
hsa_amd_queue_get_info
|
||||
hsa_amd_enable_logging
|
||||
hsa_amd_signal_wait_all
|
||||
hsa_amd_portable_export_dmabuf_v2
|
||||
hsa_amd_portable_export_dmabuf_v2
|
||||
hsa_ext_image_mipmap_array_get_level
|
||||
hsa_ext_image_create_v2
|
||||
hsa_ext_image_data_get_info_v2
|
||||
hsa_ext_image_destroy_v2
|
||||
@@ -262,6 +262,10 @@ global:
|
||||
hsa_amd_portable_export_dmabuf_v2;
|
||||
hsa_amd_ais_file_write;
|
||||
hsa_amd_ais_file_read;
|
||||
hsa_ext_image_mipmap_array_get_level;
|
||||
hsa_ext_image_create_v2;
|
||||
hsa_ext_image_data_get_info_v2;
|
||||
hsa_ext_image_destroy_v2;
|
||||
local:
|
||||
*;
|
||||
};
|
||||
|
||||
@@ -369,6 +369,48 @@ hsa_status_t hsa_ext_image_create_with_layout(
|
||||
CATCH;
|
||||
}
|
||||
|
||||
hsa_status_t hsa_ext_image_data_get_info_with_layout_v2(
|
||||
hsa_agent_t agent, const hsa_ext_image_descriptor_v2_t* image_descriptor,
|
||||
hsa_access_permission_t access_permission, hsa_ext_image_data_layout_t image_data_layout,
|
||||
size_t image_data_row_pitch, size_t image_data_slice_pitch,
|
||||
hsa_ext_image_data_info_t* image_data_info) {
|
||||
TRY;
|
||||
if (agent.handle == 0) {
|
||||
return HSA_STATUS_ERROR_INVALID_AGENT;
|
||||
}
|
||||
|
||||
if ((image_descriptor == NULL) || (image_data_info == NULL) ||
|
||||
(access_permission < HSA_ACCESS_PERMISSION_RO) ||
|
||||
(access_permission > HSA_ACCESS_PERMISSION_RW) ||
|
||||
(image_data_layout != HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR)) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
// V2 descriptor: only single-level images supported with LINEAR layout
|
||||
// Mipmap levels must be 0 or 1 for LINEAR layout
|
||||
uint32_t mipmap_levels =
|
||||
(image_descriptor->mipmap_levels == 0) ? 1 : image_descriptor->mipmap_levels;
|
||||
if (mipmap_levels > 1) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
// Convert V2 descriptor to V1 for internal use
|
||||
hsa_ext_image_descriptor_t desc_v1 = {};
|
||||
desc_v1.geometry = image_descriptor->geometry;
|
||||
desc_v1.width = image_descriptor->width;
|
||||
desc_v1.height = image_descriptor->height;
|
||||
desc_v1.depth = image_descriptor->depth;
|
||||
desc_v1.array_size = image_descriptor->array_size;
|
||||
desc_v1.format = image_descriptor->format;
|
||||
|
||||
enforceDefaultPitch(agent, &desc_v1, image_data_row_pitch, image_data_slice_pitch);
|
||||
|
||||
return ImageRuntime::instance()->GetImageSizeAndAlignment(
|
||||
agent, desc_v1, image_data_layout, image_data_row_pitch, image_data_slice_pitch,
|
||||
*image_data_info);
|
||||
CATCH;
|
||||
}
|
||||
|
||||
hsa_status_t hsa_amd_image_create(hsa_agent_t agent,
|
||||
const hsa_ext_image_descriptor_t* image_descriptor,
|
||||
const hsa_amd_image_descriptor_t* image_layout,
|
||||
@@ -388,6 +430,153 @@ hsa_status_t hsa_amd_image_create(hsa_agent_t agent,
|
||||
CATCH;
|
||||
}
|
||||
|
||||
hsa_status_t hsa_amd_image_create_v2(hsa_agent_t agent,
|
||||
const hsa_ext_image_descriptor_v2_t* image_descriptor,
|
||||
const hsa_amd_image_descriptor_t* image_layout,
|
||||
const void* image_data,
|
||||
hsa_access_permission_t access_permission,
|
||||
hsa_ext_image_t* image) {
|
||||
TRY;
|
||||
if (agent.handle == 0) {
|
||||
return HSA_STATUS_ERROR_INVALID_AGENT;
|
||||
}
|
||||
|
||||
if (image_descriptor == NULL || image_data == NULL || image == NULL) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
// Convert V2 descriptor to V1 for internal use
|
||||
hsa_ext_image_descriptor_t desc_v1 = {};
|
||||
desc_v1.geometry = image_descriptor->geometry;
|
||||
desc_v1.width = image_descriptor->width;
|
||||
desc_v1.height = image_descriptor->height;
|
||||
desc_v1.depth = image_descriptor->depth;
|
||||
desc_v1.array_size = image_descriptor->array_size;
|
||||
desc_v1.format = image_descriptor->format;
|
||||
|
||||
uint32_t mipmap_levels =
|
||||
(image_descriptor->mipmap_levels == 0) ? 1 : image_descriptor->mipmap_levels;
|
||||
|
||||
if (mipmap_levels > 1) {
|
||||
// Mipmapped array path with AMD layout
|
||||
return ImageRuntime::instance()->CreateMipmapArrayHandleWithLayout(
|
||||
agent, desc_v1, image_layout, image_data, access_permission, mipmap_levels, *image);
|
||||
} else {
|
||||
// Regular single-level image path with AMD layout
|
||||
return ImageRuntime::instance()->CreateImageHandleWithLayout(
|
||||
agent, desc_v1, image_layout, image_data, access_permission, *image);
|
||||
}
|
||||
CATCH;
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------//
|
||||
// V2 API Implementations (Unified Mipmap Support)
|
||||
//---------------------------------------------------------------------------//
|
||||
|
||||
hsa_status_t hsa_ext_image_data_get_info_v2(hsa_agent_t agent,
|
||||
const hsa_ext_image_descriptor_v2_t* image_descriptor,
|
||||
hsa_access_permission_t access_permission,
|
||||
hsa_ext_image_data_info_t* image_data_info) {
|
||||
TRY;
|
||||
if (agent.handle == 0) {
|
||||
return HSA_STATUS_ERROR_INVALID_AGENT;
|
||||
}
|
||||
|
||||
if (image_descriptor == NULL || image_data_info == NULL) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
// Check if this is a mipmap request
|
||||
uint32_t mipmap_levels =
|
||||
(image_descriptor->mipmap_levels == 0) ? 1 : image_descriptor->mipmap_levels;
|
||||
|
||||
// Convert V2 descriptor to V1 for internal use
|
||||
hsa_ext_image_descriptor_t desc_v1 = {};
|
||||
desc_v1.geometry = image_descriptor->geometry;
|
||||
desc_v1.width = image_descriptor->width;
|
||||
desc_v1.height = image_descriptor->height;
|
||||
desc_v1.depth = image_descriptor->depth;
|
||||
desc_v1.array_size = image_descriptor->array_size;
|
||||
desc_v1.format = image_descriptor->format;
|
||||
|
||||
if (mipmap_levels > 1) {
|
||||
return ImageRuntime::instance()->GetMipmapArraySizeAndAlignment(
|
||||
agent, desc_v1, mipmap_levels, HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE, 0, 0,
|
||||
image_data_info->size, image_data_info->alignment);
|
||||
} else {
|
||||
// Regular image path (single level)
|
||||
return ImageRuntime::instance()->GetImageSizeAndAlignment(
|
||||
agent, desc_v1, HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE, 0, 0, *image_data_info);
|
||||
}
|
||||
CATCH;
|
||||
}
|
||||
|
||||
hsa_status_t hsa_ext_image_create_v2(hsa_agent_t agent,
|
||||
const hsa_ext_image_descriptor_v2_t* image_descriptor,
|
||||
const void* image_data,
|
||||
hsa_access_permission_t access_permission,
|
||||
hsa_ext_image_t* image) {
|
||||
TRY;
|
||||
if (agent.handle == 0) {
|
||||
return HSA_STATUS_ERROR_INVALID_AGENT;
|
||||
}
|
||||
|
||||
if (image_descriptor == NULL || image_data == NULL || image == NULL) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
// Check if this is a mipmap request
|
||||
uint32_t mipmap_levels =
|
||||
(image_descriptor->mipmap_levels == 0) ? 1 : image_descriptor->mipmap_levels;
|
||||
|
||||
// Convert V2 descriptor to V1 for internal use
|
||||
hsa_ext_image_descriptor_t desc_v1 = {};
|
||||
desc_v1.geometry = image_descriptor->geometry;
|
||||
desc_v1.width = image_descriptor->width;
|
||||
desc_v1.height = image_descriptor->height;
|
||||
desc_v1.depth = image_descriptor->depth;
|
||||
desc_v1.array_size = image_descriptor->array_size;
|
||||
desc_v1.format = image_descriptor->format;
|
||||
|
||||
if (mipmap_levels > 1) {
|
||||
// Mipmapped array path
|
||||
return ImageRuntime::instance()->CreateMipmapArrayHandle(
|
||||
agent, desc_v1, image_data, access_permission, mipmap_levels,
|
||||
HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE, 0, 0, *image);
|
||||
} else {
|
||||
// Regular image path (single level)
|
||||
return ImageRuntime::instance()->CreateImageHandle(
|
||||
agent, desc_v1, image_data, access_permission,
|
||||
HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE, 0, 0, *image);
|
||||
}
|
||||
CATCH;
|
||||
}
|
||||
|
||||
hsa_status_t hsa_ext_image_destroy_v2(hsa_agent_t agent, hsa_ext_image_t image) {
|
||||
TRY;
|
||||
if (agent.handle == 0) {
|
||||
return HSA_STATUS_ERROR_INVALID_AGENT;
|
||||
}
|
||||
|
||||
// The destroy operation is the same for both regular images and mipmaps
|
||||
// The runtime internally determines the correct cleanup path
|
||||
return ImageRuntime::instance()->DestroyImageHandle(image);
|
||||
CATCH;
|
||||
}
|
||||
|
||||
// per-level view retrieval implementation
|
||||
hsa_status_t HSA_API hsa_ext_image_mipmap_array_get_level(hsa_agent_t agent,
|
||||
const hsa_ext_image_t* mipmapped_array,
|
||||
uint32_t mip_level,
|
||||
hsa_ext_image_t* level_image_out) {
|
||||
TRY;
|
||||
if (!mipmapped_array || !level_image_out) { return HSA_STATUS_ERROR_INVALID_ARGUMENT; }
|
||||
|
||||
return ImageRuntime::instance()->GetMipmapArrayLevelHandle(agent, *mipmapped_array, mip_level, *level_image_out);
|
||||
|
||||
CATCH;
|
||||
}
|
||||
|
||||
void LoadImage(core::ImageExtTableInternal* image_api,
|
||||
decltype(::hsa_amd_image_create)** interface_api) {
|
||||
image_api->hsa_ext_image_get_capability_fn = hsa_ext_image_get_capability;
|
||||
@@ -420,6 +609,12 @@ void LoadImage(core::ImageExtTableInternal* image_api,
|
||||
|
||||
image_api->hsa_ext_sampler_create_v2_fn = hsa_ext_sampler_create_v2;
|
||||
|
||||
// V2 unified APIs for images and mipmaps
|
||||
image_api->hsa_ext_image_data_get_info_v2_fn = hsa_ext_image_data_get_info_v2;
|
||||
image_api->hsa_ext_image_create_v2_fn = hsa_ext_image_create_v2;
|
||||
image_api->hsa_ext_image_destroy_v2_fn = hsa_ext_image_destroy_v2;
|
||||
image_api->hsa_ext_image_mipmap_array_get_level_fn = hsa_ext_image_mipmap_array_get_level;
|
||||
|
||||
*interface_api = hsa_amd_image_create;
|
||||
}
|
||||
|
||||
|
||||
@@ -118,6 +118,38 @@ void Sampler::Destroy(const Sampler* sampler) {
|
||||
assert(status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
MipmappedArray* MipmappedArray::Create(hsa_agent_t agent) {
|
||||
hsa_amd_memory_pool_t pool = ImageRuntime::instance()->kernarg_pool();
|
||||
|
||||
MipmappedArray* mipmapped_array = NULL;
|
||||
|
||||
hsa_status_t status = AMD::hsa_amd_memory_pool_allocate(
|
||||
pool, sizeof(MipmappedArray), 0, reinterpret_cast<void**>(&mipmapped_array));
|
||||
assert(status == HSA_STATUS_SUCCESS);
|
||||
|
||||
if (status != HSA_STATUS_SUCCESS) return nullptr;
|
||||
|
||||
new (mipmapped_array) MipmappedArray();
|
||||
|
||||
// Allow agent access to the image data
|
||||
status = AMD::hsa_amd_agents_allow_access(1, &agent, nullptr, mipmapped_array);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
MipmappedArray::Destroy(mipmapped_array);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return mipmapped_array;
|
||||
}
|
||||
|
||||
void MipmappedArray::Destroy(const MipmappedArray* mipmapped_array) {
|
||||
assert(mipmapped_array != NULL);
|
||||
mipmapped_array->~MipmappedArray();
|
||||
|
||||
hsa_status_t status = AMD::hsa_amd_memory_pool_free(
|
||||
const_cast<MipmappedArray*>(mipmapped_array));
|
||||
assert(status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
ImageManager::ImageManager() {}
|
||||
|
||||
ImageManager::~ImageManager() {}
|
||||
|
||||
@@ -48,6 +48,7 @@
|
||||
#include "inc/hsa_ext_image.h"
|
||||
#include "resource.h"
|
||||
#include "util.h"
|
||||
#include "image/addrlib/inc/addrinterface.h"
|
||||
|
||||
namespace rocr {
|
||||
namespace image {
|
||||
@@ -82,6 +83,7 @@ class ImageManager {
|
||||
virtual hsa_status_t CalculateImageSizeAndAlignment(
|
||||
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
|
||||
hsa_ext_image_data_layout_t image_data_layout,
|
||||
uint32_t num_mipmap_levels,
|
||||
size_t image_data_row_pitch,
|
||||
size_t image_data_slice_pitch,
|
||||
hsa_ext_image_data_info_t& image_info) const = 0;
|
||||
@@ -120,6 +122,24 @@ class ImageManager {
|
||||
virtual hsa_status_t FillImage(const Image& image, const void* pattern,
|
||||
const hsa_ext_image_region_t& region);
|
||||
|
||||
/// @brief Get the address library handle
|
||||
virtual ADDR_HANDLE GetAddrLib() const = 0;
|
||||
|
||||
/// @brief Fill mipmap structure with device specific mipmapped array object.
|
||||
virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array) const = 0;
|
||||
|
||||
/// @brief Fill mipmap structure with pre-computed AMD metadata descriptor.
|
||||
virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array, const metadata_amd_t* desc) const = 0;
|
||||
|
||||
/// @brief Create mip level view using SRD BASE_LEVEL/LAST_LEVEL fields
|
||||
virtual hsa_status_t PopulateMipLevelSrd(MipmappedArray& level_view,
|
||||
const MipmappedArray& mipmap_array, uint32_t mip_level) const = 0;
|
||||
|
||||
virtual void printSRDDetailed(const uint32_t* srd) const = 0;
|
||||
virtual void printChannelSelect(uint32_t sel) const = 0;
|
||||
virtual void printResourceType(uint32_t type) const = 0;
|
||||
virtual void printSwizzleMode(uint32_t sw_mode) const = 0;
|
||||
|
||||
protected:
|
||||
static uint16_t FloatToHalf(float in);
|
||||
|
||||
|
||||
@@ -87,6 +87,7 @@ ASSERT_SIZE_UINT32(SQ_IMG_SAMP_WORD3)
|
||||
hsa_status_t ImageManagerAi::CalculateImageSizeAndAlignment(
|
||||
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
|
||||
hsa_ext_image_data_layout_t image_data_layout,
|
||||
uint32_t num_mipmap_levels,
|
||||
size_t image_data_row_pitch,
|
||||
size_t image_data_slice_pitch,
|
||||
hsa_ext_image_data_info_t& image_info) const {
|
||||
@@ -102,8 +103,8 @@ hsa_status_t ImageManagerAi::CalculateImageSizeAndAlignment(
|
||||
desc.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB)?
|
||||
Image::TileMode::TILED : Image::TileMode::LINEAR;
|
||||
}
|
||||
if (GetAddrlibSurfaceInfoAi(component, desc, tileMode,
|
||||
image_data_row_pitch, image_data_slice_pitch, out) == (uint32_t)(-1)) {
|
||||
if (GetAddrlibSurfaceInfoAi(component, desc, num_mipmap_levels, tileMode,
|
||||
image_data_row_pitch, image_data_slice_pitch, out) == (uint32_t)(-1)) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
@@ -196,7 +197,7 @@ hsa_status_t ImageManagerAi::PopulateImageSrd(Image& image, const metadata_amd_t
|
||||
((SQ_IMG_RSRC_WORD3*)(&image.srd[3]))->bits.TYPE =
|
||||
ImageLut().MapGeometry(image.desc.geometry);
|
||||
}
|
||||
|
||||
|
||||
// Imported metadata holds the offset to metadata, add the image base address.
|
||||
uintptr_t meta = uintptr_t(((SQ_IMG_RSRC_WORD5*)(&image.srd[5]))->bits.META_DATA_ADDRESS_HI) << 40;
|
||||
meta |= uintptr_t(((SQ_IMG_RSRC_WORD7*)(&image.srd[7]))->bits.META_DATA_ADDRESS) << 8;
|
||||
@@ -341,8 +342,8 @@ hsa_status_t ImageManagerAi::PopulateImageSrd(Image& image) const {
|
||||
|
||||
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
|
||||
|
||||
uint32_t swizzleMode = GetAddrlibSurfaceInfoAi(image.component, image.desc, image.tile_mode,
|
||||
image.row_pitch, image.slice_pitch, out);
|
||||
uint32_t swizzleMode = GetAddrlibSurfaceInfoAi(image.component, image.desc,
|
||||
1, image.tile_mode, image.row_pitch, image.slice_pitch, out);
|
||||
if (swizzleMode == (uint32_t)(-1)) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
@@ -499,6 +500,7 @@ hsa_status_t ImageManagerAi::PopulateSamplerSrd(Sampler& sampler) const {
|
||||
|
||||
uint32_t ImageManagerAi::GetAddrlibSurfaceInfoAi(
|
||||
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
|
||||
uint32_t num_mipmap_levels,
|
||||
Image::TileMode tileMode,
|
||||
size_t image_data_row_pitch,
|
||||
size_t image_data_slice_pitch,
|
||||
@@ -521,7 +523,8 @@ uint32_t ImageManagerAi::GetAddrlibSurfaceInfoAi(
|
||||
in.width = width;
|
||||
in.height = height;
|
||||
in.numSlices = num_slice;
|
||||
in.pitchInElement = image_data_row_pitch / image_prop.element_size;
|
||||
in.numMipLevels = num_mipmap_levels;
|
||||
|
||||
switch(desc.geometry) {
|
||||
case HSA_EXT_IMAGE_GEOMETRY_1D:
|
||||
case HSA_EXT_IMAGE_GEOMETRY_1DB:
|
||||
@@ -583,7 +586,7 @@ uint32_t ImageManagerAi::GetAddrlibSurfaceInfoAi(
|
||||
prefSettingsInput.resourceType = in.resourceType;
|
||||
|
||||
// Disallow all swizzles but linear.
|
||||
if (tileMode == Image::TileMode::LINEAR)
|
||||
if (tileMode == Image::TileMode::LINEAR)
|
||||
{
|
||||
prefSettingsInput.forbiddenBlock.macroThin4KB = 1;
|
||||
prefSettingsInput.forbiddenBlock.macroThick4KB = 1;
|
||||
@@ -611,5 +614,406 @@ uint32_t ImageManagerAi::GetAddrlibSurfaceInfoAi(
|
||||
return in.swizzleMode;
|
||||
}
|
||||
|
||||
hsa_status_t ImageManagerAi::PopulateMipmapSrd(MipmappedArray& mipmap) const {
|
||||
ImageProperty mipmap_prop = ImageLut().MapFormat(mipmap.desc.format, mipmap.desc.geometry);
|
||||
assert(mipmap_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);
|
||||
assert(mipmap_prop.element_size != 0);
|
||||
assert(mipmap.num_levels >= 1);
|
||||
|
||||
const void* mipmap_data_addr = mipmap.data;
|
||||
|
||||
if (IsLocalMemory(mipmap.data))
|
||||
mipmap_data_addr = reinterpret_cast<const void*>(
|
||||
reinterpret_cast<uintptr_t>(mipmap.data) - local_memory_base_address_);
|
||||
|
||||
if (mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
|
||||
sq_buf_rsrc_word0_u word0;
|
||||
sq_buf_rsrc_word1_u word1;
|
||||
sq_buf_rsrc_word2_u word2;
|
||||
sq_buf_rsrc_word3_u word3;
|
||||
|
||||
word0.val = 0;
|
||||
word0.f.base_address = PtrLow32(mipmap_data_addr);
|
||||
|
||||
word1.val = 0;
|
||||
word1.f.base_address_hi = PtrHigh32(mipmap_data_addr);
|
||||
word1.f.stride = mipmap_prop.element_size;
|
||||
word1.f.swizzle_enable = false;
|
||||
word1.f.cache_swizzle = false;
|
||||
|
||||
word2.val = 0;
|
||||
word2.f.num_records = mipmap.desc.width * mipmap_prop.element_size;
|
||||
|
||||
const Swizzle swizzle = ImageLut().MapSwizzle(mipmap.desc.format.channel_order);
|
||||
word3.val = 0;
|
||||
word3.f.dst_sel_x = swizzle.x;
|
||||
word3.f.dst_sel_y = swizzle.y;
|
||||
word3.f.dst_sel_z = swizzle.z;
|
||||
word3.f.dst_sel_w = swizzle.w;
|
||||
word3.f.num_format = mipmap_prop.data_type;
|
||||
word3.f.data_format = mipmap_prop.data_format;
|
||||
word3.f.index_stride = mipmap_prop.element_size;
|
||||
word3.f.type = ImageLut().MapGeometry(mipmap.desc.geometry);
|
||||
|
||||
mipmap.srd[0] = word0.val;
|
||||
mipmap.srd[1] = word1.val;
|
||||
mipmap.srd[2] = word2.val;
|
||||
mipmap.srd[3] = word3.val;
|
||||
|
||||
mipmap.row_pitch = mipmap.desc.width * mipmap_prop.element_size;
|
||||
mipmap.slice_pitch = mipmap.row_pitch;
|
||||
} else {
|
||||
sq_img_rsrc_word0_u word0;
|
||||
sq_img_rsrc_word1_u word1;
|
||||
sq_img_rsrc_word2_u word2;
|
||||
sq_img_rsrc_word3_u word3;
|
||||
sq_img_rsrc_word4_u word4;
|
||||
sq_img_rsrc_word5_u word5;
|
||||
sq_img_rsrc_word6_u word6;
|
||||
sq_img_rsrc_word7_u word7;
|
||||
|
||||
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
|
||||
|
||||
// pMipInfo not needed - set to nullptr and AddrLib will ignore it
|
||||
out.pMipInfo = nullptr;
|
||||
|
||||
uint32_t swizzleMode = GetAddrlibSurfaceInfoAi(
|
||||
mipmap.component, mipmap.desc, mipmap.num_levels,
|
||||
mipmap.tile_mode, mipmap.row_pitch, mipmap.slice_pitch, out);
|
||||
if (swizzleMode == (uint32_t)(-1)) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
mipmap.addr_output.addr2 = out;
|
||||
mipmap.size = out.surfSize;
|
||||
|
||||
assert((out.bpp / 8) == mipmap_prop.element_size);
|
||||
|
||||
const size_t row_pitch_size = out.pitch * mipmap_prop.element_size;
|
||||
|
||||
word0.f.base_address = PtrLow40Shift8(mipmap_data_addr);
|
||||
|
||||
word1.val = 0;
|
||||
word1.f.base_address_hi = PtrHigh64Shift40(mipmap_data_addr);
|
||||
word1.f.min_lod = 0;
|
||||
word1.f.data_format = mipmap_prop.data_format;
|
||||
word1.f.num_format = mipmap_prop.data_type;
|
||||
|
||||
word2.val = 0;
|
||||
word2.f.width = mipmap.desc.width - 1;
|
||||
word2.f.height = mipmap.desc.height - 1;
|
||||
word2.f.perf_mod = 0;
|
||||
|
||||
const Swizzle swizzle = ImageLut().MapSwizzle(mipmap.desc.format.channel_order);
|
||||
word3.val = 0;
|
||||
word3.f.dst_sel_x = swizzle.x;
|
||||
word3.f.dst_sel_y = swizzle.y;
|
||||
word3.f.dst_sel_z = swizzle.z;
|
||||
word3.f.dst_sel_w = swizzle.w;
|
||||
word3.f.sw_mode = swizzleMode;
|
||||
word3.f.base_level = 0;
|
||||
word3.f.last_level = mipmap.num_levels - 1;
|
||||
word3.f.type = ImageLut().MapGeometry(mipmap.desc.geometry);
|
||||
|
||||
const bool mipmap_array =
|
||||
(mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ||
|
||||
mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DA ||
|
||||
mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DADEPTH);
|
||||
const bool mipmap_3d = (mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_3D);
|
||||
|
||||
word4.val = 0;
|
||||
word4.f.depth =
|
||||
(mipmap_array)
|
||||
? std::max(mipmap.desc.array_size, static_cast<size_t>(1)) - 1
|
||||
: (mipmap_3d) ? mipmap.desc.depth - 1 : 0;
|
||||
word4.f.pitch = out.pitch - 1;
|
||||
word4.f.bc_swizzle = GetBcSwizzle(swizzle);
|
||||
|
||||
word5.val = 0;
|
||||
word5.f.max_mip = mipmap.num_levels - 1;
|
||||
word6.val = 0;
|
||||
word7.val = 0;
|
||||
|
||||
mipmap.srd[0] = word0.val;
|
||||
mipmap.srd[1] = word1.val;
|
||||
mipmap.srd[2] = word2.val;
|
||||
mipmap.srd[3] = word3.val;
|
||||
mipmap.srd[4] = word4.val;
|
||||
mipmap.srd[5] = word5.val;
|
||||
mipmap.srd[6] = word6.val;
|
||||
mipmap.srd[7] = word7.val;
|
||||
|
||||
mipmap.row_pitch = row_pitch_size;
|
||||
mipmap.slice_pitch = out.sliceSize;
|
||||
}
|
||||
|
||||
mipmap.srd[8] = mipmap.desc.format.channel_type;
|
||||
mipmap.srd[9] = mipmap.desc.format.channel_order;
|
||||
mipmap.srd[10] = static_cast<uint32_t>(mipmap.desc.width);
|
||||
|
||||
// Mipmap-specific
|
||||
mipmap.srd[11] = mipmap.num_levels;
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t ImageManagerAi::PopulateMipmapSrd(MipmappedArray& mipmap_array, const metadata_amd_t* desc) const {
|
||||
const metadata_amd_ai_t* desc_ai = reinterpret_cast<const metadata_amd_ai_t*>(desc);
|
||||
const void* mipmap_data_addr = mipmap_array.data;
|
||||
|
||||
ImageProperty mipmap_prop = ImageLut().MapFormat(mipmap_array.desc.format, mipmap_array.desc.geometry);
|
||||
if (mipmap_prop.cap == HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED || mipmap_prop.element_size == 0) {
|
||||
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
|
||||
}
|
||||
|
||||
const Swizzle swizzle = ImageLut().MapSwizzle(mipmap_array.desc.format.channel_order);
|
||||
|
||||
if (IsLocalMemory(mipmap_array.data)) {
|
||||
mipmap_data_addr = reinterpret_cast<const void*>(
|
||||
reinterpret_cast<uintptr_t>(mipmap_array.data) - local_memory_base_address_);
|
||||
}
|
||||
|
||||
// Copy the pre-computed SRD words 0-7 from metadata
|
||||
mipmap_array.srd[0] = desc_ai->word0.u32All;
|
||||
mipmap_array.srd[1] = desc_ai->word1.u32All;
|
||||
mipmap_array.srd[2] = desc_ai->word2.u32All;
|
||||
mipmap_array.srd[3] = desc_ai->word3.u32All;
|
||||
mipmap_array.srd[4] = desc_ai->word4.u32All;
|
||||
mipmap_array.srd[5] = desc_ai->word5.u32All;
|
||||
mipmap_array.srd[6] = desc_ai->word6.u32All;
|
||||
mipmap_array.srd[7] = desc_ai->word7.u32All;
|
||||
|
||||
// Override specific fields after copying
|
||||
uint32_t hwPixelSize = ImageLut().GetPixelSize(mipmap_prop.data_format, mipmap_prop.data_type);
|
||||
if (mipmap_prop.element_size != hwPixelSize) {
|
||||
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
|
||||
}
|
||||
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD0*>(&mipmap_array.srd[0])->bits.BASE_ADDRESS = PtrLow40Shift8(mipmap_data_addr);
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&mipmap_array.srd[1])->bits.BASE_ADDRESS_HI = PtrHigh64Shift40(mipmap_data_addr);
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&mipmap_array.srd[1])->bits.DATA_FORMAT = mipmap_prop.data_format;
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&mipmap_array.srd[1])->bits.NUM_FORMAT = mipmap_prop.data_type;
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.DST_SEL_X = swizzle.x;
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.DST_SEL_Y = swizzle.y;
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.DST_SEL_Z = swizzle.z;
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.DST_SEL_W = swizzle.w;
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD5*>(&mipmap_array.srd[5])->bits.MAX_MIP = mipmap_array.num_levels - 1;
|
||||
|
||||
if (mipmap_array.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ||
|
||||
mipmap_array.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1D) {
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.TYPE =
|
||||
ImageLut().MapGeometry(mipmap_array.desc.geometry);
|
||||
}
|
||||
|
||||
// Looks like this is only used for CPU copies.
|
||||
mipmap_array.row_pitch = 0;
|
||||
mipmap_array.slice_pitch = 0;
|
||||
|
||||
// Store mipmap-specific metadata
|
||||
mipmap_array.srd[8] = mipmap_array.desc.format.channel_type;
|
||||
mipmap_array.srd[9] = mipmap_array.desc.format.channel_order;
|
||||
mipmap_array.srd[10] = static_cast<uint32_t>(mipmap_array.desc.width);
|
||||
mipmap_array.srd[11] = mipmap_array.num_levels;
|
||||
|
||||
// Allocate and populate pMipInfo from metadata mip_offsets (ADDR2 for Ai/GFX9)
|
||||
ADDR2_MIP_INFO* mip_info_storage = new ADDR2_MIP_INFO[mipmap_array.num_levels];
|
||||
memset(mip_info_storage, 0, sizeof(ADDR2_MIP_INFO) * mipmap_array.num_levels);
|
||||
|
||||
// Extract per-level information from mip_offsets array
|
||||
for (uint32_t level = 0; level < mipmap_array.num_levels; level++) {
|
||||
// mip_offsets contains offset bits [39:8], shift left by 8 to get actual byte offset
|
||||
mip_info_storage[level].offset = static_cast<uint64_t>(desc_ai->mip_offsets[level]) << 8;
|
||||
|
||||
// Calculate dimensions for this level (halve at each level)
|
||||
mip_info_storage[level].pitch = std::max(1u, static_cast<uint32_t>(mipmap_array.desc.width >> level));
|
||||
mip_info_storage[level].height = std::max(1u, static_cast<uint32_t>(mipmap_array.desc.height >> level));
|
||||
mip_info_storage[level].depth = std::max(1u, static_cast<uint32_t>(mipmap_array.desc.depth >> level));
|
||||
}
|
||||
|
||||
// Store pMipInfo in addr_output for later use by PopulateMipLevelSrd
|
||||
mipmap_array.addr_output.addr2.pMipInfo = mip_info_storage;
|
||||
|
||||
// Total size calculation from metadata
|
||||
uint32_t last_level = mipmap_array.num_levels - 1;
|
||||
uint64_t last_level_size = mip_info_storage[last_level].pitch *
|
||||
mip_info_storage[last_level].height *
|
||||
mip_info_storage[last_level].depth *
|
||||
mipmap_prop.element_size;
|
||||
mipmap_array.size = mip_info_storage[last_level].offset + last_level_size;
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
void ImageManagerAi::printSRDDetailed(const uint32_t* srd) const {
|
||||
if (!srd) {
|
||||
printf("\n========== Image SRD (GFX9/AI) - Detailed ==========\n");
|
||||
printf("ERROR: No SRD data provided.\n");
|
||||
printf("===============================================\n\n");
|
||||
return;
|
||||
}
|
||||
|
||||
printf("\n========== Image SRD (GFX9/AI) - Detailed ==========\n");
|
||||
|
||||
// Print all 12 words with bit field annotations
|
||||
for (int i = 0; i < 12; i++) {
|
||||
printf("WORD %d: 0x%08x ", i, srd[i]);
|
||||
|
||||
// Binary representation
|
||||
printf("(");
|
||||
for (int bit = 31; bit >= 0; bit--) {
|
||||
printf("%d", (srd[i] >> bit) & 1);
|
||||
if (bit % 4 == 0 && bit != 0) printf("_");
|
||||
}
|
||||
printf(")\n");
|
||||
}
|
||||
|
||||
// WORD 0: BASE_ADDRESS (bits 39:8)
|
||||
sq_img_rsrc_word0_u word0;
|
||||
word0.val = srd[0];
|
||||
printf("\nWORD 0: BASE_ADDRESS (bits 39:8) = 0x%08x\n", word0.f.base_address);
|
||||
|
||||
// WORD 1: Contains BASE_ADDRESS_HI, MIN_LOD, DATA_FORMAT, NUM_FORMAT
|
||||
sq_img_rsrc_word1_u word1;
|
||||
word1.val = srd[1];
|
||||
printf("WORD 1: BASE_ADDRESS_HI = 0x%02x\n", word1.f.base_address_hi);
|
||||
printf(" MIN_LOD = %u\n", word1.f.min_lod);
|
||||
printf(" DATA_FORMAT = %u\n", word1.f.data_format);
|
||||
printf(" NUM_FORMAT = %u\n", word1.f.num_format);
|
||||
|
||||
// Calculate full address (GFX9 uses 40-bit shifted by 8)
|
||||
uint64_t base_addr = ((uint64_t)word1.f.base_address_hi << 32) | ((uint64_t)word0.f.base_address << 8);
|
||||
printf(" → Full Base Address = 0x%016lx\n", base_addr);
|
||||
|
||||
// WORD 2: WIDTH, HEIGHT, PERF_MOD
|
||||
sq_img_rsrc_word2_u word2;
|
||||
word2.val = srd[2];
|
||||
printf("WORD 2: WIDTH = %u (actual: %u)\n", word2.f.width, word2.f.width + 1);
|
||||
printf(" HEIGHT = %u (actual: %u)\n", word2.f.height, word2.f.height + 1);
|
||||
printf(" PERF_MOD = %u\n", word2.f.perf_mod);
|
||||
|
||||
// WORD 3: Channel selectors, SW_MODE, BASE_LEVEL, LAST_LEVEL, TYPE
|
||||
sq_img_rsrc_word3_u word3;
|
||||
word3.val = srd[3];
|
||||
printf("WORD 3: DST_SEL_X = %u ", word3.f.dst_sel_x);
|
||||
printChannelSelect(word3.f.dst_sel_x);
|
||||
printf(" DST_SEL_Y = %u ", word3.f.dst_sel_y);
|
||||
printChannelSelect(word3.f.dst_sel_y);
|
||||
printf(" DST_SEL_Z = %u ", word3.f.dst_sel_z);
|
||||
printChannelSelect(word3.f.dst_sel_z);
|
||||
printf(" DST_SEL_W = %u ", word3.f.dst_sel_w);
|
||||
printChannelSelect(word3.f.dst_sel_w);
|
||||
printf(" BASE_LEVEL = %u ◄──── Current base level\n", word3.f.base_level);
|
||||
printf(" LAST_LEVEL = %u ◄──── Current last level\n", word3.f.last_level);
|
||||
printf(" SW_MODE = %u ", word3.f.sw_mode);
|
||||
printSwizzleMode(word3.f.sw_mode);
|
||||
printf(" TYPE = %u ", word3.f.type);
|
||||
printResourceType(word3.f.type);
|
||||
|
||||
// WORD 4: DEPTH, PITCH, BC_SWIZZLE
|
||||
sq_img_rsrc_word4_u word4;
|
||||
word4.val = srd[4];
|
||||
printf("WORD 4: DEPTH = %u\n", word4.f.depth);
|
||||
printf(" PITCH = %u (actual: %u)\n", word4.f.pitch, word4.f.pitch + 1);
|
||||
printf(" BC_SWIZZLE = %u\n", word4.f.bc_swizzle);
|
||||
|
||||
// Calculate effective depth based on geometry
|
||||
uint32_t type = word3.f.type;
|
||||
if (type == 10) { // 3D
|
||||
printf(" → 3D Depth = %u (actual: %u)\n", word4.f.depth, word4.f.depth + 1);
|
||||
} else if (type == 13 || type == 12) { // Arrays
|
||||
printf(" → Array Size = %u (actual: %u)\n", word4.f.depth, word4.f.depth + 1);
|
||||
}
|
||||
|
||||
// WORD 5-7: Usually zero for basic images, but may contain metadata addresses
|
||||
printf("WORD 5: META_DATA_ADDRESS_HI = 0x%08x\n", srd[5]);
|
||||
printf("WORD 6: Reserved = 0x%08x\n", srd[6]);
|
||||
printf("WORD 7: META_DATA_ADDRESS = 0x%08x\n", srd[7]);
|
||||
|
||||
// Additional mipmap information
|
||||
printf("WORD 8: CHANNEL_TYPE = 0x%08x\n", srd[8]);
|
||||
printf("WORD 9: CHANNEL_ORDER = 0x%08x\n", srd[9]);
|
||||
printf("WORD 10: WIDTH_ORIGINAL = 0x%08x\n", srd[10]);
|
||||
printf("WORD 11: NUM_LEVELS = 0x%08x\n", srd[11]);
|
||||
|
||||
// Mipmap analysis
|
||||
if (word3.f.last_level > word3.f.base_level || word3.f.last_level > 0) {
|
||||
printf("\nMIPMAP ANALYSIS:\n");
|
||||
printf(" Total Levels = %u\n", srd[11]);
|
||||
printf(" Active Range = [%u, %u]\n", word3.f.base_level, word3.f.last_level);
|
||||
if (word3.f.base_level == word3.f.last_level) {
|
||||
printf(" Mode = SINGLE LEVEL VIEW ◄──── Mip level view\n");
|
||||
uint32_t level = word3.f.base_level;
|
||||
uint32_t level_width = std::max(1u, static_cast<uint32_t>((word2.f.width + 1) >> level));
|
||||
uint32_t level_height = std::max(1u, static_cast<uint32_t>((word2.f.height + 1) >> level));
|
||||
printf(" Effective Dimensions = %ux%u (level %u)\n", level_width, level_height, level);
|
||||
} else {
|
||||
printf(" Mode = FULL MIPMAP CHAIN\n");
|
||||
}
|
||||
}
|
||||
printf("===============================================\n\n");
|
||||
}
|
||||
|
||||
void ImageManagerAi::printChannelSelect(uint32_t sel) const {
|
||||
switch(sel) {
|
||||
case 0: printf("(SEL_0)\n"); break;
|
||||
case 1: printf("(SEL_1)\n"); break;
|
||||
case 4: printf("(SEL_X/R)\n"); break;
|
||||
case 5: printf("(SEL_Y/G)\n"); break;
|
||||
case 6: printf("(SEL_Z/B)\n"); break;
|
||||
case 7: printf("(SEL_W/A)\n"); break;
|
||||
default: printf("(UNKNOWN)\n"); break;
|
||||
}
|
||||
}
|
||||
|
||||
void ImageManagerAi::printResourceType(uint32_t type) const {
|
||||
switch(type) {
|
||||
case 8: printf("(1D)\n"); break;
|
||||
case 9: printf("(2D)\n"); break;
|
||||
case 10: printf("(3D)\n"); break;
|
||||
case 11: printf("(CUBE)\n"); break;
|
||||
case 12: printf("(1D_ARRAY/1DB)\n"); break;
|
||||
case 13: printf("(2D_ARRAY)\n"); break;
|
||||
case 14: printf("(2D_MSAA)\n"); break;
|
||||
case 15: printf("(2D_MSAA_ARRAY)\n"); break;
|
||||
default: printf("(UNKNOWN=%u)\n", type); break;
|
||||
}
|
||||
}
|
||||
|
||||
void ImageManagerAi::printSwizzleMode(uint32_t sw_mode) const {
|
||||
// GFX9 swizzle modes
|
||||
if (sw_mode == 0) {
|
||||
printf("(LINEAR)\n");
|
||||
} else if (sw_mode < 5) {
|
||||
printf("(SW_256B_%u)\n", sw_mode);
|
||||
} else if (sw_mode < 9) {
|
||||
printf("(SW_4KB_%u)\n", sw_mode - 4);
|
||||
} else if (sw_mode < 13) {
|
||||
printf("(SW_64KB_%u)\n", sw_mode - 8);
|
||||
} else if (sw_mode < 22) {
|
||||
printf("(SW_VAR_%u)\n", sw_mode - 12);
|
||||
} else {
|
||||
printf("(UNKNOWN=%u)\n", sw_mode);
|
||||
}
|
||||
}
|
||||
|
||||
hsa_status_t ImageManagerAi::PopulateMipLevelSrd(
|
||||
MipmappedArray& level_view,
|
||||
const MipmappedArray& mipmap_array,
|
||||
uint32_t mip_level) const {
|
||||
|
||||
// SRD already copied from parent, just modify BASE_LEVEL/LAST_LEVEL fields
|
||||
uint32_t* srd_words = reinterpret_cast<uint32_t*>(level_view.srd);
|
||||
|
||||
// SRD WORD3 has BASE_LEVEL and LAST_LEVEL fields
|
||||
sq_img_rsrc_word3_u* word3 = reinterpret_cast<sq_img_rsrc_word3_u*>(&srd_words[3]);
|
||||
|
||||
// Set both to same value - hardware samples only this level
|
||||
word3->f.base_level = mip_level;
|
||||
word3->f.last_level = mip_level;
|
||||
|
||||
debug_print("Set SRD mip selection: BASE_LEVEL=%u, LAST_LEVEL=%u", mip_level, mip_level);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace image
|
||||
} // namespace rocr
|
||||
|
||||
@@ -59,6 +59,7 @@ class ImageManagerAi : public ImageManagerKv {
|
||||
virtual hsa_status_t CalculateImageSizeAndAlignment(
|
||||
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
|
||||
hsa_ext_image_data_layout_t image_data_layout,
|
||||
uint32_t num_mipmap_levels,
|
||||
size_t image_data_row_pitch, size_t image_data_slice_pitch,
|
||||
hsa_ext_image_data_info_t& image_info) const;
|
||||
|
||||
@@ -76,13 +77,29 @@ class ImageManagerAi : public ImageManagerKv {
|
||||
/// @brief Fill sampler structure with device specific sampler object.
|
||||
virtual hsa_status_t PopulateSamplerSrd(Sampler& sampler) const;
|
||||
|
||||
/// @brief Fill mipmap structure with device specific mipmapped array object.
|
||||
virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array) const;
|
||||
|
||||
/// @brief Fill mipmap structure with pre-computed AMD metadata descriptor.
|
||||
virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array, const metadata_amd_t* desc) const;
|
||||
|
||||
/// @brief Create mip level view using SRD BASE_LEVEL/LAST_LEVEL fields
|
||||
virtual hsa_status_t PopulateMipLevelSrd(MipmappedArray& level_view,
|
||||
const MipmappedArray& mipmap_array, uint32_t mip_level) const;
|
||||
|
||||
virtual void printSRDDetailed(const uint32_t* srd) const;
|
||||
virtual void printChannelSelect(uint32_t sel) const;
|
||||
virtual void printResourceType(uint32_t type) const;
|
||||
virtual void printSwizzleMode(uint32_t sw_mode) const;
|
||||
|
||||
protected:
|
||||
uint32_t GetAddrlibSurfaceInfoAi(hsa_agent_t component,
|
||||
const hsa_ext_image_descriptor_t& desc,
|
||||
Image::TileMode tileMode,
|
||||
size_t image_data_row_pitch,
|
||||
size_t image_data_slice_pitch,
|
||||
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT& out) const;
|
||||
const hsa_ext_image_descriptor_t& desc,
|
||||
uint32_t num_mipmap_levels,
|
||||
Image::TileMode tileMode,
|
||||
size_t image_data_row_pitch,
|
||||
size_t image_data_slice_pitch,
|
||||
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT& out) const;
|
||||
|
||||
bool IsLocalMemory(const void* address) const;
|
||||
|
||||
|
||||
@@ -215,6 +215,7 @@ ImageManagerGfx11::~ImageManagerGfx11() {}
|
||||
hsa_status_t ImageManagerGfx11::CalculateImageSizeAndAlignment(
|
||||
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
|
||||
hsa_ext_image_data_layout_t image_data_layout,
|
||||
uint32_t num_mipmap_levels,
|
||||
size_t image_data_row_pitch,
|
||||
size_t image_data_slice_pitch,
|
||||
hsa_ext_image_data_info_t& image_info) const {
|
||||
@@ -230,9 +231,8 @@ hsa_status_t ImageManagerGfx11::CalculateImageSizeAndAlignment(
|
||||
desc.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB)?
|
||||
Image::TileMode::TILED : Image::TileMode::LINEAR;
|
||||
}
|
||||
if (GetAddrlibSurfaceInfoNv(component, desc, tileMode,
|
||||
image_data_row_pitch, image_data_slice_pitch, out) ==
|
||||
(uint32_t)(-1)) {
|
||||
if (GetAddrlibSurfaceInfoNv(component, desc, num_mipmap_levels, tileMode,
|
||||
image_data_row_pitch, image_data_slice_pitch, out) == (uint32_t)(-1)) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
@@ -332,7 +332,7 @@ hsa_status_t ImageManagerGfx11::PopulateImageSrd(Image& image,
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3])->bits.TYPE =
|
||||
ImageLut().MapGeometry(image.desc.geometry);
|
||||
}
|
||||
|
||||
|
||||
// Imported metadata holds the offset to metadata, add the image base address.
|
||||
uintptr_t meta = uintptr_t(((SQ_IMG_RSRC_WORD7*)(&image.srd[7]))->bits.META_DATA_ADDRESS_HI) << 16;
|
||||
meta |= uintptr_t(((SQ_IMG_RSRC_WORD6*)(&image.srd[6]))->bits.META_DATA_ADDRESS) << 8;
|
||||
@@ -460,9 +460,8 @@ hsa_status_t ImageManagerGfx11::PopulateImageSrd(Image& image) const {
|
||||
|
||||
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
|
||||
|
||||
uint32_t swizzleMode = GetAddrlibSurfaceInfoNv(
|
||||
image.component, image.desc, image.tile_mode,
|
||||
image.row_pitch, image.slice_pitch, out);
|
||||
uint32_t swizzleMode = GetAddrlibSurfaceInfoNv(image.component, image.desc,
|
||||
1, image.tile_mode, image.row_pitch, image.slice_pitch, out);
|
||||
if (swizzleMode == (uint32_t)(-1)) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
@@ -619,6 +618,7 @@ hsa_status_t ImageManagerGfx11::PopulateSamplerSrd(Sampler& sampler) const {
|
||||
|
||||
uint32_t ImageManagerGfx11::GetAddrlibSurfaceInfoNv(
|
||||
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
|
||||
uint32_t num_mipmap_levels,
|
||||
Image::TileMode tileMode,
|
||||
size_t image_data_row_pitch,
|
||||
size_t image_data_slice_pitch,
|
||||
@@ -641,7 +641,7 @@ uint32_t ImageManagerGfx11::GetAddrlibSurfaceInfoNv(
|
||||
in.width = width;
|
||||
in.height = height;
|
||||
in.numSlices = num_slice;
|
||||
in.pitchInElement = image_data_row_pitch / image_prop.element_size;
|
||||
in.numMipLevels = num_mipmap_levels;
|
||||
|
||||
switch (desc.geometry) {
|
||||
case HSA_EXT_IMAGE_GEOMETRY_1D:
|
||||
@@ -810,5 +810,410 @@ hsa_status_t ImageManagerGfx11::FillImage(const Image& image, const void* patter
|
||||
return status;
|
||||
}
|
||||
|
||||
hsa_status_t ImageManagerGfx11::PopulateMipmapSrd(MipmappedArray& mipmap) const {
|
||||
ImageProperty mipmap_prop = ImageLut().MapFormat(mipmap.desc.format, mipmap.desc.geometry);
|
||||
assert(mipmap_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);
|
||||
assert(mipmap_prop.element_size != 0);
|
||||
assert(mipmap.num_levels >= 1);
|
||||
|
||||
const void* mipmap_data_addr = mipmap.data;
|
||||
|
||||
if (IsLocalMemory(mipmap.data))
|
||||
mipmap_data_addr = reinterpret_cast<const void*>(
|
||||
reinterpret_cast<uintptr_t>(mipmap.data) - local_memory_base_address_);
|
||||
|
||||
if (mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
|
||||
SQ_BUF_RSRC_WORD0 word0;
|
||||
SQ_BUF_RSRC_WORD1 word1;
|
||||
SQ_BUF_RSRC_WORD2 word2;
|
||||
SQ_BUF_RSRC_WORD3 word3;
|
||||
|
||||
word0.val = 0;
|
||||
word0.f.BASE_ADDRESS = PtrLow32(mipmap_data_addr);
|
||||
|
||||
word1.val = 0;
|
||||
word1.f.BASE_ADDRESS_HI = PtrHigh32(mipmap_data_addr);
|
||||
word1.f.STRIDE = mipmap_prop.element_size;
|
||||
word1.f.SWIZZLE_ENABLE = 0;
|
||||
|
||||
word2.f.NUM_RECORDS = mipmap.desc.width * mipmap_prop.element_size;
|
||||
|
||||
const Swizzle swizzle = ImageLut().MapSwizzle(mipmap.desc.format.channel_order);
|
||||
word3.val = 0;
|
||||
word3.f.DST_SEL_X = swizzle.x;
|
||||
word3.f.DST_SEL_Y = swizzle.y;
|
||||
word3.f.DST_SEL_Z = swizzle.z;
|
||||
word3.f.DST_SEL_W = swizzle.w;
|
||||
word3.f.FORMAT = GetCombinedFormat(mipmap_prop.data_format, mipmap_prop.data_type);
|
||||
word3.f.INDEX_STRIDE = mipmap_prop.element_size;
|
||||
word3.f.TYPE = ImageLut().MapGeometry(mipmap.desc.geometry);
|
||||
|
||||
mipmap.srd[0] = word0.val;
|
||||
mipmap.srd[1] = word1.val;
|
||||
mipmap.srd[2] = word2.val;
|
||||
mipmap.srd[3] = word3.val;
|
||||
|
||||
mipmap.row_pitch = mipmap.desc.width * mipmap_prop.element_size;
|
||||
mipmap.slice_pitch = mipmap.row_pitch;
|
||||
} else {
|
||||
SQ_IMG_RSRC_WORD0 word0;
|
||||
SQ_IMG_RSRC_WORD1 word1;
|
||||
SQ_IMG_RSRC_WORD2 word2;
|
||||
SQ_IMG_RSRC_WORD3 word3;
|
||||
SQ_IMG_RSRC_WORD4 word4;
|
||||
SQ_IMG_RSRC_WORD5 word5;
|
||||
SQ_IMG_RSRC_WORD5 word6;
|
||||
SQ_IMG_RSRC_WORD5 word7;
|
||||
|
||||
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
|
||||
|
||||
// pMipInfo not needed - set to nullptr and AddrLib will ignore it
|
||||
out.pMipInfo = nullptr;
|
||||
|
||||
uint32_t swizzleMode = GetAddrlibSurfaceInfoNv(
|
||||
mipmap.component, mipmap.desc, mipmap.num_levels,
|
||||
mipmap.tile_mode, mipmap.row_pitch, mipmap.slice_pitch, out);
|
||||
if (swizzleMode == (uint32_t)(-1)) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
mipmap.addr_output.addr2 = out;
|
||||
mipmap.size = out.surfSize;
|
||||
|
||||
assert((out.bpp / 8) == mipmap_prop.element_size);
|
||||
|
||||
const size_t row_pitch_size = out.pitch * mipmap_prop.element_size;
|
||||
|
||||
word0.val = 0;
|
||||
word0.f.BASE_ADDRESS = PtrLow40Shift8(mipmap_data_addr);
|
||||
|
||||
word1.val = 0;
|
||||
word1.f.BASE_ADDRESS_HI = PtrHigh64Shift40(mipmap_data_addr);
|
||||
word1.f.MAX_MIP = mipmap.num_levels - 1;
|
||||
word1.f.FORMAT = GetCombinedFormat(mipmap_prop.data_format, mipmap_prop.data_type);
|
||||
// Only take the lowest 2 bits of (mipmap.desc.width - 1)
|
||||
word1.f.WIDTH = BitSelect<0, 1>(mipmap.desc.width - 1);
|
||||
|
||||
word2.val = 0;
|
||||
// Take the high 12 bits of (mipmap.desc.width - 1)
|
||||
word2.f.WIDTH_HI = BitSelect<2, 13>(mipmap.desc.width - 1);
|
||||
word2.f.HEIGHT = mipmap.desc.height ? mipmap.desc.height - 1 : 0;
|
||||
|
||||
const Swizzle swizzle = ImageLut().MapSwizzle(mipmap.desc.format.channel_order);
|
||||
word3.val = 0;
|
||||
word3.f.DST_SEL_X = swizzle.x;
|
||||
word3.f.DST_SEL_Y = swizzle.y;
|
||||
word3.f.DST_SEL_Z = swizzle.z;
|
||||
word3.f.DST_SEL_W = swizzle.w;
|
||||
word3.f.SW_MODE = swizzleMode;
|
||||
word3.f.BASE_LEVEL = 0;
|
||||
word3.f.LAST_LEVEL = mipmap.num_levels - 1;
|
||||
word3.f.BC_SWIZZLE = GetBcSwizzle(swizzle);
|
||||
word3.f.TYPE = ImageLut().MapGeometry(mipmap.desc.geometry);
|
||||
|
||||
const bool mipmap_array =
|
||||
(mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ||
|
||||
mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DA ||
|
||||
mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DADEPTH);
|
||||
const bool mipmap_3d = (mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_3D);
|
||||
|
||||
word4.val = 0;
|
||||
word4.f.DEPTH =
|
||||
(mipmap_array) // Doesn't hurt but isn't array_size already >0?
|
||||
? std::max(mipmap.desc.array_size, static_cast<size_t>(1)) - 1
|
||||
: (mipmap_3d) ? mipmap.desc.depth - 1 : 0;
|
||||
|
||||
// For 1d, 2d and 2d-msaa in gfx11 this is pitch-1
|
||||
if (!mipmap_array && !mipmap_3d) {
|
||||
word4.f.PITCH = out.pitch - 1;
|
||||
}
|
||||
|
||||
word5.val = 0;
|
||||
word6.val = 0;
|
||||
word7.val = 0;
|
||||
|
||||
mipmap.srd[0] = word0.val;
|
||||
mipmap.srd[1] = word1.val;
|
||||
mipmap.srd[2] = word2.val;
|
||||
mipmap.srd[3] = word3.val;
|
||||
mipmap.srd[4] = word4.val;
|
||||
mipmap.srd[5] = word5.val;
|
||||
mipmap.srd[6] = word6.val;
|
||||
mipmap.srd[7] = word7.val;
|
||||
|
||||
mipmap.row_pitch = row_pitch_size;
|
||||
mipmap.slice_pitch = out.sliceSize;
|
||||
}
|
||||
|
||||
mipmap.srd[8] = mipmap.desc.format.channel_type;
|
||||
mipmap.srd[9] = mipmap.desc.format.channel_order;
|
||||
mipmap.srd[10] = static_cast<uint32_t>(mipmap.desc.width);
|
||||
|
||||
// Mipmap-specific auxiliary fields
|
||||
mipmap.srd[11] = mipmap.num_levels;
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t ImageManagerGfx11::PopulateMipmapSrd(MipmappedArray& mipmap_array, const metadata_amd_t* desc) const {
|
||||
const metadata_amd_gfx11_t* desc_gfx11 = reinterpret_cast<const metadata_amd_gfx11_t*>(desc);
|
||||
const void* mipmap_data_addr = mipmap_array.data;
|
||||
|
||||
ImageProperty mipmap_prop = ImageLut().MapFormat(mipmap_array.desc.format, mipmap_array.desc.geometry);
|
||||
if (mipmap_prop.cap == HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED || mipmap_prop.element_size == 0) {
|
||||
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
|
||||
}
|
||||
|
||||
const Swizzle swizzle = ImageLut().MapSwizzle(mipmap_array.desc.format.channel_order);
|
||||
|
||||
if (IsLocalMemory(mipmap_array.data)) {
|
||||
mipmap_data_addr = reinterpret_cast<const void*>(
|
||||
reinterpret_cast<uintptr_t>(mipmap_array.data) - local_memory_base_address_);
|
||||
}
|
||||
|
||||
// Copy the pre-computed SRD words 0-7 from metadata
|
||||
mipmap_array.srd[0] = desc_gfx11->word0.u32All;
|
||||
mipmap_array.srd[1] = desc_gfx11->word1.u32All;
|
||||
mipmap_array.srd[2] = desc_gfx11->word2.u32All;
|
||||
mipmap_array.srd[3] = desc_gfx11->word3.u32All;
|
||||
mipmap_array.srd[4] = desc_gfx11->word4.u32All;
|
||||
mipmap_array.srd[5] = desc_gfx11->word5.u32All;
|
||||
mipmap_array.srd[6] = desc_gfx11->word6.u32All;
|
||||
mipmap_array.srd[7] = desc_gfx11->word7.u32All;
|
||||
|
||||
// Override specific fields after copying
|
||||
uint32_t hwPixelSize = ImageLut().GetPixelSize(mipmap_prop.data_format, mipmap_prop.data_type);
|
||||
if (mipmap_prop.element_size != hwPixelSize) {
|
||||
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
|
||||
}
|
||||
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD0*>(&mipmap_array.srd[0])->bits.BASE_ADDRESS = PtrLow40Shift8(mipmap_data_addr);
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&mipmap_array.srd[1])->bits.BASE_ADDRESS_HI = PtrHigh64Shift40(mipmap_data_addr);
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&mipmap_array.srd[1])->bits.FORMAT = GetCombinedFormat(mipmap_prop.data_format, mipmap_prop.data_type);
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&mipmap_array.srd[1])->bits.MAX_MIP = mipmap_array.num_levels - 1;
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.DST_SEL_X = swizzle.x;
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.DST_SEL_Y = swizzle.y;
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.DST_SEL_Z = swizzle.z;
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.DST_SEL_W = swizzle.w;
|
||||
|
||||
if (mipmap_array.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ||
|
||||
mipmap_array.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1D) {
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.TYPE =
|
||||
ImageLut().MapGeometry(mipmap_array.desc.geometry);
|
||||
}
|
||||
|
||||
// Looks like this is only used for CPU copies.
|
||||
mipmap_array.row_pitch = 0;
|
||||
mipmap_array.slice_pitch = 0;
|
||||
|
||||
// Store mipmap-specific metadata
|
||||
mipmap_array.srd[8] = mipmap_array.desc.format.channel_type;
|
||||
mipmap_array.srd[9] = mipmap_array.desc.format.channel_order;
|
||||
mipmap_array.srd[10] = static_cast<uint32_t>(mipmap_array.desc.width);
|
||||
mipmap_array.srd[11] = mipmap_array.num_levels;
|
||||
|
||||
// Allocate and populate pMipInfo from metadata mip_offsets (ADDR2 for GFX11)
|
||||
ADDR2_MIP_INFO* mip_info_storage = new ADDR2_MIP_INFO[mipmap_array.num_levels];
|
||||
memset(mip_info_storage, 0, sizeof(ADDR2_MIP_INFO) * mipmap_array.num_levels);
|
||||
|
||||
// Extract per-level information from mip_offsets array
|
||||
for (uint32_t level = 0; level < mipmap_array.num_levels; level++) {
|
||||
// mip_offsets contains offset bits [39:8], shift left by 8 to get actual byte offset
|
||||
mip_info_storage[level].offset = static_cast<uint64_t>(desc_gfx11->mip_offsets[level]) << 8;
|
||||
|
||||
// Calculate dimensions for this level (halve at each level)
|
||||
mip_info_storage[level].pitch = std::max(1u, static_cast<uint32_t>(mipmap_array.desc.width >> level));
|
||||
mip_info_storage[level].height = std::max(1u, static_cast<uint32_t>(mipmap_array.desc.height >> level));
|
||||
mip_info_storage[level].depth = std::max(1u, static_cast<uint32_t>(mipmap_array.desc.depth >> level));
|
||||
}
|
||||
|
||||
// Store pMipInfo in addr_output for later use by PopulateMipLevelSrd
|
||||
mipmap_array.addr_output.addr2.pMipInfo = mip_info_storage;
|
||||
|
||||
// Total size calculation from metadata
|
||||
uint32_t last_level = mipmap_array.num_levels - 1;
|
||||
uint64_t last_level_size = mip_info_storage[last_level].pitch *
|
||||
mip_info_storage[last_level].height *
|
||||
mip_info_storage[last_level].depth *
|
||||
mipmap_prop.element_size;
|
||||
mipmap_array.size = mip_info_storage[last_level].offset + last_level_size;
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
void ImageManagerGfx11::printSRDDetailed(const uint32_t* srd) const {
|
||||
if (!srd) {
|
||||
printf("\n========== Image SRD (GFX11) - Detailed ==========\n");
|
||||
printf("ERROR: No SRD data provided.\n");
|
||||
printf("===============================================\n\n");
|
||||
return;
|
||||
}
|
||||
|
||||
printf("\n========== Image SRD (GFX11) - Detailed ==========\n");
|
||||
|
||||
// Print all 12 words with bit field annotations
|
||||
for (int i = 0; i < 12; i++) {
|
||||
printf("WORD %d: 0x%08x ", i, srd[i]);
|
||||
|
||||
// Binary representation
|
||||
printf("(");
|
||||
for (int bit = 31; bit >= 0; bit--) {
|
||||
printf("%d", (srd[i] >> bit) & 1);
|
||||
if (bit % 4 == 0 && bit != 0) printf("_");
|
||||
}
|
||||
printf(")\n");
|
||||
}
|
||||
|
||||
// WORD 0: BASE_ADDRESS (bits 39:8)
|
||||
SQ_IMG_RSRC_WORD0 word0;
|
||||
word0.val = srd[0];
|
||||
printf("\nWORD 0: BASE_ADDRESS (bits 39:8) = 0x%08x\n", word0.f.BASE_ADDRESS);
|
||||
|
||||
// WORD 1: Contains BASE_ADDRESS_HI, FORMAT, WIDTH (bits 1:0)
|
||||
SQ_IMG_RSRC_WORD1 word1;
|
||||
word1.val = srd[1];
|
||||
printf("WORD 1: BASE_ADDRESS_HI = 0x%02x\n", word1.f.BASE_ADDRESS_HI);
|
||||
printf(" FORMAT = %u\n", word1.f.FORMAT);
|
||||
printf(" WIDTH (bits 1:0) = %u\n", word1.f.WIDTH);
|
||||
|
||||
// Calculate full address (GFX11 uses 40-bit shifted by 8)
|
||||
uint64_t base_addr = ((uint64_t)word1.f.BASE_ADDRESS_HI << 32) | ((uint64_t)word0.f.BASE_ADDRESS << 8);
|
||||
printf(" → Full Base Address = 0x%016lx\n", base_addr);
|
||||
|
||||
// WORD 2: WIDTH_HI, HEIGHT
|
||||
SQ_IMG_RSRC_WORD2 word2;
|
||||
word2.val = srd[2];
|
||||
printf("WORD 2: WIDTH_HI (bits 13:2) = %u\n", word2.f.WIDTH_HI);
|
||||
printf(" HEIGHT = %u\n", word2.f.HEIGHT);
|
||||
|
||||
// Calculate full width (GFX11 uses 14 bits split: 2 in WORD1 + 12 in WORD2)
|
||||
uint32_t full_width = word1.f.WIDTH | (word2.f.WIDTH_HI << 2);
|
||||
printf(" → Full Width = %u (actual: %u)\n", full_width, full_width + 1);
|
||||
printf(" → Full Height = %u (actual: %u)\n", word2.f.HEIGHT, word2.f.HEIGHT + 1);
|
||||
|
||||
// WORD 3: Channel selectors, SW_MODE, BASE_LEVEL, LAST_LEVEL, BC_SWIZZLE, TYPE
|
||||
SQ_IMG_RSRC_WORD3 word3;
|
||||
word3.val = srd[3];
|
||||
printf("WORD 3: DST_SEL_X = %u ", word3.f.DST_SEL_X);
|
||||
printChannelSelect(word3.f.DST_SEL_X);
|
||||
printf(" DST_SEL_Y = %u ", word3.f.DST_SEL_Y);
|
||||
printChannelSelect(word3.f.DST_SEL_Y);
|
||||
printf(" DST_SEL_Z = %u ", word3.f.DST_SEL_Z);
|
||||
printChannelSelect(word3.f.DST_SEL_Z);
|
||||
printf(" DST_SEL_W = %u ", word3.f.DST_SEL_W);
|
||||
printChannelSelect(word3.f.DST_SEL_W);
|
||||
printf(" BASE_LEVEL = %u ◄──── Current base level\n", word3.f.BASE_LEVEL);
|
||||
printf(" LAST_LEVEL = %u ◄──── Current last level\n", word3.f.LAST_LEVEL);
|
||||
printf(" SW_MODE = %u ", word3.f.SW_MODE);
|
||||
printSwizzleMode(word3.f.SW_MODE);
|
||||
printf(" BC_SWIZZLE = %u\n", word3.f.BC_SWIZZLE);
|
||||
printf(" TYPE = %u ", word3.f.TYPE);
|
||||
printResourceType(word3.f.TYPE);
|
||||
|
||||
// WORD 4: DEPTH, PITCH
|
||||
SQ_IMG_RSRC_WORD4 word4;
|
||||
word4.val = srd[4];
|
||||
printf("WORD 4: DEPTH = %u\n", word4.f.DEPTH);
|
||||
printf(" PITCH = %u (actual: %u)\n", word4.f.PITCH, word4.f.PITCH + 1);
|
||||
|
||||
// Calculate effective depth based on geometry
|
||||
uint32_t type = word3.f.TYPE;
|
||||
if (type == 10) { // 3D
|
||||
printf(" → 3D Depth = %u (actual: %u)\n", word4.f.DEPTH, word4.f.DEPTH + 1);
|
||||
} else if (type == 13 || type == 12) { // Arrays
|
||||
printf(" → Array Size = %u (actual: %u)\n", word4.f.DEPTH, word4.f.DEPTH + 1);
|
||||
}
|
||||
|
||||
// WORD 5-7: Usually zero for basic images, but may contain metadata addresses
|
||||
printf("WORD 5: Reserved = 0x%08x\n", srd[5]);
|
||||
printf("WORD 6: META_DATA_ADDRESS = 0x%08x\n", srd[6]);
|
||||
printf("WORD 7: META_DATA_ADDRESS_HI = 0x%08x\n", srd[7]);
|
||||
|
||||
// Additional mipmap information
|
||||
printf("WORD 8: CHANNEL_TYPE = 0x%08x\n", srd[8]);
|
||||
printf("WORD 9: CHANNEL_ORDER = 0x%08x\n", srd[9]);
|
||||
printf("WORD 10: WIDTH_ORIGINAL = 0x%08x\n", srd[10]);
|
||||
printf("WORD 11: NUM_LEVELS = 0x%08x\n", srd[11]);
|
||||
|
||||
// Mipmap analysis
|
||||
if (word3.f.LAST_LEVEL > word3.f.BASE_LEVEL || word3.f.LAST_LEVEL > 0) {
|
||||
printf("\nMIPMAP ANALYSIS:\n");
|
||||
printf(" Total Levels = %u\n", srd[11]);
|
||||
printf(" Active Range = [%u, %u]\n", word3.f.BASE_LEVEL, word3.f.LAST_LEVEL);
|
||||
if (word3.f.BASE_LEVEL == word3.f.LAST_LEVEL) {
|
||||
printf(" Mode = SINGLE LEVEL VIEW ◄──── Mip level view\n");
|
||||
uint32_t level = word3.f.BASE_LEVEL;
|
||||
uint32_t level_width = std::max(1u, (full_width + 1) >> level);
|
||||
uint32_t level_height = std::max(1u, static_cast<uint32_t>((word2.f.HEIGHT + 1) >> level));
|
||||
printf(" Effective Dimensions = %ux%u (level %u)\n", level_width, level_height, level);
|
||||
} else {
|
||||
printf(" Mode = FULL MIPMAP CHAIN\n");
|
||||
}
|
||||
}
|
||||
printf("===============================================\n\n");
|
||||
}
|
||||
|
||||
void ImageManagerGfx11::printChannelSelect(uint32_t sel) const {
|
||||
switch(sel) {
|
||||
case 0: printf("(SEL_0)\n"); break;
|
||||
case 1: printf("(SEL_1)\n"); break;
|
||||
case 4: printf("(SEL_X/R)\n"); break;
|
||||
case 5: printf("(SEL_Y/G)\n"); break;
|
||||
case 6: printf("(SEL_Z/B)\n"); break;
|
||||
case 7: printf("(SEL_W/A)\n"); break;
|
||||
default: printf("(UNKNOWN)\n"); break;
|
||||
}
|
||||
}
|
||||
|
||||
void ImageManagerGfx11::printResourceType(uint32_t type) const {
|
||||
switch(type) {
|
||||
case 8: printf("(1D)\n"); break;
|
||||
case 9: printf("(2D)\n"); break;
|
||||
case 10: printf("(3D)\n"); break;
|
||||
case 11: printf("(CUBE)\n"); break;
|
||||
case 12: printf("(1D_ARRAY/1DB)\n"); break;
|
||||
case 13: printf("(2D_ARRAY)\n"); break;
|
||||
case 14: printf("(2D_MSAA)\n"); break;
|
||||
case 15: printf("(2D_MSAA_ARRAY)\n"); break;
|
||||
default: printf("(UNKNOWN=%u)\n", type); break;
|
||||
}
|
||||
}
|
||||
|
||||
void ImageManagerGfx11::printSwizzleMode(uint32_t sw_mode) const {
|
||||
// GFX11 swizzle modes (similar to GFX9/10)
|
||||
if (sw_mode == 0) {
|
||||
printf("(LINEAR)\n");
|
||||
} else if (sw_mode < 5) {
|
||||
printf("(SW_256B_%u)\n", sw_mode);
|
||||
} else if (sw_mode < 9) {
|
||||
printf("(SW_4KB_%u)\n", sw_mode - 4);
|
||||
} else if (sw_mode < 13) {
|
||||
printf("(SW_64KB_%u)\n", sw_mode - 8);
|
||||
} else if (sw_mode < 22) {
|
||||
printf("(SW_VAR_%u)\n", sw_mode - 12);
|
||||
} else {
|
||||
printf("(UNKNOWN=%u)\n", sw_mode);
|
||||
}
|
||||
}
|
||||
|
||||
hsa_status_t ImageManagerGfx11::PopulateMipLevelSrd(
|
||||
MipmappedArray& level_view,
|
||||
const MipmappedArray& mipmap_array,
|
||||
uint32_t mip_level) const {
|
||||
|
||||
// SRD already copied from parent, just modify BASE_LEVEL/LAST_LEVEL fields
|
||||
uint32_t* srd_words = reinterpret_cast<uint32_t*>(level_view.srd);
|
||||
|
||||
// GFX11 SRD WORD3 has BASE_LEVEL and LAST_LEVEL fields
|
||||
SQ_IMG_RSRC_WORD3* word3 = reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&srd_words[3]);
|
||||
|
||||
// Set both to same value - hardware samples only this level
|
||||
word3->f.BASE_LEVEL = mip_level;
|
||||
word3->f.LAST_LEVEL = mip_level;
|
||||
|
||||
debug_print("Set SRD mip selection: BASE_LEVEL=%u, LAST_LEVEL=%u", mip_level, mip_level);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace image
|
||||
} // namespace rocr
|
||||
|
||||
@@ -60,6 +60,7 @@ class ImageManagerGfx11 : public ImageManagerKv {
|
||||
virtual hsa_status_t CalculateImageSizeAndAlignment(
|
||||
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
|
||||
hsa_ext_image_data_layout_t image_data_layout,
|
||||
uint32_t num_mipmap_levels,
|
||||
size_t image_data_row_pitch, size_t image_data_slice_pitch,
|
||||
hsa_ext_image_data_info_t& image_info) const;
|
||||
|
||||
@@ -80,9 +81,26 @@ class ImageManagerGfx11 : public ImageManagerKv {
|
||||
/// @brief Fill image backing storage using agent copy.
|
||||
virtual hsa_status_t FillImage(const Image& image, const void* pattern,
|
||||
const hsa_ext_image_region_t& region);
|
||||
|
||||
/// @brief Fill mipmap structure with device specific mipmapped array object.
|
||||
virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array) const;
|
||||
|
||||
/// @brief Fill mipmap structure with pre-computed AMD metadata descriptor.
|
||||
virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array, const metadata_amd_t* desc) const;
|
||||
|
||||
/// @brief Create mip level view using SRD BASE_LEVEL/LAST_LEVEL fields
|
||||
virtual hsa_status_t PopulateMipLevelSrd(MipmappedArray& level_view,
|
||||
const MipmappedArray& mipmap_array, uint32_t mip_level) const;
|
||||
|
||||
virtual void printSRDDetailed(const uint32_t* srd) const;
|
||||
virtual void printChannelSelect(uint32_t sel) const;
|
||||
virtual void printResourceType(uint32_t type) const;
|
||||
virtual void printSwizzleMode(uint32_t sw_mode) const;
|
||||
|
||||
protected:
|
||||
uint32_t GetAddrlibSurfaceInfoNv(hsa_agent_t component,
|
||||
const hsa_ext_image_descriptor_t& desc,
|
||||
uint32_t num_mipmap_levels,
|
||||
Image::TileMode tileMode,
|
||||
size_t image_data_row_pitch,
|
||||
size_t image_data_slice_pitch,
|
||||
|
||||
@@ -216,14 +216,24 @@ ImageManagerGfx12::~ImageManagerGfx12() {}
|
||||
hsa_status_t ImageManagerGfx12::CalculateImageSizeAndAlignment(
|
||||
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
|
||||
hsa_ext_image_data_layout_t image_data_layout,
|
||||
uint32_t num_mipmap_levels,
|
||||
size_t image_data_row_pitch,
|
||||
size_t image_data_slice_pitch,
|
||||
hsa_ext_image_data_info_t& image_info) const {
|
||||
ADDR3_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
|
||||
hsa_profile_t profile;
|
||||
|
||||
ADDR3_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
|
||||
|
||||
// Allocate persistent memory for mip info on the heap
|
||||
ADDR3_MIP_INFO* mip_info_storage = new ADDR3_MIP_INFO[num_mipmap_levels];
|
||||
memset(mip_info_storage, 0, sizeof(ADDR3_MIP_INFO) * num_mipmap_levels);
|
||||
out.pMipInfo = mip_info_storage;
|
||||
|
||||
hsa_profile_t profile;
|
||||
hsa_status_t status = HSA::hsa_agent_get_info(component, HSA_AGENT_INFO_PROFILE, &profile);
|
||||
if (status != HSA_STATUS_SUCCESS) return status;
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
delete[] mip_info_storage;
|
||||
return status;
|
||||
}
|
||||
|
||||
Image::TileMode tileMode = Image::TileMode::LINEAR;
|
||||
if (image_data_layout == HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE) {
|
||||
@@ -231,9 +241,9 @@ hsa_status_t ImageManagerGfx12::CalculateImageSizeAndAlignment(
|
||||
desc.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB)?
|
||||
Image::TileMode::TILED : Image::TileMode::LINEAR;
|
||||
}
|
||||
if (GetAddrlibSurfaceInfoNv(component, desc, tileMode,
|
||||
image_data_row_pitch, image_data_slice_pitch, out) ==
|
||||
(uint32_t)(-1)) {
|
||||
if (GetAddrlibSurfaceInfoNv(component, desc, num_mipmap_levels, tileMode,
|
||||
image_data_row_pitch, image_data_slice_pitch, out) == (uint32_t)(-1)) {
|
||||
delete[] mip_info_storage;
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
@@ -243,6 +253,7 @@ hsa_status_t ImageManagerGfx12::CalculateImageSizeAndAlignment(
|
||||
image_data_layout == HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR &&
|
||||
((image_data_row_pitch && (rowPitch != image_data_row_pitch)) ||
|
||||
(image_data_slice_pitch && (slicePitch != image_data_slice_pitch)))) {
|
||||
delete[] mip_info_storage;
|
||||
return static_cast<hsa_status_t>(
|
||||
HSA_EXT_STATUS_ERROR_IMAGE_PITCH_UNSUPPORTED);
|
||||
}
|
||||
@@ -252,6 +263,9 @@ hsa_status_t ImageManagerGfx12::CalculateImageSizeAndAlignment(
|
||||
image_info.alignment = out.baseAlign;
|
||||
assert(image_info.alignment != 0);
|
||||
|
||||
// Clean up temporary mip info storage
|
||||
delete[] mip_info_storage;
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -430,7 +444,6 @@ hsa_status_t ImageManagerGfx12::PopulateImageSrd(Image& image) const {
|
||||
word1.val = 0;
|
||||
word1.f.BASE_ADDRESS_HI = PtrHigh32(image_data_addr);
|
||||
word1.f.STRIDE = image_prop.element_size;
|
||||
|
||||
word1.f.SWIZZLE_ENABLE = 0;
|
||||
|
||||
word2.f.NUM_RECORDS = image.desc.width * image_prop.element_size;
|
||||
@@ -471,9 +484,8 @@ hsa_status_t ImageManagerGfx12::PopulateImageSrd(Image& image) const {
|
||||
|
||||
ADDR3_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
|
||||
|
||||
uint32_t swizzleMode = GetAddrlibSurfaceInfoNv(
|
||||
image.component, image.desc, image.tile_mode,
|
||||
image.row_pitch, image.slice_pitch, out);
|
||||
uint32_t swizzleMode = GetAddrlibSurfaceInfoNv(image.component, image.desc,
|
||||
1, image.tile_mode, image.row_pitch, image.slice_pitch, out);
|
||||
if (swizzleMode == (uint32_t)(-1)) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
@@ -642,6 +654,7 @@ hsa_status_t ImageManagerGfx12::PopulateSamplerSrd(Sampler& sampler) const {
|
||||
|
||||
uint32_t ImageManagerGfx12::GetAddrlibSurfaceInfoNv(
|
||||
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
|
||||
uint32_t num_mipmap_levels,
|
||||
Image::TileMode tileMode,
|
||||
size_t image_data_row_pitch,
|
||||
size_t image_data_slice_pitch,
|
||||
@@ -664,7 +677,7 @@ uint32_t ImageManagerGfx12::GetAddrlibSurfaceInfoNv(
|
||||
in.width = width;
|
||||
in.height = height;
|
||||
in.numSlices = num_slice;
|
||||
in.pitchInElement = image_data_row_pitch / image_prop.element_size;
|
||||
in.numMipLevels = num_mipmap_levels;
|
||||
|
||||
switch (desc.geometry) {
|
||||
case HSA_EXT_IMAGE_GEOMETRY_1D:
|
||||
@@ -672,46 +685,44 @@ uint32_t ImageManagerGfx12::GetAddrlibSurfaceInfoNv(
|
||||
case HSA_EXT_IMAGE_GEOMETRY_1DA:
|
||||
in.resourceType = ADDR_RSRC_TEX_1D;
|
||||
break;
|
||||
|
||||
case HSA_EXT_IMAGE_GEOMETRY_2D:
|
||||
case HSA_EXT_IMAGE_GEOMETRY_2DDEPTH:
|
||||
case HSA_EXT_IMAGE_GEOMETRY_2DA:
|
||||
case HSA_EXT_IMAGE_GEOMETRY_2DADEPTH:
|
||||
in.resourceType = ADDR_RSRC_TEX_2D;
|
||||
break;
|
||||
|
||||
case HSA_EXT_IMAGE_GEOMETRY_3D:
|
||||
{
|
||||
in.resourceType = ADDR_RSRC_TEX_3D;
|
||||
/*
|
||||
* 3D swizzle modes on GFX12 enforces alignment
|
||||
* of the number of slices to the block depth.
|
||||
* If numSlices = 3 then the 3 slices are
|
||||
* interleaved for 3D locality among the 8 slices
|
||||
* that make up each block. This causes the memory
|
||||
* footprint to jump from an ideal size of ~12 GB
|
||||
* to ~32 GB.
|
||||
* 'enable3DSwizzleMode' flag tests for env variable
|
||||
* HSA_IMAGE_ENABLE_3D_SWIZZLE_DEBUG to enable or disable
|
||||
* 3D swizzle:
|
||||
* true: Keep view3dAs2dArray = 0 for real 3D interleaving.
|
||||
* false: Use view3dAs2dArray = 1 to avoid the alignment
|
||||
* expansion.
|
||||
* 2D swizzle modes can lower size overhead but may yield
|
||||
* suboptimal cache behavior for fully 3D volumetric
|
||||
* operations.
|
||||
*/
|
||||
bool enable3DSwizzleMode = core::Runtime::runtime_singleton_->flag().enable_3d_swizzle();
|
||||
if (enable3DSwizzleMode)
|
||||
{
|
||||
in.resourceType = ADDR_RSRC_TEX_3D;
|
||||
/*
|
||||
* 3D swizzle modes on GFX12 enforces alignment
|
||||
* of the number of slices to the block depth.
|
||||
* If numSlices = 3 then the 3 slices are
|
||||
* interleaved for 3D locality among the 8 slices
|
||||
* that make up each block. This causes the memory
|
||||
* footprint to jump from an ideal size of ~12 GB
|
||||
* to ~32 GB.
|
||||
* 'enable3DSwizzleMode' flag tests for env variable
|
||||
* HSA_IMAGE_ENABLE_3D_SWIZZLE_DEBUG to enable or disable
|
||||
* 3D swizzle:
|
||||
* true: Keep view3dAs2dArray = 0 for real 3D interleaving.
|
||||
* false: Use view3dAs2dArray = 1 to avoid the alignment
|
||||
* expansion.
|
||||
* 2D swizzle modes can lower size overhead but may yield
|
||||
* suboptimal cache behavior for fully 3D volumetric
|
||||
* operations.
|
||||
*/
|
||||
bool enable3DSwizzleMode = core::Runtime::runtime_singleton_->flag().enable_3d_swizzle();
|
||||
if (enable3DSwizzleMode)
|
||||
{
|
||||
in.flags.view3dAs2dArray = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
in.flags.view3dAs2dArray = 1;
|
||||
}
|
||||
break;
|
||||
in.flags.view3dAs2dArray = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
in.flags.view3dAs2dArray = 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
in.flags.texture = 1;
|
||||
@@ -781,8 +792,9 @@ uint32_t ImageManagerGfx12::GetAddrlibSurfaceInfoNv(
|
||||
const UINT_32 ratioLow = 2;
|
||||
const UINT_32 ratioHigh = 1;
|
||||
|
||||
// Same behaviour as GFX11, remove linear if height is 1.
|
||||
if (in.height > 1) {
|
||||
// Remove linear swizzle mode for multi-dimensional or mipmapped textures.
|
||||
// Linear mode is only appropriate for simple 1D single-level textures.
|
||||
if (in.height > 1 || in.numMipLevels > 1) {
|
||||
swOut.validModes.swLinear = 0;
|
||||
}
|
||||
|
||||
@@ -793,6 +805,10 @@ uint32_t ImageManagerGfx12::GetAddrlibSurfaceInfoNv(
|
||||
|
||||
if (swOut.validModes.value & (1 << i)) {
|
||||
ADDR3_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0};
|
||||
|
||||
// pMipInfo not needed - set to nullptr and AddrLib will ignore it
|
||||
localOut.pMipInfo = nullptr;
|
||||
|
||||
localOut.size = sizeof(ADDR3_COMPUTE_SURFACE_INFO_OUTPUT);
|
||||
|
||||
in.swizzleMode = (Addr3SwizzleMode) i;
|
||||
@@ -908,5 +924,456 @@ hsa_status_t ImageManagerGfx12::FillImage(const Image& image, const void* patter
|
||||
return status;
|
||||
}
|
||||
|
||||
hsa_status_t ImageManagerGfx12::PopulateMipmapSrd(MipmappedArray& mipmap) const {
|
||||
// Map format/geometry to hardware encoding
|
||||
ImageProperty mipmap_prop = ImageLut().MapFormat(mipmap.desc.format, mipmap.desc.geometry);
|
||||
assert(mipmap_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);
|
||||
assert(mipmap_prop.element_size != 0);
|
||||
assert(mipmap.num_levels >= 1);
|
||||
|
||||
const void* mipmap_data_addr = mipmap.data;
|
||||
|
||||
if (IsLocalMemory(mipmap.data)) {
|
||||
mipmap_data_addr = reinterpret_cast<const void*>(
|
||||
reinterpret_cast<uintptr_t>(mipmap.data) - local_memory_base_address_);
|
||||
}
|
||||
|
||||
if (mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
|
||||
SQ_BUF_RSRC_WORD0 word0;
|
||||
SQ_BUF_RSRC_WORD1 word1;
|
||||
SQ_BUF_RSRC_WORD2 word2;
|
||||
SQ_BUF_RSRC_WORD3 word3;
|
||||
|
||||
word0.val = 0;
|
||||
word0.f.BASE_ADDRESS = PtrLow32(mipmap_data_addr);
|
||||
|
||||
word1.val = 0;
|
||||
word1.f.BASE_ADDRESS_HI = PtrHigh32(mipmap_data_addr);
|
||||
word1.f.STRIDE = mipmap_prop.element_size;
|
||||
word1.f.SWIZZLE_ENABLE = 0;
|
||||
|
||||
word2.val = 0;
|
||||
word2.f.NUM_RECORDS = mipmap.desc.width * mipmap_prop.element_size;
|
||||
|
||||
const Swizzle swizzle = ImageLut().MapSwizzle(mipmap.desc.format.channel_order);
|
||||
word3.val = 0;
|
||||
word3.f.DST_SEL_X = swizzle.x;
|
||||
word3.f.DST_SEL_Y = swizzle.y;
|
||||
word3.f.DST_SEL_Z = swizzle.z;
|
||||
word3.f.DST_SEL_W = swizzle.w;
|
||||
word3.f.FORMAT = GetCombinedFormat(mipmap_prop.data_format, mipmap_prop.data_type);
|
||||
word3.f.INDEX_STRIDE = mipmap_prop.element_size;
|
||||
|
||||
// GFX12 compression features (disabled for now)
|
||||
// word3.f.WRITE_COMPRESS_ENABLE = 0;
|
||||
// word3.f.COMPRESSION_EN = 0;
|
||||
// word3.f.COMPRESSION_ACCESS_MODE = 0;
|
||||
|
||||
word3.f.TYPE = ImageLut().MapGeometry(mipmap.desc.geometry);
|
||||
|
||||
mipmap.srd[0] = word0.val;
|
||||
mipmap.srd[1] = word1.val;
|
||||
mipmap.srd[2] = word2.val;
|
||||
mipmap.srd[3] = word3.val;
|
||||
|
||||
// 1DB mipmaps don't use words 4-7
|
||||
mipmap.srd[4] = 0;
|
||||
mipmap.srd[5] = 0;
|
||||
mipmap.srd[6] = 0;
|
||||
mipmap.srd[7] = 0;
|
||||
|
||||
mipmap.row_pitch = mipmap.desc.width * mipmap_prop.element_size;
|
||||
mipmap.slice_pitch = mipmap.row_pitch;
|
||||
} else {
|
||||
SQ_IMG_RSRC_WORD0 word0;
|
||||
SQ_IMG_RSRC_WORD1 word1;
|
||||
SQ_IMG_RSRC_WORD2 word2;
|
||||
SQ_IMG_RSRC_WORD3 word3;
|
||||
SQ_IMG_RSRC_WORD4 word4;
|
||||
SQ_IMG_RSRC_WORD5 word5;
|
||||
SQ_IMG_RSRC_WORD6 word6;
|
||||
SQ_IMG_RSRC_WORD7 word7;
|
||||
|
||||
// Get ADDR3 surface information
|
||||
ADDR3_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
|
||||
|
||||
// pMipInfo not needed - set to nullptr and AddrLib will ignore it
|
||||
out.pMipInfo = nullptr;
|
||||
|
||||
unsigned int swizzleMode = GetAddrlibSurfaceInfoNv(mipmap.component,
|
||||
mipmap.desc, mipmap.num_levels, mipmap.tile_mode,
|
||||
mipmap.row_pitch, mipmap.slice_pitch, out);
|
||||
if (swizzleMode == (uint32_t)(-1)) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
mipmap.addr_output.addr3 = out;
|
||||
mipmap.size = out.surfSize;
|
||||
|
||||
assert((out.bpp / 8) == mipmap_prop.element_size);
|
||||
|
||||
const size_t row_pitch_size = out.pitch * mipmap_prop.element_size;
|
||||
|
||||
word0.val = 0;
|
||||
word0.f.BASE_ADDRESS = PtrLow40Shift8(mipmap_data_addr);
|
||||
|
||||
word1.val = 0;
|
||||
word1.f.BASE_ADDRESS_HI = PtrHigh64Shift40(mipmap_data_addr);
|
||||
word1.f.MAX_MIP = mipmap.num_levels - 1;
|
||||
word1.f.BASE_LEVEL = 0; // New to GFX12
|
||||
word1.f.FORMAT = GetCombinedFormat(mipmap_prop.data_format, mipmap_prop.data_type);
|
||||
// Only take the lowest 2 bits of (image.desc.width - 1)
|
||||
word1.f.WIDTH = BitSelect<0, 1>(mipmap.desc.width - 1);
|
||||
|
||||
word2.val = 0;
|
||||
// Take the high 14 bits of (mipmap.desc.width - 1)
|
||||
word2.f.WIDTH_HI = BitSelect<2, 15>(mipmap.desc.width - 1);
|
||||
word2.f.HEIGHT = mipmap.desc.height ? mipmap.desc.height - 1 : 0;
|
||||
|
||||
const Swizzle swizzle = ImageLut().MapSwizzle(mipmap.desc.format.channel_order);
|
||||
word3.val = 0;
|
||||
word3.f.DST_SEL_X = swizzle.x;
|
||||
word3.f.DST_SEL_Y = swizzle.y;
|
||||
word3.f.DST_SEL_Z = swizzle.z;
|
||||
word3.f.DST_SEL_W = swizzle.w;
|
||||
// word3.f.NO_EDGE_CLAMP = 0; // New to GFX12
|
||||
word3.f.LAST_LEVEL = mipmap.num_levels - 1;
|
||||
word3.f.SW_MODE = swizzleMode;
|
||||
word3.f.BC_SWIZZLE = GetBcSwizzle(swizzle);
|
||||
word3.f.TYPE = ImageLut().MapGeometry(mipmap.desc.geometry);
|
||||
|
||||
const bool mipmap_array =
|
||||
(mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ||
|
||||
mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DA ||
|
||||
mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DADEPTH);
|
||||
const bool mipmap_3d = (mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_3D);
|
||||
|
||||
word4.val = 0;
|
||||
|
||||
// For 1d, 2d and 2d-msaa, fields DEPTH+PITCH_MSB encode pitch-1
|
||||
if (!mipmap_array && !mipmap_3d) {
|
||||
uint32_t encPitch = out.pitch - 1;
|
||||
word4.f.DEPTH = encPitch & 0x3fff; // first 14 bits
|
||||
word4.f.PITCH_MSB = (encPitch >> 14) & 0x3; // last 2 bits
|
||||
} else {
|
||||
word4.f.DEPTH =
|
||||
(mipmap_array) // Doesn't hurt but isn't array_size already >0?
|
||||
? std::max(mipmap.desc.array_size, static_cast<size_t>(1)) - 1
|
||||
: (mipmap_3d) ? mipmap.desc.depth - 1 : 0;
|
||||
}
|
||||
|
||||
word5.val = 0;
|
||||
word6.val = 0;
|
||||
word7.val = 0;
|
||||
|
||||
mipmap.srd[0] = word0.val;
|
||||
mipmap.srd[1] = word1.val;
|
||||
mipmap.srd[2] = word2.val;
|
||||
mipmap.srd[3] = word3.val;
|
||||
mipmap.srd[4] = word4.val;
|
||||
mipmap.srd[5] = word5.val;
|
||||
mipmap.srd[6] = word6.val;
|
||||
mipmap.srd[7] = word7.val;
|
||||
|
||||
mipmap.row_pitch = row_pitch_size;
|
||||
mipmap.slice_pitch = out.sliceSize;
|
||||
}
|
||||
|
||||
mipmap.srd[8] = mipmap.desc.format.channel_type;
|
||||
mipmap.srd[9] = mipmap.desc.format.channel_order;
|
||||
mipmap.srd[10] = static_cast<uint32_t>(mipmap.desc.width);
|
||||
|
||||
// Mipmap-specific
|
||||
mipmap.srd[11] = mipmap.num_levels;
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t ImageManagerGfx12::PopulateMipmapSrd(MipmappedArray& mipmap_array, const metadata_amd_t* desc) const {
|
||||
const metadata_amd_gfx12_t* desc_gfx12 = reinterpret_cast<const metadata_amd_gfx12_t*>(desc);
|
||||
const void* mipmap_data_addr = mipmap_array.data;
|
||||
|
||||
ImageProperty mipmap_prop = ImageLut().MapFormat(mipmap_array.desc.format, mipmap_array.desc.geometry);
|
||||
if (mipmap_prop.cap == HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED || mipmap_prop.element_size == 0) {
|
||||
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
|
||||
}
|
||||
|
||||
const Swizzle swizzle = ImageLut().MapSwizzle(mipmap_array.desc.format.channel_order);
|
||||
|
||||
if (IsLocalMemory(mipmap_array.data)) {
|
||||
mipmap_data_addr = reinterpret_cast<const void*>(
|
||||
reinterpret_cast<uintptr_t>(mipmap_array.data) - local_memory_base_address_);
|
||||
}
|
||||
|
||||
// Copy the pre-computed SRD words 0-7 from metadata
|
||||
mipmap_array.srd[0] = desc_gfx12->word0.u32All;
|
||||
mipmap_array.srd[1] = desc_gfx12->word1.u32All;
|
||||
mipmap_array.srd[2] = desc_gfx12->word2.u32All;
|
||||
mipmap_array.srd[3] = desc_gfx12->word3.u32All;
|
||||
mipmap_array.srd[4] = desc_gfx12->word4.u32All;
|
||||
mipmap_array.srd[5] = desc_gfx12->word5.u32All;
|
||||
mipmap_array.srd[6] = desc_gfx12->word6.u32All;
|
||||
mipmap_array.srd[7] = desc_gfx12->word7.u32All;
|
||||
|
||||
if (mipmap_array.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
|
||||
// 1DB uses buffer descriptors
|
||||
SQ_BUF_RSRC_WORD0 word0;
|
||||
SQ_BUF_RSRC_WORD1 word1;
|
||||
SQ_BUF_RSRC_WORD3 word3;
|
||||
|
||||
word0.val = 0;
|
||||
word0.f.BASE_ADDRESS = PtrLow32(mipmap_data_addr);
|
||||
|
||||
word1.val = mipmap_array.srd[1];
|
||||
word1.f.BASE_ADDRESS_HI = PtrHigh32(mipmap_data_addr);
|
||||
word1.f.STRIDE = mipmap_prop.element_size;
|
||||
|
||||
word3.val = mipmap_array.srd[3];
|
||||
word3.f.DST_SEL_X = swizzle.x;
|
||||
word3.f.DST_SEL_Y = swizzle.y;
|
||||
word3.f.DST_SEL_Z = swizzle.z;
|
||||
word3.f.DST_SEL_W = swizzle.w;
|
||||
word3.f.FORMAT = GetCombinedFormat(mipmap_prop.data_format, mipmap_prop.data_type);
|
||||
word3.f.INDEX_STRIDE = mipmap_prop.element_size;
|
||||
|
||||
mipmap_array.srd[0] = word0.val;
|
||||
mipmap_array.srd[1] = word1.val;
|
||||
mipmap_array.srd[3] = word3.val;
|
||||
|
||||
mipmap_array.row_pitch = mipmap_array.desc.width * mipmap_prop.element_size;
|
||||
mipmap_array.slice_pitch = mipmap_array.row_pitch;
|
||||
} else {
|
||||
// Non-1DB uses image descriptors
|
||||
uint32_t hwPixelSize = ImageLut().GetPixelSize(mipmap_prop.data_format, mipmap_prop.data_type);
|
||||
if (mipmap_prop.element_size != hwPixelSize) {
|
||||
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
|
||||
}
|
||||
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD0*>(&mipmap_array.srd[0])->bits.BASE_ADDRESS = PtrLow40Shift8(mipmap_data_addr);
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&mipmap_array.srd[1])->bits.BASE_ADDRESS_HI = PtrHigh64Shift40(mipmap_data_addr);
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&mipmap_array.srd[1])->bits.FORMAT = GetCombinedFormat(mipmap_prop.data_format, mipmap_prop.data_type);
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.DST_SEL_X = swizzle.x;
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.DST_SEL_Y = swizzle.y;
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.DST_SEL_Z = swizzle.z;
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.DST_SEL_W = swizzle.w;
|
||||
|
||||
if (mipmap_array.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ||
|
||||
mipmap_array.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1D) {
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.TYPE =
|
||||
ImageLut().MapGeometry(mipmap_array.desc.geometry);
|
||||
}
|
||||
}
|
||||
|
||||
// Looks like this is only used for CPU copies.
|
||||
mipmap_array.row_pitch = 0;
|
||||
mipmap_array.slice_pitch = 0;
|
||||
|
||||
// Store mipmap-specific metadata
|
||||
mipmap_array.srd[8] = mipmap_array.desc.format.channel_type;
|
||||
mipmap_array.srd[9] = mipmap_array.desc.format.channel_order;
|
||||
mipmap_array.srd[10] = static_cast<uint32_t>(mipmap_array.desc.width);
|
||||
mipmap_array.srd[11] = mipmap_array.num_levels;
|
||||
|
||||
// Allocate and populate pMipInfo from metadata mip_offsets
|
||||
ADDR3_MIP_INFO* mip_info_storage = new ADDR3_MIP_INFO[mipmap_array.num_levels];
|
||||
memset(mip_info_storage, 0, sizeof(ADDR3_MIP_INFO) * mipmap_array.num_levels);
|
||||
|
||||
// Extract per-level information from mip_offsets array
|
||||
for (uint32_t level = 0; level < mipmap_array.num_levels; level++) {
|
||||
// mip_offsets contains offset bits [39:8], shift left by 8 to get actual byte offset
|
||||
mip_info_storage[level].offset = static_cast<uint64_t>(desc_gfx12->mip_offsets[level]) << 8;
|
||||
|
||||
// Calculate dimensions for this level (halve at each level)
|
||||
mip_info_storage[level].pixelPitch = std::max(1u, static_cast<uint32_t>(mipmap_array.desc.width >> level));
|
||||
mip_info_storage[level].pixelHeight = std::max(1u, static_cast<uint32_t>(mipmap_array.desc.height >> level));
|
||||
mip_info_storage[level].depth = std::max(1u, static_cast<uint32_t>(mipmap_array.desc.depth >> level));
|
||||
}
|
||||
|
||||
// Store pMipInfo in addr_output for later use by PopulateMipLevelSrd
|
||||
mipmap_array.addr_output.addr3.pMipInfo = mip_info_storage;
|
||||
|
||||
// Total size calculation from metadata (estimate from last level)
|
||||
uint32_t last_level = mipmap_array.num_levels - 1;
|
||||
uint64_t last_level_size = mip_info_storage[last_level].pixelPitch *
|
||||
mip_info_storage[last_level].pixelHeight *
|
||||
mip_info_storage[last_level].depth *
|
||||
mipmap_prop.element_size;
|
||||
mipmap_array.size = mip_info_storage[last_level].offset + last_level_size;
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
void ImageManagerGfx12::printSRDDetailed(const uint32_t* srd) const {
|
||||
if (!srd) {
|
||||
printf("\n========== Image SRD (GFX12) - Detailed ==========\n");
|
||||
printf("ERROR: No SRD data provided.\n");
|
||||
printf("===============================================\n\n");
|
||||
return;
|
||||
}
|
||||
|
||||
printf("\n========== Image SRD (GFX12) - Detailed ==========\n");
|
||||
|
||||
// Print all 12 words with bit field annotations
|
||||
for (int i = 0; i < 12; i++) {
|
||||
printf("WORD %d: 0x%08x ", i, srd[i]);
|
||||
|
||||
// Binary representation
|
||||
printf("(");
|
||||
for (int bit = 31; bit >= 0; bit--) {
|
||||
printf("%d", (srd[i] >> bit) & 1);
|
||||
if (bit % 4 == 0 && bit != 0) printf("_");
|
||||
}
|
||||
printf(")\n");
|
||||
}
|
||||
|
||||
// WORD 0: SQ_IMG_RSRC_WORD0
|
||||
SQ_IMG_RSRC_WORD0 word0;
|
||||
word0.val = srd[0];
|
||||
printf("\nWORD 0: BASE_ADDRESS (bits 39:8) = 0x%08x\n", word0.f.BASE_ADDRESS);
|
||||
|
||||
// WORD 1: SQ_IMG_RSRC_WORD1
|
||||
SQ_IMG_RSRC_WORD1 word1;
|
||||
word1.val = srd[1];
|
||||
printf("WORD 1: BASE_ADDRESS_HI = 0x%08x\n", word1.f.BASE_ADDRESS_HI);
|
||||
printf(" MAX_MIP = %u ◄──── Total mip levels - 1\n", word1.f.MAX_MIP);
|
||||
printf(" BASE_LEVEL = %u ◄──── Current base level\n", word1.f.BASE_LEVEL);
|
||||
printf(" FORMAT = %u\n", word1.f.FORMAT);
|
||||
printf(" WIDTH (bits 1:0) = %u\n", word1.f.WIDTH);
|
||||
|
||||
// Calculate full address (GFX12 uses 40-bit shifted by 8)
|
||||
uint64_t base_addr = ((uint64_t)word1.f.BASE_ADDRESS_HI << 40) | ((uint64_t)word0.f.BASE_ADDRESS << 8);
|
||||
printf(" → Full Base Address = 0x%016lx\n", base_addr);
|
||||
|
||||
// WORD 2: SQ_IMG_RSRC_WORD2
|
||||
SQ_IMG_RSRC_WORD2 word2;
|
||||
word2.val = srd[2];
|
||||
printf("WORD 2: WIDTH_HI (bits 15:2) = %u\n", word2.f.WIDTH_HI);
|
||||
printf(" HEIGHT = %u\n", word2.f.HEIGHT);
|
||||
|
||||
// Calculate full width
|
||||
uint32_t full_width = word1.f.WIDTH | (word2.f.WIDTH_HI << 2);
|
||||
printf(" → Full Width = %u (actual: %u)\n", full_width, full_width + 1);
|
||||
printf(" → Full Height = %u (actual: %u)\n", word2.f.HEIGHT, word2.f.HEIGHT + 1);
|
||||
|
||||
// WORD 3: SQ_IMG_RSRC_WORD3
|
||||
SQ_IMG_RSRC_WORD3 word3;
|
||||
word3.val = srd[3];
|
||||
printf("WORD 3: DST_SEL_X = %u ", word3.f.DST_SEL_X);
|
||||
printChannelSelect(word3.f.DST_SEL_X);
|
||||
printf(" DST_SEL_Y = %u ", word3.f.DST_SEL_Y);
|
||||
printChannelSelect(word3.f.DST_SEL_Y);
|
||||
printf(" DST_SEL_Z = %u ", word3.f.DST_SEL_Z);
|
||||
printChannelSelect(word3.f.DST_SEL_Z);
|
||||
printf(" DST_SEL_W = %u ", word3.f.DST_SEL_W);
|
||||
printChannelSelect(word3.f.DST_SEL_W);
|
||||
printf(" LAST_LEVEL = %u ◄──── Current last level (GFX12 NEW)\n", word3.f.LAST_LEVEL);
|
||||
printf(" SW_MODE = %u ", word3.f.SW_MODE);
|
||||
printSwizzleMode(word3.f.SW_MODE);
|
||||
printf(" BC_SWIZZLE = %u\n", word3.f.BC_SWIZZLE);
|
||||
printf(" TYPE = %u ", word3.f.TYPE);
|
||||
printResourceType(word3.f.TYPE);
|
||||
|
||||
// WORD 4: SQ_IMG_RSRC_WORD4
|
||||
SQ_IMG_RSRC_WORD4 word4;
|
||||
word4.val = srd[4];
|
||||
printf("WORD 4: DEPTH = %u\n", word4.f.DEPTH);
|
||||
printf(" PITCH_MSB = %u\n", word4.f.PITCH_MSB);
|
||||
|
||||
// Calculate effective depth/pitch based on geometry
|
||||
uint32_t type = word3.f.TYPE;
|
||||
if (type == 10) { // 3D
|
||||
printf(" → 3D Depth = %u (actual: %u)\n", word4.f.DEPTH, word4.f.DEPTH + 1);
|
||||
} else if (type == 13 || type == 12) { // Arrays
|
||||
printf(" → Array Size = %u (actual: %u)\n", word4.f.DEPTH, word4.f.DEPTH + 1);
|
||||
} else { // 1D/2D - encodes pitch
|
||||
uint32_t encoded_pitch = word4.f.DEPTH | (word4.f.PITCH_MSB << 14);
|
||||
printf(" → Encoded Pitch = %u (actual: %u)\n", encoded_pitch, encoded_pitch + 1);
|
||||
}
|
||||
|
||||
// WORD 5-7: Usually zero for basic images
|
||||
printf("WORD 5: Reserved = 0x%08x\n", srd[5]);
|
||||
printf("WORD 6: Reserved = 0x%08x\n", srd[6]);
|
||||
printf("WORD 7: Reserved = 0x%08x\n", srd[7]);
|
||||
|
||||
// Mipmap analysis
|
||||
if (word1.f.MAX_MIP > 0) {
|
||||
printf("\nMIPMAP ANALYSIS:\n");
|
||||
printf(" Total Levels = %u (MAX_MIP + 1)\n", word1.f.MAX_MIP + 1);
|
||||
printf(" Active Range = [%u, %u]\n", word1.f.BASE_LEVEL, word3.f.LAST_LEVEL);
|
||||
if (word1.f.BASE_LEVEL == word3.f.LAST_LEVEL) {
|
||||
printf(" Mode = SINGLE LEVEL VIEW ◄──── Mip level view\n");
|
||||
uint32_t level = word1.f.BASE_LEVEL;
|
||||
uint32_t level_width = std::max(1u, (full_width + 1) >> level);
|
||||
uint32_t level_height = std::max(1u, static_cast<uint32_t>((word2.f.HEIGHT + 1) >> level));
|
||||
printf(" Effective Dimensions = %ux%u (level %u)\n", level_width, level_height, level);
|
||||
} else {
|
||||
printf(" Mode = FULL MIPMAP CHAIN\n");
|
||||
}
|
||||
}
|
||||
printf("===============================================\n\n");
|
||||
}
|
||||
|
||||
void ImageManagerGfx12::printChannelSelect(uint32_t sel) const {
|
||||
switch(sel) {
|
||||
case 0: printf("(SEL_0)\n"); break;
|
||||
case 1: printf("(SEL_1)\n"); break;
|
||||
case 4: printf("(SEL_X/R)\n"); break;
|
||||
case 5: printf("(SEL_Y/G)\n"); break;
|
||||
case 6: printf("(SEL_Z/B)\n"); break;
|
||||
case 7: printf("(SEL_W/A)\n"); break;
|
||||
default: printf("(UNKNOWN)\n"); break;
|
||||
}
|
||||
}
|
||||
|
||||
void ImageManagerGfx12::printResourceType(uint32_t type) const {
|
||||
switch(type) {
|
||||
case 8: printf("(1D)\n"); break;
|
||||
case 9: printf("(2D)\n"); break;
|
||||
case 10: printf("(3D)\n"); break;
|
||||
case 11: printf("(CUBE)\n"); break;
|
||||
case 12: printf("(1D_ARRAY/1DB)\n"); break;
|
||||
case 13: printf("(2D_ARRAY)\n"); break;
|
||||
case 14: printf("(2D_MSAA)\n"); break;
|
||||
case 15: printf("(2D_MSAA_ARRAY)\n"); break;
|
||||
default: printf("(UNKNOWN=%u)\n", type); break;
|
||||
}
|
||||
}
|
||||
|
||||
void ImageManagerGfx12::printSwizzleMode(uint32_t sw_mode) const {
|
||||
if (sw_mode == 0) {
|
||||
printf("(LINEAR)\n");
|
||||
} else if (sw_mode < 5) {
|
||||
printf("(SW_256B_%u)\n", sw_mode);
|
||||
} else if (sw_mode < 9) {
|
||||
printf("(SW_4KB_%u)\n", sw_mode - 4);
|
||||
} else if (sw_mode < 13) {
|
||||
printf("(SW_64KB_%u)\n", sw_mode - 8);
|
||||
} else if (sw_mode < 22) {
|
||||
printf("(SW_VAR_%u)\n", sw_mode - 12);
|
||||
} else {
|
||||
printf("(UNKNOWN=%u)\n", sw_mode);
|
||||
}
|
||||
}
|
||||
|
||||
hsa_status_t ImageManagerGfx12::PopulateMipLevelSrd(
|
||||
MipmappedArray& level_view,
|
||||
const MipmappedArray& mipmap_array,
|
||||
uint32_t mip_level) const {
|
||||
|
||||
// SRD already copied from parent, just modify BASE_LEVEL/LAST_LEVEL fields
|
||||
uint32_t* srd_words = reinterpret_cast<uint32_t*>(level_view.srd);
|
||||
|
||||
// GFX12 SRD WORDs 1 and 3 has BASE_LEVEL and LAST_LEVEL fields
|
||||
SQ_IMG_RSRC_WORD1* word1 = reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&srd_words[1]);
|
||||
SQ_IMG_RSRC_WORD3* word3 = reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&srd_words[3]);
|
||||
|
||||
// Set both to same value - hardware samples only this level
|
||||
word1->f.BASE_LEVEL = mip_level;
|
||||
word3->f.LAST_LEVEL = mip_level;
|
||||
|
||||
debug_print("Set SRD mip selection: BASE_LEVEL=%u, LAST_LEVEL=%u", mip_level, mip_level);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace image
|
||||
} // namespace rocr
|
||||
|
||||
@@ -60,6 +60,7 @@ class ImageManagerGfx12 : public ImageManagerKv {
|
||||
virtual hsa_status_t CalculateImageSizeAndAlignment(
|
||||
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
|
||||
hsa_ext_image_data_layout_t image_data_layout,
|
||||
uint32_t num_mipmap_levels,
|
||||
size_t image_data_row_pitch, size_t image_data_slice_pitch,
|
||||
hsa_ext_image_data_info_t& image_info) const;
|
||||
|
||||
@@ -80,9 +81,26 @@ class ImageManagerGfx12 : public ImageManagerKv {
|
||||
/// @brief Fill image backing storage using agent copy.
|
||||
virtual hsa_status_t FillImage(const Image& image, const void* pattern,
|
||||
const hsa_ext_image_region_t& region);
|
||||
|
||||
/// @brief Fill mipmap structure with device specific mipmapped array object.
|
||||
virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array) const;
|
||||
|
||||
/// @brief Fill mipmap structure with pre-computed AMD metadata descriptor.
|
||||
virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array, const metadata_amd_t* desc) const;
|
||||
|
||||
/// @brief Create mip level view using SRD BASE_LEVEL/LAST_LEVEL fields
|
||||
virtual hsa_status_t PopulateMipLevelSrd(MipmappedArray& level_view,
|
||||
const MipmappedArray& mipmap_array, uint32_t mip_level) const;
|
||||
|
||||
virtual void printSRDDetailed(const uint32_t* srd) const;
|
||||
virtual void printChannelSelect(uint32_t sel) const;
|
||||
virtual void printResourceType(uint32_t type) const;
|
||||
virtual void printSwizzleMode(uint32_t sw_mode) const;
|
||||
|
||||
protected:
|
||||
uint32_t GetAddrlibSurfaceInfoNv(hsa_agent_t component,
|
||||
const hsa_ext_image_descriptor_t& desc,
|
||||
uint32_t num_mipmap_levels,
|
||||
Image::TileMode tileMode,
|
||||
size_t image_data_row_pitch,
|
||||
size_t image_data_slice_pitch,
|
||||
|
||||
@@ -220,6 +220,7 @@ void ImageManagerKv::GetImageInfoMaxDimension(hsa_agent_t component,
|
||||
hsa_status_t ImageManagerKv::CalculateImageSizeAndAlignment(
|
||||
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
|
||||
hsa_ext_image_data_layout_t image_data_layout,
|
||||
uint32_t num_mipmap_levels,
|
||||
size_t image_data_row_pitch,
|
||||
size_t image_data_slice_pitch,
|
||||
hsa_ext_image_data_info_t& image_info) const {
|
||||
@@ -719,6 +720,162 @@ hsa_status_t ImageManagerKv::FillImage(const Image& image, const void* pattern,
|
||||
return status;
|
||||
}
|
||||
|
||||
hsa_status_t ImageManagerKv::PopulateMipmapSrd(MipmappedArray& mipmap) const {
|
||||
// Kv (GFX8) architecture does not support mipmaps
|
||||
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
|
||||
}
|
||||
|
||||
hsa_status_t ImageManagerKv::PopulateMipmapSrd(MipmappedArray& mipmap_array, const metadata_amd_t* desc) const {
|
||||
// Kv (GFX8) architecture does not support mipmaps
|
||||
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
|
||||
}
|
||||
|
||||
void ImageManagerKv::printSRDDetailed(const uint32_t* srd) const {
|
||||
if (!srd) {
|
||||
printf("\n========== Image SRD (KV) - Detailed ==========\n");
|
||||
printf("ERROR: No SRD data provided.\n");
|
||||
printf("===============================================\n\n");
|
||||
return;
|
||||
}
|
||||
|
||||
printf("\n========== Image SRD (KV) - Detailed ==========\n");
|
||||
|
||||
// Print all 12 words with bit field annotations
|
||||
for (int i = 0; i < 12; i++) {
|
||||
printf("WORD %d: 0x%08x ", i, srd[i]);
|
||||
|
||||
// Binary representation
|
||||
printf("(");
|
||||
for (int bit = 31; bit >= 0; bit--) {
|
||||
printf("%d", (srd[i] >> bit) & 1);
|
||||
if (bit % 4 == 0 && bit != 0) printf("_");
|
||||
}
|
||||
printf(")\n");
|
||||
}
|
||||
|
||||
// WORD 0: BASE_ADDRESS (bits 39:8)
|
||||
SQ_IMG_RSRC_WORD0 word0;
|
||||
word0.u32_all = srd[0];
|
||||
printf("\nWORD 0: BASE_ADDRESS (bits 39:8) = 0x%08x\n", word0.bits.base_address);
|
||||
|
||||
// WORD 1: Contains BASE_ADDRESS_HI, MIN_LOD, DATA_FORMAT, NUM_FORMAT, MTYPE
|
||||
SQ_IMG_RSRC_WORD1 word1;
|
||||
word1.u32_all = srd[1];
|
||||
printf("WORD 1: BASE_ADDRESS_HI = 0x%02x\n", word1.bits.base_address_hi);
|
||||
printf(" MIN_LOD = %u\n", word1.bits.min_lod);
|
||||
printf(" DATA_FORMAT = %u\n", word1.bits.data_format);
|
||||
printf(" NUM_FORMAT = %u\n", word1.bits.num_format);
|
||||
printf(" MTYPE = %u\n", word1.bits.mtype);
|
||||
|
||||
// Calculate full address (KV uses 40-bit shifted by 8)
|
||||
uint64_t base_addr = ((uint64_t)word1.bits.base_address_hi << 40) | ((uint64_t)word0.bits.base_address << 8);
|
||||
printf(" → Full Base Address = 0x%016lx\n", base_addr);
|
||||
|
||||
// WORD 2: WIDTH, HEIGHT, PERF_MOD, INTERLACED
|
||||
SQ_IMG_RSRC_WORD2 word2;
|
||||
word2.u32_all = srd[2];
|
||||
printf("WORD 2: WIDTH = %u (actual: %u)\n", word2.bits.width, word2.bits.width + 1);
|
||||
printf(" HEIGHT = %u (actual: %u)\n", word2.bits.height, word2.bits.height + 1);
|
||||
printf(" PERF_MOD = %u\n", word2.bits.perf_mod);
|
||||
printf(" INTERLACED = %u\n", word2.bits.interlaced);
|
||||
|
||||
// WORD 3: Channel selectors, TILING_INDEX, POW2_PAD, TYPE, ATC
|
||||
SQ_IMG_RSRC_WORD3 word3;
|
||||
word3.u32_all = srd[3];
|
||||
printf("WORD 3: DST_SEL_X = %u ", word3.bits.dst_sel_x);
|
||||
printChannelSelect(word3.bits.dst_sel_x);
|
||||
printf(" DST_SEL_Y = %u ", word3.bits.dst_sel_y);
|
||||
printChannelSelect(word3.bits.dst_sel_y);
|
||||
printf(" DST_SEL_Z = %u ", word3.bits.dst_sel_z);
|
||||
printChannelSelect(word3.bits.dst_sel_z);
|
||||
printf(" DST_SEL_W = %u ", word3.bits.dst_sel_w);
|
||||
printChannelSelect(word3.bits.dst_sel_w);
|
||||
printf(" TILING_INDEX = %u ◄──── Tile configuration index\n", word3.bits.tiling_index);
|
||||
printf(" POW2_PAD = %u ◄──── Power-of-2 padding\n", word3.bits.pow2_pad);
|
||||
printf(" TYPE = %u ", word3.bits.type);
|
||||
printResourceType(word3.bits.type);
|
||||
printf(" ATC = %u ◄──── Address translation cache\n", word3.bits.atc);
|
||||
|
||||
// WORD 4: DEPTH, PITCH
|
||||
SQ_IMG_RSRC_WORD4 word4;
|
||||
word4.u32_all = srd[4];
|
||||
printf("WORD 4: DEPTH = %u\n", word4.bits.depth);
|
||||
printf(" PITCH = %u (actual: %u)\n", word4.bits.pitch, word4.bits.pitch + 1);
|
||||
|
||||
// Calculate effective depth/pitch based on geometry
|
||||
uint32_t type = word3.bits.type;
|
||||
if (type == 10) { // 3D
|
||||
printf(" → 3D Depth = %u (actual: %u)\n", word4.bits.depth, word4.bits.depth + 1);
|
||||
} else if (type == 13 || type == 12) { // Arrays
|
||||
printf(" → Array Size = %u (actual: %u)\n", word4.bits.depth, word4.bits.depth + 1);
|
||||
}
|
||||
|
||||
// WORD 5: LAST_ARRAY
|
||||
SQ_IMG_RSRC_WORD5 word5;
|
||||
word5.u32_all = srd[5];
|
||||
printf("WORD 5: LAST_ARRAY = %u ◄──── Last array slice\n", word5.bits.last_array);
|
||||
|
||||
// WORD 6-7: Usually zero for basic images
|
||||
printf("WORD 6: Reserved = 0x%08x\n", srd[6]);
|
||||
printf("WORD 7: Reserved = 0x%08x\n", srd[7]);
|
||||
|
||||
// Additional information (HSA extension fields)
|
||||
printf("WORD 8: CHANNEL_TYPE = 0x%08x\n", srd[8]);
|
||||
printf("WORD 9: CHANNEL_ORDER = 0x%08x\n", srd[9]);
|
||||
printf("WORD 10: WIDTH_ORIGINAL = 0x%08x\n", srd[10]);
|
||||
printf("WORD 11: NUM_LEVELS = 0x%08x\n", srd[11]);
|
||||
|
||||
// Mipmap analysis (KV architecture limitations)
|
||||
printf("\nMIPMAP ANALYSIS:\n");
|
||||
printf(" Total Levels = %u\n", srd[11]);
|
||||
printf(" Min LOD = %u ◄──── Minimum detail level\n", word1.bits.min_lod);
|
||||
printf(" KV Architecture = LEGACY MIPMAP SUPPORT\n");
|
||||
printf(" Note = KV lacks BASE_LEVEL/LAST_LEVEL fields\n");
|
||||
printf(" Note = Mip level selection via shader only\n");
|
||||
printf("===============================================\n\n");
|
||||
}
|
||||
|
||||
void ImageManagerKv::printChannelSelect(uint32_t sel) const {
|
||||
switch(sel) {
|
||||
case 0: printf("(SEL_0)\n"); break;
|
||||
case 1: printf("(SEL_1)\n"); break;
|
||||
case 4: printf("(SEL_X/R)\n"); break;
|
||||
case 5: printf("(SEL_Y/G)\n"); break;
|
||||
case 6: printf("(SEL_Z/B)\n"); break;
|
||||
case 7: printf("(SEL_W/A)\n"); break;
|
||||
default: printf("(UNKNOWN)\n"); break;
|
||||
}
|
||||
}
|
||||
|
||||
void ImageManagerKv::printResourceType(uint32_t type) const {
|
||||
switch(type) {
|
||||
case 8: printf("(1D)\n"); break;
|
||||
case 9: printf("(2D)\n"); break;
|
||||
case 10: printf("(3D)\n"); break;
|
||||
case 11: printf("(CUBE)\n"); break;
|
||||
case 12: printf("(1D_ARRAY/1DB)\n"); break;
|
||||
case 13: printf("(2D_ARRAY)\n"); break;
|
||||
case 14: printf("(2D_MSAA)\n"); break;
|
||||
case 15: printf("(2D_MSAA_ARRAY)\n"); break;
|
||||
default: printf("(UNKNOWN=%u)\n", type); break;
|
||||
}
|
||||
}
|
||||
|
||||
void ImageManagerKv::printSwizzleMode(uint32_t sw_mode) const {
|
||||
// KV architecture uses tiling modes instead of swizzle modes
|
||||
// This function is not typically called for KV, but provided for completeness
|
||||
printf("(TILING_MODE=%u)\n", sw_mode);
|
||||
}
|
||||
|
||||
hsa_status_t ImageManagerKv::PopulateMipLevelSrd(
|
||||
MipmappedArray& level_view,
|
||||
const MipmappedArray& mipmap_array,
|
||||
uint32_t mip_level) const {
|
||||
|
||||
// Mip level views not supported on GFX8 hardware
|
||||
return HSA_STATUS_ERROR_NOT_INITIALIZED;
|
||||
}
|
||||
|
||||
hsa_status_t ImageManagerKv::GetLocalMemoryRegion(hsa_region_t region,
|
||||
void* data) {
|
||||
if (data == NULL) {
|
||||
@@ -845,7 +1002,7 @@ bool ImageManagerKv::GetAddrlibSurfaceInfo(
|
||||
in.width = width;
|
||||
in.height = height;
|
||||
in.numSlices = num_slice;
|
||||
in.pitchInElement = image_data_row_pitch / image_prop.element_size;
|
||||
|
||||
switch(desc.geometry) {
|
||||
case HSA_EXT_IMAGE_GEOMETRY_1D:
|
||||
case HSA_EXT_IMAGE_GEOMETRY_1DB:
|
||||
|
||||
Исполняемый файл → Обычный файл
+18
@@ -79,6 +79,7 @@ class ImageManagerKv : public ImageManager {
|
||||
virtual hsa_status_t CalculateImageSizeAndAlignment(
|
||||
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
|
||||
hsa_ext_image_data_layout_t image_data_layout,
|
||||
uint32_t num_mipmap_levels,
|
||||
size_t image_data_row_pitch, size_t image_data_slice_pitch,
|
||||
hsa_ext_image_data_info_t& image_info) const;
|
||||
|
||||
@@ -116,6 +117,21 @@ class ImageManagerKv : public ImageManager {
|
||||
virtual hsa_status_t FillImage(const Image& image, const void* pattern,
|
||||
const hsa_ext_image_region_t& region);
|
||||
|
||||
/// @brief Fill mipmap structure with device specific mipmapped array object.
|
||||
virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array) const;
|
||||
|
||||
/// @brief Fill mipmap structure with pre-computed AMD metadata descriptor.
|
||||
virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array, const metadata_amd_t* desc) const;
|
||||
|
||||
/// @brief Create mip level view using SRD BASE_LEVEL/LAST_LEVEL fields
|
||||
virtual hsa_status_t PopulateMipLevelSrd(MipmappedArray& level_view,
|
||||
const MipmappedArray& mipmap_array, uint32_t mip_level) const;
|
||||
|
||||
virtual void printSRDDetailed(const uint32_t* srd) const;
|
||||
virtual void printChannelSelect(uint32_t sel) const;
|
||||
virtual void printResourceType(uint32_t type) const;
|
||||
virtual void printSwizzleMode(uint32_t sw_mode) const;
|
||||
|
||||
protected:
|
||||
static hsa_status_t GetLocalMemoryRegion(hsa_region_t region, void* data);
|
||||
|
||||
@@ -145,6 +161,8 @@ class ImageManagerKv : public ImageManager {
|
||||
|
||||
ADDR_HANDLE addr_lib_;
|
||||
|
||||
virtual ADDR_HANDLE GetAddrLib() const override { return addr_lib_; }
|
||||
|
||||
hsa_agent_t agent_;
|
||||
|
||||
uint32_t family_type_;
|
||||
|
||||
@@ -190,7 +190,7 @@ static FORMAT GetCombinedFormat(uint8_t fmt, uint8_t type) {
|
||||
return CFMT_INVALID;
|
||||
};
|
||||
//-----------------------------------------------------------------------------
|
||||
// End workaround
|
||||
// End workaround
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
ImageManagerNv::ImageManagerNv() : ImageManagerKv() {}
|
||||
@@ -201,6 +201,7 @@ ImageManagerNv::~ImageManagerNv() {}
|
||||
hsa_status_t ImageManagerNv::CalculateImageSizeAndAlignment(
|
||||
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
|
||||
hsa_ext_image_data_layout_t image_data_layout,
|
||||
uint32_t num_mipmap_levels,
|
||||
size_t image_data_row_pitch,
|
||||
size_t image_data_slice_pitch,
|
||||
hsa_ext_image_data_info_t& image_info) const {
|
||||
@@ -216,9 +217,8 @@ hsa_status_t ImageManagerNv::CalculateImageSizeAndAlignment(
|
||||
desc.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB)?
|
||||
Image::TileMode::TILED : Image::TileMode::LINEAR;
|
||||
}
|
||||
if (GetAddrlibSurfaceInfoNv(component, desc, tileMode,
|
||||
image_data_row_pitch, image_data_slice_pitch, out) ==
|
||||
(uint32_t)(-1)) {
|
||||
if (GetAddrlibSurfaceInfoNv(component, desc, num_mipmap_levels, tileMode,
|
||||
image_data_row_pitch, image_data_slice_pitch, out) == (uint32_t)(-1)) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
@@ -319,7 +319,7 @@ hsa_status_t ImageManagerNv::PopulateImageSrd(Image& image,
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&image.srd[3])->bits.TYPE =
|
||||
ImageLut().MapGeometry(image.desc.geometry);
|
||||
}
|
||||
|
||||
|
||||
// Imported metadata holds the offset to metadata, add the image base address.
|
||||
uintptr_t meta = uintptr_t(((SQ_IMG_RSRC_WORD7*)(&image.srd[7]))->bits.META_DATA_ADDRESS_HI) << 16;
|
||||
meta |= uintptr_t(((SQ_IMG_RSRC_WORD6*)(&image.srd[6]))->bits.META_DATA_ADDRESS) << 8;
|
||||
@@ -450,9 +450,8 @@ hsa_status_t ImageManagerNv::PopulateImageSrd(Image& image) const {
|
||||
|
||||
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
|
||||
|
||||
uint32_t swizzleMode = GetAddrlibSurfaceInfoNv(
|
||||
image.component, image.desc, image.tile_mode,
|
||||
image.row_pitch, image.slice_pitch, out);
|
||||
uint32_t swizzleMode = GetAddrlibSurfaceInfoNv(image.component, image.desc,
|
||||
1, image.tile_mode, image.row_pitch, image.slice_pitch, out);
|
||||
if (swizzleMode == (uint32_t)(-1)) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
@@ -612,6 +611,7 @@ hsa_status_t ImageManagerNv::PopulateSamplerSrd(Sampler& sampler) const {
|
||||
|
||||
uint32_t ImageManagerNv::GetAddrlibSurfaceInfoNv(
|
||||
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
|
||||
uint32_t num_mipmap_levels,
|
||||
Image::TileMode tileMode,
|
||||
size_t image_data_row_pitch,
|
||||
size_t image_data_slice_pitch,
|
||||
@@ -627,7 +627,9 @@ uint32_t ImageManagerNv::GetAddrlibSurfaceInfoNv(
|
||||
const uint32_t num_slice = static_cast<uint32_t>(
|
||||
std::max(kMinNumSlice, std::max(desc.array_size, desc.depth)));
|
||||
|
||||
uint32_t minor_ver = MinorVerFromDevID(chip_id_);
|
||||
// Minor version used for future GPU-specific optimizations (currently unused)
|
||||
(void)MinorVerFromDevID(chip_id_);
|
||||
|
||||
ADDR2_COMPUTE_SURFACE_INFO_INPUT in = {0};
|
||||
in.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT);
|
||||
in.format = addrlib_format;
|
||||
@@ -635,9 +637,8 @@ uint32_t ImageManagerNv::GetAddrlibSurfaceInfoNv(
|
||||
in.width = width;
|
||||
in.height = height;
|
||||
in.numSlices = num_slice;
|
||||
// Custom Pitch is supported in gfx1030 and beyond
|
||||
if (minor_ver >= 3)
|
||||
in.pitchInElement = image_data_row_pitch / image_prop.element_size;
|
||||
in.numMipLevels = num_mipmap_levels;
|
||||
|
||||
switch (desc.geometry) {
|
||||
case HSA_EXT_IMAGE_GEOMETRY_1D:
|
||||
case HSA_EXT_IMAGE_GEOMETRY_1DB:
|
||||
@@ -804,5 +805,421 @@ hsa_status_t ImageManagerNv::FillImage(const Image& image, const void* pattern,
|
||||
return status;
|
||||
}
|
||||
|
||||
hsa_status_t ImageManagerNv::PopulateMipmapSrd(MipmappedArray& mipmap) const {
|
||||
ImageProperty mipmap_prop = ImageLut().MapFormat(mipmap.desc.format, mipmap.desc.geometry);
|
||||
assert(mipmap_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);
|
||||
assert(mipmap_prop.element_size != 0);
|
||||
assert(mipmap.num_levels >= 1);
|
||||
|
||||
const void* mipmap_data_addr = mipmap.data;
|
||||
|
||||
if (IsLocalMemory(mipmap.data)) {
|
||||
mipmap_data_addr = reinterpret_cast<const void*>(
|
||||
reinterpret_cast<uintptr_t>(mipmap.data) - local_memory_base_address_);
|
||||
}
|
||||
|
||||
if (mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
|
||||
SQ_BUF_RSRC_WORD0 word0;
|
||||
SQ_BUF_RSRC_WORD1 word1;
|
||||
SQ_BUF_RSRC_WORD2 word2;
|
||||
SQ_BUF_RSRC_WORD3 word3;
|
||||
|
||||
word0.val = 0;
|
||||
word0.f.BASE_ADDRESS = PtrLow32(mipmap_data_addr);
|
||||
|
||||
word1.val = 0;
|
||||
word1.f.BASE_ADDRESS_HI = PtrHigh32(mipmap_data_addr);
|
||||
word1.f.STRIDE = mipmap_prop.element_size;
|
||||
word1.f.SWIZZLE_ENABLE = false;
|
||||
word1.f.CACHE_SWIZZLE = false;
|
||||
|
||||
word2.f.NUM_RECORDS = mipmap.desc.width * mipmap_prop.element_size;
|
||||
|
||||
const Swizzle swizzle = ImageLut().MapSwizzle(mipmap.desc.format.channel_order);
|
||||
word3.val = 0;
|
||||
word3.f.RESOURCE_LEVEL = 1; // NV-specific resource level
|
||||
word3.f.DST_SEL_X = swizzle.x;
|
||||
word3.f.DST_SEL_Y = swizzle.y;
|
||||
word3.f.DST_SEL_Z = swizzle.z;
|
||||
word3.f.DST_SEL_W = swizzle.w;
|
||||
word3.f.FORMAT = GetCombinedFormat(mipmap_prop.data_format, mipmap_prop.data_type);
|
||||
word3.f.INDEX_STRIDE = mipmap_prop.element_size;
|
||||
word3.f.TYPE = ImageLut().MapGeometry(mipmap.desc.geometry);
|
||||
|
||||
mipmap.srd[0] = word0.val;
|
||||
mipmap.srd[1] = word1.val;
|
||||
mipmap.srd[2] = word2.val;
|
||||
mipmap.srd[3] = word3.val;
|
||||
|
||||
mipmap.row_pitch = mipmap.desc.width * mipmap_prop.element_size;
|
||||
mipmap.slice_pitch = mipmap.row_pitch;
|
||||
} else {
|
||||
SQ_IMG_RSRC_WORD0 word0;
|
||||
SQ_IMG_RSRC_WORD1 word1;
|
||||
SQ_IMG_RSRC_WORD2 word2;
|
||||
SQ_IMG_RSRC_WORD3 word3;
|
||||
SQ_IMG_RSRC_WORD4 word4;
|
||||
SQ_IMG_RSRC_WORD5 word5;
|
||||
SQ_IMG_RSRC_WORD5 word6;
|
||||
SQ_IMG_RSRC_WORD5 word7;
|
||||
|
||||
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
|
||||
|
||||
// pMipInfo not needed - set to nullptr and AddrLib will ignore it
|
||||
out.pMipInfo = nullptr;
|
||||
|
||||
uint32_t swizzleMode = GetAddrlibSurfaceInfoNv(
|
||||
mipmap.component, mipmap.desc, mipmap.num_levels,
|
||||
mipmap.tile_mode, mipmap.row_pitch, mipmap.slice_pitch, out);
|
||||
if (swizzleMode == (uint32_t)(-1)) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
mipmap.addr_output.addr2 = out;
|
||||
mipmap.size = out.surfSize;
|
||||
|
||||
assert((out.bpp / 8) == mipmap_prop.element_size);
|
||||
|
||||
const size_t row_pitch_size = out.pitch * mipmap_prop.element_size;
|
||||
|
||||
word0.val = 0;
|
||||
word0.f.BASE_ADDRESS = PtrLow40Shift8(mipmap_data_addr);
|
||||
|
||||
word1.val = 0;
|
||||
word1.f.BASE_ADDRESS_HI = PtrHigh64Shift40(mipmap_data_addr);
|
||||
word1.f.MIN_LOD = 0;
|
||||
word1.f.FORMAT = GetCombinedFormat(mipmap_prop.data_format, mipmap_prop.data_type);
|
||||
// Only take the lowest 2 bits of (mipmap.desc.width - 1)
|
||||
word1.f.WIDTH = BitSelect<0, 1>(mipmap.desc.width - 1);
|
||||
|
||||
word2.val = 0;
|
||||
// Take the high 12 bits of (mipmap.desc.width - 1)
|
||||
word2.f.WIDTH_HI = BitSelect<2, 13>(mipmap.desc.width - 1);
|
||||
word2.f.HEIGHT = mipmap.desc.height ? mipmap.desc.height - 1 : 0;
|
||||
word2.f.RESOURCE_LEVEL = 1;
|
||||
|
||||
const Swizzle swizzle = ImageLut().MapSwizzle(mipmap.desc.format.channel_order);
|
||||
word3.val = 0;
|
||||
word3.f.DST_SEL_X = swizzle.x;
|
||||
word3.f.DST_SEL_Y = swizzle.y;
|
||||
word3.f.DST_SEL_Z = swizzle.z;
|
||||
word3.f.DST_SEL_W = swizzle.w;
|
||||
word3.f.SW_MODE = swizzleMode;
|
||||
word3.f.BASE_LEVEL = 0;
|
||||
word3.f.LAST_LEVEL = mipmap.num_levels - 1;
|
||||
word3.f.BC_SWIZZLE = GetBcSwizzle(swizzle);
|
||||
word3.f.TYPE = ImageLut().MapGeometry(mipmap.desc.geometry);
|
||||
|
||||
const bool mipmap_array =
|
||||
(mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ||
|
||||
mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DA ||
|
||||
mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DADEPTH);
|
||||
const bool mipmap_3d = (mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_3D);
|
||||
|
||||
word4.val = 0;
|
||||
word4.f.DEPTH =
|
||||
(mipmap_array)
|
||||
? std::max(mipmap.desc.array_size, static_cast<size_t>(1)) - 1
|
||||
: (mipmap_3d) ? mipmap.desc.depth - 1 : 0;
|
||||
uint32_t minor_ver = MinorVerFromDevID(chip_id_);
|
||||
// For 1d, 2d and 2d-msaa in gfx1030 and beyond this is pitch-1
|
||||
if ((minor_ver >= 3) && !mipmap_array && !mipmap_3d)
|
||||
word4.f.PITCH = out.pitch - 1;
|
||||
|
||||
word5.val = 0;
|
||||
word5.f.MAX_MIP = mipmap.num_levels - 1;
|
||||
word6.val = 0;
|
||||
word7.val = 0;
|
||||
|
||||
mipmap.srd[0] = word0.val;
|
||||
mipmap.srd[1] = word1.val;
|
||||
mipmap.srd[2] = word2.val;
|
||||
mipmap.srd[3] = word3.val;
|
||||
mipmap.srd[4] = word4.val;
|
||||
mipmap.srd[5] = word5.val;
|
||||
mipmap.srd[6] = word6.val;
|
||||
mipmap.srd[7] = word7.val;
|
||||
|
||||
mipmap.row_pitch = row_pitch_size;
|
||||
mipmap.slice_pitch = out.sliceSize;
|
||||
}
|
||||
|
||||
mipmap.srd[8] = mipmap.desc.format.channel_type;
|
||||
mipmap.srd[9] = mipmap.desc.format.channel_order;
|
||||
mipmap.srd[10] = static_cast<uint32_t>(mipmap.desc.width);
|
||||
|
||||
// Mipmap-specific auxiliary fields
|
||||
mipmap.srd[11] = mipmap.num_levels;
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
void ImageManagerNv::printSRDDetailed(const uint32_t* srd) const {
|
||||
if (!srd) {
|
||||
printf("\n========== Image SRD (NV/GFX10) - Detailed ==========\n");
|
||||
printf("ERROR: No SRD data provided.\n");
|
||||
printf("===============================================\n\n");
|
||||
return;
|
||||
}
|
||||
|
||||
printf("\n========== Image SRD (NV/GFX10) - Detailed ==========\n");
|
||||
|
||||
// Print all 12 words with bit field annotations
|
||||
for (int i = 0; i < 12; i++) {
|
||||
printf("WORD %d: 0x%08x ", i, srd[i]);
|
||||
|
||||
// Binary representation
|
||||
printf("(");
|
||||
for (int bit = 31; bit >= 0; bit--) {
|
||||
printf("%d", (srd[i] >> bit) & 1);
|
||||
if (bit % 4 == 0 && bit != 0) printf("_");
|
||||
}
|
||||
printf(")\n");
|
||||
}
|
||||
|
||||
// WORD 0: BASE_ADDRESS (bits 39:8)
|
||||
SQ_IMG_RSRC_WORD0 word0;
|
||||
word0.val = srd[0];
|
||||
printf("\nWORD 0: BASE_ADDRESS (bits 39:8) = 0x%08x\n", word0.f.BASE_ADDRESS);
|
||||
|
||||
// WORD 1: Contains BASE_ADDRESS_HI, MIN_LOD, FORMAT, WIDTH (bits 1:0)
|
||||
SQ_IMG_RSRC_WORD1 word1;
|
||||
word1.val = srd[1];
|
||||
printf("WORD 1: BASE_ADDRESS_HI = 0x%02x\n", word1.f.BASE_ADDRESS_HI);
|
||||
printf(" MIN_LOD = %u\n", word1.f.MIN_LOD);
|
||||
printf(" FORMAT = %u ◄──── Combined format/type\n", word1.f.FORMAT);
|
||||
printf(" WIDTH (bits 1:0) = %u\n", word1.f.WIDTH);
|
||||
|
||||
// Calculate full address (NV uses 40-bit shifted by 8)
|
||||
uint64_t base_addr = ((uint64_t)word1.f.BASE_ADDRESS_HI << 40) | ((uint64_t)word0.f.BASE_ADDRESS << 8);
|
||||
printf(" → Full Base Address = 0x%016lx\n", base_addr);
|
||||
|
||||
// WORD 2: WIDTH_HI, HEIGHT, RESOURCE_LEVEL
|
||||
SQ_IMG_RSRC_WORD2 word2;
|
||||
word2.val = srd[2];
|
||||
printf("WORD 2: WIDTH_HI (bits 13:2) = %u\n", word2.f.WIDTH_HI);
|
||||
printf(" HEIGHT = %u\n", word2.f.HEIGHT);
|
||||
printf(" RESOURCE_LEVEL = %u ◄──── NV-specific field\n", word2.f.RESOURCE_LEVEL);
|
||||
|
||||
// Calculate full width (NV uses 14 bits split: 2 in WORD1 + 12 in WORD2)
|
||||
uint32_t full_width = word1.f.WIDTH | (word2.f.WIDTH_HI << 2);
|
||||
printf(" → Full Width = %u (actual: %u)\n", full_width, full_width + 1);
|
||||
printf(" → Full Height = %u (actual: %u)\n", word2.f.HEIGHT, word2.f.HEIGHT + 1);
|
||||
|
||||
// WORD 3: Channel selectors, SW_MODE, BASE_LEVEL, LAST_LEVEL, BC_SWIZZLE, TYPE
|
||||
SQ_IMG_RSRC_WORD3 word3;
|
||||
word3.val = srd[3];
|
||||
printf("WORD 3: DST_SEL_X = %u ", word3.f.DST_SEL_X);
|
||||
printChannelSelect(word3.f.DST_SEL_X);
|
||||
printf(" DST_SEL_Y = %u ", word3.f.DST_SEL_Y);
|
||||
printChannelSelect(word3.f.DST_SEL_Y);
|
||||
printf(" DST_SEL_Z = %u ", word3.f.DST_SEL_Z);
|
||||
printChannelSelect(word3.f.DST_SEL_Z);
|
||||
printf(" DST_SEL_W = %u ", word3.f.DST_SEL_W);
|
||||
printChannelSelect(word3.f.DST_SEL_W);
|
||||
printf(" SW_MODE = %u ", word3.f.SW_MODE);
|
||||
printSwizzleMode(word3.f.SW_MODE);
|
||||
printf(" BASE_LEVEL = %u ◄──── Current base level\n", word3.f.BASE_LEVEL);
|
||||
printf(" LAST_LEVEL = %u ◄──── Current last level\n", word3.f.LAST_LEVEL);
|
||||
printf(" BC_SWIZZLE = %u ◄──── Border color swizzle\n", word3.f.BC_SWIZZLE);
|
||||
printf(" TYPE = %u ", word3.f.TYPE);
|
||||
printResourceType(word3.f.TYPE);
|
||||
|
||||
// WORD 4: DEPTH, optionally PITCH
|
||||
SQ_IMG_RSRC_WORD4 word4;
|
||||
word4.val = srd[4];
|
||||
printf("WORD 4: DEPTH = %u\n", word4.f.DEPTH);
|
||||
|
||||
// Calculate effective depth based on geometry and chip version
|
||||
uint32_t type = word3.f.TYPE;
|
||||
uint32_t minor_ver = MinorVerFromDevID(chip_id_);
|
||||
|
||||
if (type == 10) { // 3D
|
||||
printf(" → 3D Depth = %u (actual: %u)\n", word4.f.DEPTH, word4.f.DEPTH + 1);
|
||||
} else if (type == 13 || type == 12) { // Arrays
|
||||
printf(" → Array Size = %u (actual: %u)\n", word4.f.DEPTH, word4.f.DEPTH + 1);
|
||||
} else if ((minor_ver >= 3) && (type == 8 || type == 9 || type == 14)) { // 1D/2D/2D_MSAA in GFX1030+
|
||||
printf(" PITCH = %u (actual: %u) ◄──── GFX1030+ pitch\n", word4.f.PITCH, word4.f.PITCH + 1);
|
||||
}
|
||||
|
||||
// WORD 5-7: Usually zero for basic images
|
||||
printf("WORD 5: Reserved = 0x%08x\n", srd[5]);
|
||||
printf("WORD 6: Reserved = 0x%08x\n", srd[6]);
|
||||
printf("WORD 7: Reserved = 0x%08x\n", srd[7]);
|
||||
|
||||
// Additional information (HSA extension fields)
|
||||
printf("WORD 8: CHANNEL_TYPE = 0x%08x\n", srd[8]);
|
||||
printf("WORD 9: CHANNEL_ORDER = 0x%08x\n", srd[9]);
|
||||
printf("WORD 10: WIDTH_ORIGINAL = 0x%08x\n", srd[10]);
|
||||
printf("WORD 11: NUM_LEVELS = 0x%08x\n", srd[11]);
|
||||
|
||||
// Mipmap analysis
|
||||
if (word3.f.LAST_LEVEL > word3.f.BASE_LEVEL || word3.f.LAST_LEVEL > 0) {
|
||||
printf("\nMIPMAP ANALYSIS:\n");
|
||||
printf(" Total Levels = %u\n", srd[11]);
|
||||
printf(" Min LOD = %u\n", word1.f.MIN_LOD);
|
||||
printf(" Active Range = [%u, %u]\n", word3.f.BASE_LEVEL, word3.f.LAST_LEVEL);
|
||||
printf(" Resource Level = %u\n", word2.f.RESOURCE_LEVEL);
|
||||
if (word3.f.BASE_LEVEL == word3.f.LAST_LEVEL) {
|
||||
printf(" Mode = SINGLE LEVEL VIEW ◄──── Mip level view\n");
|
||||
uint32_t level = word3.f.BASE_LEVEL;
|
||||
uint32_t level_width = std::max(1u, (full_width + 1) >> level);
|
||||
uint32_t level_height = std::max(1u, static_cast<uint32_t>((word2.f.HEIGHT + 1) >> level));
|
||||
printf(" Effective Dimensions = %ux%u (level %u)\n", level_width, level_height, level);
|
||||
} else {
|
||||
printf(" Mode = FULL MIPMAP CHAIN\n");
|
||||
}
|
||||
}
|
||||
printf("===============================================\n\n");
|
||||
}
|
||||
|
||||
void ImageManagerNv::printChannelSelect(uint32_t sel) const {
|
||||
switch(sel) {
|
||||
case 0: printf("(SEL_0)\n"); break;
|
||||
case 1: printf("(SEL_1)\n"); break;
|
||||
case 4: printf("(SEL_X/R)\n"); break;
|
||||
case 5: printf("(SEL_Y/G)\n"); break;
|
||||
case 6: printf("(SEL_Z/B)\n"); break;
|
||||
case 7: printf("(SEL_W/A)\n"); break;
|
||||
default: printf("(UNKNOWN)\n"); break;
|
||||
}
|
||||
}
|
||||
|
||||
void ImageManagerNv::printResourceType(uint32_t type) const {
|
||||
switch(type) {
|
||||
case 8: printf("(1D)\n"); break;
|
||||
case 9: printf("(2D)\n"); break;
|
||||
case 10: printf("(3D)\n"); break;
|
||||
case 11: printf("(CUBE)\n"); break;
|
||||
case 12: printf("(1D_ARRAY/1DB)\n"); break;
|
||||
case 13: printf("(2D_ARRAY)\n"); break;
|
||||
case 14: printf("(2D_MSAA)\n"); break;
|
||||
case 15: printf("(2D_MSAA_ARRAY)\n"); break;
|
||||
default: printf("(UNKNOWN=%u)\n", type); break;
|
||||
}
|
||||
}
|
||||
|
||||
void ImageManagerNv::printSwizzleMode(uint32_t sw_mode) const {
|
||||
// NV/GFX10 swizzle modes
|
||||
if (sw_mode == 0) {
|
||||
printf("(LINEAR)\n");
|
||||
} else if (sw_mode < 5) {
|
||||
printf("(SW_256B_%u)\n", sw_mode);
|
||||
} else if (sw_mode < 9) {
|
||||
printf("(SW_4KB_%u)\n", sw_mode - 4);
|
||||
} else if (sw_mode < 13) {
|
||||
printf("(SW_64KB_%u)\n", sw_mode - 8);
|
||||
} else if (sw_mode < 22) {
|
||||
printf("(SW_VAR_%u)\n", sw_mode - 12);
|
||||
} else {
|
||||
printf("(UNKNOWN=%u)\n", sw_mode);
|
||||
}
|
||||
}
|
||||
|
||||
hsa_status_t ImageManagerNv::PopulateMipLevelSrd(
|
||||
MipmappedArray& level_view,
|
||||
const MipmappedArray& mipmap_array,
|
||||
uint32_t mip_level) const {
|
||||
|
||||
// SRD already copied from parent, just modify BASE_LEVEL/LAST_LEVEL fields
|
||||
uint32_t* srd_words = reinterpret_cast<uint32_t*>(level_view.srd);
|
||||
|
||||
// WORD3 has BASE_LEVEL and LAST_LEVEL fields
|
||||
SQ_IMG_RSRC_WORD3* word3 = reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&srd_words[3]);
|
||||
|
||||
// Set both to same value - hardware samples only this level
|
||||
word3->f.BASE_LEVEL = mip_level;
|
||||
word3->f.LAST_LEVEL = mip_level;
|
||||
|
||||
debug_print("Set SRD mip selection: BASE_LEVEL=%u, LAST_LEVEL=%u", mip_level, mip_level);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t ImageManagerNv::PopulateMipmapSrd(MipmappedArray& mipmap_array, const metadata_amd_t* desc) const {
|
||||
const metadata_amd_nv_t* desc_nv = reinterpret_cast<const metadata_amd_nv_t*>(desc);
|
||||
const void* mipmap_data_addr = mipmap_array.data;
|
||||
|
||||
ImageProperty mipmap_prop = ImageLut().MapFormat(mipmap_array.desc.format, mipmap_array.desc.geometry);
|
||||
if (mipmap_prop.cap == HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED || mipmap_prop.element_size == 0) {
|
||||
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
|
||||
}
|
||||
|
||||
const Swizzle swizzle = ImageLut().MapSwizzle(mipmap_array.desc.format.channel_order);
|
||||
|
||||
if (IsLocalMemory(mipmap_array.data)) {
|
||||
mipmap_data_addr = reinterpret_cast<const void*>(
|
||||
reinterpret_cast<uintptr_t>(mipmap_array.data) - local_memory_base_address_);
|
||||
}
|
||||
|
||||
// Copy the pre-computed SRD words 0-7 from metadata
|
||||
mipmap_array.srd[0] = desc_nv->word0.u32All;
|
||||
mipmap_array.srd[1] = desc_nv->word1.u32All;
|
||||
mipmap_array.srd[2] = desc_nv->word2.u32All;
|
||||
mipmap_array.srd[3] = desc_nv->word3.u32All;
|
||||
mipmap_array.srd[4] = desc_nv->word4.u32All;
|
||||
mipmap_array.srd[5] = desc_nv->word5.u32All;
|
||||
mipmap_array.srd[6] = desc_nv->word6.u32All;
|
||||
mipmap_array.srd[7] = desc_nv->word7.u32All;
|
||||
|
||||
// Override specific fields after copying
|
||||
uint32_t hwPixelSize = ImageLut().GetPixelSize(mipmap_prop.data_format, mipmap_prop.data_type);
|
||||
if (mipmap_prop.element_size != hwPixelSize) {
|
||||
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
|
||||
}
|
||||
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD0*>(&mipmap_array.srd[0])->bits.BASE_ADDRESS = PtrLow40Shift8(mipmap_data_addr);
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&mipmap_array.srd[1])->bits.BASE_ADDRESS_HI = PtrHigh64Shift40(mipmap_data_addr);
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD1*>(&mipmap_array.srd[1])->bits.FORMAT = GetCombinedFormat(mipmap_prop.data_format, mipmap_prop.data_type);
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.DST_SEL_X = swizzle.x;
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.DST_SEL_Y = swizzle.y;
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.DST_SEL_Z = swizzle.z;
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.DST_SEL_W = swizzle.w;
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD5*>(&mipmap_array.srd[5])->bits.MAX_MIP = mipmap_array.num_levels - 1;
|
||||
|
||||
if (mipmap_array.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ||
|
||||
mipmap_array.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1D) {
|
||||
reinterpret_cast<SQ_IMG_RSRC_WORD3*>(&mipmap_array.srd[3])->bits.TYPE =
|
||||
ImageLut().MapGeometry(mipmap_array.desc.geometry);
|
||||
}
|
||||
|
||||
// Looks like this is only used for CPU copies.
|
||||
mipmap_array.row_pitch = 0;
|
||||
mipmap_array.slice_pitch = 0;
|
||||
|
||||
// Store mipmap-specific metadata
|
||||
mipmap_array.srd[8] = mipmap_array.desc.format.channel_type;
|
||||
mipmap_array.srd[9] = mipmap_array.desc.format.channel_order;
|
||||
mipmap_array.srd[10] = static_cast<uint32_t>(mipmap_array.desc.width);
|
||||
mipmap_array.srd[11] = mipmap_array.num_levels;
|
||||
|
||||
// Allocate and populate pMipInfo from metadata mip_offsets (ADDR2 for Nv)
|
||||
ADDR2_MIP_INFO* mip_info_storage = new ADDR2_MIP_INFO[mipmap_array.num_levels];
|
||||
memset(mip_info_storage, 0, sizeof(ADDR2_MIP_INFO) * mipmap_array.num_levels);
|
||||
|
||||
// Extract per-level information from mip_offsets array
|
||||
for (uint32_t level = 0; level < mipmap_array.num_levels; level++) {
|
||||
// mip_offsets contains offset bits [39:8], shift left by 8 to get actual byte offset
|
||||
mip_info_storage[level].offset = static_cast<uint64_t>(desc_nv->mip_offsets[level]) << 8;
|
||||
|
||||
// Calculate dimensions for this level (halve at each level)
|
||||
mip_info_storage[level].pitch = std::max(1u, static_cast<uint32_t>(mipmap_array.desc.width >> level));
|
||||
mip_info_storage[level].height = std::max(1u, static_cast<uint32_t>(mipmap_array.desc.height >> level));
|
||||
mip_info_storage[level].depth = std::max(1u, static_cast<uint32_t>(mipmap_array.desc.depth >> level));
|
||||
}
|
||||
|
||||
// Store pMipInfo in addr_output for later use by PopulateMipLevelSrd
|
||||
mipmap_array.addr_output.addr2.pMipInfo = mip_info_storage;
|
||||
|
||||
// Total size calculation from metadata
|
||||
uint32_t last_level = mipmap_array.num_levels - 1;
|
||||
uint64_t last_level_size = mip_info_storage[last_level].pitch *
|
||||
mip_info_storage[last_level].height *
|
||||
mip_info_storage[last_level].depth *
|
||||
mipmap_prop.element_size;
|
||||
mipmap_array.size = mip_info_storage[last_level].offset + last_level_size;
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace image
|
||||
} // namespace rocr
|
||||
|
||||
@@ -40,8 +40,8 @@
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef EXT_IMAGE_IMAGE_MANAGER_NV_H_
|
||||
#define EXT_IMAGE_IMAGE_MANAGER_NV_H_
|
||||
#ifndef EXT_IMAGE_IMAGE_MANAGER_NV_H_
|
||||
#define EXT_IMAGE_IMAGE_MANAGER_NV_H_
|
||||
|
||||
#include "addrlib/inc/addrinterface.h"
|
||||
#include "image_manager_kv.h"
|
||||
@@ -59,6 +59,7 @@ class ImageManagerNv : public ImageManagerKv {
|
||||
virtual hsa_status_t CalculateImageSizeAndAlignment(
|
||||
hsa_agent_t component, const hsa_ext_image_descriptor_t& desc,
|
||||
hsa_ext_image_data_layout_t image_data_layout,
|
||||
uint32_t num_mipmap_levels,
|
||||
size_t image_data_row_pitch, size_t image_data_slice_pitch,
|
||||
hsa_ext_image_data_info_t& image_info) const;
|
||||
|
||||
@@ -79,13 +80,30 @@ class ImageManagerNv : public ImageManagerKv {
|
||||
/// @brief Fill image backing storage using agent copy.
|
||||
virtual hsa_status_t FillImage(const Image& image, const void* pattern,
|
||||
const hsa_ext_image_region_t& region);
|
||||
|
||||
/// @brief Fill mipmap structure with device specific mipmapped array object.
|
||||
virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array) const;
|
||||
|
||||
/// @brief Fill mipmap structure with pre-computed AMD metadata descriptor.
|
||||
virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array, const metadata_amd_t* desc) const;
|
||||
|
||||
/// @brief Create mip level view using SRD BASE_LEVEL/LAST_LEVEL fields
|
||||
virtual hsa_status_t PopulateMipLevelSrd(MipmappedArray& level_view,
|
||||
const MipmappedArray& mipmap_array, uint32_t mip_level) const;
|
||||
|
||||
virtual void printSRDDetailed(const uint32_t* srd) const;
|
||||
virtual void printChannelSelect(uint32_t sel) const;
|
||||
virtual void printResourceType(uint32_t type) const;
|
||||
virtual void printSwizzleMode(uint32_t sw_mode) const;
|
||||
|
||||
protected:
|
||||
uint32_t GetAddrlibSurfaceInfoNv(hsa_agent_t component,
|
||||
const hsa_ext_image_descriptor_t& desc,
|
||||
Image::TileMode tileMode,
|
||||
size_t image_data_row_pitch,
|
||||
size_t image_data_slice_pitch,
|
||||
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT& out) const;
|
||||
const hsa_ext_image_descriptor_t& desc,
|
||||
uint32_t num_mipmap_levels,
|
||||
Image::TileMode tileMode,
|
||||
size_t image_data_row_pitch,
|
||||
size_t image_data_slice_pitch,
|
||||
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT& out) const;
|
||||
|
||||
bool IsLocalMemory(const void* address) const;
|
||||
|
||||
@@ -95,4 +113,4 @@ class ImageManagerNv : public ImageManagerKv {
|
||||
|
||||
} // namespace image
|
||||
} // namespace rocr
|
||||
#endif // EXT_IMAGE_IMAGE_MANAGER_NV_H_
|
||||
#endif // EXT_IMAGE_IMAGE_MANAGER_NV_H_
|
||||
|
||||
@@ -44,11 +44,15 @@
|
||||
|
||||
#include <assert.h>
|
||||
#include <climits>
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
#include <mutex>
|
||||
#include <algorithm>
|
||||
|
||||
#include "core/inc/runtime.h"
|
||||
#include "core/inc/hsa_internal.h"
|
||||
#include "core/inc/hsa_ext_amd_impl.h"
|
||||
#include "core/inc/exceptions.h"
|
||||
#include "resource.h"
|
||||
#include "image_manager_kv.h"
|
||||
#include "image_manager_ai.h"
|
||||
@@ -57,9 +61,96 @@
|
||||
#include "image_manager_gfx12.h"
|
||||
#include "device_info.h"
|
||||
|
||||
|
||||
#define SINGLE_MIP_LEVEL 1
|
||||
|
||||
namespace rocr {
|
||||
namespace image {
|
||||
|
||||
static inline uint32_t ComputeMaxMipLevels(const hsa_ext_image_descriptor_t& d) {
|
||||
uint32_t w = d.width ? d.width : 1;
|
||||
uint32_t h = d.height ? d.height : 1;
|
||||
uint32_t depth = d.depth ? d.depth : 1;
|
||||
uint32_t dim_max = w;
|
||||
switch (d.geometry) {
|
||||
case HSA_EXT_IMAGE_GEOMETRY_1D:
|
||||
case HSA_EXT_IMAGE_GEOMETRY_1DA:
|
||||
case HSA_EXT_IMAGE_GEOMETRY_1DB:
|
||||
dim_max = w; break;
|
||||
case HSA_EXT_IMAGE_GEOMETRY_2D:
|
||||
case HSA_EXT_IMAGE_GEOMETRY_2DA:
|
||||
case HSA_EXT_IMAGE_GEOMETRY_2DDEPTH:
|
||||
case HSA_EXT_IMAGE_GEOMETRY_2DADEPTH:
|
||||
dim_max = std::max(w, h); break;
|
||||
case HSA_EXT_IMAGE_GEOMETRY_3D:
|
||||
dim_max = std::max(std::max(w, h), depth); break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
uint32_t levels = 0;
|
||||
while (dim_max > 0) { ++levels; dim_max >>= 1; }
|
||||
return (levels == 0) ? 1 : levels;
|
||||
}
|
||||
|
||||
hsa_status_t ImageRuntime::GetMipmapArraySizeAndAlignment(
|
||||
hsa_agent_t component,
|
||||
const hsa_ext_image_descriptor_t& desc,
|
||||
uint32_t num_mipmap_levels,
|
||||
hsa_ext_image_data_layout_t layout,
|
||||
size_t row_pitch,
|
||||
size_t slice_pitch,
|
||||
size_t& size_out,
|
||||
size_t& alignment_out) {
|
||||
size_out = 0;
|
||||
alignment_out = 0;
|
||||
|
||||
if (num_mipmap_levels == 0 || num_mipmap_levels > ComputeMaxMipLevels(desc))
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
|
||||
// Validate the image format and geometry.
|
||||
uint32_t capability = 0;
|
||||
hsa_status_t status =
|
||||
GetImageCapability(component, desc.format, desc.geometry, capability);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
if (capability == 0) {
|
||||
return static_cast<hsa_status_t>(
|
||||
HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED);
|
||||
}
|
||||
|
||||
const hsa_ext_image_geometry_t geometry = desc.geometry;
|
||||
uint32_t max_width = 0;
|
||||
uint32_t max_height = 0;
|
||||
uint32_t max_depth = 0;
|
||||
uint32_t max_array_size = 0;
|
||||
|
||||
ImageManager* manager = image_manager(component);
|
||||
|
||||
// Validate the image dimension.
|
||||
manager->GetImageInfoMaxDimension(component, geometry, max_width, max_height,
|
||||
max_depth, max_array_size);
|
||||
|
||||
if (desc.width > max_width || desc.height > max_height ||
|
||||
desc.depth > max_depth || desc.array_size > max_array_size) {
|
||||
return static_cast<hsa_status_t>(
|
||||
HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED);
|
||||
}
|
||||
|
||||
hsa_ext_image_data_info_t mipmap_info = {0};
|
||||
status = manager->CalculateImageSizeAndAlignment(component, desc, layout,
|
||||
num_mipmap_levels, row_pitch, slice_pitch, mipmap_info);
|
||||
if (HSA_STATUS_SUCCESS != status) {
|
||||
return status;
|
||||
}
|
||||
|
||||
alignment_out = mipmap_info.alignment;
|
||||
size_out = mipmap_info.size;
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t FindKernelArgPool(hsa_amd_memory_pool_t pool, void* data) {
|
||||
assert(data != nullptr);
|
||||
|
||||
@@ -162,9 +253,6 @@ ImageRuntime* ImageRuntime::instance() {
|
||||
}
|
||||
|
||||
instance = CreateSingleton();
|
||||
if (instance == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// UnloadCallback = &ext_image::ImageRuntime::DestroySingleton;
|
||||
}
|
||||
@@ -178,13 +266,15 @@ ImageRuntime* ImageRuntime::CreateSingleton() {
|
||||
if (HSA_STATUS_SUCCESS != instance->blit_kernel_.Initialize()) {
|
||||
instance->Cleanup();
|
||||
delete instance;
|
||||
return NULL;
|
||||
throw AMD::hsa_exception(HSA_STATUS_ERROR_OUT_OF_RESOURCES,
|
||||
"ImageRuntime: Failed to initialize blit kernel");
|
||||
}
|
||||
|
||||
if (HSA_STATUS_SUCCESS != HSA::hsa_iterate_agents(CreateImageManager, instance)) {
|
||||
instance->Cleanup();
|
||||
delete instance;
|
||||
return NULL;
|
||||
throw AMD::hsa_exception(HSA_STATUS_ERROR_OUT_OF_RESOURCES,
|
||||
"ImageRuntime: Failed to create image managers");
|
||||
}
|
||||
|
||||
assert(instance->kernarg_pool_.handle != 0);
|
||||
@@ -350,8 +440,9 @@ hsa_status_t ImageRuntime::GetImageSizeAndAlignment(
|
||||
HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED);
|
||||
}
|
||||
|
||||
return manager->CalculateImageSizeAndAlignment(component, desc,
|
||||
image_data_layout, image_data_row_pitch, image_data_slice_pitch, image_info);
|
||||
return manager->CalculateImageSizeAndAlignment(
|
||||
component, desc, image_data_layout, SINGLE_MIP_LEVEL,
|
||||
image_data_row_pitch, image_data_slice_pitch, image_info);
|
||||
}
|
||||
|
||||
hsa_status_t ImageRuntime::CreateImageHandle(
|
||||
@@ -421,7 +512,7 @@ hsa_status_t ImageRuntime::CreateImageHandleWithLayout(
|
||||
|
||||
if(image_layout->version!=1)
|
||||
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
|
||||
|
||||
|
||||
uint32_t id;
|
||||
HSA::hsa_agent_get_info(component, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_CHIP_ID, &id);
|
||||
|
||||
@@ -448,6 +539,64 @@ hsa_status_t ImageRuntime::CreateImageHandleWithLayout(
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t ImageRuntime::CreateMipmapArrayHandleWithLayout(
|
||||
hsa_agent_t component, const hsa_ext_image_descriptor_t& mipmap_descriptor,
|
||||
const hsa_amd_image_descriptor_t* image_layout,
|
||||
const void* image_data, const hsa_access_permission_t access_permission,
|
||||
uint32_t num_mipmap_levels,
|
||||
hsa_ext_image_t& image_handle) {
|
||||
|
||||
image_handle.handle = 0;
|
||||
|
||||
if (!IsMultipleOf(image_data, 256)) {
|
||||
return HSA_STATUS_ERROR_INVALID_ALLOCATION;
|
||||
}
|
||||
|
||||
if (image_layout->version != 1) {
|
||||
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
|
||||
}
|
||||
|
||||
uint32_t id;
|
||||
HSA::hsa_agent_get_info(component, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_CHIP_ID, &id);
|
||||
|
||||
if (image_layout->deviceID != (0x1002 << 16 | id)) {
|
||||
return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED;
|
||||
}
|
||||
|
||||
if (num_mipmap_levels == 0) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
const metadata_amd_t* desc = reinterpret_cast<const metadata_amd_t*>(image_layout);
|
||||
|
||||
MipmappedArray* mipmap_array = MipmappedArray::Create(component);
|
||||
if (!mipmap_array) {
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
mipmap_array->component = component;
|
||||
mipmap_array->desc = mipmap_descriptor;
|
||||
mipmap_array->permission = access_permission;
|
||||
mipmap_array->num_levels = num_mipmap_levels;
|
||||
mipmap_array->data = const_cast<void*>(image_data);
|
||||
mipmap_array->flags = 0;
|
||||
|
||||
ImageManager* manager = image_manager(component);
|
||||
if (!manager) {
|
||||
MipmappedArray::Destroy(mipmap_array);
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
hsa_status_t status = manager->PopulateMipmapSrd(*mipmap_array, desc);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
MipmappedArray::Destroy(mipmap_array);
|
||||
return status;
|
||||
}
|
||||
|
||||
image_handle.handle = mipmap_array->Convert();
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t ImageRuntime::DestroyImageHandle(
|
||||
const hsa_ext_image_t& image_handle) {
|
||||
const Image* image = Image::Convert(image_handle.handle);
|
||||
@@ -574,6 +723,154 @@ hsa_status_t ImageRuntime::DestroySamplerHandle(
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t ImageRuntime::CreateMipmapArrayHandle(
|
||||
hsa_agent_t component, const hsa_ext_image_descriptor_t& mipmap_descriptor,
|
||||
const void* image_data, const hsa_access_permission_t access_permission,
|
||||
uint32_t num_mipmap_levels,
|
||||
const hsa_ext_image_data_layout_t mipmap_layout,
|
||||
size_t image_data_row_pitch, size_t image_data_slice_pitch,
|
||||
hsa_ext_image_t& image_handle) {
|
||||
image_handle.handle = 0;
|
||||
if (mipmap_descriptor.width == 0 || num_mipmap_levels == 0) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
ImageManager* manager = image_manager(component);
|
||||
if (!manager) return HSA_STATUS_ERROR_INVALID_AGENT;
|
||||
|
||||
// Validate mipmap array size and alignment requirements
|
||||
size_t required_size = 0;
|
||||
size_t required_alignment = 0;
|
||||
hsa_status_t status = GetMipmapArraySizeAndAlignment(
|
||||
component, mipmap_descriptor, num_mipmap_levels, mipmap_layout, image_data_row_pitch,
|
||||
image_data_slice_pitch, required_size, required_alignment);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
// Verify image_data alignment
|
||||
assert(image_data != NULL);
|
||||
assert(IsMultipleOf(image_data, required_alignment));
|
||||
|
||||
// Create a new mipmapped array object
|
||||
MipmappedArray* mipmap_array = MipmappedArray::Create(component);
|
||||
if (!mipmap_array) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
|
||||
// Determine the tile mode
|
||||
// 1DB (1D buffered) geometry MUST always be LINEAR per HSA spec
|
||||
// LINEAR layout forces linear swizzle mode (required by API)
|
||||
// TILED allows AddrLib to use internal heuristics to select optimal swizzle mode
|
||||
if (mipmap_descriptor.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) {
|
||||
// 1DB always uses linear addressing per HSA specification
|
||||
mipmap_array->tile_mode = Image::TileMode::LINEAR;
|
||||
} else if (mipmap_layout == HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR) {
|
||||
// Explicit LINEAR layout forces linear swizzle mode
|
||||
mipmap_array->tile_mode = Image::TileMode::LINEAR;
|
||||
} else {
|
||||
// OPAQUE layout: Let AddrLib choose the best swizzle mode
|
||||
mipmap_array->tile_mode = Image::TileMode::TILED;
|
||||
}
|
||||
|
||||
debug_print("Tile mode = %u (0: LINEAR, 1: TILED)", mipmap_array->tile_mode);
|
||||
|
||||
// Initialize the mipmapped array object
|
||||
mipmap_array->component = component;
|
||||
mipmap_array->data = const_cast<void*>(image_data);
|
||||
mipmap_array->desc = mipmap_descriptor;
|
||||
mipmap_array->permission = access_permission;
|
||||
mipmap_array->num_levels = num_mipmap_levels;
|
||||
mipmap_array->flags = 0;
|
||||
|
||||
manager->PopulateMipmapSrd(*mipmap_array);
|
||||
debug_print("Populating mipmapped array SRD...");
|
||||
if (core::Runtime::runtime_singleton_->flag().image_print_srd())
|
||||
mipmap_array->printSRD();
|
||||
|
||||
manager->printSRDDetailed(mipmap_array->srd);
|
||||
|
||||
// assert(mipmap_array->size == required_size);
|
||||
image_handle.handle = mipmap_array->Convert();
|
||||
debug_print("output handle = %lu", image_handle.handle);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t ImageRuntime::DestroyMipmapArrayHandle(
|
||||
const hsa_ext_image_t& image_handle) {
|
||||
const MipmappedArray* mipmap_array = MipmappedArray::Convert(image_handle.handle);
|
||||
|
||||
if (mipmap_array == NULL) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
MipmappedArray::Destroy(const_cast<MipmappedArray*>(mipmap_array));
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t ImageRuntime::GetMipmapArrayLevelHandle(
|
||||
hsa_agent_t component, const hsa_ext_image_t& mipmapped_array,
|
||||
uint32_t mip_level, hsa_ext_image_t& level_image_out) {
|
||||
|
||||
level_image_out.handle = 0;
|
||||
|
||||
// Get GPU architecture version
|
||||
uint32_t chip_id;
|
||||
hsa_status_t status = GetGPUAsicID(component, &chip_id);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
uint32_t major_ver = MajorVerFromDevID(chip_id);
|
||||
if (major_ver < 9) {
|
||||
debug_print("ERROR: Mip level views not supported on GFX%u hardware\n", major_ver);
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
// Validate mip level
|
||||
if (mip_level < 0) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
// Convert handle to internal object and perform basic sanity.
|
||||
rocr::image::MipmappedArray* array =
|
||||
rocr::image::MipmappedArray::Convert(mipmapped_array.handle);
|
||||
if (!array || array->num_levels == 0 || mip_level >= array->num_levels) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
debug_print("Creating mip level %u view for %u level mipmap\n",
|
||||
mip_level, array->num_levels);
|
||||
|
||||
// Create a view that references the parent mipmap array
|
||||
MipmappedArray* level_view = MipmappedArray::Create(component);
|
||||
if (!level_view) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
|
||||
// Copy entire parent structure (srd is a fixed array, so it's deep-copied automatically)
|
||||
*level_view = *array;
|
||||
|
||||
// Modify SRD to select only the specific mip level
|
||||
ImageManager* manager = image_manager(component);
|
||||
if (!manager) {
|
||||
MipmappedArray::Destroy(level_view);
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
status = manager->PopulateMipLevelSrd(*level_view, *array, mip_level);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
MipmappedArray::Destroy(level_view);
|
||||
return status;
|
||||
}
|
||||
|
||||
debug_print("Created mip level view using SRD fields");
|
||||
if (core::Runtime::runtime_singleton_->flag().image_print_srd())
|
||||
level_view->printSRD();
|
||||
|
||||
manager->printSRDDetailed(level_view->srd);
|
||||
|
||||
// Return handle
|
||||
level_image_out.handle = level_view->Convert();
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
ImageRuntime::ImageRuntime()
|
||||
: cpu_l2_cache_size_(0), kernarg_pool_({0}) {}
|
||||
|
||||
|
||||
@@ -103,6 +103,14 @@ class ImageRuntime {
|
||||
const void* image_data, const hsa_access_permission_t access_permission,
|
||||
hsa_ext_image_t& image);
|
||||
|
||||
/// @brief Create mipmapped array object with AMD-specific layout and return its handle.
|
||||
hsa_status_t CreateMipmapArrayHandleWithLayout(
|
||||
hsa_agent_t component, const hsa_ext_image_descriptor_t& mipmap_descriptor,
|
||||
const hsa_amd_image_descriptor_t* image_layout,
|
||||
const void* image_data, const hsa_access_permission_t access_permission,
|
||||
uint32_t num_mipmap_levels,
|
||||
hsa_ext_image_t& image_handle);
|
||||
|
||||
/// @brief Destroy the device image object referenced by the handle.
|
||||
hsa_status_t DestroyImageHandle(const hsa_ext_image_t& image);
|
||||
|
||||
@@ -137,6 +145,34 @@ class ImageRuntime {
|
||||
/// @brief Destroy the device sampler object referenced by the handle.
|
||||
hsa_status_t DestroySamplerHandle(hsa_ext_sampler_t& sampler);
|
||||
|
||||
/// @brief Create device Mipmap array object and return its handle
|
||||
hsa_status_t CreateMipmapArrayHandle(
|
||||
hsa_agent_t component, const hsa_ext_image_descriptor_t& mipmap_descriptor,
|
||||
const void* image_data, const hsa_access_permission_t access_permission,
|
||||
uint32_t num_mipmap_levels,
|
||||
const hsa_ext_image_data_layout_t mipmap_layout,
|
||||
size_t image_data_row_pitch, size_t image_data_slice_pitch,
|
||||
hsa_ext_image_t& image_handle);
|
||||
|
||||
/// @brief - Helper function to compute mipmapped surface size / alignment & max levels.
|
||||
hsa_status_t GetMipmapArraySizeAndAlignment(
|
||||
hsa_agent_t component,
|
||||
const hsa_ext_image_descriptor_t& desc,
|
||||
uint32_t num_mipmap_levels,
|
||||
hsa_ext_image_data_layout_t layout,
|
||||
size_t row_pitch,
|
||||
size_t slice_pitch,
|
||||
size_t& size_out,
|
||||
size_t& alignment_out);
|
||||
|
||||
/// @brief Destroy the mipmapped array object referenced by the handle.
|
||||
hsa_status_t DestroyMipmapArrayHandle(const hsa_ext_image_t& image_handle);
|
||||
|
||||
/// @brief Get the handle for a specific mipmap level in a mipmapped array.
|
||||
hsa_status_t GetMipmapArrayLevelHandle(
|
||||
hsa_agent_t agent, const hsa_ext_image_t& mipmapped_array,
|
||||
uint32_t mip_level, hsa_ext_image_t& level_image_out);
|
||||
|
||||
ImageManager* image_manager(hsa_agent_t agent) {
|
||||
std::map<uint64_t, ImageManager*>::iterator it = image_managers_.find(agent.handle);
|
||||
return (it != image_managers_.end()) ? it->second : NULL;
|
||||
|
||||
@@ -49,6 +49,7 @@
|
||||
|
||||
#include "inc/hsa.h"
|
||||
#include "inc/hsa_ext_image.h"
|
||||
#include "addrlib/inc/addrinterface.h"
|
||||
|
||||
#include "util.h"
|
||||
|
||||
@@ -97,20 +98,21 @@ typedef struct ImageProperty {
|
||||
|
||||
/// @brief Structure to represent an HSA image object.
|
||||
typedef struct Image {
|
||||
private:
|
||||
Image() {
|
||||
protected:
|
||||
Image()
|
||||
: data(nullptr),
|
||||
row_pitch(0),
|
||||
slice_pitch(0) {
|
||||
component.handle = 0;
|
||||
permission = HSA_ACCESS_PERMISSION_RO;
|
||||
data = NULL;
|
||||
std::memset(srd, 0, sizeof(srd));
|
||||
std::memset(&desc, 0, sizeof(desc));
|
||||
row_pitch = slice_pitch = 0;
|
||||
tile_mode = LINEAR;
|
||||
}
|
||||
|
||||
~Image() {}
|
||||
virtual ~Image() {}
|
||||
|
||||
public:
|
||||
public:
|
||||
typedef enum TileMode {
|
||||
LINEAR,
|
||||
TILED
|
||||
@@ -127,7 +129,11 @@ public:
|
||||
|
||||
/// @brief Convert from HSA handle to vendor representation.
|
||||
static Image* Convert(uint64_t handle) {
|
||||
return reinterpret_cast<Image*>(handle - offsetof(Image, srd));
|
||||
// Compute offset manually to avoid offsetof warning with virtual destructor
|
||||
Image* dummy = nullptr;
|
||||
const ptrdiff_t srd_offset =
|
||||
reinterpret_cast<const char*>(&dummy->srd) - reinterpret_cast<const char*>(dummy);
|
||||
return reinterpret_cast<Image*>(handle - srd_offset);
|
||||
}
|
||||
|
||||
// Vendor specific image object.
|
||||
@@ -202,6 +208,61 @@ public:
|
||||
hsa_ext_sampler_descriptor_v2_t desc;
|
||||
} Sampler;
|
||||
|
||||
/// @brief Structure representing a mipmapped image array.
|
||||
typedef struct MipmappedArray : public Image {
|
||||
private:
|
||||
MipmappedArray()
|
||||
: size(0),
|
||||
num_levels(0),
|
||||
flags(0) {
|
||||
component.handle = 0;
|
||||
std::memset(srd, 0, sizeof(srd));
|
||||
std::memset(&desc, 0, sizeof(desc));
|
||||
permission = HSA_ACCESS_PERMISSION_RO;
|
||||
std::memset(&addr_output, 0, sizeof(addr_output));
|
||||
tile_mode = LINEAR;
|
||||
}
|
||||
|
||||
~MipmappedArray() {}
|
||||
|
||||
public:
|
||||
/// @brief Create a MipmappedArray.
|
||||
/// Only internal metadata is allocated; image data must be provided by the user.
|
||||
static MipmappedArray* Create(hsa_agent_t agent);
|
||||
|
||||
/// @brief Destroy a MipmappedArray.
|
||||
static void Destroy(const MipmappedArray* array);
|
||||
|
||||
/// @brief Convert from vendor representation to HSA handle.
|
||||
uint64_t Convert() const { return reinterpret_cast<uint64_t>(srd); }
|
||||
|
||||
/// @brief Convert from HSA handle to vendor representation.
|
||||
static MipmappedArray* Convert(uint64_t handle) {
|
||||
// Compute offset manually to avoid offsetof warning with virtual destructor
|
||||
MipmappedArray* dummy = nullptr;
|
||||
const ptrdiff_t srd_offset =
|
||||
reinterpret_cast<const char*>(&dummy->srd) - reinterpret_cast<const char*>(dummy);
|
||||
return reinterpret_cast<MipmappedArray*>(handle - srd_offset);
|
||||
}
|
||||
|
||||
// Total size of the allocated memory.
|
||||
size_t size;
|
||||
|
||||
// Number of mipmap levels.
|
||||
uint32_t num_levels;
|
||||
|
||||
// Reserved
|
||||
uint32_t flags;
|
||||
|
||||
// Cached surface info.
|
||||
union {
|
||||
ADDR_COMPUTE_SURFACE_INFO_OUTPUT addr1; // Pre-GFX9 versions
|
||||
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT addr2; // GFX9 and later
|
||||
ADDR3_COMPUTE_SURFACE_INFO_OUTPUT addr3; // GFX10 and later
|
||||
} addr_output;
|
||||
|
||||
} MipmappedArray;
|
||||
|
||||
} // namespace image
|
||||
} // namespace rocr
|
||||
#endif // HSA_RUNTIME_EXT_IMAGE_RESOURCE_H
|
||||
|
||||
@@ -356,6 +356,56 @@ typedef struct hsa_ext_image_descriptor_s {
|
||||
hsa_ext_image_format_t format;
|
||||
} hsa_ext_image_descriptor_t;
|
||||
|
||||
/**
|
||||
* @brief Implementation independent image descriptor (Version 2).
|
||||
*
|
||||
* @details This version adds mipmap support, allowing both regular images
|
||||
* (mipmap_levels = 0 or 1) and mipmapped arrays (mipmap_levels > 1) to be
|
||||
* created with a single unified API.
|
||||
*/
|
||||
typedef struct hsa_ext_image_descriptor_v2_s {
|
||||
/**
|
||||
* Image geometry.
|
||||
*/
|
||||
hsa_ext_image_geometry_t geometry;
|
||||
/**
|
||||
* Width of the image, in components.
|
||||
*/
|
||||
size_t width;
|
||||
/**
|
||||
* Height of the image, in components. Only used if the geometry is
|
||||
* ::HSA_EXT_IMAGE_GEOMETRY_2D, ::HSA_EXT_IMAGE_GEOMETRY_3D,
|
||||
* HSA_EXT_IMAGE_GEOMETRY_2DA, HSA_EXT_IMAGE_GEOMETRY_2DDEPTH, or
|
||||
* HSA_EXT_IMAGE_GEOMETRY_2DADEPTH, otherwise must be 0.
|
||||
*/
|
||||
size_t height;
|
||||
/**
|
||||
* Depth of the image, in components. Only used if the geometry is
|
||||
* ::HSA_EXT_IMAGE_GEOMETRY_3D, otherwise must be 0.
|
||||
*/
|
||||
size_t depth;
|
||||
/**
|
||||
* Number of image layers in the image array. Only used if the geometry is
|
||||
* ::HSA_EXT_IMAGE_GEOMETRY_1DA, ::HSA_EXT_IMAGE_GEOMETRY_2DA, or
|
||||
* HSA_EXT_IMAGE_GEOMETRY_2DADEPTH, otherwise must be 0.
|
||||
*/
|
||||
size_t array_size;
|
||||
/**
|
||||
* Image format.
|
||||
*/
|
||||
hsa_ext_image_format_t format;
|
||||
/**
|
||||
* Number of mipmap levels.
|
||||
* - 0 or 1: Regular single-level image (default behavior)
|
||||
* - >1: Mipmapped array with multiple levels
|
||||
*
|
||||
* When mipmap_levels > 1, the image is treated as a complete mipmap chain.
|
||||
* The maximum valid value is determined by the image dimensions and can be
|
||||
* queried using ::hsa_ext_image_data_get_info_v2.
|
||||
*/
|
||||
size_t mipmap_levels;
|
||||
} hsa_ext_image_descriptor_v2_t;
|
||||
|
||||
/**
|
||||
* @brief Image capability.
|
||||
*/
|
||||
@@ -663,6 +713,48 @@ hsa_status_t HSA_API hsa_ext_image_data_get_info_with_layout(
|
||||
size_t image_data_slice_pitch,
|
||||
hsa_ext_image_data_info_t *image_data_info);
|
||||
|
||||
/**
|
||||
* @brief Retrieve image data requirements with unified mipmap support (V2 API).
|
||||
*
|
||||
* @details This is a unified API that handles both regular images (mipmap_levels = 0 or 1)
|
||||
* and mipmapped arrays (mipmap_levels > 1).
|
||||
*
|
||||
* For regular images:
|
||||
* - Set image_descriptor->mipmap_levels to 0 or 1
|
||||
* - Returns size/alignment for a single image level
|
||||
*
|
||||
* For mipmapped arrays:
|
||||
* - Set image_descriptor->mipmap_levels to desired level count (> 1)
|
||||
* - Returns total size/alignment for all mip levels combined
|
||||
* - The maximum valid mipmap_levels is computed from image dimensions
|
||||
*
|
||||
* @param[in] agent Agent that will access the image.
|
||||
*
|
||||
* @param[in] image_descriptor Pointer to a V2 image descriptor. Must not be NULL.
|
||||
*
|
||||
* @param[in] access_permission Access permission when the image is accessed by the agent.
|
||||
*
|
||||
* @param[out] image_data_info Memory location where the runtime stores the size and
|
||||
* alignment requirements. Must not be NULL.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been initialized.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED The image format is not
|
||||
* supported for the specified access permission.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED The image dimensions are not
|
||||
* supported for the specified access permission.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p image_descriptor is NULL,
|
||||
* @p mipmap_levels exceeds maximum for image dimensions, @p access_permission is invalid,
|
||||
* or @p image_data_info is NULL.
|
||||
*/
|
||||
hsa_status_t HSA_API hsa_ext_image_data_get_info_v2(
|
||||
hsa_agent_t agent, const hsa_ext_image_descriptor_v2_t* image_descriptor,
|
||||
hsa_access_permission_t access_permission, hsa_ext_image_data_info_t* image_data_info);
|
||||
|
||||
/**
|
||||
* @brief Creates an agent specific image handle to an image with an
|
||||
* opaque image data layout.
|
||||
@@ -864,6 +956,105 @@ hsa_status_t HSA_API hsa_ext_image_destroy(
|
||||
hsa_agent_t agent,
|
||||
hsa_ext_image_t image);
|
||||
|
||||
/**
|
||||
* @brief Creates an agent specific image handle with unified mipmap support (V2 API).
|
||||
*
|
||||
* @details This is a unified API that handles both regular images (mipmap_levels = 0 or 1)
|
||||
* and mipmapped arrays (mipmap_levels > 1). This simplifies the API surface and aligns
|
||||
* with modern graphics API conventions where all images are conceptually mipmapped.
|
||||
*
|
||||
* For regular images:
|
||||
* - Set image_descriptor->mipmap_levels to 0 or 1
|
||||
* - Behavior is identical to ::hsa_ext_image_create
|
||||
*
|
||||
* For mipmapped arrays:
|
||||
* - Set image_descriptor->mipmap_levels to the desired level count (> 1)
|
||||
* - Behavior is identical to ::hsa_amd_mipmap_array_create
|
||||
* - The image_data must contain all mip levels laid out sequentially
|
||||
*
|
||||
* @param[in] agent Agent to be associated with the image handle created.
|
||||
*
|
||||
* @param[in] image_descriptor Pointer to a V2 image descriptor. Must not be NULL.
|
||||
*
|
||||
* @param[in] image_data Image data buffer allocated according to size and alignment
|
||||
* requirements from ::hsa_ext_image_data_get_info_v2. Must not be NULL.
|
||||
*
|
||||
* @param[in] access_permission Access permission of the image when accessed by agent.
|
||||
*
|
||||
* @param[out] image Pointer to memory location where the HSA runtime stores the
|
||||
* newly created image handle. Must not be NULL.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED The agent does not support
|
||||
* the image format for the specified access permission.
|
||||
*
|
||||
* @retval ::HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED The agent does not support
|
||||
* the image dimensions for the specified access permission.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p image_descriptor is NULL, @p image_data
|
||||
* is NULL, @p image_data does not have valid alignment, @p access_permission is invalid,
|
||||
* @p mipmap_levels exceeds maximum for image dimensions, or @p image is NULL.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate
|
||||
* required resources.
|
||||
*/
|
||||
hsa_status_t HSA_API hsa_ext_image_create_v2(hsa_agent_t agent,
|
||||
const hsa_ext_image_descriptor_v2_t* image_descriptor,
|
||||
const void* image_data,
|
||||
hsa_access_permission_t access_permission,
|
||||
hsa_ext_image_t* image);
|
||||
|
||||
/**
|
||||
* @brief Destroys an image handle created with ::hsa_ext_image_create_v2.
|
||||
*
|
||||
* @details This function can destroy both regular images and mipmapped arrays
|
||||
* created with ::hsa_ext_image_create_v2. It does not free the image_data memory,
|
||||
* which remains the responsibility of the caller.
|
||||
*
|
||||
* @param[in] agent Agent associated with the image handle.
|
||||
*
|
||||
* @param[in] image Image handle to destroy.
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS The function has been executed successfully.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been initialized.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p image is invalid.
|
||||
*/
|
||||
hsa_status_t HSA_API hsa_ext_image_destroy_v2(hsa_agent_t agent, hsa_ext_image_t image);
|
||||
|
||||
/**
|
||||
* @brief Create an image view for a specific mip level of a mipmapped array.
|
||||
*
|
||||
* @param[in] agent : GPU agent
|
||||
* @param[in] mipmapped_array : Pointer to the mipmapped array handle previously
|
||||
* created by hsa_amd_mipmap_array_create
|
||||
* @param[in] mip_level : Level index (0 = base). Must be < array's num levels.
|
||||
* @param[out] level_image_out : Output image handle for the level view
|
||||
*
|
||||
* @details
|
||||
* - Dimensions are clamped to at least 1 when shifting (right shift per level).
|
||||
* - Row/slice pitches follow underlying layout; for tiled images internal
|
||||
* SRD setup derives pitches; for linear layout the base pitches may
|
||||
* be adjusted if required per level (future enhancement).
|
||||
* - The view inherits access permissions from the parent array.
|
||||
*
|
||||
* @retval HSA_STATUS_SUCCESS
|
||||
* @retval HSA_STATUS_ERROR_INVALID_ARGUMENT (null pointers, bad level, bad handle)
|
||||
* @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES (allocation of view metadata failed)
|
||||
*/
|
||||
hsa_status_t HSA_API hsa_ext_image_mipmap_array_get_level(hsa_agent_t agent,
|
||||
const hsa_ext_image_t* mipmapped_array,
|
||||
uint32_t mip_level,
|
||||
hsa_ext_image_t* level_image_out);
|
||||
|
||||
/**
|
||||
* @brief Copies a portion of one image (the source) to another image (the
|
||||
* destination).
|
||||
|
||||
Ссылка в новой задаче
Block a user