From be375c2dbf78fb7c306481f4e444b8e9ff1ef343 Mon Sep 17 00:00:00 2001 From: Apurv Mishra Date: Thu, 8 Jan 2026 18:14:39 -0500 Subject: [PATCH] rocr: Add support for Mipmapped Array (#1847) SWDEV-539526 - Add support for Mipmapped Array in Rocr Add support for Mipmapped Array functionality in Rocr Runtimeenabling GPU applications to work with multi-level texture mipmaps. The implementation introduces new public APIs for creating, querying, and managing mipmapped arrays across different GPU architectures. Signed-off-by: Apurv Mishra Co-authored-by: Shweta Khatri Co-authored-by: taosang2 --- .../hsa-runtime/core/inc/hsa_ext_interface.h | 5 + .../core/runtime/hsa_ext_interface.cpp | 33 ++ .../runtime/hsa-runtime/hsacore.dll.def | 6 +- .../runtime/hsa-runtime/hsacore.so.def | 4 + .../hsa-runtime/image/hsa_ext_image.cpp | 195 ++++++ .../hsa-runtime/image/image_manager.cpp | 32 + .../runtime/hsa-runtime/image/image_manager.h | 20 + .../hsa-runtime/image/image_manager_ai.cpp | 418 ++++++++++++- .../hsa-runtime/image/image_manager_ai.h | 27 +- .../hsa-runtime/image/image_manager_gfx11.cpp | 421 ++++++++++++- .../hsa-runtime/image/image_manager_gfx11.h | 18 + .../hsa-runtime/image/image_manager_gfx12.cpp | 555 ++++++++++++++++-- .../hsa-runtime/image/image_manager_gfx12.h | 18 + .../hsa-runtime/image/image_manager_kv.cpp | 159 ++++- .../hsa-runtime/image/image_manager_kv.h | 18 + .../hsa-runtime/image/image_manager_nv.cpp | 441 +++++++++++++- .../hsa-runtime/image/image_manager_nv.h | 34 +- .../hsa-runtime/image/image_runtime.cpp | 313 +++++++++- .../runtime/hsa-runtime/image/image_runtime.h | 36 ++ .../runtime/hsa-runtime/image/resource.h | 75 ++- .../runtime/hsa-runtime/inc/hsa_ext_image.h | 191 ++++++ 21 files changed, 2918 insertions(+), 101 deletions(-) mode change 100755 => 100644 projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_kv.h diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hsa_ext_interface.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hsa_ext_interface.h index 44ff83142c..12f1275766 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hsa_ext_interface.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hsa_ext_interface.h @@ -55,6 +55,11 @@ namespace rocr { namespace core { struct ImageExtTableInternal : public ImageExtTable { decltype(::hsa_amd_image_get_info_max_dim)* hsa_amd_image_get_info_max_dim_fn; + // V2 unified APIs for images and mipmaps + decltype(::hsa_ext_image_data_get_info_v2)* hsa_ext_image_data_get_info_v2_fn; + decltype(::hsa_ext_image_create_v2)* hsa_ext_image_create_v2_fn; + decltype(::hsa_ext_image_destroy_v2)* hsa_ext_image_destroy_v2_fn; + decltype(::hsa_ext_image_mipmap_array_get_level)* hsa_ext_image_mipmap_array_get_level_fn; }; struct PcSamplingExtTableInternal : public PcSamplingExtTable {}; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_interface.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_interface.cpp index f8c6eafc1f..494a6307d5 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_interface.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_interface.cpp @@ -476,6 +476,39 @@ hsa_status_t hsa_ext_image_create_with_layout( image); } +hsa_status_t hsa_ext_image_data_get_info_v2( + hsa_agent_t agent, const hsa_ext_image_descriptor_v2_t* image_descriptor, + hsa_access_permission_t access_permission, + hsa_ext_image_data_info_t* image_data_info) { + return rocr::core::Runtime::runtime_singleton_->extensions_.image_api + .hsa_ext_image_data_get_info_v2_fn(agent, image_descriptor, + access_permission, image_data_info); +} + +hsa_status_t hsa_ext_image_create_v2(hsa_agent_t agent, + const hsa_ext_image_descriptor_v2_t* image_descriptor, + const void* image_data, + hsa_access_permission_t access_permission, + hsa_ext_image_t* image) { + return rocr::core::Runtime::runtime_singleton_->extensions_.image_api + .hsa_ext_image_create_v2_fn(agent, image_descriptor, image_data, + access_permission, image); +} + +hsa_status_t hsa_ext_image_destroy_v2(hsa_agent_t agent, hsa_ext_image_t image) { + return rocr::core::Runtime::runtime_singleton_->extensions_.image_api + .hsa_ext_image_destroy_v2_fn(agent, image); +} + +hsa_status_t hsa_ext_image_mipmap_array_get_level(hsa_agent_t agent, + const hsa_ext_image_t* mipmap_array, + uint32_t mip_level, + hsa_ext_image_t* level_view) { + return rocr::core::Runtime::runtime_singleton_->extensions_.image_api + .hsa_ext_image_mipmap_array_get_level_fn(agent, mipmap_array, mip_level, + level_view); +} + hsa_status_t HSA_API hsa_ven_amd_pcs_iterate_configuration( hsa_agent_t agent, hsa_ven_amd_pcs_iterate_configuration_callback_t configuration_callback, void* callback_data) { diff --git a/projects/rocr-runtime/runtime/hsa-runtime/hsacore.dll.def b/projects/rocr-runtime/runtime/hsa-runtime/hsacore.dll.def index c9509ad98f..f7114fdcc7 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/hsacore.dll.def +++ b/projects/rocr-runtime/runtime/hsa-runtime/hsacore.dll.def @@ -223,4 +223,8 @@ EXPORTS hsa_amd_queue_get_info hsa_amd_enable_logging hsa_amd_signal_wait_all - hsa_amd_portable_export_dmabuf_v2 \ No newline at end of file + hsa_amd_portable_export_dmabuf_v2 + hsa_ext_image_mipmap_array_get_level + hsa_ext_image_create_v2 + hsa_ext_image_data_get_info_v2 + hsa_ext_image_destroy_v2 \ No newline at end of file diff --git a/projects/rocr-runtime/runtime/hsa-runtime/hsacore.so.def b/projects/rocr-runtime/runtime/hsa-runtime/hsacore.so.def index 7be1b87e35..0c06531b7b 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/hsacore.so.def +++ b/projects/rocr-runtime/runtime/hsa-runtime/hsacore.so.def @@ -262,6 +262,10 @@ global: hsa_amd_portable_export_dmabuf_v2; hsa_amd_ais_file_write; hsa_amd_ais_file_read; + hsa_ext_image_mipmap_array_get_level; + hsa_ext_image_create_v2; + hsa_ext_image_data_get_info_v2; + hsa_ext_image_destroy_v2; local: *; }; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/image/hsa_ext_image.cpp b/projects/rocr-runtime/runtime/hsa-runtime/image/hsa_ext_image.cpp index 1e9dd8696c..71e06bc626 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/image/hsa_ext_image.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/image/hsa_ext_image.cpp @@ -369,6 +369,48 @@ hsa_status_t hsa_ext_image_create_with_layout( CATCH; } +hsa_status_t hsa_ext_image_data_get_info_with_layout_v2( + hsa_agent_t agent, const hsa_ext_image_descriptor_v2_t* image_descriptor, + hsa_access_permission_t access_permission, hsa_ext_image_data_layout_t image_data_layout, + size_t image_data_row_pitch, size_t image_data_slice_pitch, + hsa_ext_image_data_info_t* image_data_info) { + TRY; + if (agent.handle == 0) { + return HSA_STATUS_ERROR_INVALID_AGENT; + } + + if ((image_descriptor == NULL) || (image_data_info == NULL) || + (access_permission < HSA_ACCESS_PERMISSION_RO) || + (access_permission > HSA_ACCESS_PERMISSION_RW) || + (image_data_layout != HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR)) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + // V2 descriptor: only single-level images supported with LINEAR layout + // Mipmap levels must be 0 or 1 for LINEAR layout + uint32_t mipmap_levels = + (image_descriptor->mipmap_levels == 0) ? 1 : image_descriptor->mipmap_levels; + if (mipmap_levels > 1) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + // Convert V2 descriptor to V1 for internal use + hsa_ext_image_descriptor_t desc_v1 = {}; + desc_v1.geometry = image_descriptor->geometry; + desc_v1.width = image_descriptor->width; + desc_v1.height = image_descriptor->height; + desc_v1.depth = image_descriptor->depth; + desc_v1.array_size = image_descriptor->array_size; + desc_v1.format = image_descriptor->format; + + enforceDefaultPitch(agent, &desc_v1, image_data_row_pitch, image_data_slice_pitch); + + return ImageRuntime::instance()->GetImageSizeAndAlignment( + agent, desc_v1, image_data_layout, image_data_row_pitch, image_data_slice_pitch, + *image_data_info); + CATCH; +} + hsa_status_t hsa_amd_image_create(hsa_agent_t agent, const hsa_ext_image_descriptor_t* image_descriptor, const hsa_amd_image_descriptor_t* image_layout, @@ -388,6 +430,153 @@ hsa_status_t hsa_amd_image_create(hsa_agent_t agent, CATCH; } +hsa_status_t hsa_amd_image_create_v2(hsa_agent_t agent, + const hsa_ext_image_descriptor_v2_t* image_descriptor, + const hsa_amd_image_descriptor_t* image_layout, + const void* image_data, + hsa_access_permission_t access_permission, + hsa_ext_image_t* image) { + TRY; + if (agent.handle == 0) { + return HSA_STATUS_ERROR_INVALID_AGENT; + } + + if (image_descriptor == NULL || image_data == NULL || image == NULL) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + // Convert V2 descriptor to V1 for internal use + hsa_ext_image_descriptor_t desc_v1 = {}; + desc_v1.geometry = image_descriptor->geometry; + desc_v1.width = image_descriptor->width; + desc_v1.height = image_descriptor->height; + desc_v1.depth = image_descriptor->depth; + desc_v1.array_size = image_descriptor->array_size; + desc_v1.format = image_descriptor->format; + + uint32_t mipmap_levels = + (image_descriptor->mipmap_levels == 0) ? 1 : image_descriptor->mipmap_levels; + + if (mipmap_levels > 1) { + // Mipmapped array path with AMD layout + return ImageRuntime::instance()->CreateMipmapArrayHandleWithLayout( + agent, desc_v1, image_layout, image_data, access_permission, mipmap_levels, *image); + } else { + // Regular single-level image path with AMD layout + return ImageRuntime::instance()->CreateImageHandleWithLayout( + agent, desc_v1, image_layout, image_data, access_permission, *image); + } + CATCH; +} + +//---------------------------------------------------------------------------// +// V2 API Implementations (Unified Mipmap Support) +//---------------------------------------------------------------------------// + +hsa_status_t hsa_ext_image_data_get_info_v2(hsa_agent_t agent, + const hsa_ext_image_descriptor_v2_t* image_descriptor, + hsa_access_permission_t access_permission, + hsa_ext_image_data_info_t* image_data_info) { + TRY; + if (agent.handle == 0) { + return HSA_STATUS_ERROR_INVALID_AGENT; + } + + if (image_descriptor == NULL || image_data_info == NULL) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + // Check if this is a mipmap request + uint32_t mipmap_levels = + (image_descriptor->mipmap_levels == 0) ? 1 : image_descriptor->mipmap_levels; + + // Convert V2 descriptor to V1 for internal use + hsa_ext_image_descriptor_t desc_v1 = {}; + desc_v1.geometry = image_descriptor->geometry; + desc_v1.width = image_descriptor->width; + desc_v1.height = image_descriptor->height; + desc_v1.depth = image_descriptor->depth; + desc_v1.array_size = image_descriptor->array_size; + desc_v1.format = image_descriptor->format; + + if (mipmap_levels > 1) { + return ImageRuntime::instance()->GetMipmapArraySizeAndAlignment( + agent, desc_v1, mipmap_levels, HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE, 0, 0, + image_data_info->size, image_data_info->alignment); + } else { + // Regular image path (single level) + return ImageRuntime::instance()->GetImageSizeAndAlignment( + agent, desc_v1, HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE, 0, 0, *image_data_info); + } + CATCH; +} + +hsa_status_t hsa_ext_image_create_v2(hsa_agent_t agent, + const hsa_ext_image_descriptor_v2_t* image_descriptor, + const void* image_data, + hsa_access_permission_t access_permission, + hsa_ext_image_t* image) { + TRY; + if (agent.handle == 0) { + return HSA_STATUS_ERROR_INVALID_AGENT; + } + + if (image_descriptor == NULL || image_data == NULL || image == NULL) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + // Check if this is a mipmap request + uint32_t mipmap_levels = + (image_descriptor->mipmap_levels == 0) ? 1 : image_descriptor->mipmap_levels; + + // Convert V2 descriptor to V1 for internal use + hsa_ext_image_descriptor_t desc_v1 = {}; + desc_v1.geometry = image_descriptor->geometry; + desc_v1.width = image_descriptor->width; + desc_v1.height = image_descriptor->height; + desc_v1.depth = image_descriptor->depth; + desc_v1.array_size = image_descriptor->array_size; + desc_v1.format = image_descriptor->format; + + if (mipmap_levels > 1) { + // Mipmapped array path + return ImageRuntime::instance()->CreateMipmapArrayHandle( + agent, desc_v1, image_data, access_permission, mipmap_levels, + HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE, 0, 0, *image); + } else { + // Regular image path (single level) + return ImageRuntime::instance()->CreateImageHandle( + agent, desc_v1, image_data, access_permission, + HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE, 0, 0, *image); + } + CATCH; +} + +hsa_status_t hsa_ext_image_destroy_v2(hsa_agent_t agent, hsa_ext_image_t image) { + TRY; + if (agent.handle == 0) { + return HSA_STATUS_ERROR_INVALID_AGENT; + } + + // The destroy operation is the same for both regular images and mipmaps + // The runtime internally determines the correct cleanup path + return ImageRuntime::instance()->DestroyImageHandle(image); + CATCH; +} + +// per-level view retrieval implementation +hsa_status_t HSA_API hsa_ext_image_mipmap_array_get_level(hsa_agent_t agent, + const hsa_ext_image_t* mipmapped_array, + uint32_t mip_level, + hsa_ext_image_t* level_image_out) { + TRY; + if (!mipmapped_array || !level_image_out) { return HSA_STATUS_ERROR_INVALID_ARGUMENT; } + + return ImageRuntime::instance()->GetMipmapArrayLevelHandle(agent, *mipmapped_array, mip_level, *level_image_out); + + CATCH; +} + void LoadImage(core::ImageExtTableInternal* image_api, decltype(::hsa_amd_image_create)** interface_api) { image_api->hsa_ext_image_get_capability_fn = hsa_ext_image_get_capability; @@ -420,6 +609,12 @@ void LoadImage(core::ImageExtTableInternal* image_api, image_api->hsa_ext_sampler_create_v2_fn = hsa_ext_sampler_create_v2; + // V2 unified APIs for images and mipmaps + image_api->hsa_ext_image_data_get_info_v2_fn = hsa_ext_image_data_get_info_v2; + image_api->hsa_ext_image_create_v2_fn = hsa_ext_image_create_v2; + image_api->hsa_ext_image_destroy_v2_fn = hsa_ext_image_destroy_v2; + image_api->hsa_ext_image_mipmap_array_get_level_fn = hsa_ext_image_mipmap_array_get_level; + *interface_api = hsa_amd_image_create; } diff --git a/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager.cpp b/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager.cpp index 3750a64532..60d2670ff6 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager.cpp @@ -118,6 +118,38 @@ void Sampler::Destroy(const Sampler* sampler) { assert(status == HSA_STATUS_SUCCESS); } +MipmappedArray* MipmappedArray::Create(hsa_agent_t agent) { + hsa_amd_memory_pool_t pool = ImageRuntime::instance()->kernarg_pool(); + + MipmappedArray* mipmapped_array = NULL; + + hsa_status_t status = AMD::hsa_amd_memory_pool_allocate( + pool, sizeof(MipmappedArray), 0, reinterpret_cast(&mipmapped_array)); + assert(status == HSA_STATUS_SUCCESS); + + if (status != HSA_STATUS_SUCCESS) return nullptr; + + new (mipmapped_array) MipmappedArray(); + + // Allow agent access to the image data + status = AMD::hsa_amd_agents_allow_access(1, &agent, nullptr, mipmapped_array); + if (status != HSA_STATUS_SUCCESS) { + MipmappedArray::Destroy(mipmapped_array); + return nullptr; + } + + return mipmapped_array; +} + +void MipmappedArray::Destroy(const MipmappedArray* mipmapped_array) { + assert(mipmapped_array != NULL); + mipmapped_array->~MipmappedArray(); + + hsa_status_t status = AMD::hsa_amd_memory_pool_free( + const_cast(mipmapped_array)); + assert(status == HSA_STATUS_SUCCESS); +} + ImageManager::ImageManager() {} ImageManager::~ImageManager() {} diff --git a/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager.h b/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager.h index d5b876e7bf..e99e5deebb 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager.h @@ -48,6 +48,7 @@ #include "inc/hsa_ext_image.h" #include "resource.h" #include "util.h" +#include "image/addrlib/inc/addrinterface.h" namespace rocr { namespace image { @@ -82,6 +83,7 @@ class ImageManager { virtual hsa_status_t CalculateImageSizeAndAlignment( hsa_agent_t component, const hsa_ext_image_descriptor_t& desc, hsa_ext_image_data_layout_t image_data_layout, + uint32_t num_mipmap_levels, size_t image_data_row_pitch, size_t image_data_slice_pitch, hsa_ext_image_data_info_t& image_info) const = 0; @@ -120,6 +122,24 @@ class ImageManager { virtual hsa_status_t FillImage(const Image& image, const void* pattern, const hsa_ext_image_region_t& region); + /// @brief Get the address library handle + virtual ADDR_HANDLE GetAddrLib() const = 0; + + /// @brief Fill mipmap structure with device specific mipmapped array object. + virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array) const = 0; + + /// @brief Fill mipmap structure with pre-computed AMD metadata descriptor. + virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array, const metadata_amd_t* desc) const = 0; + + /// @brief Create mip level view using SRD BASE_LEVEL/LAST_LEVEL fields + virtual hsa_status_t PopulateMipLevelSrd(MipmappedArray& level_view, + const MipmappedArray& mipmap_array, uint32_t mip_level) const = 0; + + virtual void printSRDDetailed(const uint32_t* srd) const = 0; + virtual void printChannelSelect(uint32_t sel) const = 0; + virtual void printResourceType(uint32_t type) const = 0; + virtual void printSwizzleMode(uint32_t sw_mode) const = 0; + protected: static uint16_t FloatToHalf(float in); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_ai.cpp b/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_ai.cpp index 5dba6ff1f0..4a7a35e9cf 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_ai.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_ai.cpp @@ -87,6 +87,7 @@ ASSERT_SIZE_UINT32(SQ_IMG_SAMP_WORD3) hsa_status_t ImageManagerAi::CalculateImageSizeAndAlignment( hsa_agent_t component, const hsa_ext_image_descriptor_t& desc, hsa_ext_image_data_layout_t image_data_layout, + uint32_t num_mipmap_levels, size_t image_data_row_pitch, size_t image_data_slice_pitch, hsa_ext_image_data_info_t& image_info) const { @@ -102,8 +103,8 @@ hsa_status_t ImageManagerAi::CalculateImageSizeAndAlignment( desc.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB)? Image::TileMode::TILED : Image::TileMode::LINEAR; } - if (GetAddrlibSurfaceInfoAi(component, desc, tileMode, - image_data_row_pitch, image_data_slice_pitch, out) == (uint32_t)(-1)) { + if (GetAddrlibSurfaceInfoAi(component, desc, num_mipmap_levels, tileMode, + image_data_row_pitch, image_data_slice_pitch, out) == (uint32_t)(-1)) { return HSA_STATUS_ERROR; } @@ -196,7 +197,7 @@ hsa_status_t ImageManagerAi::PopulateImageSrd(Image& image, const metadata_amd_t ((SQ_IMG_RSRC_WORD3*)(&image.srd[3]))->bits.TYPE = ImageLut().MapGeometry(image.desc.geometry); } - + // Imported metadata holds the offset to metadata, add the image base address. uintptr_t meta = uintptr_t(((SQ_IMG_RSRC_WORD5*)(&image.srd[5]))->bits.META_DATA_ADDRESS_HI) << 40; meta |= uintptr_t(((SQ_IMG_RSRC_WORD7*)(&image.srd[7]))->bits.META_DATA_ADDRESS) << 8; @@ -341,8 +342,8 @@ hsa_status_t ImageManagerAi::PopulateImageSrd(Image& image) const { ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0}; - uint32_t swizzleMode = GetAddrlibSurfaceInfoAi(image.component, image.desc, image.tile_mode, - image.row_pitch, image.slice_pitch, out); + uint32_t swizzleMode = GetAddrlibSurfaceInfoAi(image.component, image.desc, + 1, image.tile_mode, image.row_pitch, image.slice_pitch, out); if (swizzleMode == (uint32_t)(-1)) { return HSA_STATUS_ERROR; } @@ -499,6 +500,7 @@ hsa_status_t ImageManagerAi::PopulateSamplerSrd(Sampler& sampler) const { uint32_t ImageManagerAi::GetAddrlibSurfaceInfoAi( hsa_agent_t component, const hsa_ext_image_descriptor_t& desc, + uint32_t num_mipmap_levels, Image::TileMode tileMode, size_t image_data_row_pitch, size_t image_data_slice_pitch, @@ -521,7 +523,8 @@ uint32_t ImageManagerAi::GetAddrlibSurfaceInfoAi( in.width = width; in.height = height; in.numSlices = num_slice; - in.pitchInElement = image_data_row_pitch / image_prop.element_size; + in.numMipLevels = num_mipmap_levels; + switch(desc.geometry) { case HSA_EXT_IMAGE_GEOMETRY_1D: case HSA_EXT_IMAGE_GEOMETRY_1DB: @@ -583,7 +586,7 @@ uint32_t ImageManagerAi::GetAddrlibSurfaceInfoAi( prefSettingsInput.resourceType = in.resourceType; // Disallow all swizzles but linear. - if (tileMode == Image::TileMode::LINEAR) + if (tileMode == Image::TileMode::LINEAR) { prefSettingsInput.forbiddenBlock.macroThin4KB = 1; prefSettingsInput.forbiddenBlock.macroThick4KB = 1; @@ -611,5 +614,406 @@ uint32_t ImageManagerAi::GetAddrlibSurfaceInfoAi( return in.swizzleMode; } +hsa_status_t ImageManagerAi::PopulateMipmapSrd(MipmappedArray& mipmap) const { + ImageProperty mipmap_prop = ImageLut().MapFormat(mipmap.desc.format, mipmap.desc.geometry); + assert(mipmap_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED); + assert(mipmap_prop.element_size != 0); + assert(mipmap.num_levels >= 1); + + const void* mipmap_data_addr = mipmap.data; + + if (IsLocalMemory(mipmap.data)) + mipmap_data_addr = reinterpret_cast( + reinterpret_cast(mipmap.data) - local_memory_base_address_); + + if (mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) { + sq_buf_rsrc_word0_u word0; + sq_buf_rsrc_word1_u word1; + sq_buf_rsrc_word2_u word2; + sq_buf_rsrc_word3_u word3; + + word0.val = 0; + word0.f.base_address = PtrLow32(mipmap_data_addr); + + word1.val = 0; + word1.f.base_address_hi = PtrHigh32(mipmap_data_addr); + word1.f.stride = mipmap_prop.element_size; + word1.f.swizzle_enable = false; + word1.f.cache_swizzle = false; + + word2.val = 0; + word2.f.num_records = mipmap.desc.width * mipmap_prop.element_size; + + const Swizzle swizzle = ImageLut().MapSwizzle(mipmap.desc.format.channel_order); + word3.val = 0; + word3.f.dst_sel_x = swizzle.x; + word3.f.dst_sel_y = swizzle.y; + word3.f.dst_sel_z = swizzle.z; + word3.f.dst_sel_w = swizzle.w; + word3.f.num_format = mipmap_prop.data_type; + word3.f.data_format = mipmap_prop.data_format; + word3.f.index_stride = mipmap_prop.element_size; + word3.f.type = ImageLut().MapGeometry(mipmap.desc.geometry); + + mipmap.srd[0] = word0.val; + mipmap.srd[1] = word1.val; + mipmap.srd[2] = word2.val; + mipmap.srd[3] = word3.val; + + mipmap.row_pitch = mipmap.desc.width * mipmap_prop.element_size; + mipmap.slice_pitch = mipmap.row_pitch; + } else { + sq_img_rsrc_word0_u word0; + sq_img_rsrc_word1_u word1; + sq_img_rsrc_word2_u word2; + sq_img_rsrc_word3_u word3; + sq_img_rsrc_word4_u word4; + sq_img_rsrc_word5_u word5; + sq_img_rsrc_word6_u word6; + sq_img_rsrc_word7_u word7; + + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0}; + + // pMipInfo not needed - set to nullptr and AddrLib will ignore it + out.pMipInfo = nullptr; + + uint32_t swizzleMode = GetAddrlibSurfaceInfoAi( + mipmap.component, mipmap.desc, mipmap.num_levels, + mipmap.tile_mode, mipmap.row_pitch, mipmap.slice_pitch, out); + if (swizzleMode == (uint32_t)(-1)) { + return HSA_STATUS_ERROR; + } + mipmap.addr_output.addr2 = out; + mipmap.size = out.surfSize; + + assert((out.bpp / 8) == mipmap_prop.element_size); + + const size_t row_pitch_size = out.pitch * mipmap_prop.element_size; + + word0.f.base_address = PtrLow40Shift8(mipmap_data_addr); + + word1.val = 0; + word1.f.base_address_hi = PtrHigh64Shift40(mipmap_data_addr); + word1.f.min_lod = 0; + word1.f.data_format = mipmap_prop.data_format; + word1.f.num_format = mipmap_prop.data_type; + + word2.val = 0; + word2.f.width = mipmap.desc.width - 1; + word2.f.height = mipmap.desc.height - 1; + word2.f.perf_mod = 0; + + const Swizzle swizzle = ImageLut().MapSwizzle(mipmap.desc.format.channel_order); + word3.val = 0; + word3.f.dst_sel_x = swizzle.x; + word3.f.dst_sel_y = swizzle.y; + word3.f.dst_sel_z = swizzle.z; + word3.f.dst_sel_w = swizzle.w; + word3.f.sw_mode = swizzleMode; + word3.f.base_level = 0; + word3.f.last_level = mipmap.num_levels - 1; + word3.f.type = ImageLut().MapGeometry(mipmap.desc.geometry); + + const bool mipmap_array = + (mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA || + mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DA || + mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DADEPTH); + const bool mipmap_3d = (mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_3D); + + word4.val = 0; + word4.f.depth = + (mipmap_array) + ? std::max(mipmap.desc.array_size, static_cast(1)) - 1 + : (mipmap_3d) ? mipmap.desc.depth - 1 : 0; + word4.f.pitch = out.pitch - 1; + word4.f.bc_swizzle = GetBcSwizzle(swizzle); + + word5.val = 0; + word5.f.max_mip = mipmap.num_levels - 1; + word6.val = 0; + word7.val = 0; + + mipmap.srd[0] = word0.val; + mipmap.srd[1] = word1.val; + mipmap.srd[2] = word2.val; + mipmap.srd[3] = word3.val; + mipmap.srd[4] = word4.val; + mipmap.srd[5] = word5.val; + mipmap.srd[6] = word6.val; + mipmap.srd[7] = word7.val; + + mipmap.row_pitch = row_pitch_size; + mipmap.slice_pitch = out.sliceSize; + } + + mipmap.srd[8] = mipmap.desc.format.channel_type; + mipmap.srd[9] = mipmap.desc.format.channel_order; + mipmap.srd[10] = static_cast(mipmap.desc.width); + + // Mipmap-specific + mipmap.srd[11] = mipmap.num_levels; + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t ImageManagerAi::PopulateMipmapSrd(MipmappedArray& mipmap_array, const metadata_amd_t* desc) const { + const metadata_amd_ai_t* desc_ai = reinterpret_cast(desc); + const void* mipmap_data_addr = mipmap_array.data; + + ImageProperty mipmap_prop = ImageLut().MapFormat(mipmap_array.desc.format, mipmap_array.desc.geometry); + if (mipmap_prop.cap == HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED || mipmap_prop.element_size == 0) { + return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED; + } + + const Swizzle swizzle = ImageLut().MapSwizzle(mipmap_array.desc.format.channel_order); + + if (IsLocalMemory(mipmap_array.data)) { + mipmap_data_addr = reinterpret_cast( + reinterpret_cast(mipmap_array.data) - local_memory_base_address_); + } + + // Copy the pre-computed SRD words 0-7 from metadata + mipmap_array.srd[0] = desc_ai->word0.u32All; + mipmap_array.srd[1] = desc_ai->word1.u32All; + mipmap_array.srd[2] = desc_ai->word2.u32All; + mipmap_array.srd[3] = desc_ai->word3.u32All; + mipmap_array.srd[4] = desc_ai->word4.u32All; + mipmap_array.srd[5] = desc_ai->word5.u32All; + mipmap_array.srd[6] = desc_ai->word6.u32All; + mipmap_array.srd[7] = desc_ai->word7.u32All; + + // Override specific fields after copying + uint32_t hwPixelSize = ImageLut().GetPixelSize(mipmap_prop.data_format, mipmap_prop.data_type); + if (mipmap_prop.element_size != hwPixelSize) { + return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED; + } + + reinterpret_cast(&mipmap_array.srd[0])->bits.BASE_ADDRESS = PtrLow40Shift8(mipmap_data_addr); + reinterpret_cast(&mipmap_array.srd[1])->bits.BASE_ADDRESS_HI = PtrHigh64Shift40(mipmap_data_addr); + reinterpret_cast(&mipmap_array.srd[1])->bits.DATA_FORMAT = mipmap_prop.data_format; + reinterpret_cast(&mipmap_array.srd[1])->bits.NUM_FORMAT = mipmap_prop.data_type; + reinterpret_cast(&mipmap_array.srd[3])->bits.DST_SEL_X = swizzle.x; + reinterpret_cast(&mipmap_array.srd[3])->bits.DST_SEL_Y = swizzle.y; + reinterpret_cast(&mipmap_array.srd[3])->bits.DST_SEL_Z = swizzle.z; + reinterpret_cast(&mipmap_array.srd[3])->bits.DST_SEL_W = swizzle.w; + reinterpret_cast(&mipmap_array.srd[5])->bits.MAX_MIP = mipmap_array.num_levels - 1; + + if (mipmap_array.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA || + mipmap_array.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1D) { + reinterpret_cast(&mipmap_array.srd[3])->bits.TYPE = + ImageLut().MapGeometry(mipmap_array.desc.geometry); + } + + // Looks like this is only used for CPU copies. + mipmap_array.row_pitch = 0; + mipmap_array.slice_pitch = 0; + + // Store mipmap-specific metadata + mipmap_array.srd[8] = mipmap_array.desc.format.channel_type; + mipmap_array.srd[9] = mipmap_array.desc.format.channel_order; + mipmap_array.srd[10] = static_cast(mipmap_array.desc.width); + mipmap_array.srd[11] = mipmap_array.num_levels; + + // Allocate and populate pMipInfo from metadata mip_offsets (ADDR2 for Ai/GFX9) + ADDR2_MIP_INFO* mip_info_storage = new ADDR2_MIP_INFO[mipmap_array.num_levels]; + memset(mip_info_storage, 0, sizeof(ADDR2_MIP_INFO) * mipmap_array.num_levels); + + // Extract per-level information from mip_offsets array + for (uint32_t level = 0; level < mipmap_array.num_levels; level++) { + // mip_offsets contains offset bits [39:8], shift left by 8 to get actual byte offset + mip_info_storage[level].offset = static_cast(desc_ai->mip_offsets[level]) << 8; + + // Calculate dimensions for this level (halve at each level) + mip_info_storage[level].pitch = std::max(1u, static_cast(mipmap_array.desc.width >> level)); + mip_info_storage[level].height = std::max(1u, static_cast(mipmap_array.desc.height >> level)); + mip_info_storage[level].depth = std::max(1u, static_cast(mipmap_array.desc.depth >> level)); + } + + // Store pMipInfo in addr_output for later use by PopulateMipLevelSrd + mipmap_array.addr_output.addr2.pMipInfo = mip_info_storage; + + // Total size calculation from metadata + uint32_t last_level = mipmap_array.num_levels - 1; + uint64_t last_level_size = mip_info_storage[last_level].pitch * + mip_info_storage[last_level].height * + mip_info_storage[last_level].depth * + mipmap_prop.element_size; + mipmap_array.size = mip_info_storage[last_level].offset + last_level_size; + + return HSA_STATUS_SUCCESS; +} + +void ImageManagerAi::printSRDDetailed(const uint32_t* srd) const { + if (!srd) { + printf("\n========== Image SRD (GFX9/AI) - Detailed ==========\n"); + printf("ERROR: No SRD data provided.\n"); + printf("===============================================\n\n"); + return; + } + + printf("\n========== Image SRD (GFX9/AI) - Detailed ==========\n"); + + // Print all 12 words with bit field annotations + for (int i = 0; i < 12; i++) { + printf("WORD %d: 0x%08x ", i, srd[i]); + + // Binary representation + printf("("); + for (int bit = 31; bit >= 0; bit--) { + printf("%d", (srd[i] >> bit) & 1); + if (bit % 4 == 0 && bit != 0) printf("_"); + } + printf(")\n"); + } + + // WORD 0: BASE_ADDRESS (bits 39:8) + sq_img_rsrc_word0_u word0; + word0.val = srd[0]; + printf("\nWORD 0: BASE_ADDRESS (bits 39:8) = 0x%08x\n", word0.f.base_address); + + // WORD 1: Contains BASE_ADDRESS_HI, MIN_LOD, DATA_FORMAT, NUM_FORMAT + sq_img_rsrc_word1_u word1; + word1.val = srd[1]; + printf("WORD 1: BASE_ADDRESS_HI = 0x%02x\n", word1.f.base_address_hi); + printf(" MIN_LOD = %u\n", word1.f.min_lod); + printf(" DATA_FORMAT = %u\n", word1.f.data_format); + printf(" NUM_FORMAT = %u\n", word1.f.num_format); + + // Calculate full address (GFX9 uses 40-bit shifted by 8) + uint64_t base_addr = ((uint64_t)word1.f.base_address_hi << 32) | ((uint64_t)word0.f.base_address << 8); + printf(" → Full Base Address = 0x%016lx\n", base_addr); + + // WORD 2: WIDTH, HEIGHT, PERF_MOD + sq_img_rsrc_word2_u word2; + word2.val = srd[2]; + printf("WORD 2: WIDTH = %u (actual: %u)\n", word2.f.width, word2.f.width + 1); + printf(" HEIGHT = %u (actual: %u)\n", word2.f.height, word2.f.height + 1); + printf(" PERF_MOD = %u\n", word2.f.perf_mod); + + // WORD 3: Channel selectors, SW_MODE, BASE_LEVEL, LAST_LEVEL, TYPE + sq_img_rsrc_word3_u word3; + word3.val = srd[3]; + printf("WORD 3: DST_SEL_X = %u ", word3.f.dst_sel_x); + printChannelSelect(word3.f.dst_sel_x); + printf(" DST_SEL_Y = %u ", word3.f.dst_sel_y); + printChannelSelect(word3.f.dst_sel_y); + printf(" DST_SEL_Z = %u ", word3.f.dst_sel_z); + printChannelSelect(word3.f.dst_sel_z); + printf(" DST_SEL_W = %u ", word3.f.dst_sel_w); + printChannelSelect(word3.f.dst_sel_w); + printf(" BASE_LEVEL = %u ◄──── Current base level\n", word3.f.base_level); + printf(" LAST_LEVEL = %u ◄──── Current last level\n", word3.f.last_level); + printf(" SW_MODE = %u ", word3.f.sw_mode); + printSwizzleMode(word3.f.sw_mode); + printf(" TYPE = %u ", word3.f.type); + printResourceType(word3.f.type); + + // WORD 4: DEPTH, PITCH, BC_SWIZZLE + sq_img_rsrc_word4_u word4; + word4.val = srd[4]; + printf("WORD 4: DEPTH = %u\n", word4.f.depth); + printf(" PITCH = %u (actual: %u)\n", word4.f.pitch, word4.f.pitch + 1); + printf(" BC_SWIZZLE = %u\n", word4.f.bc_swizzle); + + // Calculate effective depth based on geometry + uint32_t type = word3.f.type; + if (type == 10) { // 3D + printf(" → 3D Depth = %u (actual: %u)\n", word4.f.depth, word4.f.depth + 1); + } else if (type == 13 || type == 12) { // Arrays + printf(" → Array Size = %u (actual: %u)\n", word4.f.depth, word4.f.depth + 1); + } + + // WORD 5-7: Usually zero for basic images, but may contain metadata addresses + printf("WORD 5: META_DATA_ADDRESS_HI = 0x%08x\n", srd[5]); + printf("WORD 6: Reserved = 0x%08x\n", srd[6]); + printf("WORD 7: META_DATA_ADDRESS = 0x%08x\n", srd[7]); + + // Additional mipmap information + printf("WORD 8: CHANNEL_TYPE = 0x%08x\n", srd[8]); + printf("WORD 9: CHANNEL_ORDER = 0x%08x\n", srd[9]); + printf("WORD 10: WIDTH_ORIGINAL = 0x%08x\n", srd[10]); + printf("WORD 11: NUM_LEVELS = 0x%08x\n", srd[11]); + + // Mipmap analysis + if (word3.f.last_level > word3.f.base_level || word3.f.last_level > 0) { + printf("\nMIPMAP ANALYSIS:\n"); + printf(" Total Levels = %u\n", srd[11]); + printf(" Active Range = [%u, %u]\n", word3.f.base_level, word3.f.last_level); + if (word3.f.base_level == word3.f.last_level) { + printf(" Mode = SINGLE LEVEL VIEW ◄──── Mip level view\n"); + uint32_t level = word3.f.base_level; + uint32_t level_width = std::max(1u, static_cast((word2.f.width + 1) >> level)); + uint32_t level_height = std::max(1u, static_cast((word2.f.height + 1) >> level)); + printf(" Effective Dimensions = %ux%u (level %u)\n", level_width, level_height, level); + } else { + printf(" Mode = FULL MIPMAP CHAIN\n"); + } + } + printf("===============================================\n\n"); +} + +void ImageManagerAi::printChannelSelect(uint32_t sel) const { + switch(sel) { + case 0: printf("(SEL_0)\n"); break; + case 1: printf("(SEL_1)\n"); break; + case 4: printf("(SEL_X/R)\n"); break; + case 5: printf("(SEL_Y/G)\n"); break; + case 6: printf("(SEL_Z/B)\n"); break; + case 7: printf("(SEL_W/A)\n"); break; + default: printf("(UNKNOWN)\n"); break; + } +} + +void ImageManagerAi::printResourceType(uint32_t type) const { + switch(type) { + case 8: printf("(1D)\n"); break; + case 9: printf("(2D)\n"); break; + case 10: printf("(3D)\n"); break; + case 11: printf("(CUBE)\n"); break; + case 12: printf("(1D_ARRAY/1DB)\n"); break; + case 13: printf("(2D_ARRAY)\n"); break; + case 14: printf("(2D_MSAA)\n"); break; + case 15: printf("(2D_MSAA_ARRAY)\n"); break; + default: printf("(UNKNOWN=%u)\n", type); break; + } +} + +void ImageManagerAi::printSwizzleMode(uint32_t sw_mode) const { + // GFX9 swizzle modes + if (sw_mode == 0) { + printf("(LINEAR)\n"); + } else if (sw_mode < 5) { + printf("(SW_256B_%u)\n", sw_mode); + } else if (sw_mode < 9) { + printf("(SW_4KB_%u)\n", sw_mode - 4); + } else if (sw_mode < 13) { + printf("(SW_64KB_%u)\n", sw_mode - 8); + } else if (sw_mode < 22) { + printf("(SW_VAR_%u)\n", sw_mode - 12); + } else { + printf("(UNKNOWN=%u)\n", sw_mode); + } +} + +hsa_status_t ImageManagerAi::PopulateMipLevelSrd( + MipmappedArray& level_view, + const MipmappedArray& mipmap_array, + uint32_t mip_level) const { + + // SRD already copied from parent, just modify BASE_LEVEL/LAST_LEVEL fields + uint32_t* srd_words = reinterpret_cast(level_view.srd); + + // SRD WORD3 has BASE_LEVEL and LAST_LEVEL fields + sq_img_rsrc_word3_u* word3 = reinterpret_cast(&srd_words[3]); + + // Set both to same value - hardware samples only this level + word3->f.base_level = mip_level; + word3->f.last_level = mip_level; + + debug_print("Set SRD mip selection: BASE_LEVEL=%u, LAST_LEVEL=%u", mip_level, mip_level); + + return HSA_STATUS_SUCCESS; +} + } // namespace image } // namespace rocr diff --git a/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_ai.h b/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_ai.h index 4c709cd085..101ee86c57 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_ai.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_ai.h @@ -59,6 +59,7 @@ class ImageManagerAi : public ImageManagerKv { virtual hsa_status_t CalculateImageSizeAndAlignment( hsa_agent_t component, const hsa_ext_image_descriptor_t& desc, hsa_ext_image_data_layout_t image_data_layout, + uint32_t num_mipmap_levels, size_t image_data_row_pitch, size_t image_data_slice_pitch, hsa_ext_image_data_info_t& image_info) const; @@ -76,13 +77,29 @@ class ImageManagerAi : public ImageManagerKv { /// @brief Fill sampler structure with device specific sampler object. virtual hsa_status_t PopulateSamplerSrd(Sampler& sampler) const; + /// @brief Fill mipmap structure with device specific mipmapped array object. + virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array) const; + + /// @brief Fill mipmap structure with pre-computed AMD metadata descriptor. + virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array, const metadata_amd_t* desc) const; + + /// @brief Create mip level view using SRD BASE_LEVEL/LAST_LEVEL fields + virtual hsa_status_t PopulateMipLevelSrd(MipmappedArray& level_view, + const MipmappedArray& mipmap_array, uint32_t mip_level) const; + + virtual void printSRDDetailed(const uint32_t* srd) const; + virtual void printChannelSelect(uint32_t sel) const; + virtual void printResourceType(uint32_t type) const; + virtual void printSwizzleMode(uint32_t sw_mode) const; + protected: uint32_t GetAddrlibSurfaceInfoAi(hsa_agent_t component, - const hsa_ext_image_descriptor_t& desc, - Image::TileMode tileMode, - size_t image_data_row_pitch, - size_t image_data_slice_pitch, - ADDR2_COMPUTE_SURFACE_INFO_OUTPUT& out) const; + const hsa_ext_image_descriptor_t& desc, + uint32_t num_mipmap_levels, + Image::TileMode tileMode, + size_t image_data_row_pitch, + size_t image_data_slice_pitch, + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT& out) const; bool IsLocalMemory(const void* address) const; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_gfx11.cpp b/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_gfx11.cpp index ea1ce24bca..bf346b40b8 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_gfx11.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_gfx11.cpp @@ -215,6 +215,7 @@ ImageManagerGfx11::~ImageManagerGfx11() {} hsa_status_t ImageManagerGfx11::CalculateImageSizeAndAlignment( hsa_agent_t component, const hsa_ext_image_descriptor_t& desc, hsa_ext_image_data_layout_t image_data_layout, + uint32_t num_mipmap_levels, size_t image_data_row_pitch, size_t image_data_slice_pitch, hsa_ext_image_data_info_t& image_info) const { @@ -230,9 +231,8 @@ hsa_status_t ImageManagerGfx11::CalculateImageSizeAndAlignment( desc.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB)? Image::TileMode::TILED : Image::TileMode::LINEAR; } - if (GetAddrlibSurfaceInfoNv(component, desc, tileMode, - image_data_row_pitch, image_data_slice_pitch, out) == - (uint32_t)(-1)) { + if (GetAddrlibSurfaceInfoNv(component, desc, num_mipmap_levels, tileMode, + image_data_row_pitch, image_data_slice_pitch, out) == (uint32_t)(-1)) { return HSA_STATUS_ERROR; } @@ -332,7 +332,7 @@ hsa_status_t ImageManagerGfx11::PopulateImageSrd(Image& image, reinterpret_cast(&image.srd[3])->bits.TYPE = ImageLut().MapGeometry(image.desc.geometry); } - + // Imported metadata holds the offset to metadata, add the image base address. uintptr_t meta = uintptr_t(((SQ_IMG_RSRC_WORD7*)(&image.srd[7]))->bits.META_DATA_ADDRESS_HI) << 16; meta |= uintptr_t(((SQ_IMG_RSRC_WORD6*)(&image.srd[6]))->bits.META_DATA_ADDRESS) << 8; @@ -460,9 +460,8 @@ hsa_status_t ImageManagerGfx11::PopulateImageSrd(Image& image) const { ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0}; - uint32_t swizzleMode = GetAddrlibSurfaceInfoNv( - image.component, image.desc, image.tile_mode, - image.row_pitch, image.slice_pitch, out); + uint32_t swizzleMode = GetAddrlibSurfaceInfoNv(image.component, image.desc, + 1, image.tile_mode, image.row_pitch, image.slice_pitch, out); if (swizzleMode == (uint32_t)(-1)) { return HSA_STATUS_ERROR; } @@ -619,6 +618,7 @@ hsa_status_t ImageManagerGfx11::PopulateSamplerSrd(Sampler& sampler) const { uint32_t ImageManagerGfx11::GetAddrlibSurfaceInfoNv( hsa_agent_t component, const hsa_ext_image_descriptor_t& desc, + uint32_t num_mipmap_levels, Image::TileMode tileMode, size_t image_data_row_pitch, size_t image_data_slice_pitch, @@ -641,7 +641,7 @@ uint32_t ImageManagerGfx11::GetAddrlibSurfaceInfoNv( in.width = width; in.height = height; in.numSlices = num_slice; - in.pitchInElement = image_data_row_pitch / image_prop.element_size; + in.numMipLevels = num_mipmap_levels; switch (desc.geometry) { case HSA_EXT_IMAGE_GEOMETRY_1D: @@ -810,5 +810,410 @@ hsa_status_t ImageManagerGfx11::FillImage(const Image& image, const void* patter return status; } +hsa_status_t ImageManagerGfx11::PopulateMipmapSrd(MipmappedArray& mipmap) const { + ImageProperty mipmap_prop = ImageLut().MapFormat(mipmap.desc.format, mipmap.desc.geometry); + assert(mipmap_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED); + assert(mipmap_prop.element_size != 0); + assert(mipmap.num_levels >= 1); + + const void* mipmap_data_addr = mipmap.data; + + if (IsLocalMemory(mipmap.data)) + mipmap_data_addr = reinterpret_cast( + reinterpret_cast(mipmap.data) - local_memory_base_address_); + + if (mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) { + SQ_BUF_RSRC_WORD0 word0; + SQ_BUF_RSRC_WORD1 word1; + SQ_BUF_RSRC_WORD2 word2; + SQ_BUF_RSRC_WORD3 word3; + + word0.val = 0; + word0.f.BASE_ADDRESS = PtrLow32(mipmap_data_addr); + + word1.val = 0; + word1.f.BASE_ADDRESS_HI = PtrHigh32(mipmap_data_addr); + word1.f.STRIDE = mipmap_prop.element_size; + word1.f.SWIZZLE_ENABLE = 0; + + word2.f.NUM_RECORDS = mipmap.desc.width * mipmap_prop.element_size; + + const Swizzle swizzle = ImageLut().MapSwizzle(mipmap.desc.format.channel_order); + word3.val = 0; + word3.f.DST_SEL_X = swizzle.x; + word3.f.DST_SEL_Y = swizzle.y; + word3.f.DST_SEL_Z = swizzle.z; + word3.f.DST_SEL_W = swizzle.w; + word3.f.FORMAT = GetCombinedFormat(mipmap_prop.data_format, mipmap_prop.data_type); + word3.f.INDEX_STRIDE = mipmap_prop.element_size; + word3.f.TYPE = ImageLut().MapGeometry(mipmap.desc.geometry); + + mipmap.srd[0] = word0.val; + mipmap.srd[1] = word1.val; + mipmap.srd[2] = word2.val; + mipmap.srd[3] = word3.val; + + mipmap.row_pitch = mipmap.desc.width * mipmap_prop.element_size; + mipmap.slice_pitch = mipmap.row_pitch; + } else { + SQ_IMG_RSRC_WORD0 word0; + SQ_IMG_RSRC_WORD1 word1; + SQ_IMG_RSRC_WORD2 word2; + SQ_IMG_RSRC_WORD3 word3; + SQ_IMG_RSRC_WORD4 word4; + SQ_IMG_RSRC_WORD5 word5; + SQ_IMG_RSRC_WORD5 word6; + SQ_IMG_RSRC_WORD5 word7; + + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0}; + + // pMipInfo not needed - set to nullptr and AddrLib will ignore it + out.pMipInfo = nullptr; + + uint32_t swizzleMode = GetAddrlibSurfaceInfoNv( + mipmap.component, mipmap.desc, mipmap.num_levels, + mipmap.tile_mode, mipmap.row_pitch, mipmap.slice_pitch, out); + if (swizzleMode == (uint32_t)(-1)) { + return HSA_STATUS_ERROR; + } + mipmap.addr_output.addr2 = out; + mipmap.size = out.surfSize; + + assert((out.bpp / 8) == mipmap_prop.element_size); + + const size_t row_pitch_size = out.pitch * mipmap_prop.element_size; + + word0.val = 0; + word0.f.BASE_ADDRESS = PtrLow40Shift8(mipmap_data_addr); + + word1.val = 0; + word1.f.BASE_ADDRESS_HI = PtrHigh64Shift40(mipmap_data_addr); + word1.f.MAX_MIP = mipmap.num_levels - 1; + word1.f.FORMAT = GetCombinedFormat(mipmap_prop.data_format, mipmap_prop.data_type); + // Only take the lowest 2 bits of (mipmap.desc.width - 1) + word1.f.WIDTH = BitSelect<0, 1>(mipmap.desc.width - 1); + + word2.val = 0; + // Take the high 12 bits of (mipmap.desc.width - 1) + word2.f.WIDTH_HI = BitSelect<2, 13>(mipmap.desc.width - 1); + word2.f.HEIGHT = mipmap.desc.height ? mipmap.desc.height - 1 : 0; + + const Swizzle swizzle = ImageLut().MapSwizzle(mipmap.desc.format.channel_order); + word3.val = 0; + word3.f.DST_SEL_X = swizzle.x; + word3.f.DST_SEL_Y = swizzle.y; + word3.f.DST_SEL_Z = swizzle.z; + word3.f.DST_SEL_W = swizzle.w; + word3.f.SW_MODE = swizzleMode; + word3.f.BASE_LEVEL = 0; + word3.f.LAST_LEVEL = mipmap.num_levels - 1; + word3.f.BC_SWIZZLE = GetBcSwizzle(swizzle); + word3.f.TYPE = ImageLut().MapGeometry(mipmap.desc.geometry); + + const bool mipmap_array = + (mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA || + mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DA || + mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DADEPTH); + const bool mipmap_3d = (mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_3D); + + word4.val = 0; + word4.f.DEPTH = + (mipmap_array) // Doesn't hurt but isn't array_size already >0? + ? std::max(mipmap.desc.array_size, static_cast(1)) - 1 + : (mipmap_3d) ? mipmap.desc.depth - 1 : 0; + + // For 1d, 2d and 2d-msaa in gfx11 this is pitch-1 + if (!mipmap_array && !mipmap_3d) { + word4.f.PITCH = out.pitch - 1; + } + + word5.val = 0; + word6.val = 0; + word7.val = 0; + + mipmap.srd[0] = word0.val; + mipmap.srd[1] = word1.val; + mipmap.srd[2] = word2.val; + mipmap.srd[3] = word3.val; + mipmap.srd[4] = word4.val; + mipmap.srd[5] = word5.val; + mipmap.srd[6] = word6.val; + mipmap.srd[7] = word7.val; + + mipmap.row_pitch = row_pitch_size; + mipmap.slice_pitch = out.sliceSize; + } + + mipmap.srd[8] = mipmap.desc.format.channel_type; + mipmap.srd[9] = mipmap.desc.format.channel_order; + mipmap.srd[10] = static_cast(mipmap.desc.width); + + // Mipmap-specific auxiliary fields + mipmap.srd[11] = mipmap.num_levels; + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t ImageManagerGfx11::PopulateMipmapSrd(MipmappedArray& mipmap_array, const metadata_amd_t* desc) const { + const metadata_amd_gfx11_t* desc_gfx11 = reinterpret_cast(desc); + const void* mipmap_data_addr = mipmap_array.data; + + ImageProperty mipmap_prop = ImageLut().MapFormat(mipmap_array.desc.format, mipmap_array.desc.geometry); + if (mipmap_prop.cap == HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED || mipmap_prop.element_size == 0) { + return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED; + } + + const Swizzle swizzle = ImageLut().MapSwizzle(mipmap_array.desc.format.channel_order); + + if (IsLocalMemory(mipmap_array.data)) { + mipmap_data_addr = reinterpret_cast( + reinterpret_cast(mipmap_array.data) - local_memory_base_address_); + } + + // Copy the pre-computed SRD words 0-7 from metadata + mipmap_array.srd[0] = desc_gfx11->word0.u32All; + mipmap_array.srd[1] = desc_gfx11->word1.u32All; + mipmap_array.srd[2] = desc_gfx11->word2.u32All; + mipmap_array.srd[3] = desc_gfx11->word3.u32All; + mipmap_array.srd[4] = desc_gfx11->word4.u32All; + mipmap_array.srd[5] = desc_gfx11->word5.u32All; + mipmap_array.srd[6] = desc_gfx11->word6.u32All; + mipmap_array.srd[7] = desc_gfx11->word7.u32All; + + // Override specific fields after copying + uint32_t hwPixelSize = ImageLut().GetPixelSize(mipmap_prop.data_format, mipmap_prop.data_type); + if (mipmap_prop.element_size != hwPixelSize) { + return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED; + } + + reinterpret_cast(&mipmap_array.srd[0])->bits.BASE_ADDRESS = PtrLow40Shift8(mipmap_data_addr); + reinterpret_cast(&mipmap_array.srd[1])->bits.BASE_ADDRESS_HI = PtrHigh64Shift40(mipmap_data_addr); + reinterpret_cast(&mipmap_array.srd[1])->bits.FORMAT = GetCombinedFormat(mipmap_prop.data_format, mipmap_prop.data_type); + reinterpret_cast(&mipmap_array.srd[1])->bits.MAX_MIP = mipmap_array.num_levels - 1; + reinterpret_cast(&mipmap_array.srd[3])->bits.DST_SEL_X = swizzle.x; + reinterpret_cast(&mipmap_array.srd[3])->bits.DST_SEL_Y = swizzle.y; + reinterpret_cast(&mipmap_array.srd[3])->bits.DST_SEL_Z = swizzle.z; + reinterpret_cast(&mipmap_array.srd[3])->bits.DST_SEL_W = swizzle.w; + + if (mipmap_array.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA || + mipmap_array.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1D) { + reinterpret_cast(&mipmap_array.srd[3])->bits.TYPE = + ImageLut().MapGeometry(mipmap_array.desc.geometry); + } + + // Looks like this is only used for CPU copies. + mipmap_array.row_pitch = 0; + mipmap_array.slice_pitch = 0; + + // Store mipmap-specific metadata + mipmap_array.srd[8] = mipmap_array.desc.format.channel_type; + mipmap_array.srd[9] = mipmap_array.desc.format.channel_order; + mipmap_array.srd[10] = static_cast(mipmap_array.desc.width); + mipmap_array.srd[11] = mipmap_array.num_levels; + + // Allocate and populate pMipInfo from metadata mip_offsets (ADDR2 for GFX11) + ADDR2_MIP_INFO* mip_info_storage = new ADDR2_MIP_INFO[mipmap_array.num_levels]; + memset(mip_info_storage, 0, sizeof(ADDR2_MIP_INFO) * mipmap_array.num_levels); + + // Extract per-level information from mip_offsets array + for (uint32_t level = 0; level < mipmap_array.num_levels; level++) { + // mip_offsets contains offset bits [39:8], shift left by 8 to get actual byte offset + mip_info_storage[level].offset = static_cast(desc_gfx11->mip_offsets[level]) << 8; + + // Calculate dimensions for this level (halve at each level) + mip_info_storage[level].pitch = std::max(1u, static_cast(mipmap_array.desc.width >> level)); + mip_info_storage[level].height = std::max(1u, static_cast(mipmap_array.desc.height >> level)); + mip_info_storage[level].depth = std::max(1u, static_cast(mipmap_array.desc.depth >> level)); + } + + // Store pMipInfo in addr_output for later use by PopulateMipLevelSrd + mipmap_array.addr_output.addr2.pMipInfo = mip_info_storage; + + // Total size calculation from metadata + uint32_t last_level = mipmap_array.num_levels - 1; + uint64_t last_level_size = mip_info_storage[last_level].pitch * + mip_info_storage[last_level].height * + mip_info_storage[last_level].depth * + mipmap_prop.element_size; + mipmap_array.size = mip_info_storage[last_level].offset + last_level_size; + + return HSA_STATUS_SUCCESS; +} + +void ImageManagerGfx11::printSRDDetailed(const uint32_t* srd) const { + if (!srd) { + printf("\n========== Image SRD (GFX11) - Detailed ==========\n"); + printf("ERROR: No SRD data provided.\n"); + printf("===============================================\n\n"); + return; + } + + printf("\n========== Image SRD (GFX11) - Detailed ==========\n"); + + // Print all 12 words with bit field annotations + for (int i = 0; i < 12; i++) { + printf("WORD %d: 0x%08x ", i, srd[i]); + + // Binary representation + printf("("); + for (int bit = 31; bit >= 0; bit--) { + printf("%d", (srd[i] >> bit) & 1); + if (bit % 4 == 0 && bit != 0) printf("_"); + } + printf(")\n"); + } + + // WORD 0: BASE_ADDRESS (bits 39:8) + SQ_IMG_RSRC_WORD0 word0; + word0.val = srd[0]; + printf("\nWORD 0: BASE_ADDRESS (bits 39:8) = 0x%08x\n", word0.f.BASE_ADDRESS); + + // WORD 1: Contains BASE_ADDRESS_HI, FORMAT, WIDTH (bits 1:0) + SQ_IMG_RSRC_WORD1 word1; + word1.val = srd[1]; + printf("WORD 1: BASE_ADDRESS_HI = 0x%02x\n", word1.f.BASE_ADDRESS_HI); + printf(" FORMAT = %u\n", word1.f.FORMAT); + printf(" WIDTH (bits 1:0) = %u\n", word1.f.WIDTH); + + // Calculate full address (GFX11 uses 40-bit shifted by 8) + uint64_t base_addr = ((uint64_t)word1.f.BASE_ADDRESS_HI << 32) | ((uint64_t)word0.f.BASE_ADDRESS << 8); + printf(" → Full Base Address = 0x%016lx\n", base_addr); + + // WORD 2: WIDTH_HI, HEIGHT + SQ_IMG_RSRC_WORD2 word2; + word2.val = srd[2]; + printf("WORD 2: WIDTH_HI (bits 13:2) = %u\n", word2.f.WIDTH_HI); + printf(" HEIGHT = %u\n", word2.f.HEIGHT); + + // Calculate full width (GFX11 uses 14 bits split: 2 in WORD1 + 12 in WORD2) + uint32_t full_width = word1.f.WIDTH | (word2.f.WIDTH_HI << 2); + printf(" → Full Width = %u (actual: %u)\n", full_width, full_width + 1); + printf(" → Full Height = %u (actual: %u)\n", word2.f.HEIGHT, word2.f.HEIGHT + 1); + + // WORD 3: Channel selectors, SW_MODE, BASE_LEVEL, LAST_LEVEL, BC_SWIZZLE, TYPE + SQ_IMG_RSRC_WORD3 word3; + word3.val = srd[3]; + printf("WORD 3: DST_SEL_X = %u ", word3.f.DST_SEL_X); + printChannelSelect(word3.f.DST_SEL_X); + printf(" DST_SEL_Y = %u ", word3.f.DST_SEL_Y); + printChannelSelect(word3.f.DST_SEL_Y); + printf(" DST_SEL_Z = %u ", word3.f.DST_SEL_Z); + printChannelSelect(word3.f.DST_SEL_Z); + printf(" DST_SEL_W = %u ", word3.f.DST_SEL_W); + printChannelSelect(word3.f.DST_SEL_W); + printf(" BASE_LEVEL = %u ◄──── Current base level\n", word3.f.BASE_LEVEL); + printf(" LAST_LEVEL = %u ◄──── Current last level\n", word3.f.LAST_LEVEL); + printf(" SW_MODE = %u ", word3.f.SW_MODE); + printSwizzleMode(word3.f.SW_MODE); + printf(" BC_SWIZZLE = %u\n", word3.f.BC_SWIZZLE); + printf(" TYPE = %u ", word3.f.TYPE); + printResourceType(word3.f.TYPE); + + // WORD 4: DEPTH, PITCH + SQ_IMG_RSRC_WORD4 word4; + word4.val = srd[4]; + printf("WORD 4: DEPTH = %u\n", word4.f.DEPTH); + printf(" PITCH = %u (actual: %u)\n", word4.f.PITCH, word4.f.PITCH + 1); + + // Calculate effective depth based on geometry + uint32_t type = word3.f.TYPE; + if (type == 10) { // 3D + printf(" → 3D Depth = %u (actual: %u)\n", word4.f.DEPTH, word4.f.DEPTH + 1); + } else if (type == 13 || type == 12) { // Arrays + printf(" → Array Size = %u (actual: %u)\n", word4.f.DEPTH, word4.f.DEPTH + 1); + } + + // WORD 5-7: Usually zero for basic images, but may contain metadata addresses + printf("WORD 5: Reserved = 0x%08x\n", srd[5]); + printf("WORD 6: META_DATA_ADDRESS = 0x%08x\n", srd[6]); + printf("WORD 7: META_DATA_ADDRESS_HI = 0x%08x\n", srd[7]); + + // Additional mipmap information + printf("WORD 8: CHANNEL_TYPE = 0x%08x\n", srd[8]); + printf("WORD 9: CHANNEL_ORDER = 0x%08x\n", srd[9]); + printf("WORD 10: WIDTH_ORIGINAL = 0x%08x\n", srd[10]); + printf("WORD 11: NUM_LEVELS = 0x%08x\n", srd[11]); + + // Mipmap analysis + if (word3.f.LAST_LEVEL > word3.f.BASE_LEVEL || word3.f.LAST_LEVEL > 0) { + printf("\nMIPMAP ANALYSIS:\n"); + printf(" Total Levels = %u\n", srd[11]); + printf(" Active Range = [%u, %u]\n", word3.f.BASE_LEVEL, word3.f.LAST_LEVEL); + if (word3.f.BASE_LEVEL == word3.f.LAST_LEVEL) { + printf(" Mode = SINGLE LEVEL VIEW ◄──── Mip level view\n"); + uint32_t level = word3.f.BASE_LEVEL; + uint32_t level_width = std::max(1u, (full_width + 1) >> level); + uint32_t level_height = std::max(1u, static_cast((word2.f.HEIGHT + 1) >> level)); + printf(" Effective Dimensions = %ux%u (level %u)\n", level_width, level_height, level); + } else { + printf(" Mode = FULL MIPMAP CHAIN\n"); + } + } + printf("===============================================\n\n"); +} + +void ImageManagerGfx11::printChannelSelect(uint32_t sel) const { + switch(sel) { + case 0: printf("(SEL_0)\n"); break; + case 1: printf("(SEL_1)\n"); break; + case 4: printf("(SEL_X/R)\n"); break; + case 5: printf("(SEL_Y/G)\n"); break; + case 6: printf("(SEL_Z/B)\n"); break; + case 7: printf("(SEL_W/A)\n"); break; + default: printf("(UNKNOWN)\n"); break; + } +} + +void ImageManagerGfx11::printResourceType(uint32_t type) const { + switch(type) { + case 8: printf("(1D)\n"); break; + case 9: printf("(2D)\n"); break; + case 10: printf("(3D)\n"); break; + case 11: printf("(CUBE)\n"); break; + case 12: printf("(1D_ARRAY/1DB)\n"); break; + case 13: printf("(2D_ARRAY)\n"); break; + case 14: printf("(2D_MSAA)\n"); break; + case 15: printf("(2D_MSAA_ARRAY)\n"); break; + default: printf("(UNKNOWN=%u)\n", type); break; + } +} + +void ImageManagerGfx11::printSwizzleMode(uint32_t sw_mode) const { + // GFX11 swizzle modes (similar to GFX9/10) + if (sw_mode == 0) { + printf("(LINEAR)\n"); + } else if (sw_mode < 5) { + printf("(SW_256B_%u)\n", sw_mode); + } else if (sw_mode < 9) { + printf("(SW_4KB_%u)\n", sw_mode - 4); + } else if (sw_mode < 13) { + printf("(SW_64KB_%u)\n", sw_mode - 8); + } else if (sw_mode < 22) { + printf("(SW_VAR_%u)\n", sw_mode - 12); + } else { + printf("(UNKNOWN=%u)\n", sw_mode); + } +} + +hsa_status_t ImageManagerGfx11::PopulateMipLevelSrd( + MipmappedArray& level_view, + const MipmappedArray& mipmap_array, + uint32_t mip_level) const { + + // SRD already copied from parent, just modify BASE_LEVEL/LAST_LEVEL fields + uint32_t* srd_words = reinterpret_cast(level_view.srd); + + // GFX11 SRD WORD3 has BASE_LEVEL and LAST_LEVEL fields + SQ_IMG_RSRC_WORD3* word3 = reinterpret_cast(&srd_words[3]); + + // Set both to same value - hardware samples only this level + word3->f.BASE_LEVEL = mip_level; + word3->f.LAST_LEVEL = mip_level; + + debug_print("Set SRD mip selection: BASE_LEVEL=%u, LAST_LEVEL=%u", mip_level, mip_level); + + return HSA_STATUS_SUCCESS; +} + } // namespace image } // namespace rocr diff --git a/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_gfx11.h b/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_gfx11.h index fdea20974b..374e7cbfc8 100755 --- a/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_gfx11.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_gfx11.h @@ -60,6 +60,7 @@ class ImageManagerGfx11 : public ImageManagerKv { virtual hsa_status_t CalculateImageSizeAndAlignment( hsa_agent_t component, const hsa_ext_image_descriptor_t& desc, hsa_ext_image_data_layout_t image_data_layout, + uint32_t num_mipmap_levels, size_t image_data_row_pitch, size_t image_data_slice_pitch, hsa_ext_image_data_info_t& image_info) const; @@ -80,9 +81,26 @@ class ImageManagerGfx11 : public ImageManagerKv { /// @brief Fill image backing storage using agent copy. virtual hsa_status_t FillImage(const Image& image, const void* pattern, const hsa_ext_image_region_t& region); + + /// @brief Fill mipmap structure with device specific mipmapped array object. + virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array) const; + + /// @brief Fill mipmap structure with pre-computed AMD metadata descriptor. + virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array, const metadata_amd_t* desc) const; + + /// @brief Create mip level view using SRD BASE_LEVEL/LAST_LEVEL fields + virtual hsa_status_t PopulateMipLevelSrd(MipmappedArray& level_view, + const MipmappedArray& mipmap_array, uint32_t mip_level) const; + + virtual void printSRDDetailed(const uint32_t* srd) const; + virtual void printChannelSelect(uint32_t sel) const; + virtual void printResourceType(uint32_t type) const; + virtual void printSwizzleMode(uint32_t sw_mode) const; + protected: uint32_t GetAddrlibSurfaceInfoNv(hsa_agent_t component, const hsa_ext_image_descriptor_t& desc, + uint32_t num_mipmap_levels, Image::TileMode tileMode, size_t image_data_row_pitch, size_t image_data_slice_pitch, diff --git a/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_gfx12.cpp b/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_gfx12.cpp index 0f0211d05d..7f8480137c 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_gfx12.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_gfx12.cpp @@ -216,14 +216,24 @@ ImageManagerGfx12::~ImageManagerGfx12() {} hsa_status_t ImageManagerGfx12::CalculateImageSizeAndAlignment( hsa_agent_t component, const hsa_ext_image_descriptor_t& desc, hsa_ext_image_data_layout_t image_data_layout, + uint32_t num_mipmap_levels, size_t image_data_row_pitch, size_t image_data_slice_pitch, hsa_ext_image_data_info_t& image_info) const { - ADDR3_COMPUTE_SURFACE_INFO_OUTPUT out = {0}; - hsa_profile_t profile; + ADDR3_COMPUTE_SURFACE_INFO_OUTPUT out = {0}; + + // Allocate persistent memory for mip info on the heap + ADDR3_MIP_INFO* mip_info_storage = new ADDR3_MIP_INFO[num_mipmap_levels]; + memset(mip_info_storage, 0, sizeof(ADDR3_MIP_INFO) * num_mipmap_levels); + out.pMipInfo = mip_info_storage; + + hsa_profile_t profile; hsa_status_t status = HSA::hsa_agent_get_info(component, HSA_AGENT_INFO_PROFILE, &profile); - if (status != HSA_STATUS_SUCCESS) return status; + if (status != HSA_STATUS_SUCCESS) { + delete[] mip_info_storage; + return status; + } Image::TileMode tileMode = Image::TileMode::LINEAR; if (image_data_layout == HSA_EXT_IMAGE_DATA_LAYOUT_OPAQUE) { @@ -231,9 +241,9 @@ hsa_status_t ImageManagerGfx12::CalculateImageSizeAndAlignment( desc.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB)? Image::TileMode::TILED : Image::TileMode::LINEAR; } - if (GetAddrlibSurfaceInfoNv(component, desc, tileMode, - image_data_row_pitch, image_data_slice_pitch, out) == - (uint32_t)(-1)) { + if (GetAddrlibSurfaceInfoNv(component, desc, num_mipmap_levels, tileMode, + image_data_row_pitch, image_data_slice_pitch, out) == (uint32_t)(-1)) { + delete[] mip_info_storage; return HSA_STATUS_ERROR; } @@ -243,6 +253,7 @@ hsa_status_t ImageManagerGfx12::CalculateImageSizeAndAlignment( image_data_layout == HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR && ((image_data_row_pitch && (rowPitch != image_data_row_pitch)) || (image_data_slice_pitch && (slicePitch != image_data_slice_pitch)))) { + delete[] mip_info_storage; return static_cast( HSA_EXT_STATUS_ERROR_IMAGE_PITCH_UNSUPPORTED); } @@ -252,6 +263,9 @@ hsa_status_t ImageManagerGfx12::CalculateImageSizeAndAlignment( image_info.alignment = out.baseAlign; assert(image_info.alignment != 0); + // Clean up temporary mip info storage + delete[] mip_info_storage; + return HSA_STATUS_SUCCESS; } @@ -430,7 +444,6 @@ hsa_status_t ImageManagerGfx12::PopulateImageSrd(Image& image) const { word1.val = 0; word1.f.BASE_ADDRESS_HI = PtrHigh32(image_data_addr); word1.f.STRIDE = image_prop.element_size; - word1.f.SWIZZLE_ENABLE = 0; word2.f.NUM_RECORDS = image.desc.width * image_prop.element_size; @@ -471,9 +484,8 @@ hsa_status_t ImageManagerGfx12::PopulateImageSrd(Image& image) const { ADDR3_COMPUTE_SURFACE_INFO_OUTPUT out = {0}; - uint32_t swizzleMode = GetAddrlibSurfaceInfoNv( - image.component, image.desc, image.tile_mode, - image.row_pitch, image.slice_pitch, out); + uint32_t swizzleMode = GetAddrlibSurfaceInfoNv(image.component, image.desc, + 1, image.tile_mode, image.row_pitch, image.slice_pitch, out); if (swizzleMode == (uint32_t)(-1)) { return HSA_STATUS_ERROR; } @@ -642,6 +654,7 @@ hsa_status_t ImageManagerGfx12::PopulateSamplerSrd(Sampler& sampler) const { uint32_t ImageManagerGfx12::GetAddrlibSurfaceInfoNv( hsa_agent_t component, const hsa_ext_image_descriptor_t& desc, + uint32_t num_mipmap_levels, Image::TileMode tileMode, size_t image_data_row_pitch, size_t image_data_slice_pitch, @@ -664,7 +677,7 @@ uint32_t ImageManagerGfx12::GetAddrlibSurfaceInfoNv( in.width = width; in.height = height; in.numSlices = num_slice; - in.pitchInElement = image_data_row_pitch / image_prop.element_size; + in.numMipLevels = num_mipmap_levels; switch (desc.geometry) { case HSA_EXT_IMAGE_GEOMETRY_1D: @@ -672,46 +685,44 @@ uint32_t ImageManagerGfx12::GetAddrlibSurfaceInfoNv( case HSA_EXT_IMAGE_GEOMETRY_1DA: in.resourceType = ADDR_RSRC_TEX_1D; break; - case HSA_EXT_IMAGE_GEOMETRY_2D: case HSA_EXT_IMAGE_GEOMETRY_2DDEPTH: case HSA_EXT_IMAGE_GEOMETRY_2DA: case HSA_EXT_IMAGE_GEOMETRY_2DADEPTH: in.resourceType = ADDR_RSRC_TEX_2D; break; - case HSA_EXT_IMAGE_GEOMETRY_3D: + { + in.resourceType = ADDR_RSRC_TEX_3D; + /* + * 3D swizzle modes on GFX12 enforces alignment + * of the number of slices to the block depth. + * If numSlices = 3 then the 3 slices are + * interleaved for 3D locality among the 8 slices + * that make up each block. This causes the memory + * footprint to jump from an ideal size of ~12 GB + * to ~32 GB. + * 'enable3DSwizzleMode' flag tests for env variable + * HSA_IMAGE_ENABLE_3D_SWIZZLE_DEBUG to enable or disable + * 3D swizzle: + * true: Keep view3dAs2dArray = 0 for real 3D interleaving. + * false: Use view3dAs2dArray = 1 to avoid the alignment + * expansion. + * 2D swizzle modes can lower size overhead but may yield + * suboptimal cache behavior for fully 3D volumetric + * operations. + */ + bool enable3DSwizzleMode = core::Runtime::runtime_singleton_->flag().enable_3d_swizzle(); + if (enable3DSwizzleMode) { - in.resourceType = ADDR_RSRC_TEX_3D; - /* - * 3D swizzle modes on GFX12 enforces alignment - * of the number of slices to the block depth. - * If numSlices = 3 then the 3 slices are - * interleaved for 3D locality among the 8 slices - * that make up each block. This causes the memory - * footprint to jump from an ideal size of ~12 GB - * to ~32 GB. - * 'enable3DSwizzleMode' flag tests for env variable - * HSA_IMAGE_ENABLE_3D_SWIZZLE_DEBUG to enable or disable - * 3D swizzle: - * true: Keep view3dAs2dArray = 0 for real 3D interleaving. - * false: Use view3dAs2dArray = 1 to avoid the alignment - * expansion. - * 2D swizzle modes can lower size overhead but may yield - * suboptimal cache behavior for fully 3D volumetric - * operations. - */ - bool enable3DSwizzleMode = core::Runtime::runtime_singleton_->flag().enable_3d_swizzle(); - if (enable3DSwizzleMode) - { - in.flags.view3dAs2dArray = 0; - } - else - { - in.flags.view3dAs2dArray = 1; - } - break; + in.flags.view3dAs2dArray = 0; } + else + { + in.flags.view3dAs2dArray = 1; + } + break; + } } in.flags.texture = 1; @@ -781,8 +792,9 @@ uint32_t ImageManagerGfx12::GetAddrlibSurfaceInfoNv( const UINT_32 ratioLow = 2; const UINT_32 ratioHigh = 1; - // Same behaviour as GFX11, remove linear if height is 1. - if (in.height > 1) { + // Remove linear swizzle mode for multi-dimensional or mipmapped textures. + // Linear mode is only appropriate for simple 1D single-level textures. + if (in.height > 1 || in.numMipLevels > 1) { swOut.validModes.swLinear = 0; } @@ -793,6 +805,10 @@ uint32_t ImageManagerGfx12::GetAddrlibSurfaceInfoNv( if (swOut.validModes.value & (1 << i)) { ADDR3_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0}; + + // pMipInfo not needed - set to nullptr and AddrLib will ignore it + localOut.pMipInfo = nullptr; + localOut.size = sizeof(ADDR3_COMPUTE_SURFACE_INFO_OUTPUT); in.swizzleMode = (Addr3SwizzleMode) i; @@ -908,5 +924,456 @@ hsa_status_t ImageManagerGfx12::FillImage(const Image& image, const void* patter return status; } +hsa_status_t ImageManagerGfx12::PopulateMipmapSrd(MipmappedArray& mipmap) const { + // Map format/geometry to hardware encoding + ImageProperty mipmap_prop = ImageLut().MapFormat(mipmap.desc.format, mipmap.desc.geometry); + assert(mipmap_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED); + assert(mipmap_prop.element_size != 0); + assert(mipmap.num_levels >= 1); + + const void* mipmap_data_addr = mipmap.data; + + if (IsLocalMemory(mipmap.data)) { + mipmap_data_addr = reinterpret_cast( + reinterpret_cast(mipmap.data) - local_memory_base_address_); + } + + if (mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) { + SQ_BUF_RSRC_WORD0 word0; + SQ_BUF_RSRC_WORD1 word1; + SQ_BUF_RSRC_WORD2 word2; + SQ_BUF_RSRC_WORD3 word3; + + word0.val = 0; + word0.f.BASE_ADDRESS = PtrLow32(mipmap_data_addr); + + word1.val = 0; + word1.f.BASE_ADDRESS_HI = PtrHigh32(mipmap_data_addr); + word1.f.STRIDE = mipmap_prop.element_size; + word1.f.SWIZZLE_ENABLE = 0; + + word2.val = 0; + word2.f.NUM_RECORDS = mipmap.desc.width * mipmap_prop.element_size; + + const Swizzle swizzle = ImageLut().MapSwizzle(mipmap.desc.format.channel_order); + word3.val = 0; + word3.f.DST_SEL_X = swizzle.x; + word3.f.DST_SEL_Y = swizzle.y; + word3.f.DST_SEL_Z = swizzle.z; + word3.f.DST_SEL_W = swizzle.w; + word3.f.FORMAT = GetCombinedFormat(mipmap_prop.data_format, mipmap_prop.data_type); + word3.f.INDEX_STRIDE = mipmap_prop.element_size; + + // GFX12 compression features (disabled for now) + // word3.f.WRITE_COMPRESS_ENABLE = 0; + // word3.f.COMPRESSION_EN = 0; + // word3.f.COMPRESSION_ACCESS_MODE = 0; + + word3.f.TYPE = ImageLut().MapGeometry(mipmap.desc.geometry); + + mipmap.srd[0] = word0.val; + mipmap.srd[1] = word1.val; + mipmap.srd[2] = word2.val; + mipmap.srd[3] = word3.val; + + // 1DB mipmaps don't use words 4-7 + mipmap.srd[4] = 0; + mipmap.srd[5] = 0; + mipmap.srd[6] = 0; + mipmap.srd[7] = 0; + + mipmap.row_pitch = mipmap.desc.width * mipmap_prop.element_size; + mipmap.slice_pitch = mipmap.row_pitch; + } else { + SQ_IMG_RSRC_WORD0 word0; + SQ_IMG_RSRC_WORD1 word1; + SQ_IMG_RSRC_WORD2 word2; + SQ_IMG_RSRC_WORD3 word3; + SQ_IMG_RSRC_WORD4 word4; + SQ_IMG_RSRC_WORD5 word5; + SQ_IMG_RSRC_WORD6 word6; + SQ_IMG_RSRC_WORD7 word7; + + // Get ADDR3 surface information + ADDR3_COMPUTE_SURFACE_INFO_OUTPUT out = {0}; + + // pMipInfo not needed - set to nullptr and AddrLib will ignore it + out.pMipInfo = nullptr; + + unsigned int swizzleMode = GetAddrlibSurfaceInfoNv(mipmap.component, + mipmap.desc, mipmap.num_levels, mipmap.tile_mode, + mipmap.row_pitch, mipmap.slice_pitch, out); + if (swizzleMode == (uint32_t)(-1)) { + return HSA_STATUS_ERROR; + } + mipmap.addr_output.addr3 = out; + mipmap.size = out.surfSize; + + assert((out.bpp / 8) == mipmap_prop.element_size); + + const size_t row_pitch_size = out.pitch * mipmap_prop.element_size; + + word0.val = 0; + word0.f.BASE_ADDRESS = PtrLow40Shift8(mipmap_data_addr); + + word1.val = 0; + word1.f.BASE_ADDRESS_HI = PtrHigh64Shift40(mipmap_data_addr); + word1.f.MAX_MIP = mipmap.num_levels - 1; + word1.f.BASE_LEVEL = 0; // New to GFX12 + word1.f.FORMAT = GetCombinedFormat(mipmap_prop.data_format, mipmap_prop.data_type); + // Only take the lowest 2 bits of (image.desc.width - 1) + word1.f.WIDTH = BitSelect<0, 1>(mipmap.desc.width - 1); + + word2.val = 0; + // Take the high 14 bits of (mipmap.desc.width - 1) + word2.f.WIDTH_HI = BitSelect<2, 15>(mipmap.desc.width - 1); + word2.f.HEIGHT = mipmap.desc.height ? mipmap.desc.height - 1 : 0; + + const Swizzle swizzle = ImageLut().MapSwizzle(mipmap.desc.format.channel_order); + word3.val = 0; + word3.f.DST_SEL_X = swizzle.x; + word3.f.DST_SEL_Y = swizzle.y; + word3.f.DST_SEL_Z = swizzle.z; + word3.f.DST_SEL_W = swizzle.w; + // word3.f.NO_EDGE_CLAMP = 0; // New to GFX12 + word3.f.LAST_LEVEL = mipmap.num_levels - 1; + word3.f.SW_MODE = swizzleMode; + word3.f.BC_SWIZZLE = GetBcSwizzle(swizzle); + word3.f.TYPE = ImageLut().MapGeometry(mipmap.desc.geometry); + + const bool mipmap_array = + (mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA || + mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DA || + mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DADEPTH); + const bool mipmap_3d = (mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_3D); + + word4.val = 0; + + // For 1d, 2d and 2d-msaa, fields DEPTH+PITCH_MSB encode pitch-1 + if (!mipmap_array && !mipmap_3d) { + uint32_t encPitch = out.pitch - 1; + word4.f.DEPTH = encPitch & 0x3fff; // first 14 bits + word4.f.PITCH_MSB = (encPitch >> 14) & 0x3; // last 2 bits + } else { + word4.f.DEPTH = + (mipmap_array) // Doesn't hurt but isn't array_size already >0? + ? std::max(mipmap.desc.array_size, static_cast(1)) - 1 + : (mipmap_3d) ? mipmap.desc.depth - 1 : 0; + } + + word5.val = 0; + word6.val = 0; + word7.val = 0; + + mipmap.srd[0] = word0.val; + mipmap.srd[1] = word1.val; + mipmap.srd[2] = word2.val; + mipmap.srd[3] = word3.val; + mipmap.srd[4] = word4.val; + mipmap.srd[5] = word5.val; + mipmap.srd[6] = word6.val; + mipmap.srd[7] = word7.val; + + mipmap.row_pitch = row_pitch_size; + mipmap.slice_pitch = out.sliceSize; + } + + mipmap.srd[8] = mipmap.desc.format.channel_type; + mipmap.srd[9] = mipmap.desc.format.channel_order; + mipmap.srd[10] = static_cast(mipmap.desc.width); + + // Mipmap-specific + mipmap.srd[11] = mipmap.num_levels; + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t ImageManagerGfx12::PopulateMipmapSrd(MipmappedArray& mipmap_array, const metadata_amd_t* desc) const { + const metadata_amd_gfx12_t* desc_gfx12 = reinterpret_cast(desc); + const void* mipmap_data_addr = mipmap_array.data; + + ImageProperty mipmap_prop = ImageLut().MapFormat(mipmap_array.desc.format, mipmap_array.desc.geometry); + if (mipmap_prop.cap == HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED || mipmap_prop.element_size == 0) { + return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED; + } + + const Swizzle swizzle = ImageLut().MapSwizzle(mipmap_array.desc.format.channel_order); + + if (IsLocalMemory(mipmap_array.data)) { + mipmap_data_addr = reinterpret_cast( + reinterpret_cast(mipmap_array.data) - local_memory_base_address_); + } + + // Copy the pre-computed SRD words 0-7 from metadata + mipmap_array.srd[0] = desc_gfx12->word0.u32All; + mipmap_array.srd[1] = desc_gfx12->word1.u32All; + mipmap_array.srd[2] = desc_gfx12->word2.u32All; + mipmap_array.srd[3] = desc_gfx12->word3.u32All; + mipmap_array.srd[4] = desc_gfx12->word4.u32All; + mipmap_array.srd[5] = desc_gfx12->word5.u32All; + mipmap_array.srd[6] = desc_gfx12->word6.u32All; + mipmap_array.srd[7] = desc_gfx12->word7.u32All; + + if (mipmap_array.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) { + // 1DB uses buffer descriptors + SQ_BUF_RSRC_WORD0 word0; + SQ_BUF_RSRC_WORD1 word1; + SQ_BUF_RSRC_WORD3 word3; + + word0.val = 0; + word0.f.BASE_ADDRESS = PtrLow32(mipmap_data_addr); + + word1.val = mipmap_array.srd[1]; + word1.f.BASE_ADDRESS_HI = PtrHigh32(mipmap_data_addr); + word1.f.STRIDE = mipmap_prop.element_size; + + word3.val = mipmap_array.srd[3]; + word3.f.DST_SEL_X = swizzle.x; + word3.f.DST_SEL_Y = swizzle.y; + word3.f.DST_SEL_Z = swizzle.z; + word3.f.DST_SEL_W = swizzle.w; + word3.f.FORMAT = GetCombinedFormat(mipmap_prop.data_format, mipmap_prop.data_type); + word3.f.INDEX_STRIDE = mipmap_prop.element_size; + + mipmap_array.srd[0] = word0.val; + mipmap_array.srd[1] = word1.val; + mipmap_array.srd[3] = word3.val; + + mipmap_array.row_pitch = mipmap_array.desc.width * mipmap_prop.element_size; + mipmap_array.slice_pitch = mipmap_array.row_pitch; + } else { + // Non-1DB uses image descriptors + uint32_t hwPixelSize = ImageLut().GetPixelSize(mipmap_prop.data_format, mipmap_prop.data_type); + if (mipmap_prop.element_size != hwPixelSize) { + return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED; + } + + reinterpret_cast(&mipmap_array.srd[0])->bits.BASE_ADDRESS = PtrLow40Shift8(mipmap_data_addr); + reinterpret_cast(&mipmap_array.srd[1])->bits.BASE_ADDRESS_HI = PtrHigh64Shift40(mipmap_data_addr); + reinterpret_cast(&mipmap_array.srd[1])->bits.FORMAT = GetCombinedFormat(mipmap_prop.data_format, mipmap_prop.data_type); + reinterpret_cast(&mipmap_array.srd[3])->bits.DST_SEL_X = swizzle.x; + reinterpret_cast(&mipmap_array.srd[3])->bits.DST_SEL_Y = swizzle.y; + reinterpret_cast(&mipmap_array.srd[3])->bits.DST_SEL_Z = swizzle.z; + reinterpret_cast(&mipmap_array.srd[3])->bits.DST_SEL_W = swizzle.w; + + if (mipmap_array.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA || + mipmap_array.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1D) { + reinterpret_cast(&mipmap_array.srd[3])->bits.TYPE = + ImageLut().MapGeometry(mipmap_array.desc.geometry); + } + } + + // Looks like this is only used for CPU copies. + mipmap_array.row_pitch = 0; + mipmap_array.slice_pitch = 0; + + // Store mipmap-specific metadata + mipmap_array.srd[8] = mipmap_array.desc.format.channel_type; + mipmap_array.srd[9] = mipmap_array.desc.format.channel_order; + mipmap_array.srd[10] = static_cast(mipmap_array.desc.width); + mipmap_array.srd[11] = mipmap_array.num_levels; + + // Allocate and populate pMipInfo from metadata mip_offsets + ADDR3_MIP_INFO* mip_info_storage = new ADDR3_MIP_INFO[mipmap_array.num_levels]; + memset(mip_info_storage, 0, sizeof(ADDR3_MIP_INFO) * mipmap_array.num_levels); + + // Extract per-level information from mip_offsets array + for (uint32_t level = 0; level < mipmap_array.num_levels; level++) { + // mip_offsets contains offset bits [39:8], shift left by 8 to get actual byte offset + mip_info_storage[level].offset = static_cast(desc_gfx12->mip_offsets[level]) << 8; + + // Calculate dimensions for this level (halve at each level) + mip_info_storage[level].pixelPitch = std::max(1u, static_cast(mipmap_array.desc.width >> level)); + mip_info_storage[level].pixelHeight = std::max(1u, static_cast(mipmap_array.desc.height >> level)); + mip_info_storage[level].depth = std::max(1u, static_cast(mipmap_array.desc.depth >> level)); + } + + // Store pMipInfo in addr_output for later use by PopulateMipLevelSrd + mipmap_array.addr_output.addr3.pMipInfo = mip_info_storage; + + // Total size calculation from metadata (estimate from last level) + uint32_t last_level = mipmap_array.num_levels - 1; + uint64_t last_level_size = mip_info_storage[last_level].pixelPitch * + mip_info_storage[last_level].pixelHeight * + mip_info_storage[last_level].depth * + mipmap_prop.element_size; + mipmap_array.size = mip_info_storage[last_level].offset + last_level_size; + + return HSA_STATUS_SUCCESS; +} + +void ImageManagerGfx12::printSRDDetailed(const uint32_t* srd) const { + if (!srd) { + printf("\n========== Image SRD (GFX12) - Detailed ==========\n"); + printf("ERROR: No SRD data provided.\n"); + printf("===============================================\n\n"); + return; + } + + printf("\n========== Image SRD (GFX12) - Detailed ==========\n"); + + // Print all 12 words with bit field annotations + for (int i = 0; i < 12; i++) { + printf("WORD %d: 0x%08x ", i, srd[i]); + + // Binary representation + printf("("); + for (int bit = 31; bit >= 0; bit--) { + printf("%d", (srd[i] >> bit) & 1); + if (bit % 4 == 0 && bit != 0) printf("_"); + } + printf(")\n"); + } + + // WORD 0: SQ_IMG_RSRC_WORD0 + SQ_IMG_RSRC_WORD0 word0; + word0.val = srd[0]; + printf("\nWORD 0: BASE_ADDRESS (bits 39:8) = 0x%08x\n", word0.f.BASE_ADDRESS); + + // WORD 1: SQ_IMG_RSRC_WORD1 + SQ_IMG_RSRC_WORD1 word1; + word1.val = srd[1]; + printf("WORD 1: BASE_ADDRESS_HI = 0x%08x\n", word1.f.BASE_ADDRESS_HI); + printf(" MAX_MIP = %u ◄──── Total mip levels - 1\n", word1.f.MAX_MIP); + printf(" BASE_LEVEL = %u ◄──── Current base level\n", word1.f.BASE_LEVEL); + printf(" FORMAT = %u\n", word1.f.FORMAT); + printf(" WIDTH (bits 1:0) = %u\n", word1.f.WIDTH); + + // Calculate full address (GFX12 uses 40-bit shifted by 8) + uint64_t base_addr = ((uint64_t)word1.f.BASE_ADDRESS_HI << 40) | ((uint64_t)word0.f.BASE_ADDRESS << 8); + printf(" → Full Base Address = 0x%016lx\n", base_addr); + + // WORD 2: SQ_IMG_RSRC_WORD2 + SQ_IMG_RSRC_WORD2 word2; + word2.val = srd[2]; + printf("WORD 2: WIDTH_HI (bits 15:2) = %u\n", word2.f.WIDTH_HI); + printf(" HEIGHT = %u\n", word2.f.HEIGHT); + + // Calculate full width + uint32_t full_width = word1.f.WIDTH | (word2.f.WIDTH_HI << 2); + printf(" → Full Width = %u (actual: %u)\n", full_width, full_width + 1); + printf(" → Full Height = %u (actual: %u)\n", word2.f.HEIGHT, word2.f.HEIGHT + 1); + + // WORD 3: SQ_IMG_RSRC_WORD3 + SQ_IMG_RSRC_WORD3 word3; + word3.val = srd[3]; + printf("WORD 3: DST_SEL_X = %u ", word3.f.DST_SEL_X); + printChannelSelect(word3.f.DST_SEL_X); + printf(" DST_SEL_Y = %u ", word3.f.DST_SEL_Y); + printChannelSelect(word3.f.DST_SEL_Y); + printf(" DST_SEL_Z = %u ", word3.f.DST_SEL_Z); + printChannelSelect(word3.f.DST_SEL_Z); + printf(" DST_SEL_W = %u ", word3.f.DST_SEL_W); + printChannelSelect(word3.f.DST_SEL_W); + printf(" LAST_LEVEL = %u ◄──── Current last level (GFX12 NEW)\n", word3.f.LAST_LEVEL); + printf(" SW_MODE = %u ", word3.f.SW_MODE); + printSwizzleMode(word3.f.SW_MODE); + printf(" BC_SWIZZLE = %u\n", word3.f.BC_SWIZZLE); + printf(" TYPE = %u ", word3.f.TYPE); + printResourceType(word3.f.TYPE); + + // WORD 4: SQ_IMG_RSRC_WORD4 + SQ_IMG_RSRC_WORD4 word4; + word4.val = srd[4]; + printf("WORD 4: DEPTH = %u\n", word4.f.DEPTH); + printf(" PITCH_MSB = %u\n", word4.f.PITCH_MSB); + + // Calculate effective depth/pitch based on geometry + uint32_t type = word3.f.TYPE; + if (type == 10) { // 3D + printf(" → 3D Depth = %u (actual: %u)\n", word4.f.DEPTH, word4.f.DEPTH + 1); + } else if (type == 13 || type == 12) { // Arrays + printf(" → Array Size = %u (actual: %u)\n", word4.f.DEPTH, word4.f.DEPTH + 1); + } else { // 1D/2D - encodes pitch + uint32_t encoded_pitch = word4.f.DEPTH | (word4.f.PITCH_MSB << 14); + printf(" → Encoded Pitch = %u (actual: %u)\n", encoded_pitch, encoded_pitch + 1); + } + + // WORD 5-7: Usually zero for basic images + printf("WORD 5: Reserved = 0x%08x\n", srd[5]); + printf("WORD 6: Reserved = 0x%08x\n", srd[6]); + printf("WORD 7: Reserved = 0x%08x\n", srd[7]); + + // Mipmap analysis + if (word1.f.MAX_MIP > 0) { + printf("\nMIPMAP ANALYSIS:\n"); + printf(" Total Levels = %u (MAX_MIP + 1)\n", word1.f.MAX_MIP + 1); + printf(" Active Range = [%u, %u]\n", word1.f.BASE_LEVEL, word3.f.LAST_LEVEL); + if (word1.f.BASE_LEVEL == word3.f.LAST_LEVEL) { + printf(" Mode = SINGLE LEVEL VIEW ◄──── Mip level view\n"); + uint32_t level = word1.f.BASE_LEVEL; + uint32_t level_width = std::max(1u, (full_width + 1) >> level); + uint32_t level_height = std::max(1u, static_cast((word2.f.HEIGHT + 1) >> level)); + printf(" Effective Dimensions = %ux%u (level %u)\n", level_width, level_height, level); + } else { + printf(" Mode = FULL MIPMAP CHAIN\n"); + } + } + printf("===============================================\n\n"); +} + +void ImageManagerGfx12::printChannelSelect(uint32_t sel) const { + switch(sel) { + case 0: printf("(SEL_0)\n"); break; + case 1: printf("(SEL_1)\n"); break; + case 4: printf("(SEL_X/R)\n"); break; + case 5: printf("(SEL_Y/G)\n"); break; + case 6: printf("(SEL_Z/B)\n"); break; + case 7: printf("(SEL_W/A)\n"); break; + default: printf("(UNKNOWN)\n"); break; + } +} + +void ImageManagerGfx12::printResourceType(uint32_t type) const { + switch(type) { + case 8: printf("(1D)\n"); break; + case 9: printf("(2D)\n"); break; + case 10: printf("(3D)\n"); break; + case 11: printf("(CUBE)\n"); break; + case 12: printf("(1D_ARRAY/1DB)\n"); break; + case 13: printf("(2D_ARRAY)\n"); break; + case 14: printf("(2D_MSAA)\n"); break; + case 15: printf("(2D_MSAA_ARRAY)\n"); break; + default: printf("(UNKNOWN=%u)\n", type); break; + } +} + +void ImageManagerGfx12::printSwizzleMode(uint32_t sw_mode) const { + if (sw_mode == 0) { + printf("(LINEAR)\n"); + } else if (sw_mode < 5) { + printf("(SW_256B_%u)\n", sw_mode); + } else if (sw_mode < 9) { + printf("(SW_4KB_%u)\n", sw_mode - 4); + } else if (sw_mode < 13) { + printf("(SW_64KB_%u)\n", sw_mode - 8); + } else if (sw_mode < 22) { + printf("(SW_VAR_%u)\n", sw_mode - 12); + } else { + printf("(UNKNOWN=%u)\n", sw_mode); + } +} + +hsa_status_t ImageManagerGfx12::PopulateMipLevelSrd( + MipmappedArray& level_view, + const MipmappedArray& mipmap_array, + uint32_t mip_level) const { + + // SRD already copied from parent, just modify BASE_LEVEL/LAST_LEVEL fields + uint32_t* srd_words = reinterpret_cast(level_view.srd); + + // GFX12 SRD WORDs 1 and 3 has BASE_LEVEL and LAST_LEVEL fields + SQ_IMG_RSRC_WORD1* word1 = reinterpret_cast(&srd_words[1]); + SQ_IMG_RSRC_WORD3* word3 = reinterpret_cast(&srd_words[3]); + + // Set both to same value - hardware samples only this level + word1->f.BASE_LEVEL = mip_level; + word3->f.LAST_LEVEL = mip_level; + + debug_print("Set SRD mip selection: BASE_LEVEL=%u, LAST_LEVEL=%u", mip_level, mip_level); + + return HSA_STATUS_SUCCESS; +} + } // namespace image } // namespace rocr diff --git a/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_gfx12.h b/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_gfx12.h index 085dee9c94..2bc6c7a805 100755 --- a/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_gfx12.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_gfx12.h @@ -60,6 +60,7 @@ class ImageManagerGfx12 : public ImageManagerKv { virtual hsa_status_t CalculateImageSizeAndAlignment( hsa_agent_t component, const hsa_ext_image_descriptor_t& desc, hsa_ext_image_data_layout_t image_data_layout, + uint32_t num_mipmap_levels, size_t image_data_row_pitch, size_t image_data_slice_pitch, hsa_ext_image_data_info_t& image_info) const; @@ -80,9 +81,26 @@ class ImageManagerGfx12 : public ImageManagerKv { /// @brief Fill image backing storage using agent copy. virtual hsa_status_t FillImage(const Image& image, const void* pattern, const hsa_ext_image_region_t& region); + + /// @brief Fill mipmap structure with device specific mipmapped array object. + virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array) const; + + /// @brief Fill mipmap structure with pre-computed AMD metadata descriptor. + virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array, const metadata_amd_t* desc) const; + + /// @brief Create mip level view using SRD BASE_LEVEL/LAST_LEVEL fields + virtual hsa_status_t PopulateMipLevelSrd(MipmappedArray& level_view, + const MipmappedArray& mipmap_array, uint32_t mip_level) const; + + virtual void printSRDDetailed(const uint32_t* srd) const; + virtual void printChannelSelect(uint32_t sel) const; + virtual void printResourceType(uint32_t type) const; + virtual void printSwizzleMode(uint32_t sw_mode) const; + protected: uint32_t GetAddrlibSurfaceInfoNv(hsa_agent_t component, const hsa_ext_image_descriptor_t& desc, + uint32_t num_mipmap_levels, Image::TileMode tileMode, size_t image_data_row_pitch, size_t image_data_slice_pitch, diff --git a/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_kv.cpp b/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_kv.cpp index 1bf4cc592a..85468208f7 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_kv.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_kv.cpp @@ -220,6 +220,7 @@ void ImageManagerKv::GetImageInfoMaxDimension(hsa_agent_t component, hsa_status_t ImageManagerKv::CalculateImageSizeAndAlignment( hsa_agent_t component, const hsa_ext_image_descriptor_t& desc, hsa_ext_image_data_layout_t image_data_layout, + uint32_t num_mipmap_levels, size_t image_data_row_pitch, size_t image_data_slice_pitch, hsa_ext_image_data_info_t& image_info) const { @@ -719,6 +720,162 @@ hsa_status_t ImageManagerKv::FillImage(const Image& image, const void* pattern, return status; } +hsa_status_t ImageManagerKv::PopulateMipmapSrd(MipmappedArray& mipmap) const { + // Kv (GFX8) architecture does not support mipmaps + return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED; +} + +hsa_status_t ImageManagerKv::PopulateMipmapSrd(MipmappedArray& mipmap_array, const metadata_amd_t* desc) const { + // Kv (GFX8) architecture does not support mipmaps + return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED; +} + +void ImageManagerKv::printSRDDetailed(const uint32_t* srd) const { + if (!srd) { + printf("\n========== Image SRD (KV) - Detailed ==========\n"); + printf("ERROR: No SRD data provided.\n"); + printf("===============================================\n\n"); + return; + } + + printf("\n========== Image SRD (KV) - Detailed ==========\n"); + + // Print all 12 words with bit field annotations + for (int i = 0; i < 12; i++) { + printf("WORD %d: 0x%08x ", i, srd[i]); + + // Binary representation + printf("("); + for (int bit = 31; bit >= 0; bit--) { + printf("%d", (srd[i] >> bit) & 1); + if (bit % 4 == 0 && bit != 0) printf("_"); + } + printf(")\n"); + } + + // WORD 0: BASE_ADDRESS (bits 39:8) + SQ_IMG_RSRC_WORD0 word0; + word0.u32_all = srd[0]; + printf("\nWORD 0: BASE_ADDRESS (bits 39:8) = 0x%08x\n", word0.bits.base_address); + + // WORD 1: Contains BASE_ADDRESS_HI, MIN_LOD, DATA_FORMAT, NUM_FORMAT, MTYPE + SQ_IMG_RSRC_WORD1 word1; + word1.u32_all = srd[1]; + printf("WORD 1: BASE_ADDRESS_HI = 0x%02x\n", word1.bits.base_address_hi); + printf(" MIN_LOD = %u\n", word1.bits.min_lod); + printf(" DATA_FORMAT = %u\n", word1.bits.data_format); + printf(" NUM_FORMAT = %u\n", word1.bits.num_format); + printf(" MTYPE = %u\n", word1.bits.mtype); + + // Calculate full address (KV uses 40-bit shifted by 8) + uint64_t base_addr = ((uint64_t)word1.bits.base_address_hi << 40) | ((uint64_t)word0.bits.base_address << 8); + printf(" → Full Base Address = 0x%016lx\n", base_addr); + + // WORD 2: WIDTH, HEIGHT, PERF_MOD, INTERLACED + SQ_IMG_RSRC_WORD2 word2; + word2.u32_all = srd[2]; + printf("WORD 2: WIDTH = %u (actual: %u)\n", word2.bits.width, word2.bits.width + 1); + printf(" HEIGHT = %u (actual: %u)\n", word2.bits.height, word2.bits.height + 1); + printf(" PERF_MOD = %u\n", word2.bits.perf_mod); + printf(" INTERLACED = %u\n", word2.bits.interlaced); + + // WORD 3: Channel selectors, TILING_INDEX, POW2_PAD, TYPE, ATC + SQ_IMG_RSRC_WORD3 word3; + word3.u32_all = srd[3]; + printf("WORD 3: DST_SEL_X = %u ", word3.bits.dst_sel_x); + printChannelSelect(word3.bits.dst_sel_x); + printf(" DST_SEL_Y = %u ", word3.bits.dst_sel_y); + printChannelSelect(word3.bits.dst_sel_y); + printf(" DST_SEL_Z = %u ", word3.bits.dst_sel_z); + printChannelSelect(word3.bits.dst_sel_z); + printf(" DST_SEL_W = %u ", word3.bits.dst_sel_w); + printChannelSelect(word3.bits.dst_sel_w); + printf(" TILING_INDEX = %u ◄──── Tile configuration index\n", word3.bits.tiling_index); + printf(" POW2_PAD = %u ◄──── Power-of-2 padding\n", word3.bits.pow2_pad); + printf(" TYPE = %u ", word3.bits.type); + printResourceType(word3.bits.type); + printf(" ATC = %u ◄──── Address translation cache\n", word3.bits.atc); + + // WORD 4: DEPTH, PITCH + SQ_IMG_RSRC_WORD4 word4; + word4.u32_all = srd[4]; + printf("WORD 4: DEPTH = %u\n", word4.bits.depth); + printf(" PITCH = %u (actual: %u)\n", word4.bits.pitch, word4.bits.pitch + 1); + + // Calculate effective depth/pitch based on geometry + uint32_t type = word3.bits.type; + if (type == 10) { // 3D + printf(" → 3D Depth = %u (actual: %u)\n", word4.bits.depth, word4.bits.depth + 1); + } else if (type == 13 || type == 12) { // Arrays + printf(" → Array Size = %u (actual: %u)\n", word4.bits.depth, word4.bits.depth + 1); + } + + // WORD 5: LAST_ARRAY + SQ_IMG_RSRC_WORD5 word5; + word5.u32_all = srd[5]; + printf("WORD 5: LAST_ARRAY = %u ◄──── Last array slice\n", word5.bits.last_array); + + // WORD 6-7: Usually zero for basic images + printf("WORD 6: Reserved = 0x%08x\n", srd[6]); + printf("WORD 7: Reserved = 0x%08x\n", srd[7]); + + // Additional information (HSA extension fields) + printf("WORD 8: CHANNEL_TYPE = 0x%08x\n", srd[8]); + printf("WORD 9: CHANNEL_ORDER = 0x%08x\n", srd[9]); + printf("WORD 10: WIDTH_ORIGINAL = 0x%08x\n", srd[10]); + printf("WORD 11: NUM_LEVELS = 0x%08x\n", srd[11]); + + // Mipmap analysis (KV architecture limitations) + printf("\nMIPMAP ANALYSIS:\n"); + printf(" Total Levels = %u\n", srd[11]); + printf(" Min LOD = %u ◄──── Minimum detail level\n", word1.bits.min_lod); + printf(" KV Architecture = LEGACY MIPMAP SUPPORT\n"); + printf(" Note = KV lacks BASE_LEVEL/LAST_LEVEL fields\n"); + printf(" Note = Mip level selection via shader only\n"); + printf("===============================================\n\n"); +} + +void ImageManagerKv::printChannelSelect(uint32_t sel) const { + switch(sel) { + case 0: printf("(SEL_0)\n"); break; + case 1: printf("(SEL_1)\n"); break; + case 4: printf("(SEL_X/R)\n"); break; + case 5: printf("(SEL_Y/G)\n"); break; + case 6: printf("(SEL_Z/B)\n"); break; + case 7: printf("(SEL_W/A)\n"); break; + default: printf("(UNKNOWN)\n"); break; + } +} + +void ImageManagerKv::printResourceType(uint32_t type) const { + switch(type) { + case 8: printf("(1D)\n"); break; + case 9: printf("(2D)\n"); break; + case 10: printf("(3D)\n"); break; + case 11: printf("(CUBE)\n"); break; + case 12: printf("(1D_ARRAY/1DB)\n"); break; + case 13: printf("(2D_ARRAY)\n"); break; + case 14: printf("(2D_MSAA)\n"); break; + case 15: printf("(2D_MSAA_ARRAY)\n"); break; + default: printf("(UNKNOWN=%u)\n", type); break; + } +} + +void ImageManagerKv::printSwizzleMode(uint32_t sw_mode) const { + // KV architecture uses tiling modes instead of swizzle modes + // This function is not typically called for KV, but provided for completeness + printf("(TILING_MODE=%u)\n", sw_mode); +} + +hsa_status_t ImageManagerKv::PopulateMipLevelSrd( + MipmappedArray& level_view, + const MipmappedArray& mipmap_array, + uint32_t mip_level) const { + + // Mip level views not supported on GFX8 hardware + return HSA_STATUS_ERROR_NOT_INITIALIZED; +} + hsa_status_t ImageManagerKv::GetLocalMemoryRegion(hsa_region_t region, void* data) { if (data == NULL) { @@ -845,7 +1002,7 @@ bool ImageManagerKv::GetAddrlibSurfaceInfo( in.width = width; in.height = height; in.numSlices = num_slice; - in.pitchInElement = image_data_row_pitch / image_prop.element_size; + switch(desc.geometry) { case HSA_EXT_IMAGE_GEOMETRY_1D: case HSA_EXT_IMAGE_GEOMETRY_1DB: diff --git a/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_kv.h b/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_kv.h old mode 100755 new mode 100644 index 60b0fc0a4b..2e0b2305a3 --- a/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_kv.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_kv.h @@ -79,6 +79,7 @@ class ImageManagerKv : public ImageManager { virtual hsa_status_t CalculateImageSizeAndAlignment( hsa_agent_t component, const hsa_ext_image_descriptor_t& desc, hsa_ext_image_data_layout_t image_data_layout, + uint32_t num_mipmap_levels, size_t image_data_row_pitch, size_t image_data_slice_pitch, hsa_ext_image_data_info_t& image_info) const; @@ -116,6 +117,21 @@ class ImageManagerKv : public ImageManager { virtual hsa_status_t FillImage(const Image& image, const void* pattern, const hsa_ext_image_region_t& region); + /// @brief Fill mipmap structure with device specific mipmapped array object. + virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array) const; + + /// @brief Fill mipmap structure with pre-computed AMD metadata descriptor. + virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array, const metadata_amd_t* desc) const; + + /// @brief Create mip level view using SRD BASE_LEVEL/LAST_LEVEL fields + virtual hsa_status_t PopulateMipLevelSrd(MipmappedArray& level_view, + const MipmappedArray& mipmap_array, uint32_t mip_level) const; + + virtual void printSRDDetailed(const uint32_t* srd) const; + virtual void printChannelSelect(uint32_t sel) const; + virtual void printResourceType(uint32_t type) const; + virtual void printSwizzleMode(uint32_t sw_mode) const; + protected: static hsa_status_t GetLocalMemoryRegion(hsa_region_t region, void* data); @@ -145,6 +161,8 @@ class ImageManagerKv : public ImageManager { ADDR_HANDLE addr_lib_; + virtual ADDR_HANDLE GetAddrLib() const override { return addr_lib_; } + hsa_agent_t agent_; uint32_t family_type_; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_nv.cpp b/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_nv.cpp index 20ae6cb706..760657c0ea 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_nv.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_nv.cpp @@ -190,7 +190,7 @@ static FORMAT GetCombinedFormat(uint8_t fmt, uint8_t type) { return CFMT_INVALID; }; //----------------------------------------------------------------------------- -// End workaround +// End workaround //----------------------------------------------------------------------------- ImageManagerNv::ImageManagerNv() : ImageManagerKv() {} @@ -201,6 +201,7 @@ ImageManagerNv::~ImageManagerNv() {} hsa_status_t ImageManagerNv::CalculateImageSizeAndAlignment( hsa_agent_t component, const hsa_ext_image_descriptor_t& desc, hsa_ext_image_data_layout_t image_data_layout, + uint32_t num_mipmap_levels, size_t image_data_row_pitch, size_t image_data_slice_pitch, hsa_ext_image_data_info_t& image_info) const { @@ -216,9 +217,8 @@ hsa_status_t ImageManagerNv::CalculateImageSizeAndAlignment( desc.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB)? Image::TileMode::TILED : Image::TileMode::LINEAR; } - if (GetAddrlibSurfaceInfoNv(component, desc, tileMode, - image_data_row_pitch, image_data_slice_pitch, out) == - (uint32_t)(-1)) { + if (GetAddrlibSurfaceInfoNv(component, desc, num_mipmap_levels, tileMode, + image_data_row_pitch, image_data_slice_pitch, out) == (uint32_t)(-1)) { return HSA_STATUS_ERROR; } @@ -319,7 +319,7 @@ hsa_status_t ImageManagerNv::PopulateImageSrd(Image& image, reinterpret_cast(&image.srd[3])->bits.TYPE = ImageLut().MapGeometry(image.desc.geometry); } - + // Imported metadata holds the offset to metadata, add the image base address. uintptr_t meta = uintptr_t(((SQ_IMG_RSRC_WORD7*)(&image.srd[7]))->bits.META_DATA_ADDRESS_HI) << 16; meta |= uintptr_t(((SQ_IMG_RSRC_WORD6*)(&image.srd[6]))->bits.META_DATA_ADDRESS) << 8; @@ -450,9 +450,8 @@ hsa_status_t ImageManagerNv::PopulateImageSrd(Image& image) const { ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0}; - uint32_t swizzleMode = GetAddrlibSurfaceInfoNv( - image.component, image.desc, image.tile_mode, - image.row_pitch, image.slice_pitch, out); + uint32_t swizzleMode = GetAddrlibSurfaceInfoNv(image.component, image.desc, + 1, image.tile_mode, image.row_pitch, image.slice_pitch, out); if (swizzleMode == (uint32_t)(-1)) { return HSA_STATUS_ERROR; } @@ -612,6 +611,7 @@ hsa_status_t ImageManagerNv::PopulateSamplerSrd(Sampler& sampler) const { uint32_t ImageManagerNv::GetAddrlibSurfaceInfoNv( hsa_agent_t component, const hsa_ext_image_descriptor_t& desc, + uint32_t num_mipmap_levels, Image::TileMode tileMode, size_t image_data_row_pitch, size_t image_data_slice_pitch, @@ -627,7 +627,9 @@ uint32_t ImageManagerNv::GetAddrlibSurfaceInfoNv( const uint32_t num_slice = static_cast( std::max(kMinNumSlice, std::max(desc.array_size, desc.depth))); - uint32_t minor_ver = MinorVerFromDevID(chip_id_); + // Minor version used for future GPU-specific optimizations (currently unused) + (void)MinorVerFromDevID(chip_id_); + ADDR2_COMPUTE_SURFACE_INFO_INPUT in = {0}; in.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT); in.format = addrlib_format; @@ -635,9 +637,8 @@ uint32_t ImageManagerNv::GetAddrlibSurfaceInfoNv( in.width = width; in.height = height; in.numSlices = num_slice; - // Custom Pitch is supported in gfx1030 and beyond - if (minor_ver >= 3) - in.pitchInElement = image_data_row_pitch / image_prop.element_size; + in.numMipLevels = num_mipmap_levels; + switch (desc.geometry) { case HSA_EXT_IMAGE_GEOMETRY_1D: case HSA_EXT_IMAGE_GEOMETRY_1DB: @@ -804,5 +805,421 @@ hsa_status_t ImageManagerNv::FillImage(const Image& image, const void* pattern, return status; } +hsa_status_t ImageManagerNv::PopulateMipmapSrd(MipmappedArray& mipmap) const { + ImageProperty mipmap_prop = ImageLut().MapFormat(mipmap.desc.format, mipmap.desc.geometry); + assert(mipmap_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED); + assert(mipmap_prop.element_size != 0); + assert(mipmap.num_levels >= 1); + + const void* mipmap_data_addr = mipmap.data; + + if (IsLocalMemory(mipmap.data)) { + mipmap_data_addr = reinterpret_cast( + reinterpret_cast(mipmap.data) - local_memory_base_address_); + } + + if (mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) { + SQ_BUF_RSRC_WORD0 word0; + SQ_BUF_RSRC_WORD1 word1; + SQ_BUF_RSRC_WORD2 word2; + SQ_BUF_RSRC_WORD3 word3; + + word0.val = 0; + word0.f.BASE_ADDRESS = PtrLow32(mipmap_data_addr); + + word1.val = 0; + word1.f.BASE_ADDRESS_HI = PtrHigh32(mipmap_data_addr); + word1.f.STRIDE = mipmap_prop.element_size; + word1.f.SWIZZLE_ENABLE = false; + word1.f.CACHE_SWIZZLE = false; + + word2.f.NUM_RECORDS = mipmap.desc.width * mipmap_prop.element_size; + + const Swizzle swizzle = ImageLut().MapSwizzle(mipmap.desc.format.channel_order); + word3.val = 0; + word3.f.RESOURCE_LEVEL = 1; // NV-specific resource level + word3.f.DST_SEL_X = swizzle.x; + word3.f.DST_SEL_Y = swizzle.y; + word3.f.DST_SEL_Z = swizzle.z; + word3.f.DST_SEL_W = swizzle.w; + word3.f.FORMAT = GetCombinedFormat(mipmap_prop.data_format, mipmap_prop.data_type); + word3.f.INDEX_STRIDE = mipmap_prop.element_size; + word3.f.TYPE = ImageLut().MapGeometry(mipmap.desc.geometry); + + mipmap.srd[0] = word0.val; + mipmap.srd[1] = word1.val; + mipmap.srd[2] = word2.val; + mipmap.srd[3] = word3.val; + + mipmap.row_pitch = mipmap.desc.width * mipmap_prop.element_size; + mipmap.slice_pitch = mipmap.row_pitch; + } else { + SQ_IMG_RSRC_WORD0 word0; + SQ_IMG_RSRC_WORD1 word1; + SQ_IMG_RSRC_WORD2 word2; + SQ_IMG_RSRC_WORD3 word3; + SQ_IMG_RSRC_WORD4 word4; + SQ_IMG_RSRC_WORD5 word5; + SQ_IMG_RSRC_WORD5 word6; + SQ_IMG_RSRC_WORD5 word7; + + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0}; + + // pMipInfo not needed - set to nullptr and AddrLib will ignore it + out.pMipInfo = nullptr; + + uint32_t swizzleMode = GetAddrlibSurfaceInfoNv( + mipmap.component, mipmap.desc, mipmap.num_levels, + mipmap.tile_mode, mipmap.row_pitch, mipmap.slice_pitch, out); + if (swizzleMode == (uint32_t)(-1)) { + return HSA_STATUS_ERROR; + } + mipmap.addr_output.addr2 = out; + mipmap.size = out.surfSize; + + assert((out.bpp / 8) == mipmap_prop.element_size); + + const size_t row_pitch_size = out.pitch * mipmap_prop.element_size; + + word0.val = 0; + word0.f.BASE_ADDRESS = PtrLow40Shift8(mipmap_data_addr); + + word1.val = 0; + word1.f.BASE_ADDRESS_HI = PtrHigh64Shift40(mipmap_data_addr); + word1.f.MIN_LOD = 0; + word1.f.FORMAT = GetCombinedFormat(mipmap_prop.data_format, mipmap_prop.data_type); + // Only take the lowest 2 bits of (mipmap.desc.width - 1) + word1.f.WIDTH = BitSelect<0, 1>(mipmap.desc.width - 1); + + word2.val = 0; + // Take the high 12 bits of (mipmap.desc.width - 1) + word2.f.WIDTH_HI = BitSelect<2, 13>(mipmap.desc.width - 1); + word2.f.HEIGHT = mipmap.desc.height ? mipmap.desc.height - 1 : 0; + word2.f.RESOURCE_LEVEL = 1; + + const Swizzle swizzle = ImageLut().MapSwizzle(mipmap.desc.format.channel_order); + word3.val = 0; + word3.f.DST_SEL_X = swizzle.x; + word3.f.DST_SEL_Y = swizzle.y; + word3.f.DST_SEL_Z = swizzle.z; + word3.f.DST_SEL_W = swizzle.w; + word3.f.SW_MODE = swizzleMode; + word3.f.BASE_LEVEL = 0; + word3.f.LAST_LEVEL = mipmap.num_levels - 1; + word3.f.BC_SWIZZLE = GetBcSwizzle(swizzle); + word3.f.TYPE = ImageLut().MapGeometry(mipmap.desc.geometry); + + const bool mipmap_array = + (mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA || + mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DA || + mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DADEPTH); + const bool mipmap_3d = (mipmap.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_3D); + + word4.val = 0; + word4.f.DEPTH = + (mipmap_array) + ? std::max(mipmap.desc.array_size, static_cast(1)) - 1 + : (mipmap_3d) ? mipmap.desc.depth - 1 : 0; + uint32_t minor_ver = MinorVerFromDevID(chip_id_); + // For 1d, 2d and 2d-msaa in gfx1030 and beyond this is pitch-1 + if ((minor_ver >= 3) && !mipmap_array && !mipmap_3d) + word4.f.PITCH = out.pitch - 1; + + word5.val = 0; + word5.f.MAX_MIP = mipmap.num_levels - 1; + word6.val = 0; + word7.val = 0; + + mipmap.srd[0] = word0.val; + mipmap.srd[1] = word1.val; + mipmap.srd[2] = word2.val; + mipmap.srd[3] = word3.val; + mipmap.srd[4] = word4.val; + mipmap.srd[5] = word5.val; + mipmap.srd[6] = word6.val; + mipmap.srd[7] = word7.val; + + mipmap.row_pitch = row_pitch_size; + mipmap.slice_pitch = out.sliceSize; + } + + mipmap.srd[8] = mipmap.desc.format.channel_type; + mipmap.srd[9] = mipmap.desc.format.channel_order; + mipmap.srd[10] = static_cast(mipmap.desc.width); + + // Mipmap-specific auxiliary fields + mipmap.srd[11] = mipmap.num_levels; + + return HSA_STATUS_SUCCESS; +} + +void ImageManagerNv::printSRDDetailed(const uint32_t* srd) const { + if (!srd) { + printf("\n========== Image SRD (NV/GFX10) - Detailed ==========\n"); + printf("ERROR: No SRD data provided.\n"); + printf("===============================================\n\n"); + return; + } + + printf("\n========== Image SRD (NV/GFX10) - Detailed ==========\n"); + + // Print all 12 words with bit field annotations + for (int i = 0; i < 12; i++) { + printf("WORD %d: 0x%08x ", i, srd[i]); + + // Binary representation + printf("("); + for (int bit = 31; bit >= 0; bit--) { + printf("%d", (srd[i] >> bit) & 1); + if (bit % 4 == 0 && bit != 0) printf("_"); + } + printf(")\n"); + } + + // WORD 0: BASE_ADDRESS (bits 39:8) + SQ_IMG_RSRC_WORD0 word0; + word0.val = srd[0]; + printf("\nWORD 0: BASE_ADDRESS (bits 39:8) = 0x%08x\n", word0.f.BASE_ADDRESS); + + // WORD 1: Contains BASE_ADDRESS_HI, MIN_LOD, FORMAT, WIDTH (bits 1:0) + SQ_IMG_RSRC_WORD1 word1; + word1.val = srd[1]; + printf("WORD 1: BASE_ADDRESS_HI = 0x%02x\n", word1.f.BASE_ADDRESS_HI); + printf(" MIN_LOD = %u\n", word1.f.MIN_LOD); + printf(" FORMAT = %u ◄──── Combined format/type\n", word1.f.FORMAT); + printf(" WIDTH (bits 1:0) = %u\n", word1.f.WIDTH); + + // Calculate full address (NV uses 40-bit shifted by 8) + uint64_t base_addr = ((uint64_t)word1.f.BASE_ADDRESS_HI << 40) | ((uint64_t)word0.f.BASE_ADDRESS << 8); + printf(" → Full Base Address = 0x%016lx\n", base_addr); + + // WORD 2: WIDTH_HI, HEIGHT, RESOURCE_LEVEL + SQ_IMG_RSRC_WORD2 word2; + word2.val = srd[2]; + printf("WORD 2: WIDTH_HI (bits 13:2) = %u\n", word2.f.WIDTH_HI); + printf(" HEIGHT = %u\n", word2.f.HEIGHT); + printf(" RESOURCE_LEVEL = %u ◄──── NV-specific field\n", word2.f.RESOURCE_LEVEL); + + // Calculate full width (NV uses 14 bits split: 2 in WORD1 + 12 in WORD2) + uint32_t full_width = word1.f.WIDTH | (word2.f.WIDTH_HI << 2); + printf(" → Full Width = %u (actual: %u)\n", full_width, full_width + 1); + printf(" → Full Height = %u (actual: %u)\n", word2.f.HEIGHT, word2.f.HEIGHT + 1); + + // WORD 3: Channel selectors, SW_MODE, BASE_LEVEL, LAST_LEVEL, BC_SWIZZLE, TYPE + SQ_IMG_RSRC_WORD3 word3; + word3.val = srd[3]; + printf("WORD 3: DST_SEL_X = %u ", word3.f.DST_SEL_X); + printChannelSelect(word3.f.DST_SEL_X); + printf(" DST_SEL_Y = %u ", word3.f.DST_SEL_Y); + printChannelSelect(word3.f.DST_SEL_Y); + printf(" DST_SEL_Z = %u ", word3.f.DST_SEL_Z); + printChannelSelect(word3.f.DST_SEL_Z); + printf(" DST_SEL_W = %u ", word3.f.DST_SEL_W); + printChannelSelect(word3.f.DST_SEL_W); + printf(" SW_MODE = %u ", word3.f.SW_MODE); + printSwizzleMode(word3.f.SW_MODE); + printf(" BASE_LEVEL = %u ◄──── Current base level\n", word3.f.BASE_LEVEL); + printf(" LAST_LEVEL = %u ◄──── Current last level\n", word3.f.LAST_LEVEL); + printf(" BC_SWIZZLE = %u ◄──── Border color swizzle\n", word3.f.BC_SWIZZLE); + printf(" TYPE = %u ", word3.f.TYPE); + printResourceType(word3.f.TYPE); + + // WORD 4: DEPTH, optionally PITCH + SQ_IMG_RSRC_WORD4 word4; + word4.val = srd[4]; + printf("WORD 4: DEPTH = %u\n", word4.f.DEPTH); + + // Calculate effective depth based on geometry and chip version + uint32_t type = word3.f.TYPE; + uint32_t minor_ver = MinorVerFromDevID(chip_id_); + + if (type == 10) { // 3D + printf(" → 3D Depth = %u (actual: %u)\n", word4.f.DEPTH, word4.f.DEPTH + 1); + } else if (type == 13 || type == 12) { // Arrays + printf(" → Array Size = %u (actual: %u)\n", word4.f.DEPTH, word4.f.DEPTH + 1); + } else if ((minor_ver >= 3) && (type == 8 || type == 9 || type == 14)) { // 1D/2D/2D_MSAA in GFX1030+ + printf(" PITCH = %u (actual: %u) ◄──── GFX1030+ pitch\n", word4.f.PITCH, word4.f.PITCH + 1); + } + + // WORD 5-7: Usually zero for basic images + printf("WORD 5: Reserved = 0x%08x\n", srd[5]); + printf("WORD 6: Reserved = 0x%08x\n", srd[6]); + printf("WORD 7: Reserved = 0x%08x\n", srd[7]); + + // Additional information (HSA extension fields) + printf("WORD 8: CHANNEL_TYPE = 0x%08x\n", srd[8]); + printf("WORD 9: CHANNEL_ORDER = 0x%08x\n", srd[9]); + printf("WORD 10: WIDTH_ORIGINAL = 0x%08x\n", srd[10]); + printf("WORD 11: NUM_LEVELS = 0x%08x\n", srd[11]); + + // Mipmap analysis + if (word3.f.LAST_LEVEL > word3.f.BASE_LEVEL || word3.f.LAST_LEVEL > 0) { + printf("\nMIPMAP ANALYSIS:\n"); + printf(" Total Levels = %u\n", srd[11]); + printf(" Min LOD = %u\n", word1.f.MIN_LOD); + printf(" Active Range = [%u, %u]\n", word3.f.BASE_LEVEL, word3.f.LAST_LEVEL); + printf(" Resource Level = %u\n", word2.f.RESOURCE_LEVEL); + if (word3.f.BASE_LEVEL == word3.f.LAST_LEVEL) { + printf(" Mode = SINGLE LEVEL VIEW ◄──── Mip level view\n"); + uint32_t level = word3.f.BASE_LEVEL; + uint32_t level_width = std::max(1u, (full_width + 1) >> level); + uint32_t level_height = std::max(1u, static_cast((word2.f.HEIGHT + 1) >> level)); + printf(" Effective Dimensions = %ux%u (level %u)\n", level_width, level_height, level); + } else { + printf(" Mode = FULL MIPMAP CHAIN\n"); + } + } + printf("===============================================\n\n"); +} + +void ImageManagerNv::printChannelSelect(uint32_t sel) const { + switch(sel) { + case 0: printf("(SEL_0)\n"); break; + case 1: printf("(SEL_1)\n"); break; + case 4: printf("(SEL_X/R)\n"); break; + case 5: printf("(SEL_Y/G)\n"); break; + case 6: printf("(SEL_Z/B)\n"); break; + case 7: printf("(SEL_W/A)\n"); break; + default: printf("(UNKNOWN)\n"); break; + } +} + +void ImageManagerNv::printResourceType(uint32_t type) const { + switch(type) { + case 8: printf("(1D)\n"); break; + case 9: printf("(2D)\n"); break; + case 10: printf("(3D)\n"); break; + case 11: printf("(CUBE)\n"); break; + case 12: printf("(1D_ARRAY/1DB)\n"); break; + case 13: printf("(2D_ARRAY)\n"); break; + case 14: printf("(2D_MSAA)\n"); break; + case 15: printf("(2D_MSAA_ARRAY)\n"); break; + default: printf("(UNKNOWN=%u)\n", type); break; + } +} + +void ImageManagerNv::printSwizzleMode(uint32_t sw_mode) const { + // NV/GFX10 swizzle modes + if (sw_mode == 0) { + printf("(LINEAR)\n"); + } else if (sw_mode < 5) { + printf("(SW_256B_%u)\n", sw_mode); + } else if (sw_mode < 9) { + printf("(SW_4KB_%u)\n", sw_mode - 4); + } else if (sw_mode < 13) { + printf("(SW_64KB_%u)\n", sw_mode - 8); + } else if (sw_mode < 22) { + printf("(SW_VAR_%u)\n", sw_mode - 12); + } else { + printf("(UNKNOWN=%u)\n", sw_mode); + } +} + +hsa_status_t ImageManagerNv::PopulateMipLevelSrd( + MipmappedArray& level_view, + const MipmappedArray& mipmap_array, + uint32_t mip_level) const { + + // SRD already copied from parent, just modify BASE_LEVEL/LAST_LEVEL fields + uint32_t* srd_words = reinterpret_cast(level_view.srd); + + // WORD3 has BASE_LEVEL and LAST_LEVEL fields + SQ_IMG_RSRC_WORD3* word3 = reinterpret_cast(&srd_words[3]); + + // Set both to same value - hardware samples only this level + word3->f.BASE_LEVEL = mip_level; + word3->f.LAST_LEVEL = mip_level; + + debug_print("Set SRD mip selection: BASE_LEVEL=%u, LAST_LEVEL=%u", mip_level, mip_level); + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t ImageManagerNv::PopulateMipmapSrd(MipmappedArray& mipmap_array, const metadata_amd_t* desc) const { + const metadata_amd_nv_t* desc_nv = reinterpret_cast(desc); + const void* mipmap_data_addr = mipmap_array.data; + + ImageProperty mipmap_prop = ImageLut().MapFormat(mipmap_array.desc.format, mipmap_array.desc.geometry); + if (mipmap_prop.cap == HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED || mipmap_prop.element_size == 0) { + return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED; + } + + const Swizzle swizzle = ImageLut().MapSwizzle(mipmap_array.desc.format.channel_order); + + if (IsLocalMemory(mipmap_array.data)) { + mipmap_data_addr = reinterpret_cast( + reinterpret_cast(mipmap_array.data) - local_memory_base_address_); + } + + // Copy the pre-computed SRD words 0-7 from metadata + mipmap_array.srd[0] = desc_nv->word0.u32All; + mipmap_array.srd[1] = desc_nv->word1.u32All; + mipmap_array.srd[2] = desc_nv->word2.u32All; + mipmap_array.srd[3] = desc_nv->word3.u32All; + mipmap_array.srd[4] = desc_nv->word4.u32All; + mipmap_array.srd[5] = desc_nv->word5.u32All; + mipmap_array.srd[6] = desc_nv->word6.u32All; + mipmap_array.srd[7] = desc_nv->word7.u32All; + + // Override specific fields after copying + uint32_t hwPixelSize = ImageLut().GetPixelSize(mipmap_prop.data_format, mipmap_prop.data_type); + if (mipmap_prop.element_size != hwPixelSize) { + return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED; + } + + reinterpret_cast(&mipmap_array.srd[0])->bits.BASE_ADDRESS = PtrLow40Shift8(mipmap_data_addr); + reinterpret_cast(&mipmap_array.srd[1])->bits.BASE_ADDRESS_HI = PtrHigh64Shift40(mipmap_data_addr); + reinterpret_cast(&mipmap_array.srd[1])->bits.FORMAT = GetCombinedFormat(mipmap_prop.data_format, mipmap_prop.data_type); + reinterpret_cast(&mipmap_array.srd[3])->bits.DST_SEL_X = swizzle.x; + reinterpret_cast(&mipmap_array.srd[3])->bits.DST_SEL_Y = swizzle.y; + reinterpret_cast(&mipmap_array.srd[3])->bits.DST_SEL_Z = swizzle.z; + reinterpret_cast(&mipmap_array.srd[3])->bits.DST_SEL_W = swizzle.w; + reinterpret_cast(&mipmap_array.srd[5])->bits.MAX_MIP = mipmap_array.num_levels - 1; + + if (mipmap_array.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA || + mipmap_array.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1D) { + reinterpret_cast(&mipmap_array.srd[3])->bits.TYPE = + ImageLut().MapGeometry(mipmap_array.desc.geometry); + } + + // Looks like this is only used for CPU copies. + mipmap_array.row_pitch = 0; + mipmap_array.slice_pitch = 0; + + // Store mipmap-specific metadata + mipmap_array.srd[8] = mipmap_array.desc.format.channel_type; + mipmap_array.srd[9] = mipmap_array.desc.format.channel_order; + mipmap_array.srd[10] = static_cast(mipmap_array.desc.width); + mipmap_array.srd[11] = mipmap_array.num_levels; + + // Allocate and populate pMipInfo from metadata mip_offsets (ADDR2 for Nv) + ADDR2_MIP_INFO* mip_info_storage = new ADDR2_MIP_INFO[mipmap_array.num_levels]; + memset(mip_info_storage, 0, sizeof(ADDR2_MIP_INFO) * mipmap_array.num_levels); + + // Extract per-level information from mip_offsets array + for (uint32_t level = 0; level < mipmap_array.num_levels; level++) { + // mip_offsets contains offset bits [39:8], shift left by 8 to get actual byte offset + mip_info_storage[level].offset = static_cast(desc_nv->mip_offsets[level]) << 8; + + // Calculate dimensions for this level (halve at each level) + mip_info_storage[level].pitch = std::max(1u, static_cast(mipmap_array.desc.width >> level)); + mip_info_storage[level].height = std::max(1u, static_cast(mipmap_array.desc.height >> level)); + mip_info_storage[level].depth = std::max(1u, static_cast(mipmap_array.desc.depth >> level)); + } + + // Store pMipInfo in addr_output for later use by PopulateMipLevelSrd + mipmap_array.addr_output.addr2.pMipInfo = mip_info_storage; + + // Total size calculation from metadata + uint32_t last_level = mipmap_array.num_levels - 1; + uint64_t last_level_size = mip_info_storage[last_level].pitch * + mip_info_storage[last_level].height * + mip_info_storage[last_level].depth * + mipmap_prop.element_size; + mipmap_array.size = mip_info_storage[last_level].offset + last_level_size; + + return HSA_STATUS_SUCCESS; +} + } // namespace image } // namespace rocr diff --git a/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_nv.h b/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_nv.h index 078f2935a0..230f7a1464 100755 --- a/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_nv.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager_nv.h @@ -40,8 +40,8 @@ // //////////////////////////////////////////////////////////////////////////////// -#ifndef EXT_IMAGE_IMAGE_MANAGER_NV_H_ -#define EXT_IMAGE_IMAGE_MANAGER_NV_H_ +#ifndef EXT_IMAGE_IMAGE_MANAGER_NV_H_ +#define EXT_IMAGE_IMAGE_MANAGER_NV_H_ #include "addrlib/inc/addrinterface.h" #include "image_manager_kv.h" @@ -59,6 +59,7 @@ class ImageManagerNv : public ImageManagerKv { virtual hsa_status_t CalculateImageSizeAndAlignment( hsa_agent_t component, const hsa_ext_image_descriptor_t& desc, hsa_ext_image_data_layout_t image_data_layout, + uint32_t num_mipmap_levels, size_t image_data_row_pitch, size_t image_data_slice_pitch, hsa_ext_image_data_info_t& image_info) const; @@ -79,13 +80,30 @@ class ImageManagerNv : public ImageManagerKv { /// @brief Fill image backing storage using agent copy. virtual hsa_status_t FillImage(const Image& image, const void* pattern, const hsa_ext_image_region_t& region); + + /// @brief Fill mipmap structure with device specific mipmapped array object. + virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array) const; + + /// @brief Fill mipmap structure with pre-computed AMD metadata descriptor. + virtual hsa_status_t PopulateMipmapSrd(MipmappedArray& mipmap_array, const metadata_amd_t* desc) const; + + /// @brief Create mip level view using SRD BASE_LEVEL/LAST_LEVEL fields + virtual hsa_status_t PopulateMipLevelSrd(MipmappedArray& level_view, + const MipmappedArray& mipmap_array, uint32_t mip_level) const; + + virtual void printSRDDetailed(const uint32_t* srd) const; + virtual void printChannelSelect(uint32_t sel) const; + virtual void printResourceType(uint32_t type) const; + virtual void printSwizzleMode(uint32_t sw_mode) const; + protected: uint32_t GetAddrlibSurfaceInfoNv(hsa_agent_t component, - const hsa_ext_image_descriptor_t& desc, - Image::TileMode tileMode, - size_t image_data_row_pitch, - size_t image_data_slice_pitch, - ADDR2_COMPUTE_SURFACE_INFO_OUTPUT& out) const; + const hsa_ext_image_descriptor_t& desc, + uint32_t num_mipmap_levels, + Image::TileMode tileMode, + size_t image_data_row_pitch, + size_t image_data_slice_pitch, + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT& out) const; bool IsLocalMemory(const void* address) const; @@ -95,4 +113,4 @@ class ImageManagerNv : public ImageManagerKv { } // namespace image } // namespace rocr -#endif // EXT_IMAGE_IMAGE_MANAGER_NV_H_ +#endif // EXT_IMAGE_IMAGE_MANAGER_NV_H_ diff --git a/projects/rocr-runtime/runtime/hsa-runtime/image/image_runtime.cpp b/projects/rocr-runtime/runtime/hsa-runtime/image/image_runtime.cpp index 37f9fa61fe..85e114545d 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/image/image_runtime.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/image/image_runtime.cpp @@ -44,11 +44,15 @@ #include #include +#include +#include #include +#include #include "core/inc/runtime.h" #include "core/inc/hsa_internal.h" #include "core/inc/hsa_ext_amd_impl.h" +#include "core/inc/exceptions.h" #include "resource.h" #include "image_manager_kv.h" #include "image_manager_ai.h" @@ -57,9 +61,96 @@ #include "image_manager_gfx12.h" #include "device_info.h" + +#define SINGLE_MIP_LEVEL 1 + namespace rocr { namespace image { + static inline uint32_t ComputeMaxMipLevels(const hsa_ext_image_descriptor_t& d) { + uint32_t w = d.width ? d.width : 1; + uint32_t h = d.height ? d.height : 1; + uint32_t depth = d.depth ? d.depth : 1; + uint32_t dim_max = w; + switch (d.geometry) { + case HSA_EXT_IMAGE_GEOMETRY_1D: + case HSA_EXT_IMAGE_GEOMETRY_1DA: + case HSA_EXT_IMAGE_GEOMETRY_1DB: + dim_max = w; break; + case HSA_EXT_IMAGE_GEOMETRY_2D: + case HSA_EXT_IMAGE_GEOMETRY_2DA: + case HSA_EXT_IMAGE_GEOMETRY_2DDEPTH: + case HSA_EXT_IMAGE_GEOMETRY_2DADEPTH: + dim_max = std::max(w, h); break; + case HSA_EXT_IMAGE_GEOMETRY_3D: + dim_max = std::max(std::max(w, h), depth); break; + default: + break; + } + uint32_t levels = 0; + while (dim_max > 0) { ++levels; dim_max >>= 1; } + return (levels == 0) ? 1 : levels; + } + +hsa_status_t ImageRuntime::GetMipmapArraySizeAndAlignment( + hsa_agent_t component, + const hsa_ext_image_descriptor_t& desc, + uint32_t num_mipmap_levels, + hsa_ext_image_data_layout_t layout, + size_t row_pitch, + size_t slice_pitch, + size_t& size_out, + size_t& alignment_out) { + size_out = 0; + alignment_out = 0; + + if (num_mipmap_levels == 0 || num_mipmap_levels > ComputeMaxMipLevels(desc)) + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + + // Validate the image format and geometry. + uint32_t capability = 0; + hsa_status_t status = + GetImageCapability(component, desc.format, desc.geometry, capability); + if (status != HSA_STATUS_SUCCESS) { + return status; + } + + if (capability == 0) { + return static_cast( + HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED); + } + + const hsa_ext_image_geometry_t geometry = desc.geometry; + uint32_t max_width = 0; + uint32_t max_height = 0; + uint32_t max_depth = 0; + uint32_t max_array_size = 0; + + ImageManager* manager = image_manager(component); + + // Validate the image dimension. + manager->GetImageInfoMaxDimension(component, geometry, max_width, max_height, + max_depth, max_array_size); + + if (desc.width > max_width || desc.height > max_height || + desc.depth > max_depth || desc.array_size > max_array_size) { + return static_cast( + HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED); + } + + hsa_ext_image_data_info_t mipmap_info = {0}; + status = manager->CalculateImageSizeAndAlignment(component, desc, layout, + num_mipmap_levels, row_pitch, slice_pitch, mipmap_info); + if (HSA_STATUS_SUCCESS != status) { + return status; + } + + alignment_out = mipmap_info.alignment; + size_out = mipmap_info.size; + + return HSA_STATUS_SUCCESS; +} + hsa_status_t FindKernelArgPool(hsa_amd_memory_pool_t pool, void* data) { assert(data != nullptr); @@ -162,9 +253,6 @@ ImageRuntime* ImageRuntime::instance() { } instance = CreateSingleton(); - if (instance == NULL) { - return NULL; - } // UnloadCallback = &ext_image::ImageRuntime::DestroySingleton; } @@ -178,13 +266,15 @@ ImageRuntime* ImageRuntime::CreateSingleton() { if (HSA_STATUS_SUCCESS != instance->blit_kernel_.Initialize()) { instance->Cleanup(); delete instance; - return NULL; + throw AMD::hsa_exception(HSA_STATUS_ERROR_OUT_OF_RESOURCES, + "ImageRuntime: Failed to initialize blit kernel"); } if (HSA_STATUS_SUCCESS != HSA::hsa_iterate_agents(CreateImageManager, instance)) { instance->Cleanup(); delete instance; - return NULL; + throw AMD::hsa_exception(HSA_STATUS_ERROR_OUT_OF_RESOURCES, + "ImageRuntime: Failed to create image managers"); } assert(instance->kernarg_pool_.handle != 0); @@ -350,8 +440,9 @@ hsa_status_t ImageRuntime::GetImageSizeAndAlignment( HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED); } - return manager->CalculateImageSizeAndAlignment(component, desc, - image_data_layout, image_data_row_pitch, image_data_slice_pitch, image_info); + return manager->CalculateImageSizeAndAlignment( + component, desc, image_data_layout, SINGLE_MIP_LEVEL, + image_data_row_pitch, image_data_slice_pitch, image_info); } hsa_status_t ImageRuntime::CreateImageHandle( @@ -421,7 +512,7 @@ hsa_status_t ImageRuntime::CreateImageHandleWithLayout( if(image_layout->version!=1) return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED; - + uint32_t id; HSA::hsa_agent_get_info(component, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_CHIP_ID, &id); @@ -448,6 +539,64 @@ hsa_status_t ImageRuntime::CreateImageHandleWithLayout( return HSA_STATUS_SUCCESS; } +hsa_status_t ImageRuntime::CreateMipmapArrayHandleWithLayout( + hsa_agent_t component, const hsa_ext_image_descriptor_t& mipmap_descriptor, + const hsa_amd_image_descriptor_t* image_layout, + const void* image_data, const hsa_access_permission_t access_permission, + uint32_t num_mipmap_levels, + hsa_ext_image_t& image_handle) { + + image_handle.handle = 0; + + if (!IsMultipleOf(image_data, 256)) { + return HSA_STATUS_ERROR_INVALID_ALLOCATION; + } + + if (image_layout->version != 1) { + return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED; + } + + uint32_t id; + HSA::hsa_agent_get_info(component, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_CHIP_ID, &id); + + if (image_layout->deviceID != (0x1002 << 16 | id)) { + return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED; + } + + if (num_mipmap_levels == 0) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + const metadata_amd_t* desc = reinterpret_cast(image_layout); + + MipmappedArray* mipmap_array = MipmappedArray::Create(component); + if (!mipmap_array) { + return HSA_STATUS_ERROR_OUT_OF_RESOURCES; + } + + mipmap_array->component = component; + mipmap_array->desc = mipmap_descriptor; + mipmap_array->permission = access_permission; + mipmap_array->num_levels = num_mipmap_levels; + mipmap_array->data = const_cast(image_data); + mipmap_array->flags = 0; + + ImageManager* manager = image_manager(component); + if (!manager) { + MipmappedArray::Destroy(mipmap_array); + return HSA_STATUS_ERROR_OUT_OF_RESOURCES; + } + + hsa_status_t status = manager->PopulateMipmapSrd(*mipmap_array, desc); + if (status != HSA_STATUS_SUCCESS) { + MipmappedArray::Destroy(mipmap_array); + return status; + } + + image_handle.handle = mipmap_array->Convert(); + return HSA_STATUS_SUCCESS; +} + hsa_status_t ImageRuntime::DestroyImageHandle( const hsa_ext_image_t& image_handle) { const Image* image = Image::Convert(image_handle.handle); @@ -574,6 +723,154 @@ hsa_status_t ImageRuntime::DestroySamplerHandle( return HSA_STATUS_SUCCESS; } +hsa_status_t ImageRuntime::CreateMipmapArrayHandle( + hsa_agent_t component, const hsa_ext_image_descriptor_t& mipmap_descriptor, + const void* image_data, const hsa_access_permission_t access_permission, + uint32_t num_mipmap_levels, + const hsa_ext_image_data_layout_t mipmap_layout, + size_t image_data_row_pitch, size_t image_data_slice_pitch, + hsa_ext_image_t& image_handle) { + image_handle.handle = 0; + if (mipmap_descriptor.width == 0 || num_mipmap_levels == 0) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + ImageManager* manager = image_manager(component); + if (!manager) return HSA_STATUS_ERROR_INVALID_AGENT; + + // Validate mipmap array size and alignment requirements + size_t required_size = 0; + size_t required_alignment = 0; + hsa_status_t status = GetMipmapArraySizeAndAlignment( + component, mipmap_descriptor, num_mipmap_levels, mipmap_layout, image_data_row_pitch, + image_data_slice_pitch, required_size, required_alignment); + if (status != HSA_STATUS_SUCCESS) { + return status; + } + + // Verify image_data alignment + assert(image_data != NULL); + assert(IsMultipleOf(image_data, required_alignment)); + + // Create a new mipmapped array object + MipmappedArray* mipmap_array = MipmappedArray::Create(component); + if (!mipmap_array) return HSA_STATUS_ERROR_OUT_OF_RESOURCES; + + // Determine the tile mode + // 1DB (1D buffered) geometry MUST always be LINEAR per HSA spec + // LINEAR layout forces linear swizzle mode (required by API) + // TILED allows AddrLib to use internal heuristics to select optimal swizzle mode + if (mipmap_descriptor.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) { + // 1DB always uses linear addressing per HSA specification + mipmap_array->tile_mode = Image::TileMode::LINEAR; + } else if (mipmap_layout == HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR) { + // Explicit LINEAR layout forces linear swizzle mode + mipmap_array->tile_mode = Image::TileMode::LINEAR; + } else { + // OPAQUE layout: Let AddrLib choose the best swizzle mode + mipmap_array->tile_mode = Image::TileMode::TILED; + } + + debug_print("Tile mode = %u (0: LINEAR, 1: TILED)", mipmap_array->tile_mode); + + // Initialize the mipmapped array object + mipmap_array->component = component; + mipmap_array->data = const_cast(image_data); + mipmap_array->desc = mipmap_descriptor; + mipmap_array->permission = access_permission; + mipmap_array->num_levels = num_mipmap_levels; + mipmap_array->flags = 0; + + manager->PopulateMipmapSrd(*mipmap_array); + debug_print("Populating mipmapped array SRD..."); + if (core::Runtime::runtime_singleton_->flag().image_print_srd()) + mipmap_array->printSRD(); + + manager->printSRDDetailed(mipmap_array->srd); + + // assert(mipmap_array->size == required_size); + image_handle.handle = mipmap_array->Convert(); + debug_print("output handle = %lu", image_handle.handle); + return HSA_STATUS_SUCCESS; +} + +hsa_status_t ImageRuntime::DestroyMipmapArrayHandle( + const hsa_ext_image_t& image_handle) { + const MipmappedArray* mipmap_array = MipmappedArray::Convert(image_handle.handle); + + if (mipmap_array == NULL) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + MipmappedArray::Destroy(const_cast(mipmap_array)); + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t ImageRuntime::GetMipmapArrayLevelHandle( + hsa_agent_t component, const hsa_ext_image_t& mipmapped_array, + uint32_t mip_level, hsa_ext_image_t& level_image_out) { + + level_image_out.handle = 0; + + // Get GPU architecture version + uint32_t chip_id; + hsa_status_t status = GetGPUAsicID(component, &chip_id); + if (status != HSA_STATUS_SUCCESS) { + return status; + } + uint32_t major_ver = MajorVerFromDevID(chip_id); + if (major_ver < 9) { + debug_print("ERROR: Mip level views not supported on GFX%u hardware\n", major_ver); + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + // Validate mip level + if (mip_level < 0) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + // Convert handle to internal object and perform basic sanity. + rocr::image::MipmappedArray* array = + rocr::image::MipmappedArray::Convert(mipmapped_array.handle); + if (!array || array->num_levels == 0 || mip_level >= array->num_levels) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + debug_print("Creating mip level %u view for %u level mipmap\n", + mip_level, array->num_levels); + + // Create a view that references the parent mipmap array + MipmappedArray* level_view = MipmappedArray::Create(component); + if (!level_view) return HSA_STATUS_ERROR_OUT_OF_RESOURCES; + + // Copy entire parent structure (srd is a fixed array, so it's deep-copied automatically) + *level_view = *array; + + // Modify SRD to select only the specific mip level + ImageManager* manager = image_manager(component); + if (!manager) { + MipmappedArray::Destroy(level_view); + return HSA_STATUS_ERROR_OUT_OF_RESOURCES; + } + + status = manager->PopulateMipLevelSrd(*level_view, *array, mip_level); + if (status != HSA_STATUS_SUCCESS) { + MipmappedArray::Destroy(level_view); + return status; + } + + debug_print("Created mip level view using SRD fields"); + if (core::Runtime::runtime_singleton_->flag().image_print_srd()) + level_view->printSRD(); + + manager->printSRDDetailed(level_view->srd); + + // Return handle + level_image_out.handle = level_view->Convert(); + return HSA_STATUS_SUCCESS; +} + ImageRuntime::ImageRuntime() : cpu_l2_cache_size_(0), kernarg_pool_({0}) {} diff --git a/projects/rocr-runtime/runtime/hsa-runtime/image/image_runtime.h b/projects/rocr-runtime/runtime/hsa-runtime/image/image_runtime.h index c8386fe1f1..ba85432629 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/image/image_runtime.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/image/image_runtime.h @@ -103,6 +103,14 @@ class ImageRuntime { const void* image_data, const hsa_access_permission_t access_permission, hsa_ext_image_t& image); + /// @brief Create mipmapped array object with AMD-specific layout and return its handle. + hsa_status_t CreateMipmapArrayHandleWithLayout( + hsa_agent_t component, const hsa_ext_image_descriptor_t& mipmap_descriptor, + const hsa_amd_image_descriptor_t* image_layout, + const void* image_data, const hsa_access_permission_t access_permission, + uint32_t num_mipmap_levels, + hsa_ext_image_t& image_handle); + /// @brief Destroy the device image object referenced by the handle. hsa_status_t DestroyImageHandle(const hsa_ext_image_t& image); @@ -137,6 +145,34 @@ class ImageRuntime { /// @brief Destroy the device sampler object referenced by the handle. hsa_status_t DestroySamplerHandle(hsa_ext_sampler_t& sampler); + /// @brief Create device Mipmap array object and return its handle + hsa_status_t CreateMipmapArrayHandle( + hsa_agent_t component, const hsa_ext_image_descriptor_t& mipmap_descriptor, + const void* image_data, const hsa_access_permission_t access_permission, + uint32_t num_mipmap_levels, + const hsa_ext_image_data_layout_t mipmap_layout, + size_t image_data_row_pitch, size_t image_data_slice_pitch, + hsa_ext_image_t& image_handle); + + /// @brief - Helper function to compute mipmapped surface size / alignment & max levels. + hsa_status_t GetMipmapArraySizeAndAlignment( + hsa_agent_t component, + const hsa_ext_image_descriptor_t& desc, + uint32_t num_mipmap_levels, + hsa_ext_image_data_layout_t layout, + size_t row_pitch, + size_t slice_pitch, + size_t& size_out, + size_t& alignment_out); + + /// @brief Destroy the mipmapped array object referenced by the handle. + hsa_status_t DestroyMipmapArrayHandle(const hsa_ext_image_t& image_handle); + + /// @brief Get the handle for a specific mipmap level in a mipmapped array. + hsa_status_t GetMipmapArrayLevelHandle( + hsa_agent_t agent, const hsa_ext_image_t& mipmapped_array, + uint32_t mip_level, hsa_ext_image_t& level_image_out); + ImageManager* image_manager(hsa_agent_t agent) { std::map::iterator it = image_managers_.find(agent.handle); return (it != image_managers_.end()) ? it->second : NULL; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/image/resource.h b/projects/rocr-runtime/runtime/hsa-runtime/image/resource.h index 309b96635d..3f1893a631 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/image/resource.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/image/resource.h @@ -49,6 +49,7 @@ #include "inc/hsa.h" #include "inc/hsa_ext_image.h" +#include "addrlib/inc/addrinterface.h" #include "util.h" @@ -97,20 +98,21 @@ typedef struct ImageProperty { /// @brief Structure to represent an HSA image object. typedef struct Image { -private: - Image() { +protected: + Image() + : data(nullptr), + row_pitch(0), + slice_pitch(0) { component.handle = 0; permission = HSA_ACCESS_PERMISSION_RO; - data = NULL; std::memset(srd, 0, sizeof(srd)); std::memset(&desc, 0, sizeof(desc)); - row_pitch = slice_pitch = 0; tile_mode = LINEAR; } - ~Image() {} + virtual ~Image() {} -public: + public: typedef enum TileMode { LINEAR, TILED @@ -127,7 +129,11 @@ public: /// @brief Convert from HSA handle to vendor representation. static Image* Convert(uint64_t handle) { - return reinterpret_cast(handle - offsetof(Image, srd)); + // Compute offset manually to avoid offsetof warning with virtual destructor + Image* dummy = nullptr; + const ptrdiff_t srd_offset = + reinterpret_cast(&dummy->srd) - reinterpret_cast(dummy); + return reinterpret_cast(handle - srd_offset); } // Vendor specific image object. @@ -202,6 +208,61 @@ public: hsa_ext_sampler_descriptor_v2_t desc; } Sampler; +/// @brief Structure representing a mipmapped image array. +typedef struct MipmappedArray : public Image { +private: + MipmappedArray() + : size(0), + num_levels(0), + flags(0) { + component.handle = 0; + std::memset(srd, 0, sizeof(srd)); + std::memset(&desc, 0, sizeof(desc)); + permission = HSA_ACCESS_PERMISSION_RO; + std::memset(&addr_output, 0, sizeof(addr_output)); + tile_mode = LINEAR; + } + + ~MipmappedArray() {} + +public: + /// @brief Create a MipmappedArray. + /// Only internal metadata is allocated; image data must be provided by the user. + static MipmappedArray* Create(hsa_agent_t agent); + + /// @brief Destroy a MipmappedArray. + static void Destroy(const MipmappedArray* array); + + /// @brief Convert from vendor representation to HSA handle. + uint64_t Convert() const { return reinterpret_cast(srd); } + + /// @brief Convert from HSA handle to vendor representation. + static MipmappedArray* Convert(uint64_t handle) { + // Compute offset manually to avoid offsetof warning with virtual destructor + MipmappedArray* dummy = nullptr; + const ptrdiff_t srd_offset = + reinterpret_cast(&dummy->srd) - reinterpret_cast(dummy); + return reinterpret_cast(handle - srd_offset); + } + + // Total size of the allocated memory. + size_t size; + + // Number of mipmap levels. + uint32_t num_levels; + + // Reserved + uint32_t flags; + + // Cached surface info. + union { + ADDR_COMPUTE_SURFACE_INFO_OUTPUT addr1; // Pre-GFX9 versions + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT addr2; // GFX9 and later + ADDR3_COMPUTE_SURFACE_INFO_OUTPUT addr3; // GFX10 and later + } addr_output; + +} MipmappedArray; + } // namespace image } // namespace rocr #endif // HSA_RUNTIME_EXT_IMAGE_RESOURCE_H diff --git a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_image.h b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_image.h index 8f58d3a5f2..86b9294437 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_image.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_image.h @@ -356,6 +356,56 @@ typedef struct hsa_ext_image_descriptor_s { hsa_ext_image_format_t format; } hsa_ext_image_descriptor_t; +/** + * @brief Implementation independent image descriptor (Version 2). + * + * @details This version adds mipmap support, allowing both regular images + * (mipmap_levels = 0 or 1) and mipmapped arrays (mipmap_levels > 1) to be + * created with a single unified API. + */ +typedef struct hsa_ext_image_descriptor_v2_s { + /** + * Image geometry. + */ + hsa_ext_image_geometry_t geometry; + /** + * Width of the image, in components. + */ + size_t width; + /** + * Height of the image, in components. Only used if the geometry is + * ::HSA_EXT_IMAGE_GEOMETRY_2D, ::HSA_EXT_IMAGE_GEOMETRY_3D, + * HSA_EXT_IMAGE_GEOMETRY_2DA, HSA_EXT_IMAGE_GEOMETRY_2DDEPTH, or + * HSA_EXT_IMAGE_GEOMETRY_2DADEPTH, otherwise must be 0. + */ + size_t height; + /** + * Depth of the image, in components. Only used if the geometry is + * ::HSA_EXT_IMAGE_GEOMETRY_3D, otherwise must be 0. + */ + size_t depth; + /** + * Number of image layers in the image array. Only used if the geometry is + * ::HSA_EXT_IMAGE_GEOMETRY_1DA, ::HSA_EXT_IMAGE_GEOMETRY_2DA, or + * HSA_EXT_IMAGE_GEOMETRY_2DADEPTH, otherwise must be 0. + */ + size_t array_size; + /** + * Image format. + */ + hsa_ext_image_format_t format; + /** + * Number of mipmap levels. + * - 0 or 1: Regular single-level image (default behavior) + * - >1: Mipmapped array with multiple levels + * + * When mipmap_levels > 1, the image is treated as a complete mipmap chain. + * The maximum valid value is determined by the image dimensions and can be + * queried using ::hsa_ext_image_data_get_info_v2. + */ + size_t mipmap_levels; +} hsa_ext_image_descriptor_v2_t; + /** * @brief Image capability. */ @@ -663,6 +713,48 @@ hsa_status_t HSA_API hsa_ext_image_data_get_info_with_layout( size_t image_data_slice_pitch, hsa_ext_image_data_info_t *image_data_info); +/** + * @brief Retrieve image data requirements with unified mipmap support (V2 API). + * + * @details This is a unified API that handles both regular images (mipmap_levels = 0 or 1) + * and mipmapped arrays (mipmap_levels > 1). + * + * For regular images: + * - Set image_descriptor->mipmap_levels to 0 or 1 + * - Returns size/alignment for a single image level + * + * For mipmapped arrays: + * - Set image_descriptor->mipmap_levels to desired level count (> 1) + * - Returns total size/alignment for all mip levels combined + * - The maximum valid mipmap_levels is computed from image dimensions + * + * @param[in] agent Agent that will access the image. + * + * @param[in] image_descriptor Pointer to a V2 image descriptor. Must not be NULL. + * + * @param[in] access_permission Access permission when the image is accessed by the agent. + * + * @param[out] image_data_info Memory location where the runtime stores the size and + * alignment requirements. Must not be NULL. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been initialized. + * + * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED The image format is not + * supported for the specified access permission. + * + * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED The image dimensions are not + * supported for the specified access permission. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p image_descriptor is NULL, + * @p mipmap_levels exceeds maximum for image dimensions, @p access_permission is invalid, + * or @p image_data_info is NULL. + */ +hsa_status_t HSA_API hsa_ext_image_data_get_info_v2( + hsa_agent_t agent, const hsa_ext_image_descriptor_v2_t* image_descriptor, + hsa_access_permission_t access_permission, hsa_ext_image_data_info_t* image_data_info); + /** * @brief Creates an agent specific image handle to an image with an * opaque image data layout. @@ -864,6 +956,105 @@ hsa_status_t HSA_API hsa_ext_image_destroy( hsa_agent_t agent, hsa_ext_image_t image); +/** + * @brief Creates an agent specific image handle with unified mipmap support (V2 API). + * + * @details This is a unified API that handles both regular images (mipmap_levels = 0 or 1) + * and mipmapped arrays (mipmap_levels > 1). This simplifies the API surface and aligns + * with modern graphics API conventions where all images are conceptually mipmapped. + * + * For regular images: + * - Set image_descriptor->mipmap_levels to 0 or 1 + * - Behavior is identical to ::hsa_ext_image_create + * + * For mipmapped arrays: + * - Set image_descriptor->mipmap_levels to the desired level count (> 1) + * - Behavior is identical to ::hsa_amd_mipmap_array_create + * - The image_data must contain all mip levels laid out sequentially + * + * @param[in] agent Agent to be associated with the image handle created. + * + * @param[in] image_descriptor Pointer to a V2 image descriptor. Must not be NULL. + * + * @param[in] image_data Image data buffer allocated according to size and alignment + * requirements from ::hsa_ext_image_data_get_info_v2. Must not be NULL. + * + * @param[in] access_permission Access permission of the image when accessed by agent. + * + * @param[out] image Pointer to memory location where the HSA runtime stores the + * newly created image handle. Must not be NULL. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. + * + * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED The agent does not support + * the image format for the specified access permission. + * + * @retval ::HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED The agent does not support + * the image dimensions for the specified access permission. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p image_descriptor is NULL, @p image_data + * is NULL, @p image_data does not have valid alignment, @p access_permission is invalid, + * @p mipmap_levels exceeds maximum for image dimensions, or @p image is NULL. + * + * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES The HSA runtime failed to allocate + * required resources. + */ +hsa_status_t HSA_API hsa_ext_image_create_v2(hsa_agent_t agent, + const hsa_ext_image_descriptor_v2_t* image_descriptor, + const void* image_data, + hsa_access_permission_t access_permission, + hsa_ext_image_t* image); + +/** + * @brief Destroys an image handle created with ::hsa_ext_image_create_v2. + * + * @details This function can destroy both regular images and mipmapped arrays + * created with ::hsa_ext_image_create_v2. It does not free the image_data memory, + * which remains the responsibility of the caller. + * + * @param[in] agent Agent associated with the image handle. + * + * @param[in] image Image handle to destroy. + * + * @retval ::HSA_STATUS_SUCCESS The function has been executed successfully. + * + * @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been initialized. + * + * @retval ::HSA_STATUS_ERROR_INVALID_AGENT The agent is invalid. + * + * @retval ::HSA_STATUS_ERROR_INVALID_ARGUMENT @p image is invalid. + */ +hsa_status_t HSA_API hsa_ext_image_destroy_v2(hsa_agent_t agent, hsa_ext_image_t image); + +/** + * @brief Create an image view for a specific mip level of a mipmapped array. + * + * @param[in] agent : GPU agent + * @param[in] mipmapped_array : Pointer to the mipmapped array handle previously + * created by hsa_amd_mipmap_array_create + * @param[in] mip_level : Level index (0 = base). Must be < array's num levels. + * @param[out] level_image_out : Output image handle for the level view + * + * @details + * - Dimensions are clamped to at least 1 when shifting (right shift per level). + * - Row/slice pitches follow underlying layout; for tiled images internal + * SRD setup derives pitches; for linear layout the base pitches may + * be adjusted if required per level (future enhancement). + * - The view inherits access permissions from the parent array. + * + * @retval HSA_STATUS_SUCCESS + * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT (null pointers, bad level, bad handle) + * @retval HSA_STATUS_ERROR_OUT_OF_RESOURCES (allocation of view metadata failed) + */ +hsa_status_t HSA_API hsa_ext_image_mipmap_array_get_level(hsa_agent_t agent, + const hsa_ext_image_t* mipmapped_array, + uint32_t mip_level, + hsa_ext_image_t* level_image_out); + /** * @brief Copies a portion of one image (the source) to another image (the * destination).