//////////////////////////////////////////////////////////////////////////////// // // The University of Illinois/NCSA // Open Source License (NCSA) // // Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved. // // Developed by: // // AMD Research and AMD HSA Software Development // // Advanced Micro Devices, Inc. // // www.amd.com // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to // deal with the Software without restriction, including without limitation // the rights to use, copy, modify, merge, publish, distribute, sublicense, // and/or sell copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following conditions: // // - Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimers. // - Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimers in // the documentation and/or other materials provided with the distribution. // - Neither the names of Advanced Micro Devices, Inc, // nor the names of its contributors may be used to endorse or promote // products derived from this Software without specific prior written // permission. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER // DEALINGS WITH THE SOFTWARE. // //////////////////////////////////////////////////////////////////////////////// #define NOMINMAX #include "image_manager_nv.h" #include #include #include #include "inc/hsa_ext_amd.h" #include "core/inc/hsa_internal.h" #include "addrlib/src/core/addrlib.h" #include "image_runtime.h" #include "resource.h" #include "resource_nv.h" #include "util.h" #include "device_info.h" namespace rocr { namespace image { ASSERT_SIZE_UINT32(SQ_BUF_RSRC_WORD0) ASSERT_SIZE_UINT32(SQ_BUF_RSRC_WORD1) ASSERT_SIZE_UINT32(SQ_BUF_RSRC_WORD2) ASSERT_SIZE_UINT32(SQ_BUF_RSRC_WORD3) ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD0) ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD1) ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD2) ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD3) ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD4) ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD5) ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD6) ASSERT_SIZE_UINT32(SQ_IMG_RSRC_WORD7) ASSERT_SIZE_UINT32(SQ_IMG_SAMP_WORD0) ASSERT_SIZE_UINT32(SQ_IMG_SAMP_WORD1) ASSERT_SIZE_UINT32(SQ_IMG_SAMP_WORD2) ASSERT_SIZE_UINT32(SQ_IMG_SAMP_WORD3) //----------------------------------------------------------------------------- // Workaround switch to combined format/type codes and missing gfx10 // specific look up table. Only covers types used in image_lut_kv.cpp. //----------------------------------------------------------------------------- struct formatconverstion_t { FMT fmt; type type; FORMAT format; }; // Format/Type to combined format code table. // Sorted and indexed to allow fast searches. static const formatconverstion_t FormatLUT[] = { {FMT_1_5_5_5, TYPE_UNORM, CFMT_1_5_5_5_UNORM}, // 0 {FMT_10_10_10_2, TYPE_UNORM, CFMT_10_10_10_2_UNORM}, // 1 {FMT_10_10_10_2, TYPE_SNORM, CFMT_10_10_10_2_SNORM}, // 2 {FMT_10_10_10_2, TYPE_UINT, CFMT_10_10_10_2_UINT}, // 3 {FMT_10_10_10_2, TYPE_SINT, CFMT_10_10_10_2_SINT}, // 4 {FMT_16, TYPE_UNORM, CFMT_16_UNORM}, // 5 {FMT_16, TYPE_SNORM, CFMT_16_SNORM}, // 6 {FMT_16, TYPE_UINT, CFMT_16_UINT}, // 7 {FMT_16, TYPE_SINT, CFMT_16_SINT}, // 8 {FMT_16, TYPE_FLOAT, CFMT_16_FLOAT}, // 9 {FMT_16_16, TYPE_UNORM, CFMT_16_16_UNORM}, // 10 {FMT_16_16, TYPE_SNORM, CFMT_16_16_SNORM}, // 11 {FMT_16_16, TYPE_UINT, CFMT_16_16_UINT}, // 12 {FMT_16_16, TYPE_SINT, CFMT_16_16_SINT}, // 13 {FMT_16_16, TYPE_FLOAT, CFMT_16_16_FLOAT}, // 14 {FMT_16_16_16_16, TYPE_UNORM, CFMT_16_16_16_16_UNORM}, // 15 {FMT_16_16_16_16, TYPE_SNORM, CFMT_16_16_16_16_SNORM}, // 16 {FMT_16_16_16_16, TYPE_UINT, CFMT_16_16_16_16_UINT}, // 17 {FMT_16_16_16_16, TYPE_SINT, CFMT_16_16_16_16_SINT}, // 18 {FMT_16_16_16_16, TYPE_FLOAT, CFMT_16_16_16_16_FLOAT}, // 19 {FMT_2_10_10_10, TYPE_UNORM, CFMT_2_10_10_10_UNORM}, // 20 {FMT_2_10_10_10, TYPE_SNORM, CFMT_2_10_10_10_SNORM}, // 21 {FMT_2_10_10_10, TYPE_UINT, CFMT_2_10_10_10_UINT}, // 22 {FMT_2_10_10_10, TYPE_SINT, CFMT_2_10_10_10_SINT}, // 23 {FMT_24_8, TYPE_UNORM, CFMT_24_8_UNORM}, // 24 {FMT_24_8, TYPE_UINT, CFMT_24_8_UINT}, // 25 {FMT_32, TYPE_UINT, CFMT_32_UINT}, // 26 {FMT_32, TYPE_SINT, CFMT_32_SINT}, // 27 {FMT_32, TYPE_FLOAT, CFMT_32_FLOAT}, // 28 {FMT_32_32, TYPE_UINT, CFMT_32_32_UINT}, // 29 {FMT_32_32, TYPE_SINT, CFMT_32_32_SINT}, // 30 {FMT_32_32, TYPE_FLOAT, CFMT_32_32_FLOAT}, // 31 {FMT_32_32_32, TYPE_UINT, CFMT_32_32_32_UINT}, // 32 {FMT_32_32_32, TYPE_SINT, CFMT_32_32_32_SINT}, // 33 {FMT_32_32_32, TYPE_FLOAT, CFMT_32_32_32_FLOAT}, // 34 {FMT_32_32_32_32, TYPE_UINT, CFMT_32_32_32_32_UINT}, // 35 {FMT_32_32_32_32, TYPE_SINT, CFMT_32_32_32_32_SINT}, // 36 {FMT_32_32_32_32, TYPE_FLOAT, CFMT_32_32_32_32_FLOAT}, // 37 {FMT_5_5_5_1, TYPE_UNORM, CFMT_5_5_5_1_UNORM}, // 38 {FMT_5_6_5, TYPE_UNORM, CFMT_5_6_5_UNORM}, // 39 {FMT_8, TYPE_UNORM, CFMT_8_UNORM}, // 40 {FMT_8, TYPE_SNORM, CFMT_8_SNORM}, // 41 {FMT_8, TYPE_UINT, CFMT_8_UINT}, // 42 {FMT_8, TYPE_SINT, CFMT_8_SINT}, // 43 {FMT_8, TYPE_SRGB, CFMT_8_SRGB}, // 44 {FMT_8_24, TYPE_UNORM, CFMT_8_24_UNORM}, // 45 {FMT_8_24, TYPE_UINT, CFMT_8_24_UINT}, // 46 {FMT_8_8, TYPE_UNORM, CFMT_8_8_UNORM}, // 47 {FMT_8_8, TYPE_SNORM, CFMT_8_8_SNORM}, // 48 {FMT_8_8, TYPE_UINT, CFMT_8_8_UINT}, // 49 {FMT_8_8, TYPE_SINT, CFMT_8_8_SINT}, // 50 {FMT_8_8, TYPE_SRGB, CFMT_8_8_SRGB}, // 51 {FMT_8_8_8_8, TYPE_UNORM, CFMT_8_8_8_8_UNORM}, // 52 {FMT_8_8_8_8, TYPE_SNORM, CFMT_8_8_8_8_SNORM}, // 53 {FMT_8_8_8_8, TYPE_UINT, CFMT_8_8_8_8_UINT}, // 54 {FMT_8_8_8_8, TYPE_SINT, CFMT_8_8_8_8_SINT}, // 55 {FMT_8_8_8_8, TYPE_SRGB, CFMT_8_8_8_8_SRGB} // 56 }; static const int FormatLUTSize = sizeof(FormatLUT)/sizeof(formatconverstion_t); //Index in FormatLUT to start search, indexed by FMT enum. static const int FormatEntryPoint[] = { 57, 40, 5, 47, 26, 10, 57, 57, 1, 20, 52, 29, 15, 32, 35, 57, 39, 0, 38, 57, 45, 24 }; static FORMAT GetCombinedFormat(uint8_t fmt, uint8_t type) { assert(fmt < sizeof(FormatEntryPoint)/sizeof(int) && "FMT out of range."); int start = FormatEntryPoint[fmt]; int stop = std::min(start + 6, FormatLUTSize); // Only 6 types are used in image_kv_lut.cpp for(int i=start; i> 3) * out.pitch; size_t slicePitch = rowPitch * out.height; if (desc.geometry != HSA_EXT_IMAGE_GEOMETRY_1DB && image_data_layout == HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR && ((image_data_row_pitch && (rowPitch != image_data_row_pitch)) || (image_data_slice_pitch && (slicePitch != image_data_slice_pitch)))) { return static_cast( HSA_EXT_STATUS_ERROR_IMAGE_PITCH_UNSUPPORTED); } image_info.size = out.surfSize; assert(image_info.size != 0); image_info.alignment = out.baseAlign; assert(image_info.alignment != 0); return HSA_STATUS_SUCCESS; } bool ImageManagerNv::IsLocalMemory(const void* address) const { return true; } hsa_status_t ImageManagerNv::PopulateImageSrd(Image& image, const metadata_amd_t* descriptor) const { const metadata_amd_nv_t* desc = reinterpret_cast(descriptor); const void* image_data_addr = image.data; ImageProperty image_prop = ImageLut().MapFormat(image.desc.format, image.desc.geometry); if ((image_prop.cap == HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED) || (image_prop.element_size == 0)) return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED; const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order); if (IsLocalMemory(image.data)) { image_data_addr = reinterpret_cast( reinterpret_cast(image.data) - local_memory_base_address_); } image.srd[0] = desc->word0.u32All; image.srd[1] = desc->word1.u32All; image.srd[2] = desc->word2.u32All; image.srd[3] = desc->word3.u32All; image.srd[4] = desc->word4.u32All; image.srd[5] = desc->word5.u32All; image.srd[6] = desc->word6.u32All; image.srd[7] = desc->word7.u32All; if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) { SQ_BUF_RSRC_WORD0 word0; SQ_BUF_RSRC_WORD1 word1; SQ_BUF_RSRC_WORD3 word3; word0.val = 0; word0.f.BASE_ADDRESS = PtrLow32(image_data_addr); word1.val = image.srd[1]; word1.f.BASE_ADDRESS_HI = PtrHigh32(image_data_addr); word1.f.STRIDE = image_prop.element_size; word3.val = image.srd[3]; word3.f.DST_SEL_X = swizzle.x; word3.f.DST_SEL_Y = swizzle.y; word3.f.DST_SEL_Z = swizzle.z; word3.f.DST_SEL_W = swizzle.w; word3.f.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type); word3.f.INDEX_STRIDE = image_prop.element_size; image.srd[0] = word0.val; image.srd[1] = word1.val; image.srd[3] = word3.val; } else { uint32_t hwPixelSize = ImageLut().GetPixelSize(image_prop.data_format, image_prop.data_type); if (image_prop.element_size != hwPixelSize) { return (hsa_status_t)HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED; } reinterpret_cast(&image.srd[0])->bits.BASE_ADDRESS = PtrLow40Shift8(image_data_addr); reinterpret_cast(&image.srd[1])->bits.BASE_ADDRESS_HI = PtrHigh64Shift40(image_data_addr); reinterpret_cast(&image.srd[1])->bits.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type); reinterpret_cast(&image.srd[3])->bits.DST_SEL_X = swizzle.x; reinterpret_cast(&image.srd[3])->bits.DST_SEL_Y = swizzle.y; reinterpret_cast(&image.srd[3])->bits.DST_SEL_Z = swizzle.z; reinterpret_cast(&image.srd[3])->bits.DST_SEL_W = swizzle.w; if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA || image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1D) { reinterpret_cast(&image.srd[3])->bits.TYPE = ImageLut().MapGeometry(image.desc.geometry); } // Imported metadata holds the offset to metadata, add the image base address. uintptr_t meta = uintptr_t(((SQ_IMG_RSRC_WORD7*)(&image.srd[7]))->bits.META_DATA_ADDRESS_HI) << 16; meta |= uintptr_t(((SQ_IMG_RSRC_WORD6*)(&image.srd[6]))->bits.META_DATA_ADDRESS) << 8; meta += reinterpret_cast(image_data_addr); ((SQ_IMG_RSRC_WORD6*)(&image.srd[6]))->bits.META_DATA_ADDRESS = PtrLow16Shift8((void*)meta); ((SQ_IMG_RSRC_WORD7*)(&image.srd[7]))->bits.META_DATA_ADDRESS_HI = PtrHigh64Shift16((void*)meta); } // Looks like this is only used for CPU copies. image.row_pitch = 0; image.slice_pitch = 0; // Used by HSAIL shader ABI image.srd[8] = image.desc.format.channel_type; image.srd[9] = image.desc.format.channel_order; image.srd[10] = static_cast(image.desc.width); return HSA_STATUS_SUCCESS; } static TEX_BC_SWIZZLE GetBcSwizzle(const Swizzle& swizzle) { SEL r = (SEL)swizzle.x; SEL g = (SEL)swizzle.y; SEL b = (SEL)swizzle.z; SEL a = (SEL)swizzle.w; TEX_BC_SWIZZLE bcSwizzle = TEX_BC_Swizzle_XYZW; if (a == SEL_X) { // Have to use either TEX_BC_Swizzle_WZYX or TEX_BC_Swizzle_WXYZ // // For the pre-defined border color values (white, opaque black, // transparent black), the only thing that matters is that the alpha // channel winds up in the correct place (because the RGB channels are // all the same) so either of these TEX_BC_Swizzle enumerations will // work. Not sure what happens with border color palettes. if (b == SEL_Y) { // ABGR bcSwizzle = TEX_BC_Swizzle_WZYX; } else if ((r == SEL_X) && (g == SEL_X) && (b == SEL_X)) { // RGBA bcSwizzle = TEX_BC_Swizzle_XYZW; } else { // ARGB bcSwizzle = TEX_BC_Swizzle_WXYZ; } } else if (r == SEL_X) { // Have to use either TEX_BC_Swizzle_XYZW or TEX_BC_Swizzle_XWYZ if (g == SEL_Y) { // RGBA bcSwizzle = TEX_BC_Swizzle_XYZW; } else if ((g == SEL_X) && (b == SEL_X) && (a == SEL_W)) { // RGBA bcSwizzle = TEX_BC_Swizzle_XYZW; } else { // RAGB bcSwizzle = TEX_BC_Swizzle_XWYZ; } } else if (g == SEL_X) { // GRAB, have to use TEX_BC_Swizzle_YXWZ bcSwizzle = TEX_BC_Swizzle_YXWZ; } else if (b == SEL_X) { // BGRA, have to use TEX_BC_Swizzle_ZYXW bcSwizzle = TEX_BC_Swizzle_ZYXW; } return bcSwizzle; } hsa_status_t ImageManagerNv::PopulateImageSrd(Image& image) const { ImageProperty image_prop = ImageLut().MapFormat(image.desc.format, image.desc.geometry); assert(image_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED); assert(image_prop.element_size != 0); const void* image_data_addr = image.data; if (IsLocalMemory(image.data)) { image_data_addr = reinterpret_cast( reinterpret_cast(image.data) - local_memory_base_address_); } if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) { SQ_BUF_RSRC_WORD0 word0; SQ_BUF_RSRC_WORD1 word1; SQ_BUF_RSRC_WORD2 word2; SQ_BUF_RSRC_WORD3 word3; word0.val = 0; word0.f.BASE_ADDRESS = PtrLow32(image_data_addr); word1.val = 0; word1.f.BASE_ADDRESS_HI = PtrHigh32(image_data_addr); word1.f.STRIDE = image_prop.element_size; word1.f.SWIZZLE_ENABLE = false; word1.f.CACHE_SWIZZLE = false; word2.f.NUM_RECORDS = image.desc.width * image_prop.element_size; const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order); word3.val = 0; word3.f.RESOURCE_LEVEL = 1; word3.f.DST_SEL_X = swizzle.x; word3.f.DST_SEL_Y = swizzle.y; word3.f.DST_SEL_Z = swizzle.z; word3.f.DST_SEL_W = swizzle.w; word3.f.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type); word3.f.INDEX_STRIDE = image_prop.element_size; word3.f.TYPE = ImageLut().MapGeometry(image.desc.geometry); image.srd[0] = word0.val; image.srd[1] = word1.val; image.srd[2] = word2.val; image.srd[3] = word3.val; image.row_pitch = image.desc.width * image_prop.element_size; image.slice_pitch = image.row_pitch; } else { SQ_IMG_RSRC_WORD0 word0; SQ_IMG_RSRC_WORD1 word1; SQ_IMG_RSRC_WORD2 word2; SQ_IMG_RSRC_WORD3 word3; SQ_IMG_RSRC_WORD4 word4; SQ_IMG_RSRC_WORD5 word5; SQ_IMG_RSRC_WORD5 word6; SQ_IMG_RSRC_WORD5 word7; ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0}; uint32_t swizzleMode = GetAddrlibSurfaceInfoNv( image.component, image.desc, image.tile_mode, image.row_pitch, image.slice_pitch, out); if (swizzleMode == (uint32_t)(-1)) { return HSA_STATUS_ERROR; } assert((out.bpp / 8) == image_prop.element_size); const size_t row_pitch_size = out.pitch * image_prop.element_size; word0.f.BASE_ADDRESS = PtrLow40Shift8(image_data_addr); word1.val = 0; word1.f.BASE_ADDRESS_HI = PtrHigh64Shift40(image_data_addr); word1.f.MIN_LOD = 0; word1.f.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type); // Only take the lowest 2 bits of (image.desc.width - 1) word1.f.WIDTH = BitSelect<0, 1>(image.desc.width - 1); word2.val = 0; // Take the high 12 bits of (image.desc.width - 1) word2.f.WIDTH_HI = BitSelect<2, 13>(image.desc.width - 1); word2.f.HEIGHT = image.desc.height ? image.desc.height - 1 : 0; word2.f.RESOURCE_LEVEL = 1; const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order); word3.val = 0; word3.f.DST_SEL_X = swizzle.x; word3.f.DST_SEL_Y = swizzle.y; word3.f.DST_SEL_Z = swizzle.z; word3.f.DST_SEL_W = swizzle.w; word3.f.SW_MODE = swizzleMode; word3.f.BC_SWIZZLE = GetBcSwizzle(swizzle); word3.f.TYPE = ImageLut().MapGeometry(image.desc.geometry); const bool image_array = (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA || image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DA || image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_2DADEPTH); const bool image_3d = (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_3D); word4.val = 0; word4.f.DEPTH = (image_array) // Doesn't hurt but isn't array_size already >0? ? std::max(image.desc.array_size, static_cast(1)) - 1 : (image_3d) ? image.desc.depth - 1 : 0; uint32_t minor_ver = MinorVerFromDevID(chip_id_); // For 1d, 2d and 2d-msaa in gfx1030 and beyond this is pitch-1 if ((minor_ver >= 3) && !image_array && !image_3d) word4.f.PITCH = out.pitch - 1; word5.val = 0; word6.val = 0; word7.val = 0; image.srd[0] = word0.val; image.srd[1] = word1.val; image.srd[2] = word2.val; image.srd[3] = word3.val; image.srd[4] = word4.val; image.srd[5] = word5.val; image.srd[6] = word6.val; image.srd[7] = word7.val; image.row_pitch = row_pitch_size; image.slice_pitch = out.sliceSize; } image.srd[8] = image.desc.format.channel_type; image.srd[9] = image.desc.format.channel_order; image.srd[10] = static_cast(image.desc.width); return HSA_STATUS_SUCCESS; } hsa_status_t ImageManagerNv::ModifyImageSrd( Image& image, hsa_ext_image_format_t& new_format) const { image.desc.format = new_format; ImageProperty image_prop = ImageLut().MapFormat(image.desc.format, image.desc.geometry); assert(image_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED); assert(image_prop.element_size != 0); if (image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) { const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order); SQ_BUF_RSRC_WORD3* word3 = reinterpret_cast(&image.srd[3]); word3->bits.DST_SEL_X = swizzle.x; word3->bits.DST_SEL_Y = swizzle.y; word3->bits.DST_SEL_Z = swizzle.z; word3->bits.DST_SEL_W = swizzle.w; word3->bits.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type); } else { SQ_IMG_RSRC_WORD1* word1 = reinterpret_cast(&image.srd[1]); word1->bits.FORMAT = GetCombinedFormat(image_prop.data_format, image_prop.data_type); const Swizzle swizzle = ImageLut().MapSwizzle(image.desc.format.channel_order); SQ_IMG_RSRC_WORD3* word3 = reinterpret_cast(&image.srd[3]); word3->bits.DST_SEL_X = swizzle.x; word3->bits.DST_SEL_Y = swizzle.y; word3->bits.DST_SEL_Z = swizzle.z; word3->bits.DST_SEL_W = swizzle.w; } image.srd[8] = image.desc.format.channel_type; image.srd[9] = image.desc.format.channel_order; image.srd[10] = static_cast(image.desc.width); return HSA_STATUS_SUCCESS; } hsa_status_t ImageManagerNv::PopulateSamplerSrd(Sampler& sampler) const { const hsa_ext_sampler_descriptor_t sampler_descriptor = sampler.desc; SQ_IMG_SAMP_WORD0 word0; SQ_IMG_SAMP_WORD1 word1; SQ_IMG_SAMP_WORD2 word2; SQ_IMG_SAMP_WORD3 word3; word0.u32All = 0; switch (sampler_descriptor.address_mode) { case HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE: word0.bits.CLAMP_X = static_cast(SQ_TEX_CLAMP_LAST_TEXEL); break; case HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_BORDER: word0.bits.CLAMP_X = static_cast(SQ_TEX_CLAMP_BORDER); break; case HSA_EXT_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT: word0.bits.CLAMP_X = static_cast(SQ_TEX_MIRROR); break; case HSA_EXT_SAMPLER_ADDRESSING_MODE_UNDEFINED: case HSA_EXT_SAMPLER_ADDRESSING_MODE_REPEAT: word0.bits.CLAMP_X = static_cast(SQ_TEX_WRAP); break; default: return HSA_STATUS_ERROR_INVALID_ARGUMENT; } word0.bits.CLAMP_Y = word0.bits.CLAMP_X; word0.bits.CLAMP_Z = word0.bits.CLAMP_X; word0.bits.FORCE_UNNORMALIZED = (sampler_descriptor.coordinate_mode == HSA_EXT_SAMPLER_COORDINATE_MODE_UNNORMALIZED); word1.u32All = 0; word1.bits.MAX_LOD = 4095; word2.u32All = 0; switch (sampler_descriptor.filter_mode) { case HSA_EXT_SAMPLER_FILTER_MODE_NEAREST: word2.bits.XY_MAG_FILTER = static_cast(SQ_TEX_XY_FILTER_POINT); break; case HSA_EXT_SAMPLER_FILTER_MODE_LINEAR: word2.bits.XY_MAG_FILTER = static_cast(SQ_TEX_XY_FILTER_BILINEAR); break; default: return HSA_STATUS_ERROR_INVALID_ARGUMENT; } word2.bits.XY_MIN_FILTER = word2.bits.XY_MAG_FILTER; word2.bits.Z_FILTER = SQ_TEX_Z_FILTER_NONE; word2.bits.MIP_FILTER = SQ_TEX_MIP_FILTER_NONE; word3.u32All = 0; // TODO: check this bit with HSAIL spec. word3.bits.BORDER_COLOR_TYPE = SQ_TEX_BORDER_COLOR_TRANS_BLACK; sampler.srd[0] = word0.u32All; sampler.srd[1] = word1.u32All; sampler.srd[2] = word2.u32All; sampler.srd[3] = word3.u32All; return HSA_STATUS_SUCCESS; } uint32_t ImageManagerNv::GetAddrlibSurfaceInfoNv( hsa_agent_t component, const hsa_ext_image_descriptor_t& desc, Image::TileMode tileMode, size_t image_data_row_pitch, size_t image_data_slice_pitch, ADDR2_COMPUTE_SURFACE_INFO_OUTPUT& out) const { const ImageProperty image_prop = GetImageProperty(component, desc.format, desc.geometry); const AddrFormat addrlib_format = GetAddrlibFormat(image_prop); const uint32_t width = static_cast(desc.width); const uint32_t height = static_cast(desc.height); static const size_t kMinNumSlice = 1; const uint32_t num_slice = static_cast( std::max(kMinNumSlice, std::max(desc.array_size, desc.depth))); uint32_t minor_ver = MinorVerFromDevID(chip_id_); ADDR2_COMPUTE_SURFACE_INFO_INPUT in = {0}; in.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT); in.format = addrlib_format; in.bpp = static_cast(image_prop.element_size) * 8; in.width = width; in.height = height; in.numSlices = num_slice; // Custom Pitch is supported in gfx1030 and beyond if (minor_ver >= 3) in.pitchInElement = image_data_row_pitch / image_prop.element_size; switch (desc.geometry) { case HSA_EXT_IMAGE_GEOMETRY_1D: case HSA_EXT_IMAGE_GEOMETRY_1DB: case HSA_EXT_IMAGE_GEOMETRY_1DA: in.resourceType = ADDR_RSRC_TEX_1D; break; case HSA_EXT_IMAGE_GEOMETRY_2D: case HSA_EXT_IMAGE_GEOMETRY_2DDEPTH: case HSA_EXT_IMAGE_GEOMETRY_2DA: case HSA_EXT_IMAGE_GEOMETRY_2DADEPTH: in.resourceType = ADDR_RSRC_TEX_2D; break; case HSA_EXT_IMAGE_GEOMETRY_3D: in.resourceType = ADDR_RSRC_TEX_3D; break; } in.flags.texture = 1; ADDR2_GET_PREFERRED_SURF_SETTING_INPUT prefSettingsInput = { 0 }; ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT prefSettingsOutput = { 0 }; prefSettingsInput.size = sizeof(prefSettingsInput); prefSettingsInput.flags = in.flags; prefSettingsInput.bpp = in.bpp; prefSettingsInput.format = in.format; prefSettingsInput.width = in.width; prefSettingsInput.height = in.height; prefSettingsInput.numFrags = in.numFrags; prefSettingsInput.numSamples = in.numSamples; prefSettingsInput.numMipLevels = in.numMipLevels; prefSettingsInput.numSlices = in.numSlices; prefSettingsInput.resourceLoction = ADDR_RSRC_LOC_UNDEF; prefSettingsInput.resourceType = in.resourceType; // Disallow all swizzles but linear. if (tileMode == Image::TileMode::LINEAR) { prefSettingsInput.forbiddenBlock.macroThin4KB = 1; prefSettingsInput.forbiddenBlock.macroThick4KB = 1; prefSettingsInput.forbiddenBlock.macroThin64KB = 1; prefSettingsInput.forbiddenBlock.macroThick64KB = 1; prefSettingsInput.forbiddenBlock.micro = 1; prefSettingsInput.forbiddenBlock.var = 1; } // but don't ever allow the 256b swizzle modes //prefSettingsInput.forbiddenBlock.micro = 1; // and don't allow variable-size block modes //prefSettingsInput.forbiddenBlock.var = 1; if (ADDR_OK != Addr2GetPreferredSurfaceSetting(addr_lib_, &prefSettingsInput, &prefSettingsOutput)) { return (uint32_t)(-1); } in.swizzleMode = prefSettingsOutput.swizzleMode; out.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT); if (ADDR_OK != Addr2ComputeSurfaceInfo(addr_lib_, &in, &out)) { return (uint32_t)(-1); } if (out.surfSize == 0) { return (uint32_t)(-1); } return in.swizzleMode; } hsa_status_t ImageManagerNv::FillImage(const Image& image, const void* pattern, const hsa_ext_image_region_t& region) { if (BlitQueueInit().queue_ == NULL) { return HSA_STATUS_ERROR_OUT_OF_RESOURCES; } Image* image_view = const_cast(&image); SQ_BUF_RSRC_WORD3* word3_buff = NULL; SQ_IMG_RSRC_WORD3* word3_image = NULL; uint32_t dst_sel_w_original = 0; if (image_view->desc.format.channel_type == HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_101010) { // Force GPU to ignore the last two bits (alpha bits). if (image_view->desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DB) { word3_buff = reinterpret_cast(&image_view->srd[3]); dst_sel_w_original = word3_buff->bits.DST_SEL_W; word3_buff->bits.DST_SEL_W = SEL_0; } else { word3_image = reinterpret_cast(&image_view->srd[3]); dst_sel_w_original = word3_image->bits.DST_SEL_W; word3_image->bits.DST_SEL_W = SEL_0; } } SQ_IMG_RSRC_WORD1* word1 = NULL; uint32_t num_format_original = 0; const void* new_pattern = pattern; float fill_value[4] = {0}; switch (image_view->desc.format.channel_order) { case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA: case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB: case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX: case HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA: { // KV and CZ don't have write support for SRGBA image, so convert pattern // to standard form and treat the image as RGBA image. const float* pattern_f = reinterpret_cast(pattern); fill_value[0] = LinearToStandardRGB(pattern_f[0]); fill_value[1] = LinearToStandardRGB(pattern_f[1]); fill_value[2] = LinearToStandardRGB(pattern_f[2]); fill_value[3] = pattern_f[3]; new_pattern = fill_value; ImageProperty image_prop = ImageLut().MapFormat(image.desc.format, image.desc.geometry); word1 = reinterpret_cast(&image_view->srd[1]); num_format_original = word1->bits.FORMAT; word1->bits.FORMAT = GetCombinedFormat(image_prop.data_format, TYPE_UNORM); } break; default: break; } hsa_status_t status = ImageRuntime::instance()->blit_kernel().FillImage( blit_queue_, blit_code_catalog_, *image_view, new_pattern, region); // Revert back original configuration. if (word3_buff != NULL) { word3_buff->bits.DST_SEL_W = dst_sel_w_original; } if (word3_image != NULL) { word3_image->bits.DST_SEL_W = dst_sel_w_original; } if (word1 != NULL) { word1->bits.FORMAT = num_format_original; } return status; } } // namespace image } // namespace rocr