Dosyalar
rocm-systems/projects/rocr-runtime/runtime/hsa-runtime/image/image_manager.cpp
T
Apurv Mishra be375c2dbf rocr: Add support for Mipmapped Array (#1847)
SWDEV-539526 - Add support for Mipmapped Array in Rocr

Add support for Mipmapped Array functionality in Rocr Runtimeenabling GPU applications to work with multi-level texture mipmaps. The implementation introduces new public APIs for creating, querying, and managing mipmapped arrays across different GPU architectures.

Signed-off-by: Apurv Mishra <Apurv.Mishra@amd.com>
Co-authored-by: Shweta Khatri <shweta.khatri@amd.com>
Co-authored-by: taosang2 <tao.sang@amd.com>
2026-01-08 17:14:39 -06:00

761 satır
26 KiB
C++

////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#include "inc/hsa_ext_amd.h"
#include "inc/hsa_ext_image.h"
#include "core/inc/hsa_ext_amd_impl.h"
#include "image_manager.h"
#include "image_runtime.h"
#include <assert.h>
#include <algorithm>
#include <climits>
#include <cmath>
namespace rocr {
namespace image {
Image* Image::Create(hsa_agent_t agent) {
hsa_amd_memory_pool_t pool = ImageRuntime::instance()->kernarg_pool();
Image* image = NULL;
hsa_status_t status =
AMD::hsa_amd_memory_pool_allocate(pool, sizeof(Image), 0, reinterpret_cast<void**>(&image));
assert(status == HSA_STATUS_SUCCESS);
if (status != HSA_STATUS_SUCCESS) return NULL;
new (image) Image();
status = AMD::hsa_amd_agents_allow_access(1, &agent, NULL, image);
if (status != HSA_STATUS_SUCCESS) {
Image::Destroy(image);
return NULL;
}
return image;
}
void Image::Destroy(const Image* image) {
assert(image != NULL);
image->~Image();
hsa_status_t status = AMD::hsa_amd_memory_pool_free(const_cast<Image*>(image));
assert(status == HSA_STATUS_SUCCESS);
}
Sampler* Sampler::Create(hsa_agent_t agent) {
hsa_amd_memory_pool_t pool = ImageRuntime::instance()->kernarg_pool();
Sampler* sampler = NULL;
hsa_status_t status = AMD::hsa_amd_memory_pool_allocate(pool, sizeof(Sampler), 0,
reinterpret_cast<void**>(&sampler));
if (status != HSA_STATUS_SUCCESS) return NULL;
new (sampler) Sampler();
status = AMD::hsa_amd_agents_allow_access(1, &agent, NULL, sampler);
if (status != HSA_STATUS_SUCCESS) {
Sampler::Destroy(sampler);
return NULL;
}
return sampler;
}
void Sampler::Destroy(const Sampler* sampler) {
assert(sampler != NULL);
sampler->~Sampler();
hsa_status_t status = AMD::hsa_amd_memory_pool_free(const_cast<Sampler*>(sampler));
assert(status == HSA_STATUS_SUCCESS);
}
MipmappedArray* MipmappedArray::Create(hsa_agent_t agent) {
hsa_amd_memory_pool_t pool = ImageRuntime::instance()->kernarg_pool();
MipmappedArray* mipmapped_array = NULL;
hsa_status_t status = AMD::hsa_amd_memory_pool_allocate(
pool, sizeof(MipmappedArray), 0, reinterpret_cast<void**>(&mipmapped_array));
assert(status == HSA_STATUS_SUCCESS);
if (status != HSA_STATUS_SUCCESS) return nullptr;
new (mipmapped_array) MipmappedArray();
// Allow agent access to the image data
status = AMD::hsa_amd_agents_allow_access(1, &agent, nullptr, mipmapped_array);
if (status != HSA_STATUS_SUCCESS) {
MipmappedArray::Destroy(mipmapped_array);
return nullptr;
}
return mipmapped_array;
}
void MipmappedArray::Destroy(const MipmappedArray* mipmapped_array) {
assert(mipmapped_array != NULL);
mipmapped_array->~MipmappedArray();
hsa_status_t status = AMD::hsa_amd_memory_pool_free(
const_cast<MipmappedArray*>(mipmapped_array));
assert(status == HSA_STATUS_SUCCESS);
}
ImageManager::ImageManager() {}
ImageManager::~ImageManager() {}
hsa_status_t ImageManager::CopyBufferToImage(
const void* src_memory, size_t src_row_pitch, size_t src_slice_pitch,
const Image& dst_image, const hsa_ext_image_region_t& image_region) {
Image* src_image = Image::Create(dst_image.component);
src_image->component = dst_image.component;
src_image->desc = dst_image.desc;
src_image->data = const_cast<void*>(src_memory);
src_image->permission = HSA_ACCESS_PERMISSION_RO;
src_image->row_pitch = src_row_pitch;
src_image->slice_pitch = src_slice_pitch;
const hsa_dim3_t dst_origin = image_region.offset;
const hsa_dim3_t src_origin = {0};
const hsa_dim3_t copy_size = image_region.range;
hsa_status_t status = ImageManager::CopyImage(
dst_image, *src_image, dst_origin, src_origin, copy_size);
Image::Destroy(src_image);
return status;
}
hsa_status_t ImageManager::CopyImageToBuffer(
const Image& src_image, void* dst_memory, size_t dst_row_pitch,
size_t dst_slice_pitch, const hsa_ext_image_region_t& image_region) {
// Treat buffer as image since we don't tile our image anyway.
Image* dst_image = Image::Create(src_image.component);
dst_image->component = src_image.component;
dst_image->desc = src_image.desc; // the width, height, depth is ignored.
dst_image->data = dst_memory;
dst_image->permission = HSA_ACCESS_PERMISSION_WO;
dst_image->row_pitch = dst_row_pitch;
dst_image->slice_pitch = dst_slice_pitch;
const hsa_dim3_t dst_origin = {0};
const hsa_dim3_t src_origin = image_region.offset;
const hsa_dim3_t copy_size = image_region.range;
hsa_status_t status = ImageManager::CopyImage(
*dst_image, src_image, dst_origin, src_origin, copy_size);
Image::Destroy(dst_image);
return status;
}
hsa_status_t ImageManager::CopyImage(const Image& dst_image,
const Image& src_image,
const hsa_dim3_t& dst_origin,
const hsa_dim3_t& src_origin,
const hsa_dim3_t size) {
ImageProperty dst_image_prop = GetImageProperty(
dst_image.component, dst_image.desc.format, dst_image.desc.geometry);
assert(dst_image_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);
const size_t dst_element_size = dst_image_prop.element_size;
assert(dst_element_size != 0);
ImageProperty src_image_prop = GetImageProperty(
src_image.component, src_image.desc.format, src_image.desc.geometry);
assert(src_image_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);
const size_t src_element_size = src_image_prop.element_size;
assert(src_element_size != 0);
const hsa_ext_image_format_t src_format = src_image.desc.format;
const hsa_ext_image_channel_order32_t src_order = src_format.channel_order;
const hsa_ext_image_channel_type32_t src_type = src_format.channel_type;
const hsa_ext_image_format_t dst_format = dst_image.desc.format;
const hsa_ext_image_channel_order32_t dst_order = dst_format.channel_order;
const hsa_ext_image_channel_type32_t dst_type = dst_format.channel_type;
bool linear_to_standard_rgb = false;
bool standard_to_linear_rgb = false;
if ((src_order != dst_order) || (src_type != dst_type)) {
// Source and destination format must be the same, except for
// SRGBA <--> RGBA images.
if ((src_type == HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8) &&
(dst_type == HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8)) {
if ((src_order == HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA) &&
(dst_order == HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA)) {
standard_to_linear_rgb = true;
} else if ((src_order == HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA) &&
(dst_order == HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA)) {
linear_to_standard_rgb = true;
} else {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
} else {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
}
// Source and destination format should be the same so the element size
// should be same too.
const size_t element_size = src_element_size;
// row_pitch and slice_pitch in bytes.
const size_t dst_row_pitch =
std::max(dst_image.row_pitch, size.x * element_size);
const size_t dst_slice_pitch = std::max(
dst_image.slice_pitch,
dst_row_pitch *
(dst_image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ? 1 : size.y));
const size_t src_row_pitch =
std::max(src_image.row_pitch, size.x * element_size);
const size_t src_slice_pitch = std::max(
src_image.slice_pitch,
src_row_pitch *
(src_image.desc.geometry == HSA_EXT_IMAGE_GEOMETRY_1DA ? 1 : size.y));
size_t src_offset = src_origin.x;
size_t dst_offset = dst_origin.x;
size_t copy_size = size.x;
// Calculate source the offset in bytes.
src_offset *= element_size;
src_offset += src_row_pitch * src_origin.y;
src_offset += src_slice_pitch * src_origin.z;
// Calculate destination the offset in bytes.
dst_offset *= element_size;
dst_offset += dst_row_pitch * dst_origin.y;
dst_offset += dst_slice_pitch * dst_origin.z;
copy_size *= element_size;
// Get destination and source memory.
unsigned char* dst = static_cast<unsigned char*>(dst_image.data);
const unsigned char* src = static_cast<const unsigned char*>(src_image.data);
if (!linear_to_standard_rgb && !standard_to_linear_rgb) {
// Copy the memory by row.
for (size_t slice = 0; slice < size.z; ++slice) {
size_t src_offset_temp = src_offset + slice * src_slice_pitch;
size_t dst_offset_temp = dst_offset + slice * dst_slice_pitch;
for (size_t rows = 0; rows < size.y; ++rows) {
std::memcpy((dst + dst_offset_temp), (src + src_offset_temp),
copy_size);
src_offset_temp += src_row_pitch;
dst_offset_temp += dst_row_pitch;
}
}
} else {
// Copy per pixel between RGBA-SRGBA images.
for (size_t slice = 0; slice < size.z; ++slice) {
size_t src_offset_temp = src_offset + slice * src_slice_pitch;
size_t dst_offset_temp = dst_offset + slice * dst_slice_pitch;
for (size_t rows = 0; rows < size.y; ++rows) {
const uint8_t* src_pixel = src + src_offset_temp;
uint8_t* dst_pixel = dst + dst_offset_temp;
if (linear_to_standard_rgb) {
for (size_t cols = 0; cols < size.x; ++cols) {
dst_pixel[0] =
Denormalize(LinearToStandardRGB(Normalize(src_pixel[0]))); // R
dst_pixel[1] =
Denormalize(LinearToStandardRGB(Normalize(src_pixel[1]))); // G
dst_pixel[2] =
Denormalize(LinearToStandardRGB(Normalize(src_pixel[2]))); // B
dst_pixel[3] = src_pixel[3]; // A
src_pixel += element_size;
dst_pixel += element_size;
}
} else {
assert(standard_to_linear_rgb);
for (size_t cols = 0; cols < size.x; ++cols) {
dst_pixel[0] =
Denormalize(StandardToLinearRGB(Normalize(src_pixel[0]))); // R
dst_pixel[1] =
Denormalize(StandardToLinearRGB(Normalize(src_pixel[1]))); // G
dst_pixel[2] =
Denormalize(StandardToLinearRGB(Normalize(src_pixel[2]))); // B
dst_pixel[3] = src_pixel[3]; // A
src_pixel += element_size;
dst_pixel += element_size;
}
}
src_offset_temp += src_row_pitch;
dst_offset_temp += dst_row_pitch;
}
}
}
return HSA_STATUS_SUCCESS;
}
uint16_t ImageManager::FloatToHalf(float in) {
volatile union {
float f;
uint32_t u;
} fu;
fu.f = in;
const uint16_t sign_bit_16 = (fu.u >> 16) & 0x8000;
const uint32_t exp_32 = (fu.u >> 23) & 0xff;
const uint32_t mantissa_32 = (fu.u) & 0x7fffff;
if (exp_32 == 0 && mantissa_32 == 0) {
// Zero.
return sign_bit_16;
} else if (exp_32 == 0xff) {
if (mantissa_32 == 0) {
// Inf.
return (sign_bit_16 | 0x7c00);
} else if ((mantissa_32 & 0x400000)) {
// Quiet NaN.
return (sign_bit_16 | 0x7e00);
} else {
// Signal NaN.
return (sign_bit_16 | 0x7c01);
}
} else {
const uint32_t kMaxExpNormal = 0x477fe000 >> 23; // 65504.
const uint32_t kMinExpNormal = 0x38800000 >> 23; // 2^-14;
const uint32_t kMinExpSubnormal = 0x33800000 >> 23; // 2^-24.
if (exp_32 > kMaxExpNormal) {
// Half overflow.
// TODO: clamp it to max half float or +Inf.
return (sign_bit_16 | 0x7bff);
} else if (exp_32 < kMinExpSubnormal) {
// Half underflow.
return (sign_bit_16);
} else if (exp_32 < kMinExpNormal) {
// Half subnormal.
return (sign_bit_16 |
((0x0400 | (mantissa_32 >> 13)) >> (127 - exp_32 - 14)));
} else {
// Half normal.
return (sign_bit_16 |
(((exp_32 - 127 + 15) << 10) | (mantissa_32 >> 13)));
}
}
}
float ImageManager::Normalize(uint8_t u_val) {
if (u_val == 0) {
return 0.0f;
} else if (u_val == UINT8_MAX) {
return 1.0f;
} else {
return std::min(
std::max(static_cast<float>(u_val) / static_cast<float>(UINT8_MAX),
0.0f),
1.0f);
}
}
uint8_t ImageManager::Denormalize(float f_val) {
const unsigned long kScale = UINT8_MAX;
return std::min(
static_cast<unsigned long>(std::max(lrintf(kScale * f_val), 0l)), kScale);
}
float ImageManager::StandardToLinearRGB(float s_val) {
// Map SRGB value to RGB color space based on HSA Programmers Reference
// Manual version 1.0 Provisional, chapter 7.1.4.1.2 Standard RGB (s-Form).
double l_val = (double)s_val;
l_val = (l_val <= 0.04045f) ? (l_val / 12.92f)
: pow(((l_val + 0.055f) / 1.055f), 2.4f);
return l_val;
}
float ImageManager::LinearToStandardRGB(float l_val) {
// Map RGB value to SRGB color space based on HSA Programmers Reference
// Manual version 1.0 Provisional, chapter 7.1.4.1.2 Standard RGB (s-Form).
double s_val = (double)l_val;
#if (defined(WIN32) || defined(_WIN32))
if (_isnan(s_val)) s_val = 0.0;
#else
if (std::isnan(s_val)) s_val = 0.0;
#endif
if (s_val > 1.0) {
s_val = 1.0;
} else if (s_val < 0.0) {
s_val = 0.0;
} else if (s_val < 0.0031308) {
s_val = 12.92 * s_val;
} else {
s_val = (1.055 * pow(s_val, 5.0 / 12.0)) - 0.055;
}
return s_val;
}
void ImageManager::FormatPattern(const hsa_ext_image_format_t& format,
const void* pattern_in, void* pattern_out) {
const int kR = 0;
const int kG = 1;
const int kB = 2;
const int kA = 3;
int index[4] = {0};
int num_channel = 0;
switch (format.channel_order) {
case HSA_EXT_IMAGE_CHANNEL_ORDER_A:
index[0] = kA;
num_channel = 1;
break;
case HSA_EXT_IMAGE_CHANNEL_ORDER_R:
case HSA_EXT_IMAGE_CHANNEL_ORDER_RX:
index[0] = kR;
num_channel = 1;
break;
case HSA_EXT_IMAGE_CHANNEL_ORDER_RG:
case HSA_EXT_IMAGE_CHANNEL_ORDER_RGX:
index[0] = kR;
index[1] = kG;
num_channel = 2;
break;
case HSA_EXT_IMAGE_CHANNEL_ORDER_RA:
index[0] = kR;
index[1] = kA;
num_channel = 2;
break;
case HSA_EXT_IMAGE_CHANNEL_ORDER_RGB:
case HSA_EXT_IMAGE_CHANNEL_ORDER_RGBX:
case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB:
case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX:
index[0] = kR;
index[1] = kG;
index[2] = kB;
num_channel = 3;
break;
case HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA:
case HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA:
index[0] = kR;
index[1] = kG;
index[2] = kB;
index[3] = kA;
num_channel = 4;
break;
case HSA_EXT_IMAGE_CHANNEL_ORDER_BGRA:
case HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA:
index[0] = kB;
index[1] = kG;
index[2] = kR;
index[3] = kA;
num_channel = 4;
break;
case HSA_EXT_IMAGE_CHANNEL_ORDER_ARGB:
index[0] = kA;
index[1] = kR;
index[2] = kG;
index[3] = kB;
num_channel = 4;
break;
case HSA_EXT_IMAGE_CHANNEL_ORDER_ABGR:
index[0] = kA;
index[1] = kB;
index[2] = kG;
index[3] = kR;
num_channel = 4;
break;
case HSA_EXT_IMAGE_CHANNEL_ORDER_INTENSITY:
case HSA_EXT_IMAGE_CHANNEL_ORDER_LUMINANCE:
case HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH:
case HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL:
index[0] = kR;
num_channel = 1;
break;
default:
assert(false && "Should not reach here.");
break;
}
const float* pattern_in_f = NULL;
const int32_t* pattern_in_i32 = NULL;
const uint32_t* pattern_in_ui32 = NULL;
float new_pattern_in_f[4] = { 0 };
if ((format.channel_order == HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB) ||
(format.channel_order == HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX) ||
(format.channel_order == HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA) ||
(format.channel_order == HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA)) {
pattern_in_f = reinterpret_cast<const float*>(pattern_in);
new_pattern_in_f[0] = LinearToStandardRGB(pattern_in_f[0]);
new_pattern_in_f[1] = LinearToStandardRGB(pattern_in_f[1]);
new_pattern_in_f[2] = LinearToStandardRGB(pattern_in_f[2]);
new_pattern_in_f[3] = pattern_in_f[3];
pattern_in_f = reinterpret_cast<const float*>(new_pattern_in_f);
} else {
pattern_in_f = reinterpret_cast<const float*>(pattern_in);
pattern_in_i32 = reinterpret_cast<const int32_t*>(pattern_in);
pattern_in_ui32 = reinterpret_cast<const uint32_t*>(pattern_in);
}
for (int c = 0; c < num_channel; ++c) {
switch (format.channel_type) {
case HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT8: {
int8_t* pattern_out_i8 = reinterpret_cast<int8_t*>(pattern_out);
const long kScale = INT8_MAX;
const long conv = lrintf(kScale * pattern_in_f[index[c]]);
pattern_out_i8[c] = std::min(std::max(conv, -kScale - 1l), kScale);
} break;
case HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT16: {
int16_t* pattern_out_i16 = reinterpret_cast<int16_t*>(pattern_out);
const long kScale = INT16_MAX;
const long conv = lrintf(kScale * pattern_in_f[index[c]]);
pattern_out_i16[c] = std::min(std::max(conv, -kScale - 1l), kScale);
} break;
case HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8: {
uint8_t* pattern_out_ui8 = reinterpret_cast<uint8_t*>(pattern_out);
const unsigned long kScale = UINT8_MAX;
const long conv = lrintf(kScale * pattern_in_f[index[c]]);
pattern_out_ui8[c] =
std::min(static_cast<unsigned long>(std::max(conv, 0l)), kScale);
} break;
case HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT16: {
uint16_t* pattern_out_ui16 = reinterpret_cast<uint16_t*>(pattern_out);
const unsigned long kScale = UINT16_MAX;
const long conv = lrintf(kScale * pattern_in_f[index[c]]);
pattern_out_ui16[c] =
std::min(static_cast<unsigned long>(std::max(conv, 0l)), kScale);
} break;
case HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT24: {
typedef struct Order24 { uint32_t r : 24; } Order24;
Order24* pattern_out_u24 = reinterpret_cast<Order24*>(pattern_out);
const unsigned long kScale = 0xffffff;
const long conv = lrintf(kScale * pattern_in_f[index[c]]);
pattern_out_u24[c].r =
std::min(static_cast<unsigned long>(std::max(conv, 0l)), kScale);
} break;
case HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555: {
typedef struct Order555 {
uint32_t b : 5;
uint32_t g : 5;
uint32_t r : 5;
} Order555;
Order555* pattern_out_u555 = reinterpret_cast<Order555*>(pattern_out);
const unsigned long kScale = 0x1f;
long conv = lrintf(kScale * pattern_in_f[index[0]]);
pattern_out_u555->r =
std::min(static_cast<unsigned long>(std::max(conv, 0l)), kScale);
conv = lrintf(kScale * pattern_in_f[index[1]]);
pattern_out_u555->g =
std::min(static_cast<unsigned long>(std::max(conv, 0l)), kScale);
conv = lrintf(kScale * pattern_in_f[index[2]]);
pattern_out_u555->b =
std::min(static_cast<unsigned long>(std::max(conv, 0l)), kScale);
return;
} break;
case HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565: {
typedef struct Order565 {
uint32_t b : 5;
uint32_t g : 6;
uint32_t r : 5;
} Order565;
Order565* pattern_out_u565 = reinterpret_cast<Order565*>(pattern_out);
unsigned long scale = 0x1f;
long conv = lrintf(scale * pattern_in_f[index[0]]);
pattern_out_u565->r =
std::min(static_cast<unsigned long>(std::max(conv, 0l)), scale);
scale = 0x3f;
conv = lrintf(scale * pattern_in_f[index[1]]);
pattern_out_u565->g =
std::min(static_cast<unsigned long>(std::max(conv, 0l)), scale);
scale = 0x1f;
conv = lrintf(scale * pattern_in_f[index[2]]);
pattern_out_u565->b =
std::min(static_cast<unsigned long>(std::max(conv, 0l)), scale);
return;
} break;
case HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_101010: {
typedef struct Order101010 {
uint32_t b : 10;
uint32_t g : 10;
uint32_t r : 10;
} Order101010;
Order101010* pattern_out_u101010 =
reinterpret_cast<Order101010*>(pattern_out);
const unsigned long kScale = 0x3ff;
long conv = lrintf(kScale * pattern_in_f[index[0]]);
pattern_out_u101010->r =
std::min(static_cast<unsigned long>(std::max(conv, 0l)), kScale);
conv = lrintf(kScale * pattern_in_f[index[1]]);
pattern_out_u101010->g =
std::min(static_cast<unsigned long>(std::max(conv, 0l)), kScale);
conv = lrintf(kScale * pattern_in_f[index[2]]);
pattern_out_u101010->b =
std::min(static_cast<unsigned long>(std::max(conv, 0l)), kScale);
return;
} break;
case HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8: {
int8_t* pattern_out_i8 = reinterpret_cast<int8_t*>(pattern_out);
pattern_out_i8[c] = pattern_in_i32[index[c]];
} break;
case HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16: {
int16_t* pattern_out_i16 = reinterpret_cast<int16_t*>(pattern_out);
pattern_out_i16[c] = pattern_in_i32[index[c]];
} break;
case HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32: {
int32_t* pattern_out_i32 = reinterpret_cast<int32_t*>(pattern_out);
pattern_out_i32[c] = pattern_in_i32[index[c]];
} break;
case HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8: {
uint8_t* pattern_out_ui8 = reinterpret_cast<uint8_t*>(pattern_out);
pattern_out_ui8[c] = pattern_in_ui32[index[c]];
} break;
case HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16: {
uint16_t* pattern_out_ui16 = reinterpret_cast<uint16_t*>(pattern_out);
pattern_out_ui16[c] = pattern_in_ui32[index[c]];
} break;
case HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32: {
uint32_t* pattern_out_ui32 = reinterpret_cast<uint32_t*>(pattern_out);
pattern_out_ui32[c] = pattern_in_ui32[index[c]];
} break;
case HSA_EXT_IMAGE_CHANNEL_TYPE_HALF_FLOAT: {
// TODO: convert to f16
uint16_t* pattern_out_ui16 = reinterpret_cast<uint16_t*>(pattern_out);
pattern_out_ui16[c] = FloatToHalf(pattern_in_f[index[c]]);
break;
}
case HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT: {
float* pattern_out_f = reinterpret_cast<float*>(pattern_out);
pattern_out_f[c] = pattern_in_f[index[c]];
} break;
default:
assert(false && "Should not reach here.");
break;
}
}
}
hsa_status_t ImageManager::FillImage(const Image& image, const void* pattern,
const hsa_ext_image_region_t& region) {
const hsa_dim3_t origin = region.offset;
const hsa_dim3_t size = region.range;
ImageProperty image_prop =
GetImageProperty(image.component, image.desc.format, image.desc.geometry);
assert(image_prop.cap != HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED);
const size_t element_size = image_prop.element_size;
assert(element_size != 0);
const size_t row_pitch = image.row_pitch;
const size_t slice_pitch = image.slice_pitch;
// Map memory.
unsigned char* fill_mem = static_cast<unsigned char*>(image.data);
char fill_value[4 * sizeof(int)] = {0};
FormatPattern(image.desc.format, pattern, fill_value);
// Calculate offset.
size_t offset = origin.x * element_size;
offset += row_pitch * origin.y;
offset += slice_pitch * origin.z;
// Fill the image memory with the pattern.
for (size_t slice = 0; slice < size.z; ++slice) {
size_t offset_temp = offset + slice * slice_pitch;
for (size_t rows = 0; rows < size.y; ++rows) {
size_t pix_offset = offset_temp;
// Copy pattern per pixel.
for (size_t column = 0; column < size.x; ++column) {
memcpy((fill_mem + pix_offset), fill_value, element_size);
pix_offset += element_size;
}
offset_temp += row_pitch;
}
}
return HSA_STATUS_SUCCESS;
}
} // namespace image
} // namespace rocr