rocDecode resize sample implementation (#285)
* WIP: resize kernels * hipified kernels for scaling from cuda * updated videodecodeRGB sample for scaling * added stream parameter to kernels * add scale kernels using tex2d and NN * enable tex2D kernels * add NN resize kernels * fixed scaling kernels * fixed tex2D scaling kernel for UV scaling * minor formatting * address review comments --------- Co-authored-by: Aryan Salmanpour <aryan.salmanpour@amd.com>
This commit is contained in:
@@ -87,7 +87,9 @@ if(HIP_FOUND AND FFMPEG_FOUND AND ROCDECODE_FOUND)
|
||||
list(APPEND SOURCES ${PROJECT_SOURCE_DIR}
|
||||
videodecrgb.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../utils/rocvideodecode/roc_video_dec.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../utils/colorspace_kernels.cpp)
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../utils/colorspace_kernels.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../utils/resize_kernels.cpp)
|
||||
|
||||
add_executable(${PROJECT_NAME} ${SOURCES})
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++17")
|
||||
target_link_libraries(${PROJECT_NAME} ${LINK_LIBRARY_LIST})
|
||||
|
||||
@@ -40,6 +40,7 @@ THE SOFTWARE.
|
||||
#include "video_demuxer.h"
|
||||
#include "roc_video_dec.h"
|
||||
#include "colorspace_kernels.h"
|
||||
#include "resize_kernels.h"
|
||||
|
||||
FILE *fpOut = nullptr;
|
||||
enum OutputFormatEnum {
|
||||
@@ -158,34 +159,70 @@ std::queue<uint8_t*> frame_queue[frame_buffers_size];
|
||||
std::mutex mutex[frame_buffers_size];
|
||||
std::condition_variable cv[frame_buffers_size];
|
||||
|
||||
void ColorSpaceConversionThread(std::atomic<bool>& continue_processing, bool convert_to_rgb, OutputSurfaceInfo **surf_info, OutputFormatEnum e_output_format,
|
||||
uint8_t *p_rgb_dev_mem, bool dump_output_frames, std::string &output_file_path, RocVideoDecoder &viddec) {
|
||||
size_t rgb_image_size;
|
||||
void ColorSpaceConversionThread(std::atomic<bool>& continue_processing, bool convert_to_rgb, Dim *p_resize_dim, OutputSurfaceInfo **surf_info, OutputSurfaceInfo **res_surf_info,
|
||||
OutputFormatEnum e_output_format, uint8_t *p_rgb_dev_mem, uint8_t *p_resize_dev_mem, bool dump_output_frames, std::string &output_file_path, RocVideoDecoder &viddec) {
|
||||
|
||||
size_t rgb_image_size, resize_image_size;
|
||||
hipError_t hip_status = hipSuccess;
|
||||
int current_frame_index = 0;
|
||||
uint8_t *frame;
|
||||
|
||||
while (continue_processing || !frame_queue[current_frame_index].empty()) {
|
||||
OutputSurfaceInfo *p_surf_info;
|
||||
uint8_t *out_frame;
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex[current_frame_index]);
|
||||
cv[current_frame_index].wait(lock, [&] {return !frame_queue[current_frame_index].empty() || !continue_processing;});
|
||||
if (!continue_processing && frame_queue[current_frame_index].empty()) {
|
||||
break;
|
||||
}
|
||||
p_surf_info = *surf_info;
|
||||
// Get the current frame at the curren_buffer index for processing
|
||||
frame = frame_queue[current_frame_index].front();
|
||||
frame_queue[current_frame_index].pop();
|
||||
out_frame = frame;
|
||||
}
|
||||
if (p_resize_dim->w && p_resize_dim->h && *res_surf_info) {
|
||||
// check if the resize dims are different from output dims
|
||||
// resize is needed since output dims are different from resize dims
|
||||
// TODO:: the below code assumes NV12/P016 for decoded output surface. Modify to take other surface formats in future
|
||||
if (((*surf_info)->output_width != p_resize_dim->w) || ((*surf_info)->output_height != p_resize_dim->h)) {
|
||||
resize_image_size = p_resize_dim->w * (p_resize_dim->h + (p_resize_dim->h >> 1)) * (*surf_info)->bytes_per_pixel;
|
||||
if (p_resize_dev_mem == nullptr && resize_image_size > 0) {
|
||||
hip_status = hipMalloc(&p_resize_dev_mem, resize_image_size);
|
||||
if (hip_status != hipSuccess) {
|
||||
std::cerr << "ERROR: hipMalloc failed to allocate the device memory for the output!" << hip_status << std::endl;
|
||||
return;
|
||||
}
|
||||
}
|
||||
// call resize kernel
|
||||
if ((*surf_info)->bytes_per_pixel == 2) {
|
||||
ResizeP016(p_resize_dev_mem, p_resize_dim->w * 2, p_resize_dim->w, p_resize_dim->h, frame, (*surf_info)->output_pitch, (*surf_info)->output_width,
|
||||
(*surf_info)->output_height, (frame + (*surf_info)->output_vstride * (*surf_info)->output_pitch), nullptr, viddec.GetStream());
|
||||
} else {
|
||||
ResizeNv12(p_resize_dev_mem, p_resize_dim->w, p_resize_dim->w, p_resize_dim->h, frame, (*surf_info)->output_pitch, (*surf_info)->output_width,
|
||||
(*surf_info)->output_height, (frame + (*surf_info)->output_vstride * (*surf_info)->output_pitch), nullptr, viddec.GetStream());
|
||||
}
|
||||
(*res_surf_info)->output_width = p_resize_dim->w;
|
||||
(*res_surf_info)->output_height = p_resize_dim->h;
|
||||
(*res_surf_info)->output_pitch = p_resize_dim->w * (*surf_info)->bytes_per_pixel;
|
||||
(*res_surf_info)->output_vstride = p_resize_dim->h;
|
||||
(*res_surf_info)->output_surface_size_in_bytes = (*res_surf_info)->output_pitch * (p_resize_dim->h + (p_resize_dim->h >> 1));
|
||||
(*res_surf_info)->mem_type = OUT_SURFACE_MEM_DEV_COPIED;
|
||||
p_surf_info = *res_surf_info;
|
||||
out_frame = p_resize_dev_mem;
|
||||
}
|
||||
}
|
||||
|
||||
if (convert_to_rgb) {
|
||||
int rgb_width;
|
||||
if ((*surf_info)->bit_depth == 8) {
|
||||
rgb_width = ((*surf_info)->output_width + 1) & ~1; // has to be a multiple of 2 for hip colorconvert kernels
|
||||
rgb_image_size = ((e_output_format == bgr) || (e_output_format == rgb)) ? rgb_width * (*surf_info)->output_height * 3 : rgb_width * (*surf_info)->output_height * 4;
|
||||
if (p_surf_info->bit_depth == 8) {
|
||||
rgb_width = (p_surf_info->output_width + 1) & ~1; // has to be a multiple of 2 for hip colorconvert kernels
|
||||
rgb_image_size = ((e_output_format == bgr) || (e_output_format == rgb)) ? rgb_width * p_surf_info->output_height * 3 : rgb_width * p_surf_info->output_height * 4;
|
||||
} else {
|
||||
rgb_width = ((*surf_info)->output_width + 1) & ~1;
|
||||
rgb_image_size = ((e_output_format == bgr) || (e_output_format == rgb)) ? rgb_width * (*surf_info)->output_height * 3 : ((e_output_format == bgr48) || (e_output_format == rgb48)) ?
|
||||
rgb_width * (*surf_info)->output_height * 6 : rgb_width * (*surf_info)->output_height * 8;
|
||||
rgb_width = (p_surf_info->output_width + 1) & ~1;
|
||||
rgb_image_size = ((e_output_format == bgr) || (e_output_format == rgb)) ? rgb_width * p_surf_info->output_height * 3 : ((e_output_format == bgr48) || (e_output_format == rgb48)) ?
|
||||
rgb_width * p_surf_info->output_height * 6 : rgb_width * p_surf_info->output_height * 8;
|
||||
}
|
||||
if (p_rgb_dev_mem == nullptr) {
|
||||
hip_status = hipMalloc(&p_rgb_dev_mem, rgb_image_size);
|
||||
@@ -194,18 +231,17 @@ void ColorSpaceConversionThread(std::atomic<bool>& continue_processing, bool con
|
||||
return;
|
||||
}
|
||||
}
|
||||
ColorConvertYUV2RGB(frame, *surf_info, p_rgb_dev_mem, e_output_format, viddec.GetStream());
|
||||
ColorConvertYUV2RGB(out_frame, p_surf_info, p_rgb_dev_mem, e_output_format, viddec.GetStream());
|
||||
}
|
||||
if (dump_output_frames) {
|
||||
if (convert_to_rgb)
|
||||
DumpRGBImage(output_file_path, p_rgb_dev_mem, *surf_info, rgb_image_size);
|
||||
DumpRGBImage(output_file_path, p_rgb_dev_mem, p_surf_info, rgb_image_size);
|
||||
else
|
||||
viddec.SaveFrameToFile(output_file_path, frame, *surf_info);
|
||||
viddec.SaveFrameToFile(output_file_path, out_frame, p_surf_info);
|
||||
}
|
||||
|
||||
cv[current_frame_index].notify_one();
|
||||
current_frame_index = (current_frame_index + 1) % frame_buffers_size;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -216,11 +252,13 @@ int main(int argc, char **argv) {
|
||||
bool convert_to_rgb = false;
|
||||
int device_id = 0;
|
||||
Rect crop_rect = {};
|
||||
Dim resize_dim = {};
|
||||
Rect *p_crop_rect = nullptr;
|
||||
size_t rgb_image_size;
|
||||
uint32_t rgb_image_stride;
|
||||
hipError_t hip_status = hipSuccess;
|
||||
uint8_t *p_rgb_dev_mem = nullptr;
|
||||
uint8_t *p_resize_dev_mem = nullptr;
|
||||
OutputSurfaceMemoryType mem_type = OUT_SURFACE_MEM_DEV_INTERNAL;
|
||||
OutputFormatEnum e_output_format = native;
|
||||
int rgb_width;
|
||||
@@ -268,6 +306,16 @@ int main(int argc, char **argv) {
|
||||
p_crop_rect = &crop_rect;
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(argv[i], "-resize")) {
|
||||
if (++i == argc || 2 != sscanf(argv[i], "%dx%d", &resize_dim.w, &resize_dim.h)) {
|
||||
ShowHelpAndExit("-resize");
|
||||
}
|
||||
if (resize_dim.w % 2 == 1 || resize_dim.h % 2 == 1) {
|
||||
std::cout << "Resizing dimensions must have width and height of even numbers" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(argv[i], "-of")) {
|
||||
if (++i == argc) {
|
||||
ShowHelpAndExit("-of");
|
||||
@@ -302,13 +350,13 @@ int main(int argc, char **argv) {
|
||||
uint8_t *p_frame = nullptr;
|
||||
int64_t pts = 0;
|
||||
OutputSurfaceInfo *surf_info;
|
||||
OutputSurfaceInfo *resize_surf_info = nullptr;
|
||||
uint32_t width, height;
|
||||
double total_dec_time = 0;
|
||||
convert_to_rgb = e_output_format != native;
|
||||
|
||||
std::atomic<bool> continue_processing(true);
|
||||
std::thread color_space_conversion_thread(ColorSpaceConversionThread, std::ref(continue_processing), std::ref(convert_to_rgb), &surf_info, std::ref(e_output_format),
|
||||
std::ref(p_rgb_dev_mem), std::ref(dump_output_frames), std::ref(output_file_path), std::ref(viddec));
|
||||
std::thread color_space_conversion_thread(ColorSpaceConversionThread, std::ref(continue_processing), std::ref(convert_to_rgb), &resize_dim, &surf_info, &resize_surf_info, std::ref(e_output_format),
|
||||
std::ref(p_rgb_dev_mem), std::ref(p_resize_dev_mem), std::ref(dump_output_frames), std::ref(output_file_path), std::ref(viddec));
|
||||
|
||||
auto startTime = std::chrono::high_resolution_clock::now();
|
||||
do {
|
||||
@@ -318,6 +366,10 @@ int main(int argc, char **argv) {
|
||||
std::cerr << "Error: Failed to get Output Image Info!" << std::endl;
|
||||
break;
|
||||
}
|
||||
if (resize_dim.w && resize_dim.h && !resize_surf_info) {
|
||||
resize_surf_info = new OutputSurfaceInfo;
|
||||
memcpy(resize_surf_info, surf_info, sizeof(OutputSurfaceInfo));
|
||||
}
|
||||
|
||||
int last_index = 0;
|
||||
for (int i = 0; i < n_frames_returned; i++) {
|
||||
@@ -386,6 +438,9 @@ int main(int argc, char **argv) {
|
||||
std::cout << info_message << total_dec_time / n_frame << std::endl;
|
||||
std::cout << "info: avg FPS: " << (n_frame / total_dec_time) * 1000 << std::endl;
|
||||
}
|
||||
if (resize_surf_info != nullptr) {
|
||||
delete resize_surf_info;
|
||||
}
|
||||
} catch (const std::exception &ex) {
|
||||
std::cout << ex.what() << std::endl;
|
||||
exit(1);
|
||||
|
||||
@@ -0,0 +1,328 @@
|
||||
/*
|
||||
Copyright (c) 2023 - 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "resize_kernels.h"
|
||||
#include "roc_video_dec.h"
|
||||
|
||||
#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT
|
||||
/**
|
||||
* @brief low level HIP kernel for Resize using tex2d
|
||||
*
|
||||
* @tparam YuvUnitx2
|
||||
* @param tex_y - text2D object Y pointer
|
||||
* @param tex_uv - text2D object UV pointer
|
||||
* @param p_dst - dst Y pointer
|
||||
* @param p_dst_uv - dst UV pointer
|
||||
* @param pitch - dst pitch
|
||||
* @param width - dst width
|
||||
* @param height - dst height
|
||||
* @param fx_scale - xscale
|
||||
* @param fy_scale - yscale
|
||||
* @return
|
||||
*/
|
||||
|
||||
template<typename YuvUnitx2>
|
||||
static __global__ void ResizeHip(hipTextureObject_t tex_y, hipTextureObject_t tex_uv,
|
||||
uint8_t *p_dst, uint8_t *p_dst_uv, int pitch, int width, int height,
|
||||
float fx_scale, float fy_scale)
|
||||
{
|
||||
int ix = blockIdx.x * blockDim.x + threadIdx.x,
|
||||
iy = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (ix >= width / 2 || iy >= height / 2) {
|
||||
return;
|
||||
}
|
||||
|
||||
int x = ix * 2, y = iy * 2;
|
||||
typedef decltype(YuvUnitx2::x) YuvUnit;
|
||||
const int max_yuv_value = (1 << (sizeof(YuvUnit) * 8)) - 1;
|
||||
*(YuvUnitx2 *)(p_dst + y * pitch + x * sizeof(YuvUnit)) = YuvUnitx2 {
|
||||
(YuvUnit)(tex2D<float>(tex_y, x * fx_scale, y * fy_scale) * max_yuv_value),
|
||||
(YuvUnit)(tex2D<float>(tex_y, (x + 1) * fx_scale, y * fy_scale) * max_yuv_value)
|
||||
};
|
||||
y++;
|
||||
*(YuvUnitx2 *)(p_dst + y * pitch + x * sizeof(YuvUnit)) = YuvUnitx2 {
|
||||
(YuvUnit)(tex2D<float>(tex_y, x * fx_scale, y * fy_scale) * max_yuv_value),
|
||||
(YuvUnit)(tex2D<float>(tex_y, (x + 1) * fx_scale, y * fy_scale) * max_yuv_value)
|
||||
};
|
||||
float2 uv = tex2D<float2>(tex_uv, ix * fx_scale, iy * fy_scale + 0.5f);
|
||||
*(YuvUnitx2 *)(p_dst_uv + iy * pitch + ix * 2 * sizeof(YuvUnit)) = YuvUnitx2{ (YuvUnit)(uv.x * max_yuv_value), (YuvUnit)(uv.y * max_yuv_value) };
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief low level HIP kernel for Resize using nearest neighbor interpolation
|
||||
*
|
||||
* @tparam YuvUnitx2
|
||||
* @param p_src - src Y pointer
|
||||
* @param p_src_uv - src UV pointer
|
||||
* @param src_pitch - src pitch
|
||||
* @param p_dst - dst Y pointer
|
||||
* @param p_dst_uv - dst UV pointer
|
||||
* @param pitch - dst pitch
|
||||
* @param width - dst width
|
||||
* @param height - dst height
|
||||
* @param fx_scale - xscale
|
||||
* @param fy_scale - yscale
|
||||
* @return
|
||||
*/
|
||||
|
||||
template<typename YuvUnitx2>
|
||||
static __global__ void ResizeHip(uint8_t *p_src, uint8_t *p_src_uv, int src_pitch,
|
||||
uint8_t *p_dst, uint8_t *p_dst_uv, int pitch, int width, int height, float fx_scale, float fy_scale) {
|
||||
|
||||
int ix = blockIdx.x * blockDim.x + threadIdx.x,
|
||||
iy = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (ix >= width / 2 || iy >= height / 2) {
|
||||
return;
|
||||
}
|
||||
|
||||
int x = ix * 2, y = iy * 2;
|
||||
typedef decltype(YuvUnitx2::x) YuvUnit;
|
||||
uint8_t *p_src_y = p_src + src_pitch * static_cast<uint32_t>(fmaf(y, fy_scale, 0.5 * fy_scale));
|
||||
*(YuvUnitx2 *)(p_dst + y * pitch + x * sizeof(YuvUnit)) = YuvUnitx2 {
|
||||
*(YuvUnit *)(p_src_y + static_cast<uint>(fmaf(x, fx_scale, 0.5 * fx_scale)) * sizeof(YuvUnit)),
|
||||
*(YuvUnit *)(p_src_y + static_cast<uint>(fmaf(x + 1, fx_scale, 0.5 * fx_scale) * sizeof(YuvUnit)))
|
||||
};
|
||||
y++;
|
||||
p_src_y = p_src + src_pitch * static_cast<uint32_t>(fmaf(y, fy_scale, 0.5 * fy_scale));
|
||||
*(YuvUnitx2 *)(p_dst + y * pitch + x * sizeof(YuvUnit)) = YuvUnitx2 {
|
||||
*(YuvUnit *)(p_src_y + static_cast<uint>(fmaf(x, fx_scale, 0.5 * fx_scale)) * sizeof(YuvUnit)),
|
||||
*(YuvUnit *)(p_src_y + static_cast<uint>(fmaf(x + 1, fx_scale, 0.5 * fx_scale)) * sizeof(YuvUnit))
|
||||
};
|
||||
YuvUnit *p_uv = (YuvUnit *) (p_src_uv + static_cast<uint>(fmaf(ix, fx_scale, fx_scale * 0.5)) * sizeof(YuvUnit) * 2 +
|
||||
src_pitch * static_cast<uint>(fmaf(iy, fy_scale, 0.5 * fy_scale)));
|
||||
*(YuvUnitx2 *)(p_dst_uv + iy * pitch + ix * 2 * sizeof(YuvUnit)) = YuvUnitx2{ (YuvUnit)p_uv[0], (YuvUnit)p_uv[1] };
|
||||
}
|
||||
|
||||
|
||||
template <typename YuvUnitx2>
|
||||
static void Resize(unsigned char *p_dst, unsigned char* p_dst_uv, int dst_pitch, int dst_width, int dst_height,
|
||||
unsigned char *p_src, unsigned char *p_src_uv, int src_pitch, int src_width, int src_height, hipStream_t hip_stream) {
|
||||
#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT
|
||||
hipResourceDesc res_desc = {};
|
||||
res_desc.resType = hipResourceTypePitch2D;
|
||||
res_desc.res.pitch2D.devPtr = p_src;
|
||||
res_desc.res.pitch2D.desc = hipCreateChannelDesc<decltype(YuvUnitx2::x)>();
|
||||
res_desc.res.pitch2D.width = src_width;
|
||||
res_desc.res.pitch2D.height = src_height;
|
||||
res_desc.res.pitch2D.pitchInBytes = src_pitch;
|
||||
|
||||
hipTextureDesc tex_desc = {};
|
||||
tex_desc.filterMode = hipFilterModeLinear;
|
||||
tex_desc.readMode = hipReadModeNormalizedFloat;
|
||||
|
||||
hipTextureObject_t tex_y=0;
|
||||
HIP_API_CALL(hipCreateTextureObject(&tex_y, &res_desc, &tex_desc, NULL));
|
||||
|
||||
res_desc.res.pitch2D.devPtr = p_src_uv;
|
||||
res_desc.res.pitch2D.desc = hipCreateChannelDesc<YuvUnitx2>();
|
||||
res_desc.res.pitch2D.width = src_width >> 1;
|
||||
res_desc.res.pitch2D.height = src_height / 2;
|
||||
|
||||
hipTextureObject_t tex_uv=0;
|
||||
HIP_API_CALL(hipCreateTextureObject(&tex_uv, &res_desc, &tex_desc, NULL));
|
||||
|
||||
ResizeHip<YuvUnitx2> <<<dim3((dst_width + 31) / 32, (dst_height + 31) / 32), dim3(16, 16), 0, hip_stream >>>(tex_y, tex_uv, p_dst, p_dst_uv,
|
||||
dst_pitch, dst_width, dst_height, 1.0f * src_width / dst_width, 1.0f * src_height / dst_height);
|
||||
|
||||
HIP_API_CALL(hipDestroyTextureObject(tex_y));
|
||||
HIP_API_CALL(hipDestroyTextureObject(tex_uv));
|
||||
#else
|
||||
ResizeHip<YuvUnitx2> <<<dim3((dst_width + 31) / 32, (dst_height + 31) / 32), dim3(16, 16), 0, hip_stream >>>(p_src, p_src_uv, src_pitch, p_dst, p_dst_uv,
|
||||
dst_pitch, dst_width, dst_height, 1.0f * src_width / dst_width, 1.0f * src_height / dst_height);
|
||||
#endif
|
||||
}
|
||||
|
||||
void ResizeNv12(unsigned char *p_dst_nv12, int dst_pitch, int dst_width, int dst_height, unsigned char *p_src_nv12,
|
||||
int src_pitch, int src_width, int src_height, unsigned char* p_src_nv12_uv, unsigned char* p_dst_nv12_uv, hipStream_t hip_stream)
|
||||
{
|
||||
unsigned char* p_src_uv = p_src_nv12_uv ? p_src_nv12_uv : p_src_nv12 + (src_pitch*src_height);
|
||||
unsigned char* p_dst_uv = p_dst_nv12_uv ? p_dst_nv12_uv : p_dst_nv12 + (dst_pitch*dst_height);
|
||||
return Resize<uchar2>(p_dst_nv12, p_dst_uv, dst_pitch, dst_width, dst_height, p_src_nv12, p_src_uv, src_pitch, src_width, src_height, hip_stream);
|
||||
}
|
||||
|
||||
|
||||
void ResizeP016(unsigned char *p_dst_p016, int dst_pitch, int dst_width, int dst_height, unsigned char *p_src_p016,
|
||||
int src_pitch, int src_width, int src_height, unsigned char* p_src_p016_uv, unsigned char* p_dst_p016_uv, hipStream_t hip_stream)
|
||||
{
|
||||
unsigned char* p_src_uv = p_src_p016_uv ? p_src_p016_uv : p_src_p016 + (src_pitch*src_height);
|
||||
unsigned char* p_dst_uv = p_dst_p016_uv ? p_dst_p016_uv : p_dst_p016 + (dst_pitch*dst_height);
|
||||
return Resize<ushort2>(p_dst_p016, p_dst_uv, dst_pitch, dst_width, dst_height, p_src_p016, p_src_uv, src_pitch, src_width, src_height, hip_stream);
|
||||
}
|
||||
|
||||
#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT
|
||||
static __global__ void Scale_tex2D(hipTextureObject_t tex_src, uint8_t *p_dst, int pitch, int width,
|
||||
int height, float fx_scale, float fy_scale) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x,
|
||||
y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x >= width || y >= height) {
|
||||
return;
|
||||
}
|
||||
|
||||
*(unsigned char*)(p_dst + (y * pitch) + x) = (unsigned char)(fminf((tex2D<float>(tex_src, x * fx_scale, y * fy_scale)) * 255.0f, 255.0f));
|
||||
}
|
||||
|
||||
static __global__ void Scale_UV_tex2D(hipTextureObject_t tex_src, uint8_t *p_dst, int pitch, int width,
|
||||
int height, float fx_scale, float fy_scale) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x,
|
||||
y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x >= width || y >= height){
|
||||
return;
|
||||
}
|
||||
float2 uv = tex2D<float2>(tex_src, x * fx_scale, y * fy_scale);
|
||||
uchar2 dst_uv = uchar2{ (unsigned char)(fminf(uv.x * 255.0f, 255.0f)), (unsigned char)(fminf(uv.y * 255.0f, 255.0f)) };
|
||||
|
||||
*(uchar2*)(p_dst + (y * pitch) + 2 * x) = dst_uv;
|
||||
}
|
||||
#endif
|
||||
|
||||
static __global__ void Scale(uint8_t *p_src, int src_pitch, uint8_t *p_dst, int pitch, int width,
|
||||
int height, float fx_scale, float fy_scale) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x,
|
||||
y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x >= width || y >= height){
|
||||
return;
|
||||
}
|
||||
|
||||
// do nearest neighbor interpolation
|
||||
uint8_t *p_src_xy = p_src + src_pitch * static_cast<uint>(fmaf(y, fy_scale, 0.5 * fy_scale)) + static_cast<uint>(fmaf(x, fx_scale, 0.5*fx_scale));
|
||||
*(uint8_t*)(p_dst + (y * pitch) + x) = *p_src_xy;
|
||||
}
|
||||
|
||||
static __global__ void Scale_UV(uint8_t *p_src, int src_pitch, uint8_t *p_dst, int pitch, int width,
|
||||
int height, float fx_scale, float fy_scale) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x,
|
||||
y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x >= width || y >= height) {
|
||||
return;
|
||||
}
|
||||
// do nearest neighbor interpolation
|
||||
uint8_t *p_src_uv = p_src + src_pitch * static_cast<uint>(fmaf(y , fy_scale, 0.5 * fy_scale)) + static_cast<uint>(fmaf(x, fx_scale, 0.5 * fx_scale)) * 2;
|
||||
uchar2 dst_uv = uchar2{ p_src_uv[0], p_src_uv[1] };
|
||||
*(uchar2*)(p_dst + (y * pitch) + 2 * x) = dst_uv;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Resize a single plane of Y/U/V or UV interleaved (reserved for future)
|
||||
*
|
||||
* @param dp_dst - dest pointer
|
||||
* @param dst_pitch - Pitch of the dst plane
|
||||
* @param dst_width - Width of the dst plane
|
||||
* @param dst_height - Height of the dst plane
|
||||
* @param dp_src - source pointer
|
||||
* @param src_pitch - source pitch
|
||||
* @param src_width - source width
|
||||
* @param src_height - source height
|
||||
* @param b_resize_uv - to resize UV plance or not
|
||||
* @param hip_stream - Stream for launching the kernel
|
||||
*/
|
||||
void ResizeYUVHipLaunchKernel(uint8_t *dp_dst, int dst_pitch, int dst_width, int dst_height, uint8_t *dp_src, int src_pitch,
|
||||
int src_width, int src_height, bool b_resize_uv, hipStream_t hip_stream) {
|
||||
|
||||
#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT
|
||||
hipResourceDesc res_desc = {};
|
||||
res_desc.resType = hipResourceTypePitch2D;
|
||||
res_desc.res.pitch2D.devPtr = dp_src;
|
||||
res_desc.res.pitch2D.desc = b_resize_uv ? hipCreateChannelDesc<uchar2>() : hipCreateChannelDesc<unsigned char>();
|
||||
res_desc.res.pitch2D.width = src_width;
|
||||
res_desc.res.pitch2D.height = src_height;
|
||||
res_desc.res.pitch2D.pitchInBytes = src_pitch;
|
||||
|
||||
hipTextureDesc tex_desc = {};
|
||||
tex_desc.filterMode = hipFilterModeLinear;
|
||||
tex_desc.readMode = hipReadModeNormalizedFloat;
|
||||
|
||||
tex_desc.addressMode[0] = hipAddressModeClamp;
|
||||
tex_desc.addressMode[1] = hipAddressModeClamp;
|
||||
tex_desc.addressMode[2] = hipAddressModeClamp;
|
||||
|
||||
hipTextureObject_t tex_src = 0;
|
||||
HIP_API_CALL(hipCreateTextureObject(&tex_src, &res_desc, &tex_desc, NULL));
|
||||
|
||||
dim3 blockSize(16, 16, 1);
|
||||
dim3 gridSize(((uint32_t)dst_width + blockSize.x - 1) / blockSize.x, ((uint32_t)dst_height + blockSize.y - 1) / blockSize.y, 1);
|
||||
|
||||
if (b_resize_uv){
|
||||
Scale_UV_tex2D <<<gridSize, blockSize, 0, hip_stream >>>(tex_src, dp_dst,
|
||||
dst_pitch, dst_width, dst_height, 1.0f * src_width / dst_width, 1.0f * src_height / dst_height);
|
||||
}
|
||||
else{
|
||||
Scale_tex2D <<<gridSize, blockSize, 0, hip_stream >>>(tex_src, dp_dst,
|
||||
dst_pitch, dst_width, dst_height, 1.0f * src_width / dst_width, 1.0f * src_height / dst_height);
|
||||
}
|
||||
|
||||
HIP_API_CALL(hipGetLastError());
|
||||
HIP_API_CALL(hipDestroyTextureObject(tex_src));
|
||||
#else
|
||||
dim3 blockSize(16, 16, 1);
|
||||
dim3 gridSize(((uint32_t)dst_width + blockSize.x - 1) / blockSize.x, ((uint32_t)dst_height + blockSize.y - 1) / blockSize.y, 1);
|
||||
|
||||
if (b_resize_uv) {
|
||||
Scale_UV <<<gridSize, blockSize, 0, hip_stream >>>(dp_src, src_pitch, dp_dst,
|
||||
dst_pitch, dst_width, dst_height, 1.0f * src_width / dst_width, 1.0f * src_height / dst_height);
|
||||
}
|
||||
else {
|
||||
Scale <<<gridSize, blockSize, 0, hip_stream >>>(dp_src, src_pitch, dp_dst,
|
||||
dst_pitch, dst_width, dst_height, 1.0f * src_width / dst_width, 1.0f * src_height / dst_height);
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
void ResizeYUV420(uint8_t *p_dst_y,
|
||||
uint8_t* p_dst_u,
|
||||
uint8_t* p_dst_v,
|
||||
int dst_pitch_y,
|
||||
int dst_pitch_uv,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
uint8_t *p_src_y,
|
||||
uint8_t* p_src_u,
|
||||
uint8_t* p_src_v,
|
||||
int src_pitch_y,
|
||||
int src_pitch_uv,
|
||||
int src_width,
|
||||
int src_height,
|
||||
bool b_nv12,
|
||||
hipStream_t hip_stream) {
|
||||
|
||||
int uv_width_dst = (dst_width + 1) >> 1;
|
||||
int uv_height_dst = (dst_width + 1) >> 1;
|
||||
int uv_width_src = (src_width + 1) >> 1;
|
||||
int uv_height_src = (src_height + 1) >> 1;
|
||||
|
||||
// Scale Y plane
|
||||
ResizeYUVHipLaunchKernel(p_dst_y, dst_pitch_y, dst_width, dst_height, p_src_y, src_pitch_y, src_width, src_height, 0, hip_stream);
|
||||
if (b_nv12) {
|
||||
ResizeYUVHipLaunchKernel(p_dst_u, dst_pitch_uv, uv_width_dst, uv_height_dst, p_src_u, src_pitch_uv, uv_width_src, uv_height_src, b_nv12, hip_stream);
|
||||
} else {
|
||||
ResizeYUVHipLaunchKernel(p_dst_u, dst_pitch_uv, uv_width_dst, uv_height_dst, p_src_u, src_pitch_uv, uv_width_src, uv_height_src, b_nv12, hip_stream);
|
||||
ResizeYUVHipLaunchKernel(p_dst_v, dst_pitch_uv, uv_width_dst, uv_height_dst, p_src_v, src_pitch_uv, uv_width_src, uv_height_src, b_nv12, hip_stream);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,103 @@
|
||||
/*
|
||||
Copyright (c) 2023 - 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <stdint.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function to resize both planes of an NV12 image
|
||||
*
|
||||
*
|
||||
* @param p_dst_nv12 - destination pointer Y plane
|
||||
* @param dst_pitch - destination pitch
|
||||
* @param dst_width - destination width
|
||||
* @param dst_height - destination height
|
||||
* @param p_src_nv12 - source pointer
|
||||
* @param src_pitch - source pitch
|
||||
* @param src_width - source width
|
||||
* @param src_height - source height
|
||||
* @param p_src_nv12_uv - source pointer of UV plane
|
||||
* @param hip_stream - Stream for launching the kernel
|
||||
*/
|
||||
void ResizeNv12(uint8_t *p_dst_nv12, int dst_pitch, int dst_width, int dst_height, uint8_t *p_src_nv12,
|
||||
int src_pitch, int src_width, int src_height, unsigned char* p_src_nv12_uv, unsigned char* p_dst_nv12_uv, hipStream_t hip_stream);
|
||||
|
||||
/**
|
||||
* @brief
|
||||
*
|
||||
* @param p_dst_p016
|
||||
* @param dst_pitch
|
||||
* @param dst_width
|
||||
* @param dst_height
|
||||
* @param p_src_p016
|
||||
* @param src_pitch
|
||||
* @param src_width
|
||||
* @param src_height
|
||||
* @param p_src_p016_uv
|
||||
* @param p_dst_p016_uv
|
||||
* @param hip_stream - Stream for launching the kernel
|
||||
*/
|
||||
void ResizeP016(uint8_t *p_dst_p016, int dst_pitch, int dst_width, int dst_height, uint8_t *p_src_p016, int src_pitch,
|
||||
int src_width, int src_height, unsigned char* p_src_p016_uv, unsigned char* p_dst_p016_uv, hipStream_t hip_stream);
|
||||
|
||||
/**
|
||||
* @brief Function to resize 420 YUV image
|
||||
*
|
||||
* @param p_dst_y - Destination Y plane pointer
|
||||
* @param p_dst_u - Destination U plane pointer
|
||||
* @param p_dst_v - Destination V plane pointer
|
||||
* @param dst_pitch_y - Destination Pitch Y
|
||||
* @param dst_pitch_uv - Destination Pitch UV
|
||||
* @param dst_width - Destination Width
|
||||
* @param dst_height - Destination Height
|
||||
* @param p_src_y - Src Y plane pointer
|
||||
* @param p_src_u - Src U plane pointer
|
||||
* @param p_src_v - Src V plane pointer
|
||||
* @param src_pitch_y - Src Pitch Y
|
||||
* @param src_pitch_uv - Src Pitch UV
|
||||
* @param src_width - Src Width
|
||||
* @param src_height - Src Height
|
||||
* @param b_nv12 - Is uv interleaved?
|
||||
* @param hip_stream - Stream for launching the kernel
|
||||
*/
|
||||
void ResizeYUV420(uint8_t *p_dst_y, uint8_t* p_dst_u, uint8_t* p_dst_v, int dst_pitch_y, int dst_pitch_uv,
|
||||
int dst_width, int dst_height, uint8_t *p_src_y, uint8_t* p_src_u, uint8_t* p_src_v,
|
||||
int src_pitch_y, int src_pitch_uv, int src_width, int src_height, bool b_nv12 = false, hipStream_t hip_stream = nullptr);
|
||||
|
||||
/**
|
||||
* @brief The function to launch ResizeYUV HIP kernel
|
||||
*
|
||||
* @param dp_dst - dest pointer
|
||||
* @param dst_pitch - Pitch of the dst plane
|
||||
* @param dst_width - Width of the dst plane
|
||||
* @param dst_height - Height of the dst plane
|
||||
* @param dp_src - source pointer
|
||||
* @param src_pitch - source pitch
|
||||
* @param src_width - source width
|
||||
* @param src_height - source height
|
||||
* @param b_resize_uv - to resize UV plance or not
|
||||
* @param hip_stream - Stream for launching the kernel
|
||||
*/
|
||||
void ResizeYUVHipLaunchKernel(uint8_t *dp_dst, int dst_pitch, int dst_width, int dst_height, uint8_t *dp_src, int src_pitch,
|
||||
int src_width, int src_height, bool b_resize_uv = false, hipStream_t hip_stream = nullptr);
|
||||
@@ -124,6 +124,10 @@ struct Rect {
|
||||
int bottom;
|
||||
};
|
||||
|
||||
struct Dim {
|
||||
int w, h;
|
||||
};
|
||||
|
||||
static inline int align(int value, int alignment) {
|
||||
return (value + alignment - 1) & ~(alignment - 1);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user