From 664d2808acbb3f87f36c3fcd7f70d5caf6140973 Mon Sep 17 00:00:00 2001 From: Aryan Salmanpour Date: Thu, 20 Mar 2025 11:53:42 -0400 Subject: [PATCH] Enable and fix compiler warnings (#136) --- CMakeLists.txt | 2 +- src/amd_detail/rocjpeg_api_trace.cpp | 6 +- src/rocjpeg_commons.h | 9 --- src/rocjpeg_decoder.cpp | 7 +- src/rocjpeg_hip_kernels.cpp | 104 +++++++++++++-------------- src/rocjpeg_parser.cpp | 4 +- src/rocjpeg_vaapi_decoder.cpp | 9 ++- src/rocjpeg_vaapi_decoder.h | 3 +- 8 files changed, 65 insertions(+), 79 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a0f61f73c4..6a965d25ed 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -173,7 +173,7 @@ if(HIP_FOUND AND Libva_FOUND) # rocJPEG.so add_library(${PROJECT_NAME} SHARED ${SOURCES}) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++17") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++17 -Wall") target_link_libraries(${PROJECT_NAME} ${LINK_LIBRARY_LIST}) set_target_properties(${PROJECT_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) diff --git a/src/amd_detail/rocjpeg_api_trace.cpp b/src/amd_detail/rocjpeg_api_trace.cpp index b15c5b376a..0393ca28c8 100644 --- a/src/amd_detail/rocjpeg_api_trace.cpp +++ b/src/amd_detail/rocjpeg_api_trace.cpp @@ -70,10 +70,6 @@ template <> struct dispatch_table_info { \ static constexpr auto import_func = &ROCPROFILER_REGISTER_IMPORT_FUNC(NAME); \ }; -constexpr auto ComputeTableSize(size_t num_funcs) { - return (num_funcs * sizeof(void*)) + sizeof(uint64_t); -} - ROCJPEG_DEFINE_DISPATCH_TABLE_INFO(RocJpegDispatchTable, rocjpeg) #endif @@ -81,7 +77,7 @@ template void ToolInit(Tp* table) { #if ROCJPEG_ROCPROFILER_REGISTER > 0 auto table_array = std::array{static_cast(table)}; auto lib_id = rocprofiler_register_library_indentifier_t{}; - auto rocp_reg_status = rocprofiler_register_library_api_table( + rocprofiler_register_library_api_table( dispatch_table_info::name, dispatch_table_info::import_func, dispatch_table_info::version, table_array.data(), table_array.size(), &lib_id); #else diff --git a/src/rocjpeg_commons.h b/src/rocjpeg_commons.h index 28a8403971..c25025c680 100644 --- a/src/rocjpeg_commons.h +++ b/src/rocjpeg_commons.h @@ -64,15 +64,6 @@ THE SOFTWARE. } \ } -static bool GetEnv(const char *name, char *value, size_t valueSize) { - const char *v = getenv(name); - if (v) { - strncpy(value, v, valueSize); - value[valueSize - 1] = 0; - } - return v ? true : false; -} - static inline int align(int value, int alignment) { return (value + alignment - 1) & ~(alignment - 1); } diff --git a/src/rocjpeg_decoder.cpp b/src/rocjpeg_decoder.cpp index 85ff7b47dc..1ac3aef9a0 100644 --- a/src/rocjpeg_decoder.cpp +++ b/src/rocjpeg_decoder.cpp @@ -28,6 +28,9 @@ RocJpegDecoder::RocJpegDecoder(RocJpegBackend backend, int device_id) : RocJpegDecoder::~RocJpegDecoder() { if (hip_stream_) { hipError_t hip_status = hipStreamDestroy(hip_stream_); + if (hip_status != hipSuccess) { + ERR("ERROR: Failed to destroy the HIP stream!"); + } } } @@ -44,7 +47,6 @@ RocJpegDecoder::~RocJpegDecoder() { * - ROCJPEG_STATUS_INVALID_PARAMETER if the requested device_id is not found. */ RocJpegStatus RocJpegDecoder::InitHIP(int device_id) { - hipError_t hip_status = hipSuccess; CHECK_HIP(hipGetDeviceCount(&num_devices_)); if (num_devices_ < 1) { ERR("ERROR: Failed to find any GPU!"); @@ -80,7 +82,7 @@ RocJpegStatus RocJpegDecoder::InitializeDecoder() { } if (backend_ == ROCJPEG_BACKEND_HARDWARE) { std::string gpu_uuid(hip_dev_prop_.uuid.bytes, sizeof(hip_dev_prop_.uuid.bytes)); - rocjpeg_status = jpeg_vaapi_decoder_.InitializeDecoder(hip_dev_prop_.name, hip_dev_prop_.gcnArchName, device_id_, gpu_uuid); + rocjpeg_status = jpeg_vaapi_decoder_.InitializeDecoder(hip_dev_prop_.name, device_id_, gpu_uuid); if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) { ERR("ERROR: Failed to initialize the VA-API JPEG decoder!"); return rocjpeg_status; @@ -104,7 +106,6 @@ RocJpegStatus RocJpegDecoder::InitializeDecoder() { */ RocJpegStatus RocJpegDecoder::Decode(RocJpegStreamHandle jpeg_stream_handle, const RocJpegDecodeParams *decode_params, RocJpegImage *destination) { std::lock_guard lock(mutex_); - RocJpegStatus rocjpeg_status = ROCJPEG_STATUS_SUCCESS; if (jpeg_stream_handle == nullptr || decode_params == nullptr || destination == nullptr) { return ROCJPEG_STATUS_INVALID_PARAMETER; } diff --git a/src/rocjpeg_hip_kernels.cpp b/src/rocjpeg_hip_kernels.cpp index 2f128f1101..67fd1a0113 100644 --- a/src/rocjpeg_hip_kernels.cpp +++ b/src/rocjpeg_hip_kernels.cpp @@ -49,12 +49,12 @@ __device__ __forceinline__ float4 hipUnpack(uint32_t src) { return make_float4(hipUnpack0(src), hipUnpack1(src), hipUnpack2(src), hipUnpack3(src)); } -__global__ void ColorConvertYUV444ToRGBKernel(uint32_t dst_width, uint32_t dst_height, uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, +__global__ void ColorConvertYUV444ToRGBKernel(uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, uint32_t dst_image_stride_in_bytes_comp, const uint8_t *src_y_image, const uint8_t *src_u_image, const uint8_t *src_v_image, uint32_t src_yuv_image_stride_in_bytes, uint32_t dst_width_comp, uint32_t dst_height_comp, uint32_t src_yuv_image_stride_in_bytes_comp) { - int32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; - int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + uint32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + uint32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; if ((x < dst_width_comp) && (y < dst_height_comp)) { uint32_t src_y0_idx = y * src_yuv_image_stride_in_bytes_comp + (x << 3); @@ -250,18 +250,18 @@ void ColorConvertYUV444ToRGB(hipStream_t stream, uint32_t dst_width, uint32_t ds uint32_t src_yuv_image_stride_in_bytes_comp = src_yuv_image_stride_in_bytes * 2; ColorConvertYUV444ToRGBKernel<<(global_threads_x) / local_threads_x), ceil(static_cast(global_threads_y) / local_threads_y)), - dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, dst_image, + dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_image, dst_image_stride_in_bytes, dst_image_stride_in_bytes_comp, src_yuv_image, src_yuv_image + src_u_image_offset, src_yuv_image + src_v_image_offset, src_yuv_image_stride_in_bytes, dst_width_comp, dst_height_comp, src_yuv_image_stride_in_bytes_comp); } -__global__ void ColorConvertYUV444ToRGBPlanarKernel(uint32_t dst_width, uint32_t dst_height, uint8_t *dst_image_r, uint8_t *dst_image_g, uint8_t *dst_image_b, uint32_t dst_image_stride_in_bytes, +__global__ void ColorConvertYUV444ToRGBPlanarKernel(uint8_t *dst_image_r, uint8_t *dst_image_g, uint8_t *dst_image_b, uint32_t dst_image_stride_in_bytes, uint32_t dst_image_stride_in_bytes_comp, const uint8_t *src_y_image, const uint8_t *src_u_image, const uint8_t *src_v_image, uint32_t src_yuv_image_stride_in_bytes, uint32_t dst_width_comp, uint32_t dst_height_comp, uint32_t src_yuv_image_stride_in_bytes_comp) { - int32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; - int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + uint32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + uint32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; if ((x < dst_width_comp) && (y < dst_height_comp)) { uint32_t src_y0_idx = y * src_yuv_image_stride_in_bytes_comp + (x << 3); @@ -480,18 +480,18 @@ void ColorConvertYUV444ToRGBPlanar(hipStream_t stream, uint32_t dst_width, uint3 uint32_t src_yuv_image_stride_in_bytes_comp = src_yuv_image_stride_in_bytes * 2; ColorConvertYUV444ToRGBPlanarKernel<<(global_threads_x) / local_threads_x), ceil(static_cast(global_threads_y) / local_threads_y)), - dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, dst_image_r, dst_image_g, dst_image_b, + dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_image_r, dst_image_g, dst_image_b, dst_image_stride_in_bytes, dst_image_stride_in_bytes_comp, src_yuv_image, src_yuv_image + src_u_image_offset, src_yuv_image + src_v_image_offset, src_yuv_image_stride_in_bytes, dst_width_comp, dst_height_comp, src_yuv_image_stride_in_bytes_comp); } -__global__ void ColorConvertYUV440ToRGBKernel(uint32_t dst_width, uint32_t dst_height, uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, +__global__ void ColorConvertYUV440ToRGBKernel(uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, uint32_t dst_image_stride_in_bytes_comp, const uint8_t *src_y_image, const uint8_t *src_u_image, const uint8_t *src_v_image, uint32_t src_yuv_image_stride_in_bytes, uint32_t dst_width_comp, uint32_t dst_height_comp, uint32_t src_yuv_image_stride_in_bytes_comp) { - int32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; - int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + uint32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + uint32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; if ((x < dst_width_comp) && (y < dst_height_comp)) { uint32_t src_y0_idx = y * src_yuv_image_stride_in_bytes_comp + (x << 3); @@ -684,18 +684,18 @@ void ColorConvertYUV440ToRGB(hipStream_t stream, uint32_t dst_width, uint32_t ds uint32_t src_yuv_image_stride_in_bytes_comp = src_yuv_image_stride_in_bytes * 2; ColorConvertYUV440ToRGBKernel<<(global_threads_x) / local_threads_x), ceil(static_cast(global_threads_y) / local_threads_y)), - dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, dst_image, + dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_image, dst_image_stride_in_bytes, dst_image_stride_in_bytes_comp, src_yuv_image, src_yuv_image + src_u_image_offset, src_yuv_image + src_v_image_offset, src_yuv_image_stride_in_bytes, dst_width_comp, dst_height_comp, src_yuv_image_stride_in_bytes_comp); } -__global__ void ColorConvertYUV440ToRGBPlanarKernel(uint32_t dst_width, uint32_t dst_height, uint8_t *dst_image_r, uint8_t *dst_image_g, uint8_t *dst_image_b, uint32_t dst_image_stride_in_bytes, +__global__ void ColorConvertYUV440ToRGBPlanarKernel(uint8_t *dst_image_r, uint8_t *dst_image_g, uint8_t *dst_image_b, uint32_t dst_image_stride_in_bytes, uint32_t dst_image_stride_in_bytes_comp, const uint8_t *src_y_image, const uint8_t *src_u_image, const uint8_t *src_v_image, uint32_t src_yuv_image_stride_in_bytes, uint32_t dst_width_comp, uint32_t dst_height_comp, uint32_t src_yuv_image_stride_in_bytes_comp) { - int32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; - int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + uint32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + uint32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; if ((x < dst_width_comp) && (y < dst_height_comp)) { uint32_t src_y0_idx = y * src_yuv_image_stride_in_bytes_comp + (x << 3); @@ -911,19 +911,19 @@ void ColorConvertYUV440ToRGBPlanar(hipStream_t stream, uint32_t dst_width, uint3 uint32_t src_yuv_image_stride_in_bytes_comp = src_yuv_image_stride_in_bytes * 2; ColorConvertYUV440ToRGBPlanarKernel<<(global_threads_x) / local_threads_x), ceil(static_cast(global_threads_y) / local_threads_y)), - dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, dst_image_r, dst_image_g, dst_image_b, + dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_image_r, dst_image_g, dst_image_b, dst_image_stride_in_bytes, dst_image_stride_in_bytes_comp, src_yuv_image, src_yuv_image + src_u_image_offset, src_yuv_image + src_v_image_offset, src_yuv_image_stride_in_bytes, dst_width_comp, dst_height_comp, src_yuv_image_stride_in_bytes_comp); } -__global__ void ColorConvertYUYVToRGBKernel(uint32_t dst_width, uint32_t dst_height, +__global__ void ColorConvertYUYVToRGBKernel( uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, uint32_t dst_image_stride_in_bytes_comp, const uint8_t *src_image, uint32_t src_image_stride_in_bytes, uint32_t src_image_stride_in_bytes_comp, uint32_t dst_width_comp, uint32_t dst_height_comp) { - int32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; - int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + uint32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + uint32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; if ((x < dst_width_comp) && (y < dst_height_comp)) { uint32_t l0_idx = y * src_image_stride_in_bytes_comp + (x << 4); @@ -1127,17 +1127,17 @@ void ColorConvertYUYVToRGB(hipStream_t stream, uint32_t dst_width, uint32_t dst_ uint32_t src_image_stride_in_bytes_comp = src_image_stride_in_bytes * 2; ColorConvertYUYVToRGBKernel<<(global_threads_x) / local_threads_x), ceil(static_cast(global_threads_y) / local_threads_y)), - dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, dst_image, + dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_image, dst_image_stride_in_bytes, dst_image_stride_in_bytes_comp, src_image, src_image_stride_in_bytes, src_image_stride_in_bytes_comp, dst_width_comp, dst_height_comp); } -__global__ void ColorConvertYUYVToRGBPlanarKernel(uint32_t dst_width, uint32_t dst_height, +__global__ void ColorConvertYUYVToRGBPlanarKernel( uint8_t *dst_image_r, uint8_t *dst_image_g, uint8_t *dst_image_b, uint32_t dst_image_stride_in_bytes, uint32_t dst_image_stride_in_bytes_comp, const uint8_t *src_image, uint32_t src_image_stride_in_bytes, uint32_t src_image_stride_in_bytes_comp, uint32_t dst_width_comp, uint32_t dst_height_comp) { - int32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; - int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + uint32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + uint32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; if ((x < dst_width_comp) && (y < dst_height_comp)) { uint32_t l0_idx = y * src_image_stride_in_bytes_comp + (x << 4); @@ -1369,19 +1369,19 @@ void ColorConvertYUYVToRGBPlanar(hipStream_t stream, uint32_t dst_width, uint32_ uint32_t src_image_stride_in_bytes_comp = src_image_stride_in_bytes * 2; ColorConvertYUYVToRGBPlanarKernel<<(global_threads_x) / local_threads_x), ceil(static_cast(global_threads_y) / local_threads_y)), - dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, dst_image_r, dst_image_g, dst_image_b, + dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_image_r, dst_image_g, dst_image_b, dst_image_stride_in_bytes, dst_image_stride_in_bytes_comp, src_image, src_image_stride_in_bytes, src_image_stride_in_bytes_comp, dst_width_comp, dst_height_comp); } -__global__ void ColorConvertNV12ToRGBKernel(uint32_t dst_width, uint32_t dst_height, +__global__ void ColorConvertNV12ToRGBKernel( uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, uint32_t dst_image_stride_in_bytes_comp, const uint8_t *src_luma_image, uint32_t src_luma_image_stride_in_bytes, const uint8_t *src_chroma_image, uint32_t src_chroma_image_stride_in_bytes, uint32_t dst_width_comp, uint32_t dst_height_comp, uint32_t src_luma_image_stride_in_bytes_comp) { - int32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; - int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + uint32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + uint32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; if ((x < dst_width_comp) && (y < dst_height_comp)) { uint32_t src_y0_idx = y * src_luma_image_stride_in_bytes_comp + (x << 3); @@ -1605,19 +1605,19 @@ void ColorConvertNV12ToRGB(hipStream_t stream, uint32_t dst_width, uint32_t dst_ uint32_t src_luma_image_stride_in_bytes_comp = src_luma_image_stride_in_bytes * 2; ColorConvertNV12ToRGBKernel<<(global_threads_x) / local_threads_x), ceil(static_cast(global_threads_y) / local_threads_y)), - dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, dst_image, dst_image_stride_in_bytes, + dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_image, dst_image_stride_in_bytes, dst_image_stride_in_bytes_comp, src_luma_image, src_luma_image_stride_in_bytes, src_chroma_image, src_chroma_image_stride_in_bytes, dst_width_comp, dst_height_comp, src_luma_image_stride_in_bytes_comp); } -__global__ void ColorConvertNV12ToRGBPlanarKernel(uint32_t dst_width, uint32_t dst_height, +__global__ void ColorConvertNV12ToRGBPlanarKernel( uint8_t *dst_image_r, uint8_t *dst_image_g, uint8_t *dst_image_b, uint32_t dst_image_stride_in_bytes, uint32_t dst_image_stride_in_bytes_comp, const uint8_t *src_luma_image, uint32_t src_luma_image_stride_in_bytes, const uint8_t *src_chroma_image, uint32_t src_chroma_image_stride_in_bytes, uint32_t dst_width_comp, uint32_t dst_height_comp, uint32_t src_luma_image_stride_in_bytes_comp) { - int32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; - int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + uint32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + uint32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; if ((x < dst_width_comp) && (y < dst_height_comp)) { uint32_t src_y0_idx = y * src_luma_image_stride_in_bytes_comp + (x << 3); @@ -1865,19 +1865,19 @@ void ColorConvertNV12ToRGBPlanar(hipStream_t stream, uint32_t dst_width, uint32_ uint32_t src_luma_image_stride_in_bytes_comp = src_luma_image_stride_in_bytes * 2; ColorConvertNV12ToRGBPlanarKernel<<(global_threads_x) / local_threads_x), ceil(static_cast(global_threads_y) / local_threads_y)), - dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, dst_image_r, dst_image_g, dst_image_b, dst_image_stride_in_bytes, + dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_image_r, dst_image_g, dst_image_b, dst_image_stride_in_bytes, dst_image_stride_in_bytes_comp, src_luma_image, src_luma_image_stride_in_bytes, src_chroma_image, src_chroma_image_stride_in_bytes, dst_width_comp, dst_height_comp, src_luma_image_stride_in_bytes_comp); } -__global__ void ColorConvertYUV400ToRGBKernel(uint32_t dst_width, uint32_t dst_height, +__global__ void ColorConvertYUV400ToRGBKernel( uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, uint32_t dst_image_stride_in_bytes_comp, const uint8_t *src_luma_image, uint32_t src_luma_image_stride_in_bytes, uint32_t dst_width_comp, uint32_t dst_height_comp, uint32_t src_luma_image_stride_in_bytes_comp) { - int32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; - int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + uint32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + uint32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; if ((x < dst_width_comp) && (y < dst_height_comp)) { uint32_t src_y0_idx = y * src_luma_image_stride_in_bytes_comp + (x << 3); @@ -1959,19 +1959,19 @@ void ColorConvertYUV400ToRGB(hipStream_t stream, uint32_t dst_width, uint32_t ds uint32_t src_luma_image_stride_in_bytes_comp = src_luma_image_stride_in_bytes * 2; ColorConvertYUV400ToRGBKernel<<(global_threads_x) / local_threads_x), ceil(static_cast(global_threads_y) / local_threads_y)), - dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, dst_image, dst_image_stride_in_bytes, + dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_image, dst_image_stride_in_bytes, dst_image_stride_in_bytes_comp, src_luma_image, src_luma_image_stride_in_bytes, dst_width_comp, dst_height_comp, src_luma_image_stride_in_bytes_comp); } -__global__ void ColorConvertYUV400ToRGBPlanarKernel(uint32_t dst_width, uint32_t dst_height, +__global__ void ColorConvertYUV400ToRGBPlanarKernel( uint8_t *dst_image_r, uint8_t *dst_image_g, uint8_t *dst_image_b, uint32_t dst_image_stride_in_bytes, uint32_t dst_image_stride_in_bytes_comp, const uint8_t *src_luma_image, uint32_t src_luma_image_stride_in_bytes, uint32_t dst_width_comp, uint32_t dst_height_comp, uint32_t src_luma_image_stride_in_bytes_comp) { - int32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; - int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + uint32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + uint32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; if ((x < dst_width_comp) && (y < dst_height_comp)) { uint32_t src_y0_idx = y * src_luma_image_stride_in_bytes_comp + (x << 3); @@ -2022,7 +2022,7 @@ void ColorConvertYUV400ToRGBPlanar(hipStream_t stream, uint32_t dst_width, uint3 uint32_t src_luma_image_stride_in_bytes_comp = src_luma_image_stride_in_bytes * 2; ColorConvertYUV400ToRGBPlanarKernel<<(global_threads_x) / local_threads_x), ceil(static_cast(global_threads_y) / local_threads_y)), - dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, dst_image_r, dst_image_g, dst_image_b, dst_image_stride_in_bytes, + dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_image_r, dst_image_g, dst_image_b, dst_image_stride_in_bytes, dst_image_stride_in_bytes_comp, src_luma_image, src_luma_image_stride_in_bytes, dst_width_comp, dst_height_comp, src_luma_image_stride_in_bytes_comp); @@ -2031,8 +2031,8 @@ void ColorConvertYUV400ToRGBPlanar(hipStream_t stream, uint32_t dst_width, uint3 __global__ void ColorConvertRGBAToRGBKernel(uint32_t dst_width, uint32_t dst_height, uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, const uint8_t *src_image, uint32_t src_image_stride_in_bytes) { - int x = (hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x) * 8; - int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + uint32_t x = (hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x) * 8; + uint32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; if (x >= dst_width || y >= dst_height) { return; @@ -2083,8 +2083,8 @@ __global__ void ConvertInterleavedUVToPlanarUVKernel(uint32_t dst_width, uint32_ uint8_t *dst_image1, uint8_t *dst_image2, uint32_t dst_image_stride_in_bytes, const uint8_t *src_image, uint32_t src_image_stride_in_bytes) { - int32_t x = (hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x) * 8; - int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + uint32_t x = (hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x) * 8; + uint32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; if (x >= dst_width || y >= dst_height) { return; @@ -2133,13 +2133,13 @@ void ConvertInterleavedUVToPlanarUV(hipStream_t stream, uint32_t dst_width, uint } -__global__ void ExtractYFromPackedYUYVKernel(uint32_t dst_width, uint32_t dst_height, +__global__ void ExtractYFromPackedYUYVKernel(uint32_t dst_height, uint8_t *destination_y, uint32_t dst_luma_stride_in_bytes, const uint8_t *src_image, uint32_t src_image_stride_in_bytes, uint32_t dst_width_comp) { - int32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; - int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + uint32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + uint32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; if (x < dst_width_comp && y < dst_height) { uint32_t src_idx = y * src_image_stride_in_bytes + (x << 4); @@ -2179,17 +2179,17 @@ void ExtractYFromPackedYUYV(hipStream_t stream, uint32_t dst_width, uint32_t dst uint32_t dst_width_comp = (dst_width + 7) / 8; ExtractYFromPackedYUYVKernel<<(global_threads_x) / local_threads_x), ceil(static_cast(global_threads_y) / local_threads_y)), - dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, destination_y, + dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_height, destination_y, dst_luma_stride_in_bytes, src_image, src_image_stride_in_bytes, dst_width_comp); } -__global__ void ConvertPackedYUYVToPlanarYUVKernel(uint32_t dst_width, uint32_t dst_height, +__global__ void ConvertPackedYUYVToPlanarYUVKernel(uint32_t dst_height, uint8_t *destination_y, uint8_t *destination_u, uint8_t *destination_v, uint32_t dst_luma_stride_in_bytes, uint32_t dst_chroma_stride_in_bytes, const uint8_t *src_image, uint32_t src_image_stride_in_bytes, uint32_t dst_width_comp) { - int32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; - int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + uint32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + uint32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; if ((x < dst_width_comp && y < dst_height)) { uint32_t src_idx = y * src_image_stride_in_bytes + (x << 4); @@ -2241,6 +2241,6 @@ void ConvertPackedYUYVToPlanarYUV(hipStream_t stream, uint32_t dst_width, uint32 uint32_t dst_width_comp = (dst_width + 7) / 8; ConvertPackedYUYVToPlanarYUVKernel<<(global_threads_x) / local_threads_x), ceil(static_cast(global_threads_y) / local_threads_y)), - dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, destination_y, destination_u, + dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_height, destination_y, destination_u, destination_v, dst_luma_stride_in_bytes, dst_chroma_stride_in_bytes, src_image, src_image_stride_in_bytes, dst_width_comp); } \ No newline at end of file diff --git a/src/rocjpeg_parser.cpp b/src/rocjpeg_parser.cpp index 08b7b29282..3280edcd0a 100644 --- a/src/rocjpeg_parser.cpp +++ b/src/rocjpeg_parser.cpp @@ -22,7 +22,7 @@ THE SOFTWARE. #include "rocjpeg_parser.h" RocJpegStreamParser::RocJpegStreamParser() : stream_{nullptr}, stream_end_{nullptr}, stream_length_{0}, - jpeg_stream_parameters_{{}} { + jpeg_stream_parameters_{} { } RocJpegStreamParser::~RocJpegStreamParser() { @@ -337,7 +337,7 @@ bool RocJpegStreamParser::ParseSOS() { jpeg_stream_parameters_.slice_parameter_buffer.num_components = num_components; stream_ += 3; - for (int32_t i = 0; i < num_components; i++) { + for (uint32_t i = 0; i < num_components; i++) { component_id = *stream_++; table = *stream_++; jpeg_stream_parameters_.slice_parameter_buffer.components[i].component_selector = component_id; diff --git a/src/rocjpeg_vaapi_decoder.cpp b/src/rocjpeg_vaapi_decoder.cpp index cceea99d89..ed18ad5046 100644 --- a/src/rocjpeg_vaapi_decoder.cpp +++ b/src/rocjpeg_vaapi_decoder.cpp @@ -294,7 +294,7 @@ bool RocJpegVaapiMemoryPool::SetSurfaceAsIdle(VASurfaceID surface_id) { */ RocJpegVappiDecoder::RocJpegVappiDecoder(int device_id) : device_id_{device_id}, drm_fd_{-1}, min_picture_width_{64}, min_picture_height_{64}, max_picture_width_{4096}, max_picture_height_{4096}, supports_modifiers_{false}, va_display_{0}, va_config_attrib_{{}}, va_config_id_{0}, va_profile_{VAProfileJPEGBaseline}, - vaapi_mem_pool_(std::make_unique()), current_vcn_jpeg_spec_{0}, va_picture_parameter_buf_id_{0}, va_quantization_matrix_buf_id_{0}, va_huffmantable_buf_id_{0}, + vaapi_mem_pool_(std::make_unique()), current_vcn_jpeg_spec_{}, va_picture_parameter_buf_id_{0}, va_quantization_matrix_buf_id_{0}, va_huffmantable_buf_id_{0}, va_slice_param_buf_id_{0}, va_slice_data_buf_id_{0} {}; /** @@ -344,12 +344,11 @@ RocJpegVappiDecoder::~RocJpegVappiDecoder() { * and other necessary parameters. It also sets up the VAAPI display and creates the decoder configuration. * * @param device_name The name of the device. - * @param gcn_arch_name The name of the GCN architecture. * @param device_id The ID of the device. * @param gpu_uuid The UUID of the GPU. * @return The status of the initialization process. */ -RocJpegStatus RocJpegVappiDecoder::InitializeDecoder(std::string device_name, std::string gcn_arch_name, int device_id, std::string& gpu_uuid) { +RocJpegStatus RocJpegVappiDecoder::InitializeDecoder(std::string device_name, int device_id, std::string& gpu_uuid) { device_id_ = device_id; std::vector visible_devices; GetVisibleDevices(visible_devices); @@ -788,14 +787,14 @@ RocJpegStatus RocJpegVappiDecoder::SubmitDecodeBatched(JpegStreamParameters *jpe CHECK_VAAPI(vaCreateSurfaces(va_display_, surface_format, key.width, key.height, mem_pool_entry.va_surface_ids.data(), mem_pool_entry.va_surface_ids.size(), surface_attribs.data(), supports_modifiers_ ? 2 : 1)); mem_pool_entry.image_width = key.width; mem_pool_entry.image_height = key.height; - for (int i = 0; i < mem_pool_entry.va_surface_ids.size(); i++) { + for (size_t i = 0; i < mem_pool_entry.va_surface_ids.size(); i++) { surface_ids[indices[i]] = mem_pool_entry.va_surface_ids[i]; } mem_pool_entry.hip_interops.resize(indices.size(), HipInteropDeviceMem()); mem_pool_entry.entry_status = kBusy; CHECK_ROCJPEG(vaapi_mem_pool_->AddPoolEntry(key.pixel_format, mem_pool_entry)); } else { - for (int i = 0; i < mem_pool_entry.va_surface_ids.size(); i++) { + for (size_t i = 0; i < mem_pool_entry.va_surface_ids.size(); i++) { surface_ids[indices[i]] = mem_pool_entry.va_surface_ids[i]; } } diff --git a/src/rocjpeg_vaapi_decoder.h b/src/rocjpeg_vaapi_decoder.h index 9751fda82b..c9ac454b7a 100644 --- a/src/rocjpeg_vaapi_decoder.h +++ b/src/rocjpeg_vaapi_decoder.h @@ -277,12 +277,11 @@ public: /** * @brief Initializes the decoder with the specified device, GCN architecture, and device ID. * @param device_name The name of the device. - * @param gcn_arch_name The name of the GCN architecture. * @param device_id The ID of the device. * @param gpu_uuid The UUID of the GPU. * @return The status of the initialization. */ - RocJpegStatus InitializeDecoder(std::string device_name, std::string gcn_arch_name, int device_id, std::string& gpu_uuid); + RocJpegStatus InitializeDecoder(std::string device_name, int device_id, std::string& gpu_uuid); /** * @brief Submits a JPEG stream for decoding.