diff --git a/CHANGELOG.md b/CHANGELOG.md index dbe32e1d5e..2a54375a24 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ Documentation for rocJPEG is available at [https://rocm.docs.amd.com/projects/rocJPEG/en/latest/](https://rocm.docs.amd.com/projects/rocJPEG/en/latest/) -## rocjpeg 1.0.0 for ROCm 7.0.0 +## rocjpeg 1.1.0 for ROCm 7.0.0 ## Added * cmake config files diff --git a/CMakeLists.txt b/CMakeLists.txt index 3b19379b76..97fffdfdca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,7 +42,7 @@ endif() # rocjpeg Version # NOTE: package version and rocjpeg_version.h is generated with this version -set(VERSION "1.0.0") +set(VERSION "1.1.0") # Set Project Version and Language project(rocjpeg VERSION ${VERSION} LANGUAGES CXX) diff --git a/samples/rocjpeg_samples_utils.h b/samples/rocjpeg_samples_utils.h index 3975ec62f2..86af011947 100644 --- a/samples/rocjpeg_samples_utils.h +++ b/samples/rocjpeg_samples_utils.h @@ -331,30 +331,30 @@ public: switch (subsampling) { case ROCJPEG_CSS_444: num_channels = 3; - output_image.pitch[2] = output_image.pitch[1] = output_image.pitch[0] = is_roi_valid ? roi_width : widths[0]; - channel_sizes[2] = channel_sizes[1] = channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment); + output_image.pitch[2] = output_image.pitch[1] = output_image.pitch[0] = is_roi_valid ? align(roi_width, mem_alignment) : align(widths[0], mem_alignment); + channel_sizes[2] = channel_sizes[1] = channel_sizes[0] = output_image.pitch[0] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment)); break; case ROCJPEG_CSS_440: num_channels = 3; - output_image.pitch[2] = output_image.pitch[1] = output_image.pitch[0] = is_roi_valid ? roi_width : widths[0]; - channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment); - channel_sizes[2] = channel_sizes[1] = align(output_image.pitch[0] * ((is_roi_valid ? roi_height : heights[0]) >> 1), mem_alignment); + output_image.pitch[2] = output_image.pitch[1] = output_image.pitch[0] = is_roi_valid ? align(roi_width, mem_alignment) : align(widths[0], mem_alignment); + channel_sizes[0] = output_image.pitch[0] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment)); + channel_sizes[2] = channel_sizes[1] = output_image.pitch[0] * ((is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment)) >> 1); break; case ROCJPEG_CSS_422: num_channels = 1; - output_image.pitch[0] = (is_roi_valid ? roi_width : widths[0]) * 2; - channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment); + output_image.pitch[0] = (is_roi_valid ? align(roi_width, mem_alignment) : align(widths[0], mem_alignment)) * 2; + channel_sizes[0] = output_image.pitch[0] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment)); break; case ROCJPEG_CSS_420: num_channels = 2; - output_image.pitch[1] = output_image.pitch[0] = is_roi_valid ? roi_width : widths[0]; - channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment); - channel_sizes[1] = align(output_image.pitch[1] * ((is_roi_valid ? roi_height : heights[0]) >> 1), mem_alignment); + output_image.pitch[1] = output_image.pitch[0] = is_roi_valid ? align(roi_width, mem_alignment) : align(widths[0], mem_alignment); + channel_sizes[0] = output_image.pitch[0] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment)); + channel_sizes[1] = output_image.pitch[1] * ((is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment)) >> 1); break; case ROCJPEG_CSS_400: num_channels = 1; - output_image.pitch[0] = is_roi_valid ? roi_width : widths[0]; - channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment); + output_image.pitch[0] = is_roi_valid ? align(roi_width, mem_alignment) : align(widths[0], mem_alignment); + channel_sizes[0] = output_image.pitch[0] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment)); break; default: std::cout << "Unknown chroma subsampling!" << std::endl; @@ -364,32 +364,32 @@ public: case ROCJPEG_OUTPUT_YUV_PLANAR: if (subsampling == ROCJPEG_CSS_400) { num_channels = 1; - output_image.pitch[0] = is_roi_valid ? roi_width : widths[0]; - channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment); + output_image.pitch[0] = is_roi_valid ? align(roi_width, mem_alignment) : align(widths[0], mem_alignment); + channel_sizes[0] = output_image.pitch[0] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment)); } else { num_channels = 3; - output_image.pitch[0] = is_roi_valid ? roi_width : widths[0]; - output_image.pitch[1] = is_roi_valid ? roi_width : widths[1]; - output_image.pitch[2] = is_roi_valid ? roi_width : widths[2]; - channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment); - channel_sizes[1] = align(output_image.pitch[1] * (is_roi_valid ? roi_height : heights[1]), mem_alignment); - channel_sizes[2] = align(output_image.pitch[2] * (is_roi_valid ? roi_height : heights[2]), mem_alignment); + output_image.pitch[0] = is_roi_valid ? align(roi_width, mem_alignment) : align(widths[0], mem_alignment); + output_image.pitch[1] = is_roi_valid ? align(roi_width, mem_alignment) : align(widths[1], mem_alignment); + output_image.pitch[2] = is_roi_valid ? align(roi_width, mem_alignment) : align(widths[2], mem_alignment); + channel_sizes[0] = output_image.pitch[0] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment)); + channel_sizes[1] = output_image.pitch[1] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[1], mem_alignment)); + channel_sizes[2] = output_image.pitch[2] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[2], mem_alignment)); } break; case ROCJPEG_OUTPUT_Y: num_channels = 1; - output_image.pitch[0] = is_roi_valid ? roi_width : widths[0]; - channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment); + output_image.pitch[0] = is_roi_valid ? align(roi_width, mem_alignment) : align(widths[0], mem_alignment); + channel_sizes[0] = output_image.pitch[0] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment)); break; case ROCJPEG_OUTPUT_RGB: num_channels = 1; - output_image.pitch[0] = (is_roi_valid ? roi_width : widths[0]) * 3; - channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment); + output_image.pitch[0] = (is_roi_valid ? align(roi_width, mem_alignment) : align(widths[0], mem_alignment)) * 3; + channel_sizes[0] = output_image.pitch[0] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment)); break; case ROCJPEG_OUTPUT_RGB_PLANAR: num_channels = 3; - output_image.pitch[2] = output_image.pitch[1] = output_image.pitch[0] = is_roi_valid ? roi_width : widths[0]; - channel_sizes[2] = channel_sizes[1] = channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment); + output_image.pitch[2] = output_image.pitch[1] = output_image.pitch[0] = is_roi_valid ? align(roi_width, mem_alignment) : align(widths[0], mem_alignment); + channel_sizes[2] = channel_sizes[1] = channel_sizes[0] = output_image.pitch[0] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment)); break; default: std::cout << "Unknown output format!" << std::endl; @@ -567,9 +567,9 @@ public: return; } - uint32_t channel0_size = output_image->pitch[0] * heights[0]; - uint32_t channel1_size = output_image->pitch[1] * heights[1]; - uint32_t channel2_size = output_image->pitch[2] * heights[2]; + uint32_t channel0_size = output_image->pitch[0] * align(heights[0], mem_alignment); + uint32_t channel1_size = output_image->pitch[1] * align(heights[1], mem_alignment); + uint32_t channel2_size = output_image->pitch[2] * align(heights[2], mem_alignment); uint32_t output_image_size = channel0_size + channel1_size + channel2_size; @@ -628,7 +628,7 @@ public: } private: - static const int mem_alignment = 4 * 1024 * 1024; + static const int mem_alignment = 16; /** * @brief Shows the help message and exits. * diff --git a/src/rocjpeg_vaapi_decoder.cpp b/src/rocjpeg_vaapi_decoder.cpp index 6164d52d40..07db79da73 100644 --- a/src/rocjpeg_vaapi_decoder.cpp +++ b/src/rocjpeg_vaapi_decoder.cpp @@ -429,6 +429,8 @@ void RocJpegVappiDecoder::GetNumJpegCores() { uint32_t major_version = 0, minor_version = 0; uint32_t num_jpeg_cores = 0; int error_code = 0; + const char *enable_vcn_hw_csc_str = std::getenv("ROCJPEG_ENABLE_VCN_HW_CSC"); + bool enable_vcn_hw_csc = (enable_vcn_hw_csc_str != nullptr && strcmp(enable_vcn_hw_csc_str, "1") == 0); if (amdgpu_device_initialize(drm_fd_, &major_version, &minor_version, &dev_handle)) { ERR("amdgpu_device_initialize failed!"); return; @@ -437,7 +439,8 @@ void RocJpegVappiDecoder::GetNumJpegCores() { if (!error_code) { current_vcn_jpeg_spec_.num_jpeg_cores = num_jpeg_cores; // Set the capabilities based on the number of JPEG cores - current_vcn_jpeg_spec_.can_roi_decode = current_vcn_jpeg_spec_.can_convert_to_rgb = (num_jpeg_cores >= 8); + current_vcn_jpeg_spec_.can_roi_decode = (num_jpeg_cores >= 8); + current_vcn_jpeg_spec_.can_convert_to_rgb = (num_jpeg_cores >= 8) && enable_vcn_hw_csc; } else { ERR("Failed to get the number of jpeg cores."); }