Add an environment variable to control the VCN hardware color space conversion enablement (#167)

* Add an environment variable to control the VCN hardware color space conversion enablement

* Update version

* optimize the mem_alignment logic for hip memory allocation
This commit is contained in:
Aryan Salmanpour
2025-07-24 22:05:31 -04:00
committad av GitHub
förälder 20ea5e95fe
incheckning 08003e0fd7
4 ändrade filer med 36 tillägg och 33 borttagningar
+1 -1
Visa fil
@@ -3,7 +3,7 @@
Documentation for rocJPEG is available at
[https://rocm.docs.amd.com/projects/rocJPEG/en/latest/](https://rocm.docs.amd.com/projects/rocJPEG/en/latest/)
## rocjpeg 1.0.0 for ROCm 7.0.0
## rocjpeg 1.1.0 for ROCm 7.0.0
## Added
* cmake config files
+1 -1
Visa fil
@@ -42,7 +42,7 @@ endif()
# rocjpeg Version
# NOTE: package version and rocjpeg_version.h is generated with this version
set(VERSION "1.0.0")
set(VERSION "1.1.0")
# Set Project Version and Language
project(rocjpeg VERSION ${VERSION} LANGUAGES CXX)
+30 -30
Visa fil
@@ -331,30 +331,30 @@ public:
switch (subsampling) {
case ROCJPEG_CSS_444:
num_channels = 3;
output_image.pitch[2] = output_image.pitch[1] = output_image.pitch[0] = is_roi_valid ? roi_width : widths[0];
channel_sizes[2] = channel_sizes[1] = channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment);
output_image.pitch[2] = output_image.pitch[1] = output_image.pitch[0] = is_roi_valid ? align(roi_width, mem_alignment) : align(widths[0], mem_alignment);
channel_sizes[2] = channel_sizes[1] = channel_sizes[0] = output_image.pitch[0] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment));
break;
case ROCJPEG_CSS_440:
num_channels = 3;
output_image.pitch[2] = output_image.pitch[1] = output_image.pitch[0] = is_roi_valid ? roi_width : widths[0];
channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment);
channel_sizes[2] = channel_sizes[1] = align(output_image.pitch[0] * ((is_roi_valid ? roi_height : heights[0]) >> 1), mem_alignment);
output_image.pitch[2] = output_image.pitch[1] = output_image.pitch[0] = is_roi_valid ? align(roi_width, mem_alignment) : align(widths[0], mem_alignment);
channel_sizes[0] = output_image.pitch[0] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment));
channel_sizes[2] = channel_sizes[1] = output_image.pitch[0] * ((is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment)) >> 1);
break;
case ROCJPEG_CSS_422:
num_channels = 1;
output_image.pitch[0] = (is_roi_valid ? roi_width : widths[0]) * 2;
channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment);
output_image.pitch[0] = (is_roi_valid ? align(roi_width, mem_alignment) : align(widths[0], mem_alignment)) * 2;
channel_sizes[0] = output_image.pitch[0] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment));
break;
case ROCJPEG_CSS_420:
num_channels = 2;
output_image.pitch[1] = output_image.pitch[0] = is_roi_valid ? roi_width : widths[0];
channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment);
channel_sizes[1] = align(output_image.pitch[1] * ((is_roi_valid ? roi_height : heights[0]) >> 1), mem_alignment);
output_image.pitch[1] = output_image.pitch[0] = is_roi_valid ? align(roi_width, mem_alignment) : align(widths[0], mem_alignment);
channel_sizes[0] = output_image.pitch[0] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment));
channel_sizes[1] = output_image.pitch[1] * ((is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment)) >> 1);
break;
case ROCJPEG_CSS_400:
num_channels = 1;
output_image.pitch[0] = is_roi_valid ? roi_width : widths[0];
channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment);
output_image.pitch[0] = is_roi_valid ? align(roi_width, mem_alignment) : align(widths[0], mem_alignment);
channel_sizes[0] = output_image.pitch[0] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment));
break;
default:
std::cout << "Unknown chroma subsampling!" << std::endl;
@@ -364,32 +364,32 @@ public:
case ROCJPEG_OUTPUT_YUV_PLANAR:
if (subsampling == ROCJPEG_CSS_400) {
num_channels = 1;
output_image.pitch[0] = is_roi_valid ? roi_width : widths[0];
channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment);
output_image.pitch[0] = is_roi_valid ? align(roi_width, mem_alignment) : align(widths[0], mem_alignment);
channel_sizes[0] = output_image.pitch[0] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment));
} else {
num_channels = 3;
output_image.pitch[0] = is_roi_valid ? roi_width : widths[0];
output_image.pitch[1] = is_roi_valid ? roi_width : widths[1];
output_image.pitch[2] = is_roi_valid ? roi_width : widths[2];
channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment);
channel_sizes[1] = align(output_image.pitch[1] * (is_roi_valid ? roi_height : heights[1]), mem_alignment);
channel_sizes[2] = align(output_image.pitch[2] * (is_roi_valid ? roi_height : heights[2]), mem_alignment);
output_image.pitch[0] = is_roi_valid ? align(roi_width, mem_alignment) : align(widths[0], mem_alignment);
output_image.pitch[1] = is_roi_valid ? align(roi_width, mem_alignment) : align(widths[1], mem_alignment);
output_image.pitch[2] = is_roi_valid ? align(roi_width, mem_alignment) : align(widths[2], mem_alignment);
channel_sizes[0] = output_image.pitch[0] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment));
channel_sizes[1] = output_image.pitch[1] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[1], mem_alignment));
channel_sizes[2] = output_image.pitch[2] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[2], mem_alignment));
}
break;
case ROCJPEG_OUTPUT_Y:
num_channels = 1;
output_image.pitch[0] = is_roi_valid ? roi_width : widths[0];
channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment);
output_image.pitch[0] = is_roi_valid ? align(roi_width, mem_alignment) : align(widths[0], mem_alignment);
channel_sizes[0] = output_image.pitch[0] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment));
break;
case ROCJPEG_OUTPUT_RGB:
num_channels = 1;
output_image.pitch[0] = (is_roi_valid ? roi_width : widths[0]) * 3;
channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment);
output_image.pitch[0] = (is_roi_valid ? align(roi_width, mem_alignment) : align(widths[0], mem_alignment)) * 3;
channel_sizes[0] = output_image.pitch[0] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment));
break;
case ROCJPEG_OUTPUT_RGB_PLANAR:
num_channels = 3;
output_image.pitch[2] = output_image.pitch[1] = output_image.pitch[0] = is_roi_valid ? roi_width : widths[0];
channel_sizes[2] = channel_sizes[1] = channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment);
output_image.pitch[2] = output_image.pitch[1] = output_image.pitch[0] = is_roi_valid ? align(roi_width, mem_alignment) : align(widths[0], mem_alignment);
channel_sizes[2] = channel_sizes[1] = channel_sizes[0] = output_image.pitch[0] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment));
break;
default:
std::cout << "Unknown output format!" << std::endl;
@@ -567,9 +567,9 @@ public:
return;
}
uint32_t channel0_size = output_image->pitch[0] * heights[0];
uint32_t channel1_size = output_image->pitch[1] * heights[1];
uint32_t channel2_size = output_image->pitch[2] * heights[2];
uint32_t channel0_size = output_image->pitch[0] * align(heights[0], mem_alignment);
uint32_t channel1_size = output_image->pitch[1] * align(heights[1], mem_alignment);
uint32_t channel2_size = output_image->pitch[2] * align(heights[2], mem_alignment);
uint32_t output_image_size = channel0_size + channel1_size + channel2_size;
@@ -628,7 +628,7 @@ public:
}
private:
static const int mem_alignment = 4 * 1024 * 1024;
static const int mem_alignment = 16;
/**
* @brief Shows the help message and exits.
*
+4 -1
Visa fil
@@ -429,6 +429,8 @@ void RocJpegVappiDecoder::GetNumJpegCores() {
uint32_t major_version = 0, minor_version = 0;
uint32_t num_jpeg_cores = 0;
int error_code = 0;
const char *enable_vcn_hw_csc_str = std::getenv("ROCJPEG_ENABLE_VCN_HW_CSC");
bool enable_vcn_hw_csc = (enable_vcn_hw_csc_str != nullptr && strcmp(enable_vcn_hw_csc_str, "1") == 0);
if (amdgpu_device_initialize(drm_fd_, &major_version, &minor_version, &dev_handle)) {
ERR("amdgpu_device_initialize failed!");
return;
@@ -437,7 +439,8 @@ void RocJpegVappiDecoder::GetNumJpegCores() {
if (!error_code) {
current_vcn_jpeg_spec_.num_jpeg_cores = num_jpeg_cores;
// Set the capabilities based on the number of JPEG cores
current_vcn_jpeg_spec_.can_roi_decode = current_vcn_jpeg_spec_.can_convert_to_rgb = (num_jpeg_cores >= 8);
current_vcn_jpeg_spec_.can_roi_decode = (num_jpeg_cores >= 8);
current_vcn_jpeg_spec_.can_convert_to_rgb = (num_jpeg_cores >= 8) && enable_vcn_hw_csc;
} else {
ERR("Failed to get the number of jpeg cores.");
}