Add support for mapping GPU UUIDs to render nodes (#101)

This commit is contained in:
Aryan Salmanpour
2024-12-20 15:17:20 -05:00
committed by GitHub
parent 916f6ac806
commit 832cb88ccc
3 changed files with 102 additions and 43 deletions
+2 -1
View File
@@ -79,7 +79,8 @@ RocJpegStatus RocJpegDecoder::InitializeDecoder() {
return rocjpeg_status;
}
if (backend_ == ROCJPEG_BACKEND_HARDWARE) {
rocjpeg_status = jpeg_vaapi_decoder_.InitializeDecoder(hip_dev_prop_.name, hip_dev_prop_.gcnArchName, device_id_);
std::string gpu_uuid(hip_dev_prop_.uuid.bytes, sizeof(hip_dev_prop_.uuid.bytes));
rocjpeg_status = jpeg_vaapi_decoder_.InitializeDecoder(hip_dev_prop_.name, hip_dev_prop_.gcnArchName, device_id_, gpu_uuid);
if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) {
ERR("ERROR: Failed to initialize the VA-API JPEG decoder!");
return rocjpeg_status;
+86 -41
View File
@@ -359,9 +359,10 @@ RocJpegVappiDecoder::~RocJpegVappiDecoder() {
* @param device_name The name of the device.
* @param gcn_arch_name The name of the GCN architecture.
* @param device_id The ID of the device.
* @param gpu_uuid The UUID of the GPU.
* @return The status of the initialization process.
*/
RocJpegStatus RocJpegVappiDecoder::InitializeDecoder(std::string device_name, std::string gcn_arch_name, int device_id) {
RocJpegStatus RocJpegVappiDecoder::InitializeDecoder(std::string device_name, std::string gcn_arch_name, int device_id, std::string& gpu_uuid) {
device_id_ = device_id;
std::size_t pos = gcn_arch_name.find_first_of(":");
std::string gcn_arch_name_base = (pos != std::string::npos) ? gcn_arch_name.substr(0, pos) : gcn_arch_name;
@@ -379,29 +380,21 @@ RocJpegStatus RocJpegVappiDecoder::InitializeDecoder(std::string device_name, st
std::vector<int> visible_devices;
GetVisibleDevices(visible_devices);
GetGpuUuids();
int offset = 0;
if (gcn_arch_name_base.compare("gfx942") == 0) {
std::vector<ComputePartition> current_compute_partitions;
GetCurrentComputePartition(current_compute_partitions);
if (current_compute_partitions.empty()) {
//if the current_compute_partitions is empty then the default SPX mode is assumed.
if (device_id_ < visible_devices.size()) {
offset = visible_devices[device_id_] * 7;
} else {
offset = device_id_ * 7;
}
} else {
GetDrmNodeOffset(device_name, device_id_, visible_devices, current_compute_partitions, offset);
}
std::vector<ComputePartition> current_compute_partitions;
GetCurrentComputePartition(current_compute_partitions);
if (!current_compute_partitions.empty()) {
GetDrmNodeOffset(device_name, device_id_, visible_devices, current_compute_partitions, offset);
}
}
std::string drm_node = "/dev/dri/renderD";
if (device_id_ < visible_devices.size()) {
drm_node += std::to_string(128 + offset + visible_devices[device_id_]);
} else {
drm_node += std::to_string(128 + offset + device_id_);
}
int render_node_id = (gpu_uuids_to_render_nodes_map_.find(gpu_uuid) != gpu_uuids_to_render_nodes_map_.end()) ? gpu_uuids_to_render_nodes_map_[gpu_uuid] : 128;
drm_node += std::to_string(render_node_id + offset);
CHECK_ROCJPEG(InitVAAPI(drm_node));
CHECK_ROCJPEG(CreateDecoderConfig());
CHECK_ROCJPEG(CreateDecoderContext());
@@ -913,7 +906,12 @@ RocJpegStatus RocJpegVappiDecoder::GetHipInteropMem(VASurfaceID surface_id, HipI
* @param visible_devices_vector The vector to store the visible devices.
*/
void RocJpegVappiDecoder::GetVisibleDevices(std::vector<int>& visible_devices_vetor) {
char *visible_devices = std::getenv("HIP_VISIBLE_DEVICES");
// First, check if the ROCR_VISIBLE_DEVICES environment variable is present
char *visible_devices = std::getenv("ROCR_VISIBLE_DEVICES");
// If ROCR_VISIBLE_DEVICES is not present, check if HIP_VISIBLE_DEVICES is present
if (visible_devices == nullptr) {
visible_devices = std::getenv("HIP_VISIBLE_DEVICES");
}
if (visible_devices != nullptr) {
char *token = std::strtok(visible_devices,",");
while (token != nullptr) {
@@ -983,49 +981,54 @@ void RocJpegVappiDecoder::GetDrmNodeOffset(std::string device_name, uint8_t devi
if (!current_compute_partitions.empty()) {
switch (current_compute_partitions[0]) {
case kSpx:
if (device_id < visible_devices.size()) {
offset = visible_devices[device_id] * 7;
} else {
offset = device_id * 7;
}
offset = 0;
break;
case kDpx:
if (device_id < visible_devices.size()) {
offset = (visible_devices[device_id] / 2) * 6;
offset = (visible_devices[device_id] % 2);
} else {
offset = (device_id / 2) * 6;
offset = (device_id % 2);
}
break;
case kTpx:
// Please note that although there are only 6 XCCs per socket on MI300A,
// there are two dummy render nodes added by the driver.
// This needs to be taken into account when creating drm_node on each socket in TPX mode.
if (device_id < visible_devices.size()) {
offset = (visible_devices[device_id] / 3) * 5;
offset = (visible_devices[device_id] % 3);
} else {
offset = (device_id / 3) * 5;
offset = (device_id % 3);
}
break;
case kQpx:
if (device_id < visible_devices.size()) {
offset = (visible_devices[device_id] / 4) * 4;
offset = (visible_devices[device_id] % 4);
} else {
offset = (device_id / 4) * 4;
offset = (device_id % 4);
}
break;
case kCpx:
// Please note that both MI300A and MI300X have the same gfx_arch_name which is
// gfx942. Therefore we cannot use the gfx942 to identify MI300A.
// instead use the device name and look for MI300A
// Also, as explained aboe in the TPX mode section, we need to be taken into account
// the extra two dummy nodes when creating drm_node on each socket in CPX mode as well.
// Note: The MI300 series share the same gfx_arch_name (gfx942).
// Therefore, we cannot use gfx942 to distinguish between MI300A, MI308, etc.
// Instead, use the device name to identify MI300A, MI308, etc.
std::string mi300a = "MI300A";
size_t found_mi300a = device_name.find(mi300a);
if (found_mi300a != std::string::npos) {
std::string mi308 = "MI308";
size_t found_mi308 = device_name.find(mi308);
if (found_mi308 != std::string::npos) {
if (device_id < visible_devices.size()) {
offset = (visible_devices[device_id] / 6) * 2;
offset = (visible_devices[device_id] % 4);
} else {
offset = (device_id / 6) * 2;
offset = (device_id % 4);
}
} else if (found_mi300a != std::string::npos) {
if (device_id < visible_devices.size()) {
offset = (visible_devices[device_id] % 6);
} else {
offset = (device_id % 6);
}
} else {
if (device_id < visible_devices.size()) {
offset = (visible_devices[device_id] % 8);
} else {
offset = (device_id % 8);
}
}
break;
@@ -1048,4 +1051,46 @@ RocJpegStatus RocJpegVappiDecoder::SetSurfaceAsIdle(VASurfaceID surface_id) {
return ROCJPEG_STATUS_INVALID_PARAMETER;
}
return ROCJPEG_STATUS_SUCCESS;
}
/**
* @brief Retrieves GPU UUIDs and maps them to render node IDs.
*
* This function iterates through all render nodes in the /dev/dri directory,
* extracts the render node ID from the filename, and then reads the unique GPU
* UUID from the corresponding sysfs path. It maps each unique GPU UUID to its
* corresponding render node ID and stores this mapping in the gpu_uuids_to_render_nodes_map_.
*/
void RocJpegVappiDecoder::GetGpuUuids() {
std::string dri_path = "/dev/dri";
// Iterate through all render nodes
if (fs::exists(dri_path)) {
for (const auto& entry : fs::directory_iterator(dri_path, fs::directory_options::skip_permission_denied)) {
try {
std::string filename = entry.path().filename().string();
// Check if the file name starts with "renderD"
if (filename.find("renderD") == 0) {
// Extract the integer part from the render node name (e.g., 128 from renderD128)
int render_id = std::stoi(filename.substr(7));
std::string sys_device_path = "/sys/class/drm/" + filename + "/device";
if (fs::exists(sys_device_path)) {
std::string unique_id_path = sys_device_path + "/unique_id";
if (fs::exists(unique_id_path)) {
std::ifstream unique_id_file(unique_id_path);
std::string unique_id;
if (unique_id_file.is_open() && std::getline(unique_id_file, unique_id)) {
if (!unique_id.empty()) {
// Map the unique GPU UUID to the render node ID
gpu_uuids_to_render_nodes_map_[unique_id] = render_id;
}
}
}
}
}
} catch (const std::exception& e) {
// If an exception occurs, continue with the next entry
continue;
}
}
}
}
+14 -1
View File
@@ -282,9 +282,10 @@ public:
* @param device_name The name of the device.
* @param gcn_arch_name The name of the GCN architecture.
* @param device_id The ID of the device.
* @param gpu_uuid The UUID of the GPU.
* @return The status of the initialization.
*/
RocJpegStatus InitializeDecoder(std::string device_name, std::string gcn_arch_name, int device_id);
RocJpegStatus InitializeDecoder(std::string device_name, std::string gcn_arch_name, int device_id, std::string& gpu_uuid);
/**
* @brief Submits a JPEG stream for decoding.
@@ -355,6 +356,14 @@ private:
VABufferID va_huffmantable_buf_id_; // The VAAPI Huffman table buffer ID
VABufferID va_slice_param_buf_id_; // The VAAPI slice parameter buffer ID
VABufferID va_slice_data_buf_id_; // The VAAPI slice data buffer ID
/**
* @brief A map that associates GPU UUIDs with their corresponding render node indices.
*
* This unordered map uses GPU UUIDs as keys (std::string) and maps them to their
* respective render node indices (int). It provides a fast lookup mechanism to
* retrieve the render node index for a given GPU UUID.
*/
std::unordered_map<std::string, int> gpu_uuids_to_render_nodes_map_;
/**
* @brief Initializes the VAAPI with the specified DRM node.
@@ -408,6 +417,10 @@ private:
void GetDrmNodeOffset(std::string device_name, uint8_t device_id, std::vector<int>& visible_devices,
std::vector<ComputePartition> &current_compute_partitions,
int &offset);
/**
* @brief Retrieves GPU UUIDs and maps them to render node IDs.
*/
void GetGpuUuids();
};
#endif // ROC_JPEG_VAAPI_DECODER_H_