Optimize finding the compute partition mode (#493)

[ROCm/rocdecode commit: 6ada77a9ad]
Этот коммит содержится в:
Aryan Salmanpour
2025-01-18 10:12:02 -05:00
коммит произвёл GitHub
родитель a6d3c91163
Коммит df70c8eb87
2 изменённых файлов: 95 добавлений и 104 удалений
+93 -101
Просмотреть файл
@@ -558,11 +558,8 @@ rocDecStatus VaContext::GetVaContext(int device_id, uint32_t *va_ctx_id) {
GetVisibleDevices(visible_devices);
int offset = 0;
std::vector<ComputePartition> current_compute_partitions;
GetCurrentComputePartition(current_compute_partitions);
if (!current_compute_partitions.empty()) {
GetDrmNodeOffset(va_contexts_[va_ctx_idx].hip_dev_prop.name, va_contexts_[va_ctx_idx].device_id, visible_devices, current_compute_partitions, offset);
}
ComputePartition current_compute_partition = (gpu_uuids_to_compute_partition_map_.find(gpu_uuid) != gpu_uuids_to_compute_partition_map_.end()) ? gpu_uuids_to_compute_partition_map_[gpu_uuid] : kSpx;
GetDrmNodeOffset(va_contexts_[va_ctx_idx].hip_dev_prop.name, va_contexts_[va_ctx_idx].device_id, visible_devices, current_compute_partition, offset);
std::string drm_node = "/dev/dri/renderD";
int render_node_id = (gpu_uuids_to_render_nodes_map_.find(gpu_uuid) != gpu_uuids_to_render_nodes_map_.end()) ? gpu_uuids_to_render_nodes_map_[gpu_uuid] : 128;
@@ -840,127 +837,122 @@ void VaContext::GetVisibleDevices(std::vector<int>& visible_devices_vetor) {
}
}
void VaContext::GetCurrentComputePartition(std::vector<ComputePartition> &current_compute_partitions) {
std::string search_path = "/sys/devices/";
std::string partition_file = "current_compute_partition";
std::error_code ec;
if (fs::exists(search_path)) {
for (auto it = fs::recursive_directory_iterator(search_path, fs::directory_options::skip_permission_denied); it != fs::recursive_directory_iterator(); ) {
try {
if (it->path().filename() == partition_file) {
std::ifstream file(it->path());
if (file.is_open()) {
std::string partition;
std::getline(file, partition);
if (partition.compare("SPX") == 0 || partition.compare("spx") == 0) {
current_compute_partitions.push_back(kSpx);
} else if (partition.compare("DPX") == 0 || partition.compare("dpx") == 0) {
current_compute_partitions.push_back(kDpx);
} else if (partition.compare("TPX") == 0 || partition.compare("tpx") == 0) {
current_compute_partitions.push_back(kTpx);
} else if (partition.compare("QPX") == 0 || partition.compare("qpx") == 0) {
current_compute_partitions.push_back(kQpx);
} else if (partition.compare("CPX") == 0 || partition.compare("cpx") == 0) {
current_compute_partitions.push_back(kCpx);
}
file.close();
}
}
++it;
} catch (fs::filesystem_error& e) {
it.increment(ec);
}
}
}
}
void VaContext::GetDrmNodeOffset(std::string device_name, uint8_t device_id, std::vector<int>& visible_devices, ComputePartition current_compute_partition, int &offset) {
void VaContext::GetDrmNodeOffset(std::string device_name, uint8_t device_id, std::vector<int>& visible_devices, std::vector<ComputePartition> &current_compute_partitions, int &offset) {
if (!current_compute_partitions.empty()) {
switch (current_compute_partitions[0]) {
case kSpx:
offset = 0;
break;
case kDpx:
switch (current_compute_partition) {
case kSpx:
offset = 0;
break;
case kDpx:
if (device_id < visible_devices.size()) {
offset = (visible_devices[device_id] % 2);
} else {
offset = (device_id % 2);
}
break;
case kTpx:
if (device_id < visible_devices.size()) {
offset = (visible_devices[device_id] % 3);
} else {
offset = (device_id % 3);
}
break;
case kQpx:
if (device_id < visible_devices.size()) {
offset = (visible_devices[device_id] % 4);
} else {
offset = (device_id % 4);
}
break;
case kCpx:
// Note: The MI300 series share the same gfx_arch_name (gfx942).
// Therefore, we cannot use gfx942 to distinguish between MI300X, MI300A etc.
// Instead, use the device name to identify MI300A etc.
std::string mi300a = "MI300A";
size_t found_mi300a = device_name.find(mi300a);
if (found_mi300a != std::string::npos) {
if (device_id < visible_devices.size()) {
offset = (visible_devices[device_id] % 2);
offset = (visible_devices[device_id] % 6);
} else {
offset = (device_id % 2);
offset = (device_id % 6);
}
break;
case kTpx:
} else {
if (device_id < visible_devices.size()) {
offset = (visible_devices[device_id] % 3);
offset = (visible_devices[device_id] % 8);
} else {
offset = (device_id % 3);
offset = (device_id % 8);
}
break;
case kQpx:
if (device_id < visible_devices.size()) {
offset = (visible_devices[device_id] % 4);
} else {
offset = (device_id % 4);
}
break;
case kCpx:
// Note: The MI300 series share the same gfx_arch_name (gfx942).
// Therefore, we cannot use gfx942 to distinguish between MI300X, MI300A etc.
// Instead, use the device name to identify MI300A etc.
std::string mi300a = "MI300A";
size_t found_mi300a = device_name.find(mi300a);
if (found_mi300a != std::string::npos) {
if (device_id < visible_devices.size()) {
offset = (visible_devices[device_id] % 6);
} else {
offset = (device_id % 6);
}
} else {
if (device_id < visible_devices.size()) {
offset = (visible_devices[device_id] % 8);
} else {
offset = (device_id % 8);
}
}
break;
}
}
break;
}
}
/**
* @brief Retrieves GPU UUIDs and maps them to render node IDs.
* @brief Retrieves GPU UUIDs and maps them to render node IDs and compute partitions.
*
* This function iterates through all render nodes in the /dev/dri directory,
* extracts the render node ID from the filename, and then reads the unique GPU
* UUID from the corresponding sysfs path. It maps each unique GPU UUID to its
* corresponding render node ID and stores this mapping in the gpu_uuids_to_render_nodes_map_.
* Additionally, it maps the unique GPU UUID to the current compute partition if available.
*/
void VaContext::GetGpuUuids() {
std::string dri_path = "/dev/dri";
// Iterate through all render nodes
for (const auto& entry : fs::directory_iterator(dri_path, fs::directory_options::skip_permission_denied)) {
try {
std::string filename = entry.path().filename().string();
// Check if the file name starts with "renderD"
if (filename.find("renderD") == 0) {
// Extract the integer part from the render node name (e.g., 128 from renderD128)
int render_id = std::stoi(filename.substr(7));
std::string sys_device_path = "/sys/class/drm/" + filename + "/device";
if (fs::exists(sys_device_path)) {
std::string unique_id_path = sys_device_path + "/unique_id";
if (fs::exists(unique_id_path)) {
std::ifstream unique_id_file(unique_id_path);
if (fs::exists(dri_path)) {
for (const auto& entry : fs::directory_iterator(dri_path, fs::directory_options::skip_permission_denied)) {
try {
std::string filename = entry.path().filename().string();
// Check if the file name starts with "renderD"
if (filename.find("renderD") == 0) {
// Extract the integer part from the render node name (e.g., 128 from renderD128)
int render_id = std::stoi(filename.substr(7));
std::string sys_device_path = "/sys/class/drm/" + filename + "/device";
if (fs::exists(sys_device_path)) {
std::string unique_id_path = sys_device_path + "/unique_id";
std::string unique_id;
if (unique_id_file.is_open() && std::getline(unique_id_file, unique_id)) {
if (!unique_id.empty()) {
// Map the unique GPU UUID to the render node ID
gpu_uuids_to_render_nodes_map_[unique_id] = render_id;
if (fs::exists(unique_id_path)) {
std::ifstream unique_id_file(unique_id_path);
if (unique_id_file.is_open() && std::getline(unique_id_file, unique_id)) {
if (!unique_id.empty()) {
// Map the unique GPU UUID to the render node ID
gpu_uuids_to_render_nodes_map_[unique_id] = render_id;
}
}
unique_id_file.close();
}
if (!unique_id.empty()) {
unique_id_path = sys_device_path + "/current_compute_partition";
if (fs::exists(unique_id_path)) {
std::ifstream unique_id_file(unique_id_path);
std::string partition;
ComputePartition current_compute_partition = kSpx;
if (unique_id_file.is_open() && std::getline(unique_id_file, partition)) {
if (!partition.empty()) {
if (partition.compare("SPX") == 0 || partition.compare("spx") == 0) {
current_compute_partition = kSpx;
} else if (partition.compare("DPX") == 0 || partition.compare("dpx") == 0) {
current_compute_partition = kDpx;
} else if (partition.compare("TPX") == 0 || partition.compare("tpx") == 0) {
current_compute_partition = kTpx;
} else if (partition.compare("QPX") == 0 || partition.compare("qpx") == 0) {
current_compute_partition = kQpx;
} else if (partition.compare("CPX") == 0 || partition.compare("cpx") == 0) {
current_compute_partition = kCpx;
}
// Map the unique GPU UUID to the compute partition
gpu_uuids_to_compute_partition_map_[unique_id] = current_compute_partition;
}
unique_id_file.close();
}
}
}
}
}
} catch (const std::exception& e) {
// If an exception occurs, continue with the next entry
continue;
}
} catch (const std::exception& e) {
// If an exception occurs, continue with the next entry
continue;
}
}
}
}
+2 -3
Просмотреть файл
@@ -153,7 +153,7 @@ private:
* retrieve the render node index for a given GPU UUID.
*/
std::unordered_map<std::string, int> gpu_uuids_to_render_nodes_map_;
std::unordered_map<std::string, ComputePartition> gpu_uuids_to_compute_partition_map_;
VaContext();
VaContext(const VaContext&) = delete;
VaContext& operator = (const VaContext) = delete;
@@ -162,7 +162,6 @@ private:
rocDecStatus InitHIP(int device_id, hipDeviceProp_t& hip_dev_prop);
rocDecStatus InitVAAPI(int va_ctx_idx, std::string drm_node);
void GetVisibleDevices(std::vector<int>& visible_devices_vetor);
void GetCurrentComputePartition(std::vector<ComputePartition> &current_compute_partitions);
void GetDrmNodeOffset(std::string device_name, uint8_t device_id, std::vector<int>& visible_devices, std::vector<ComputePartition> &current_compute_partitions, int &offset);
void GetDrmNodeOffset(std::string device_name, uint8_t device_id, std::vector<int>& visible_devices, ComputePartition current_compute_partition, int &offset);
void GetGpuUuids();
};