[SWDEV-558895] Fix rsmi_event_notification_get segfaulting (#738)
Signed-off-by: adapryor <Adam.pryor@amd.com>
[ROCm/amdsmi commit: ce016f0dcb]
This commit is contained in:
@@ -206,6 +206,9 @@ GPU: 0
|
||||
|
||||
### Resolved Issues
|
||||
|
||||
- **Fixed event monitoring segfaults causing RDC to crash**.
|
||||
- Adds mutex locking around access to device event notification file pointer
|
||||
|
||||
- **Fixed an issue where amdsmi_get_gpu_od_volt_info() returned a reference to a python object**.
|
||||
- The returned dictionary was changed to return values in all fields
|
||||
|
||||
|
||||
@@ -7357,8 +7357,23 @@ rsmi_event_notification_get(int timeout_ms,
|
||||
return;
|
||||
}
|
||||
|
||||
FILE *anon_fp =
|
||||
smi.devices()[fd_indx_to_dev_id[i]]->evt_notif_anon_file_ptr();
|
||||
const uint32_t dv_ind = fd_indx_to_dev_id[i];
|
||||
auto& dev = *smi.devices()[dv_ind];
|
||||
|
||||
// Ensure protected access of anon_fp
|
||||
amd::smi::pthread_wrap pw(*amd::smi::GetMutex(dv_ind));
|
||||
amd::smi::ScopedPthread lock(pw);
|
||||
|
||||
FILE *anon_fp = dev.evt_notif_anon_file_ptr();
|
||||
if (!anon_fp) {
|
||||
std::ostringstream ss;
|
||||
ss << "Null evt_notif_anon_file_ptr() for dv_ind=" << dv_ind;
|
||||
LOG_ERROR(ss);
|
||||
continue;
|
||||
}
|
||||
|
||||
flockfile(anon_fp); // serialize stdio on this stream
|
||||
|
||||
data_item =
|
||||
reinterpret_cast<rsmi_evt_notification_data_t *>(&data[*num_elem]);
|
||||
|
||||
@@ -7614,6 +7629,7 @@ rsmi_event_notification_get(int timeout_ms,
|
||||
data_item =
|
||||
reinterpret_cast<rsmi_evt_notification_data_t *>(&data[*num_elem]);
|
||||
}
|
||||
funlockfile(anon_fp); // // paired with flockfile; RAII unlock of device mutex on scope exit
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user