Change default mutex to thread only (#104)
Most sysfs reads do not require cross-process level mutex, and writes to sysfs
should be protected by the kernel already.
Users can still switch to the old behavior by setting the environment variable AMDSMI_MUTEX_CROSS_PROCESS=1.
[ROCm/amdsmi commit: 8e74024b11]
此提交包含在:
+10
-6
@@ -13,11 +13,11 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/pr
|
||||
|
||||
- **Added TVIOL_ACTIVE to `amd-smi monitor`**.
|
||||
Added temperature violation active or not status to `amd-smi monitor`. TVIOL_ACTIVE will be displayed as below:
|
||||
- True if active
|
||||
- False if not active
|
||||
- N/A if not supported.
|
||||
- True if active
|
||||
- False if not active
|
||||
- N/A if not supported.
|
||||
|
||||
Example CLI output:
|
||||
Example CLI output:
|
||||
```shell
|
||||
$ amd-smi monitor --viol
|
||||
GPU PVIOL TVIOL TVIOL_ACTIVE PHOT_TVIOL VR_TVIOL HBM_TVIOL
|
||||
@@ -31,7 +31,7 @@ GPU PVIOL TVIOL TVIOL_ACTIVE PHOT_TVIOL VR_TVIOL HBM_TVIOL
|
||||
7 100 % 0 % False 0 % 0 % 0 %
|
||||
```
|
||||
|
||||
- **Added support for GPU metrics 1.7 to `amdsmi_get_gpu_metrics_info()`**
|
||||
- **Added support for GPU metrics 1.7 to `amdsmi_get_gpu_metrics_info()`**.
|
||||
Updated `amdsmi_get_gpu_metrics_info()` and structure `amdsmi_gpu_metrics_t` to include new fields for XGMI Link Status, graphics clocks below host limit (per XCP), and VRAM max bandwidth:
|
||||
- `uint64_t vram_max_bandwidth` - VRAM max bandwidth at max memory clock (GB/s)
|
||||
- `uint16_t xgmi_link_status[MAX_NUM_XGMI_LINKS]` - XGMI link statis, 1=Up 0=Down
|
||||
@@ -145,7 +145,11 @@ Python API now accepts `sensor_ind` as an optional argument, does not imapact pr
|
||||
|
||||
- **Depricated enum `AMDSMI_NORMAL_STRING_LENGTH` in favor of `AMDSMI_MAX_STRING_LENGTH`**.
|
||||
|
||||
- **Changed amdsmi_vram_vendor_type_t enum names impacting amdsmi_vram_info_t structure**.
|
||||
- **Changed to use thread local mutex by default**.
|
||||
Most sysfs reads do not require cross-process level mutex, and writes to sysfs should be protected by the kernel already.
|
||||
Users can still switch to the old behavior by setting the environment variable `AMDSMI_MUTEX_CROSS_PROCESS=1`.
|
||||
|
||||
- **Changed `amdsmi_vram_vendor_type_t` enum names impacting `amdsmi_vram_info_t` structure**.
|
||||
This also change impacts usage of the vram_vendor output of `amdsmi_get_gpu_vram_info()`
|
||||
|
||||
- **Changed `amdsmi_nps_caps_t` struct impacting `amdsmi_memory_partition_config_t`, `amdsmi_accelerator_partition_t`, `amdsmi_accelerator_partition_profile_config_t`**.
|
||||
|
||||
@@ -44,7 +44,8 @@ THE SOFTWARE.
|
||||
#include "rocm_smi/rocm_smi_exception.h"
|
||||
#include "rocm_smi/rocm_smi_main.h"
|
||||
|
||||
#define THREAD_ONLY_ENV_VAR "RSMI_MUTEX_THREAD_ONLY"
|
||||
// Default to thread only mutex unless export AMDSMI_MUTEX_CROSS_PROCESS=1
|
||||
#define PROCESS_CROSS_PROCESS_ENV_VAR "AMDSMI_MUTEX_CROSS_PROCESS"
|
||||
#define MUTEX_TIME_OUT_ENV_VAR "RSMI_MUTEX_TIMEOUT"
|
||||
#define DEFAULT_MUTEX_TIMEOUT_SECONDS 5
|
||||
|
||||
@@ -141,7 +142,7 @@ shared_mutex_t shared_mutex_init(const char *name, mode_t mode, bool retried) {
|
||||
|
||||
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance();
|
||||
|
||||
if (GetEnvVarUInteger(THREAD_ONLY_ENV_VAR) == 1 || smi.is_thread_only_mutex()) {
|
||||
if (GetEnvVarUInteger(PROCESS_CROSS_PROCESS_ENV_VAR) != 1 || smi.is_thread_only_mutex()) {
|
||||
return init_thread_safe_only(name);
|
||||
}
|
||||
|
||||
@@ -296,7 +297,7 @@ shared_mutex_t shared_mutex_init(const char *name, mode_t mode, bool retried) {
|
||||
|
||||
int shared_mutex_close(shared_mutex_t mutex) {
|
||||
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance();
|
||||
const bool is_thread_only = GetEnvVarUInteger(THREAD_ONLY_ENV_VAR) == 1 ||
|
||||
const bool is_thread_only = GetEnvVarUInteger(PROCESS_CROSS_PROCESS_ENV_VAR) != 1 ||
|
||||
smi.is_thread_only_mutex();
|
||||
if (is_thread_only) {
|
||||
delete mutex.ptr;
|
||||
@@ -317,7 +318,7 @@ int shared_mutex_close(shared_mutex_t mutex) {
|
||||
|
||||
int shared_mutex_destroy(shared_mutex_t mutex) {
|
||||
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance();
|
||||
const bool is_thread_only = GetEnvVarUInteger(THREAD_ONLY_ENV_VAR) == 1 ||
|
||||
const bool is_thread_only = GetEnvVarUInteger(PROCESS_CROSS_PROCESS_ENV_VAR) != 1 ||
|
||||
smi.is_thread_only_mutex();
|
||||
if ((errno = pthread_mutex_destroy(mutex.ptr))) {
|
||||
perror("pthread_mutex_destroy");
|
||||
|
||||
新增問題並參考
封鎖使用者