diff --git a/projects/amdsmi/CHANGELOG.md b/projects/amdsmi/CHANGELOG.md index 8036e3ea2c..fe8eed7903 100644 --- a/projects/amdsmi/CHANGELOG.md +++ b/projects/amdsmi/CHANGELOG.md @@ -13,11 +13,11 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/pr - **Added TVIOL_ACTIVE to `amd-smi monitor`**. Added temperature violation active or not status to `amd-smi monitor`. TVIOL_ACTIVE will be displayed as below: - - True if active - - False if not active - - N/A if not supported. + - True if active + - False if not active + - N/A if not supported. - Example CLI output: + Example CLI output: ```shell $ amd-smi monitor --viol GPU PVIOL TVIOL TVIOL_ACTIVE PHOT_TVIOL VR_TVIOL HBM_TVIOL @@ -31,7 +31,7 @@ GPU PVIOL TVIOL TVIOL_ACTIVE PHOT_TVIOL VR_TVIOL HBM_TVIOL 7 100 % 0 % False 0 % 0 % 0 % ``` -- **Added support for GPU metrics 1.7 to `amdsmi_get_gpu_metrics_info()`** +- **Added support for GPU metrics 1.7 to `amdsmi_get_gpu_metrics_info()`**. Updated `amdsmi_get_gpu_metrics_info()` and structure `amdsmi_gpu_metrics_t` to include new fields for XGMI Link Status, graphics clocks below host limit (per XCP), and VRAM max bandwidth: - `uint64_t vram_max_bandwidth` - VRAM max bandwidth at max memory clock (GB/s) - `uint16_t xgmi_link_status[MAX_NUM_XGMI_LINKS]` - XGMI link statis, 1=Up 0=Down @@ -145,7 +145,11 @@ Python API now accepts `sensor_ind` as an optional argument, does not imapact pr - **Depricated enum `AMDSMI_NORMAL_STRING_LENGTH` in favor of `AMDSMI_MAX_STRING_LENGTH`**. -- **Changed amdsmi_vram_vendor_type_t enum names impacting amdsmi_vram_info_t structure**. +- **Changed to use thread local mutex by default**. +Most sysfs reads do not require cross-process level mutex, and writes to sysfs should be protected by the kernel already. +Users can still switch to the old behavior by setting the environment variable `AMDSMI_MUTEX_CROSS_PROCESS=1`. + +- **Changed `amdsmi_vram_vendor_type_t` enum names impacting `amdsmi_vram_info_t` structure**. This also change impacts usage of the vram_vendor output of `amdsmi_get_gpu_vram_info()` - **Changed `amdsmi_nps_caps_t` struct impacting `amdsmi_memory_partition_config_t`, `amdsmi_accelerator_partition_t`, `amdsmi_accelerator_partition_profile_config_t`**. diff --git a/projects/amdsmi/third_party/shared_mutex/shared_mutex.cc b/projects/amdsmi/third_party/shared_mutex/shared_mutex.cc index 38553850cd..808a7e4eda 100644 --- a/projects/amdsmi/third_party/shared_mutex/shared_mutex.cc +++ b/projects/amdsmi/third_party/shared_mutex/shared_mutex.cc @@ -44,7 +44,8 @@ THE SOFTWARE. #include "rocm_smi/rocm_smi_exception.h" #include "rocm_smi/rocm_smi_main.h" -#define THREAD_ONLY_ENV_VAR "RSMI_MUTEX_THREAD_ONLY" +// Default to thread only mutex unless export AMDSMI_MUTEX_CROSS_PROCESS=1 +#define PROCESS_CROSS_PROCESS_ENV_VAR "AMDSMI_MUTEX_CROSS_PROCESS" #define MUTEX_TIME_OUT_ENV_VAR "RSMI_MUTEX_TIMEOUT" #define DEFAULT_MUTEX_TIMEOUT_SECONDS 5 @@ -141,7 +142,7 @@ shared_mutex_t shared_mutex_init(const char *name, mode_t mode, bool retried) { amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance(); - if (GetEnvVarUInteger(THREAD_ONLY_ENV_VAR) == 1 || smi.is_thread_only_mutex()) { + if (GetEnvVarUInteger(PROCESS_CROSS_PROCESS_ENV_VAR) != 1 || smi.is_thread_only_mutex()) { return init_thread_safe_only(name); } @@ -296,7 +297,7 @@ shared_mutex_t shared_mutex_init(const char *name, mode_t mode, bool retried) { int shared_mutex_close(shared_mutex_t mutex) { amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance(); - const bool is_thread_only = GetEnvVarUInteger(THREAD_ONLY_ENV_VAR) == 1 || + const bool is_thread_only = GetEnvVarUInteger(PROCESS_CROSS_PROCESS_ENV_VAR) != 1 || smi.is_thread_only_mutex(); if (is_thread_only) { delete mutex.ptr; @@ -317,7 +318,7 @@ int shared_mutex_close(shared_mutex_t mutex) { int shared_mutex_destroy(shared_mutex_t mutex) { amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance(); - const bool is_thread_only = GetEnvVarUInteger(THREAD_ONLY_ENV_VAR) == 1 || + const bool is_thread_only = GetEnvVarUInteger(PROCESS_CROSS_PROCESS_ENV_VAR) != 1 || smi.is_thread_only_mutex(); if ((errno = pthread_mutex_destroy(mutex.ptr))) { perror("pthread_mutex_destroy");