Remove duplicate temperature function

The amdsmi_dev_get_temp_metric() will cover both function:
amdsmi_get_temperature_measure() using AMDSMI_TEMP_CURRENT
and
amdsmi_get_temperature_limit() using AMDSMI_TEMP_CRITICAL
Remove those two function.

It also merge the amdsmi_get_power_limit() into
amdsmi_get_power_measure()

Change-Id: I40d4afeb2ec0ac7b64832729f36adfaae120c990
Этот коммит содержится в:
Bill(Shuzhou) Liu
2023-01-10 08:48:59 -06:00
родитель e217fff82c
Коммит ec48312c61
8 изменённых файлов: 42 добавлений и 451 удалений
+19 -23
Просмотреть файл
@@ -315,6 +315,7 @@ int main() {
power_measure.average_socket_power);
printf("\tEnergy accumulator: %d\n\n",
power_measure.energy_accumulator);
printf("\tGPU Power limit: %d\n\n", power_measure.power_limit);
// Get driver version
char version[AMDSMI_MAX_DRIVER_VERSION_LENGTH];
@@ -356,13 +357,6 @@ int main() {
printf(" %s: %d\n", ucode_name, fw_information.fw_info_list[j].fw_version);
}
// Get GPU power limit info
amdsmi_power_limit_t power_limit = {};
ret = amdsmi_get_power_limit(device_handles[j], &power_limit);
CHK_AMDSMI_RET(ret)
printf(" Output of amdsmi_get_power_limit:\n");
printf("\tGPU Power limit: %d\n\n", power_limit.limit);
// Get GFX clock measurements
amdsmi_clk_measure_t gfx_clk_values = {};
ret = amdsmi_get_clock_measure(device_handles[j], CLK_TYPE_GFX,
@@ -399,42 +393,44 @@ int main() {
printf("\tPCIe max speed: %d\n\n", pcie_caps_info.pcie_speed);
// Get VRAM temperature limit
amdsmi_temperature_limit_t mem_temp_limit = {};
ret = amdsmi_get_temperature_limit(
device_handles[j], TEMPERATURE_TYPE_VRAM, &mem_temp_limit);
int64_t temperature = 0;
ret = amdsmi_dev_get_temp_metric(
device_handles[j], TEMPERATURE_TYPE_VRAM,
AMDSMI_TEMP_CRITICAL, &temperature);
CHK_AMDSMI_RET(ret)
printf(" Output of amdsmi_get_temperature_limit:\n");
printf("\tGPU VRAM temp limit: %d\n", mem_temp_limit.limit);
printf(" Output of amdsmi_dev_get_temp_metric:\n");
printf("\tGPU VRAM temp limit: %d\n", temperature);
// Get GFX temperature limit
amdsmi_temperature_limit_t gfx_temp_limit = {};
ret = amdsmi_get_temperature_limit(
device_handles[j], TEMPERATURE_TYPE_EDGE, &gfx_temp_limit);
ret = amdsmi_dev_get_temp_metric(
device_handles[j], TEMPERATURE_TYPE_EDGE,
AMDSMI_TEMP_CRITICAL, &temperature);
CHK_AMDSMI_RET(ret)
printf("\tGPU GFX temp limit: %d\n\n", gfx_temp_limit.limit);
printf("\tGPU GFX temp limit: %d\n\n", temperature);
// Get temperature measurements
// amdsmi_temperature_t edge_temp, junction_temp, vram_temp,
// plx_temp;
amdsmi_temperature_t temp_measurements[4];
int64_t temp_measurements[4];
amdsmi_temperature_type_t temp_types[4] = {
TEMPERATURE_TYPE_EDGE, TEMPERATURE_TYPE_JUNCTION,
TEMPERATURE_TYPE_VRAM, TEMPERATURE_TYPE_PLX};
for (const auto &temp_type : temp_types) {
ret = amdsmi_get_temperature_measure(
ret = amdsmi_dev_get_temp_metric(
device_handles[j], temp_type,
AMDSMI_TEMP_CURRENT,
&temp_measurements[(int)(temp_type)]);
CHK_AMDSMI_RET(ret)
}
printf(" Output of amdsmi_get_temperature_measure:\n");
printf(" Output of amdsmi_dev_get_temp_metric:\n");
printf("\tGPU Edge temp measurement: %d\n",
temp_measurements[TEMPERATURE_TYPE_EDGE].cur_temp);
temp_measurements[TEMPERATURE_TYPE_EDGE]);
printf("\tGPU Junction temp measurement: %d\n",
temp_measurements[TEMPERATURE_TYPE_JUNCTION].cur_temp);
temp_measurements[TEMPERATURE_TYPE_JUNCTION]);
printf("\tGPU VRAM temp measurement: %d\n",
temp_measurements[TEMPERATURE_TYPE_VRAM].cur_temp);
temp_measurements[TEMPERATURE_TYPE_VRAM]);
printf("\tGPU PLX temp measurement: %d\n\n",
temp_measurements[TEMPERATURE_TYPE_PLX].cur_temp);
temp_measurements[TEMPERATURE_TYPE_PLX]);
// Get RAS features enabled
char block_names[14][10] = {"UMC", "SDMA", "GFX", "MMHUB",
+8 -7
Просмотреть файл
@@ -221,25 +221,26 @@ int main() {
.fw_version);
// Get temperature measurements
amdsmi_temperature_t temp_measurements[4];
int64_t temp_measurements[4];
amdsmi_temperature_type_t temp_types[4] = {
TEMPERATURE_TYPE_EDGE, TEMPERATURE_TYPE_JUNCTION,
TEMPERATURE_TYPE_VRAM, TEMPERATURE_TYPE_PLX};
for (const auto &temp_type : temp_types) {
ret = amdsmi_get_temperature_measure(
ret = amdsmi_dev_get_temp_metric(
device_handles[j], temp_type,
AMDSMI_TEMP_CURRENT,
&temp_measurements[(int)(temp_type)]);
CHK_AMDSMI_RET(ret)
}
printf(" Output of amdsmi_get_temperature_measure:\n");
printf(" Output of amdsmi_dev_get_temp_metric:\n");
printf("\tGPU Edge temp measurement: %d\n",
temp_measurements[TEMPERATURE_TYPE_EDGE].cur_temp);
temp_measurements[TEMPERATURE_TYPE_EDGE]);
printf("\tGPU Junction temp measurement: %d\n",
temp_measurements[TEMPERATURE_TYPE_JUNCTION].cur_temp);
temp_measurements[TEMPERATURE_TYPE_JUNCTION]);
printf("\tGPU VRAM temp measurement: %d\n",
temp_measurements[TEMPERATURE_TYPE_VRAM].cur_temp);
temp_measurements[TEMPERATURE_TYPE_VRAM]);
printf("\tGPU PLX temp measurement: %d\n\n",
temp_measurements[TEMPERATURE_TYPE_PLX].cur_temp);
temp_measurements[TEMPERATURE_TYPE_PLX]);
// Get bad pages
char bad_page_status_names[3][15] = {"RESERVED", "PENDING",
+2 -62
Просмотреть файл
@@ -358,28 +358,14 @@ typedef struct amdsmi_board_info {
char manufacturer_name[AMDSMI_NORMAL_STRING_LENGTH];
} amdsmi_board_info_t;
typedef struct amdsmi_temperature {
uint32_t cur_temp;
uint32_t reserved[7];
} amdsmi_temperature_t;
typedef struct amdsmi_temperature_limit {
uint32_t limit;
uint32_t reserved[7];
} amdsmi_temperature_limit_t;
typedef struct amdsmi_power_limit {
uint32_t limit;
uint32_t reserved[7];
} amdsmi_power_limit_t;
typedef struct amdsmi_power_measure {
uint32_t average_socket_power;
uint64_t energy_accumulator; // v1 mod. (32->64)
uint32_t voltage_gfx; // GFX voltage measurement in mV
uint32_t voltage_soc; // SOC voltage measurement in mV
uint32_t voltage_mem; // MEM voltage measurement in mV
uint32_t reserved[10];
uint32_t power_limit; // The power limit;
uint32_t reserved[9];
} amdsmi_power_measure_t;
typedef struct amdsmi_clk_measure {
@@ -3812,52 +3798,6 @@ amdsmi_get_power_measure(amdsmi_device_handle device_handle, amdsmi_power_measur
amdsmi_status_t
amdsmi_get_clock_measure(amdsmi_device_handle device_handle, amdsmi_clk_type_t clk_type, amdsmi_clk_measure_t *info);
/**
* @brief Returns temperature measurements of the GPU.
* The results are in °C.
*
* @param[in] device_handle Device which to query
*
* @param[in] temp_type Enum representing the temperature type to query.
*
* @param[out] info Reference to the temperature measured.
* Must be allocated by user.
*
* @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail
*/
amdsmi_status_t
amdsmi_get_temperature_measure(amdsmi_device_handle device_handle, amdsmi_temperature_type_t temp_type, amdsmi_temperature_t *info);
/**
* @brief Returns temperature limit of the GPU.
* The results are in °C.
*
* @param[in] device_handle Device which to query
*
* @param[in] temp_type Enum representing the temperature type to query.
*
* @param[out] limit Reference to the temperature limit.
* Must be allocated by user.
*
* @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail
*/
amdsmi_status_t
amdsmi_get_temperature_limit(amdsmi_device_handle device_handle, amdsmi_temperature_type_t temp_type, amdsmi_temperature_limit_t *limit);
/**
* @brief Returns power limit of the GPU.
* The results are in W.
*
* @param[in] device_handle Device which to query
*
* @param[out] limit Reference to the power limit.
* Must be allocated by user.
*
* @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail
*/
amdsmi_status_t
amdsmi_get_power_limit(amdsmi_device_handle device_handle, amdsmi_power_limit_t *power);
/**
* @brief Returns the VRAM usage (both total and used memory)
* in MegaBytes.
-118
Просмотреть файл
@@ -571,49 +571,6 @@ try:
except AmdSmiException as e:
print(e)
```
## amdsmi_get_temperature_measure
Description: Returns the measurements of temperatures for the given GPU
Input parameters:
* `device_handle` device which to query
* `temperature_type` one of `AmdSmiTemperatureType` enum values:
Field | Description
---|---
`EDGE` | edge temperature type
`JUNCTION` | junction temperature type
`VRAM` | vram temperature type
`HBM_0` | HBM_0 temperature type
`HBM_1` | HBM_1 temperature type
`HBM_2` | HBM_2 temperature type
`HBM_3` | HBM_3 temperature type
`PLX` | PLX temperature type
Output: Dictionary with fields
Field | Description
---|---
`cur_temp`| temperature value for the given temperature type
Exceptions that can be thrown by `amdsmi_get_temperature_measure` function:
* `AmdSmiLibraryException`
* `AmdSmiRetryException`
* `AmdSmiParameterException`
Example:
```python
try:
devices = amdsmi_get_device_handles()
if len(devices) == 0:
print("No GPUs on machine")
else:
for device in devices:
temperature_measure = amdsmi_get_temperature_measure(device, AmdSmiTemperatureType.EDGE)
print(temperature_measure['cur_temp'])
except AmdSmiException as e:
print(e)
```
## amdsmi_get_clock_measure
Description: Returns the clock measure for the given GPU
@@ -665,81 +622,6 @@ try:
except AmdSmiException as e:
print(e)
```
## amdsmi_get_power_limit
Description: Returns the power limit for the given GPU
Input parameters:
* `device_handle` device which to query
Output: Dictionary with fields
Field | Description
---|---
`limit`| power limit
Exceptions that can be thrown by `amdsmi_get_power_limit` function:
* `AmdSmiLibraryException`
* `AmdSmiRetryException`
* `AmdSmiParameterException`
Example:
```python
try:
devices = amdsmi_get_device_handles()
if len(devices) == 0:
print("No GPUs on machine")
else:
for device in devices:
power_limit = amdsmi_get_power_limit(device)
print(power_limit['limit'])
except AmdSmiException as e:
print(e)
```
## amdsmi_get_temperature_limit
Description: Returns the temperature limits for the given GPU
Input parameters:
* `device_handle` device which to query
* `temperature_type` one of `AmdSmiTemperatureType` enum values:
Field | Description
---|---
`EDGE` | edge temperature type
`JUNCTION` | junction temperature type
`VRAM` | vram temperature type
`HBM_0` | HBM_0 temperature type
`HBM_1` | HBM_1 temperature type
`HBM_2` | HBM_2 temperature type
`HBM_3` | HBM_3 temperature type
`PLX` | PLX temperature type
Output: Dictionary with fields
Field | Description
---|---
`limit`| temperature limit for the given thermal domain
Exceptions that can be thrown by `amdsmi_get_temperature_limit` function:
* `AmdSmiLibraryException`
* `AmdSmiRetryException`
* `AmdSmiParameterException`
Example:
```python
try:
devices = amdsmi_get_device_handles()
if len(devices) == 0:
print("No GPUs on machine")
else:
for device in devices:
temperature_limit = amdsmi_get_temperature_limit(device, AmdSmiTemperatureType.EDGE)
print(temperature_limit['limit'])
except AmdSmiException as e:
print(e)
```
## amdsmi_get_pcie_link_status
Description: Returns the pcie link status for the given GPU
-3
Просмотреть файл
@@ -50,12 +50,9 @@ from .amdsmi_interface import amdsmi_get_gpu_activity
from .amdsmi_interface import amdsmi_get_vram_usage
from .amdsmi_interface import amdsmi_get_power_measure
from .amdsmi_interface import amdsmi_get_clock_measure
from .amdsmi_interface import amdsmi_get_temperature_measure
from .amdsmi_interface import amdsmi_get_pcie_link_status
from .amdsmi_interface import amdsmi_get_pcie_link_caps
from .amdsmi_interface import amdsmi_get_power_limit
from .amdsmi_interface import amdsmi_get_temperature_limit
from .amdsmi_interface import amdsmi_get_bad_page_info
# # Power Management
-63
Просмотреть файл
@@ -734,69 +734,6 @@ def amdsmi_get_clock_measure(
}
def amdsmi_get_temperature_measure(
device_handle: amdsmi_wrapper.amdsmi_device_handle,
temperature_type: amdsmi_wrapper.amdsmi_temperature_type_t,
) -> Dict[str, Any]:
if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle):
raise AmdSmiParameterException(
device_handle, amdsmi_wrapper.amdsmi_device_handle
)
if not isinstance(temperature_type, AmdSmiTemperatureType):
raise AmdSmiParameterException(temperature_type, AmdSmiTemperatureType)
temperature_measure = amdsmi_wrapper.amdsmi_temperature_t()
_check_res(
amdsmi_wrapper.amdsmi_get_temperature_measure(
device_handle,
amdsmi_wrapper.amdsmi_temperature_type_t(temperature_type),
ctypes.byref(temperature_measure),
)
)
return {"cur_temp": temperature_measure.cur_temp}
def amdsmi_get_power_limit(
device_handle: amdsmi_wrapper.amdsmi_device_handle,
) -> Dict[str, Any]:
if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle):
raise AmdSmiParameterException(
device_handle, amdsmi_wrapper.amdsmi_device_handle
)
power_limit = amdsmi_wrapper.amdsmi_power_limit_t()
_check_res(
amdsmi_wrapper.amdsmi_get_power_limit(
device_handle, ctypes.byref(power_limit))
)
return {"limit": power_limit.limit}
def amdsmi_get_temperature_limit(
device_handle: amdsmi_wrapper.amdsmi_device_handle,
temperature_type: amdsmi_wrapper.amdsmi_temperature_type_t,
) -> Dict[str, Any]:
if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle):
raise AmdSmiParameterException(
device_handle, amdsmi_wrapper.amdsmi_device_handle
)
if not isinstance(temperature_type, AmdSmiTemperatureType):
raise AmdSmiParameterException(temperature_type, AmdSmiTemperatureType)
temperature_limit = amdsmi_wrapper.amdsmi_temperature_limit_t()
_check_res(
amdsmi_wrapper.amdsmi_get_temperature_limit(
device_handle,
amdsmi_wrapper.amdsmi_temperature_type_t(temperature_type),
ctypes.byref(temperature_limit),
)
)
return {"limit": temperature_limit.limit}
def amdsmi_get_bad_page_info(
device_handle: amdsmi_wrapper.amdsmi_device_handle,
) -> Union[list, str]:
+6 -43
Просмотреть файл
@@ -653,33 +653,6 @@ amdsmi_board_info_t = struct_amdsmi_board_info
class struct_amdsmi_temperature(Structure):
pass
struct_amdsmi_temperature._pack_ = 1 # source:False
struct_amdsmi_temperature._fields_ = [
('cur_temp', ctypes.c_uint32),
('reserved', ctypes.c_uint32 * 7),
]
amdsmi_temperature_t = struct_amdsmi_temperature
class struct_amdsmi_temperature_limit(Structure):
pass
struct_amdsmi_temperature_limit._pack_ = 1 # source:False
struct_amdsmi_temperature_limit._fields_ = [
('limit', ctypes.c_uint32),
('reserved', ctypes.c_uint32 * 7),
]
amdsmi_temperature_limit_t = struct_amdsmi_temperature_limit
class struct_amdsmi_power_limit(Structure):
pass
struct_amdsmi_power_limit._pack_ = 1 # source:False
struct_amdsmi_power_limit._fields_ = [
('limit', ctypes.c_uint32),
('reserved', ctypes.c_uint32 * 7),
]
amdsmi_power_limit_t = struct_amdsmi_power_limit
class struct_amdsmi_power_measure(Structure):
pass
@@ -691,7 +664,8 @@ struct_amdsmi_power_measure._fields_ = [
('voltage_gfx', ctypes.c_uint32),
('voltage_soc', ctypes.c_uint32),
('voltage_mem', ctypes.c_uint32),
('reserved', ctypes.c_uint32 * 10),
('power_limit', ctypes.c_uint32),
('reserved', ctypes.c_uint32 * 9),
('PADDING_1', ctypes.c_ubyte * 4),
]
@@ -1733,15 +1707,6 @@ amdsmi_get_power_measure.argtypes = [amdsmi_device_handle, ctypes.POINTER(struct
amdsmi_get_clock_measure = _libraries['libamd_smi.so'].amdsmi_get_clock_measure
amdsmi_get_clock_measure.restype = amdsmi_status_t
amdsmi_get_clock_measure.argtypes = [amdsmi_device_handle, amdsmi_clk_type_t, ctypes.POINTER(struct_amdsmi_clk_measure)]
amdsmi_get_temperature_measure = _libraries['libamd_smi.so'].amdsmi_get_temperature_measure
amdsmi_get_temperature_measure.restype = amdsmi_status_t
amdsmi_get_temperature_measure.argtypes = [amdsmi_device_handle, amdsmi_temperature_type_t, ctypes.POINTER(struct_amdsmi_temperature)]
amdsmi_get_temperature_limit = _libraries['libamd_smi.so'].amdsmi_get_temperature_limit
amdsmi_get_temperature_limit.restype = amdsmi_status_t
amdsmi_get_temperature_limit.argtypes = [amdsmi_device_handle, amdsmi_temperature_type_t, ctypes.POINTER(struct_amdsmi_temperature_limit)]
amdsmi_get_power_limit = _libraries['libamd_smi.so'].amdsmi_get_power_limit
amdsmi_get_power_limit.restype = amdsmi_status_t
amdsmi_get_power_limit.argtypes = [amdsmi_device_handle, ctypes.POINTER(struct_amdsmi_power_limit)]
amdsmi_get_vram_usage = _libraries['libamd_smi.so'].amdsmi_get_vram_usage
amdsmi_get_vram_usage.restype = amdsmi_status_t
amdsmi_get_vram_usage.argtypes = [amdsmi_device_handle, ctypes.POINTER(struct_amdsmi_vram_info)]
@@ -1949,12 +1914,11 @@ __all__ = \
'amdsmi_get_fw_info', 'amdsmi_get_gpu_activity',
'amdsmi_get_minmax_bandwidth', 'amdsmi_get_pcie_link_caps',
'amdsmi_get_pcie_link_status', 'amdsmi_get_power_cap_info',
'amdsmi_get_power_limit', 'amdsmi_get_power_measure',
'amdsmi_get_power_measure',
'amdsmi_get_process_info', 'amdsmi_get_process_list',
'amdsmi_get_ras_block_features_enabled',
'amdsmi_get_socket_handles', 'amdsmi_get_socket_info',
'amdsmi_get_target_frequency_range',
'amdsmi_get_temperature_limit', 'amdsmi_get_temperature_measure',
'amdsmi_get_utilization_count', 'amdsmi_get_vbios_info',
'amdsmi_get_version', 'amdsmi_get_version_str',
'amdsmi_get_vram_usage', 'amdsmi_get_xgmi_info',
@@ -1972,7 +1936,7 @@ __all__ = \
'amdsmi_od_volt_curve_t', 'amdsmi_od_volt_freq_data',
'amdsmi_od_volt_freq_data_t', 'amdsmi_pcie_bandwidth',
'amdsmi_pcie_bandwidth_t', 'amdsmi_pcie_info_t',
'amdsmi_power_cap_info_t', 'amdsmi_power_limit_t',
'amdsmi_power_cap_info_t',
'amdsmi_power_measure_t', 'amdsmi_power_profile_preset_masks',
'amdsmi_power_profile_preset_masks__enumvalues',
'amdsmi_power_profile_preset_masks_t',
@@ -1986,11 +1950,10 @@ __all__ = \
'amdsmi_set_perf_determinism_mode', 'amdsmi_shut_down',
'amdsmi_socket_handle', 'amdsmi_status_string', 'amdsmi_status_t',
'amdsmi_stop_event_notification', 'amdsmi_sw_component_t',
'amdsmi_sw_component_t__enumvalues', 'amdsmi_temperature_limit_t',
'amdsmi_temperature_metric',
'amdsmi_temperature_metric__enumvalues',
'amdsmi_temperature_metric_t',
'amdsmi_temperature_metric_t__enumvalues', 'amdsmi_temperature_t',
'amdsmi_temperature_metric_t__enumvalues',
'amdsmi_temperature_type', 'amdsmi_temperature_type_t',
'amdsmi_temperature_type_t__enumvalues',
'amdsmi_topo_get_link_type', 'amdsmi_topo_get_link_weight',
@@ -2025,7 +1988,7 @@ __all__ = \
'struct_amdsmi_power_limit', 'struct_amdsmi_power_measure',
'struct_amdsmi_process_info', 'struct_amdsmi_process_info_0',
'struct_amdsmi_process_info_1', 'struct_amdsmi_temperature',
'struct_amdsmi_temperature_limit', 'struct_amdsmi_vbios_info',
'struct_amdsmi_vbios_info',
'struct_amdsmi_vram_info', 'struct_amdsmi_xgmi_info',
'struct_c__SA_amdsmi_counter_value_t',
'struct_c__SA_amdsmi_error_count_t',
+7 -132
Просмотреть файл
@@ -1467,29 +1467,6 @@ amdsmi_get_gpu_activity(amdsmi_device_handle device_handle, amdsmi_engine_usage_
return AMDSMI_STATUS_SUCCESS;
}
amdsmi_status_t
amdsmi_get_power_limit(amdsmi_device_handle device_handle, amdsmi_power_limit_t *power) {
AMDSMI_CHECK_INIT();
if (power == nullptr) {
return AMDSMI_STATUS_INVAL;
}
amd::smi::AMDSmiGPUDevice* gpu_device = nullptr;
amdsmi_status_t r = get_gpu_device_from_handle(device_handle, &gpu_device);
if (r != AMDSMI_STATUS_SUCCESS)
return r;
amdsmi_status_t status;
int power_limit;
status = smi_amdgpu_get_power_cap(gpu_device, &power_limit);
if (status != AMDSMI_STATUS_SUCCESS) {
return status;
}
power->limit = (uint16_t)(power_limit);
return AMDSMI_STATUS_SUCCESS;
}
amdsmi_status_t
amdsmi_get_clock_measure(amdsmi_device_handle device_handle, amdsmi_clk_type_t clk_type, amdsmi_clk_measure_t *info) {
AMDSMI_CHECK_INIT();
@@ -1545,115 +1522,6 @@ amdsmi_get_clock_measure(amdsmi_device_handle device_handle, amdsmi_clk_type_t c
return AMDSMI_STATUS_SUCCESS;
}
amdsmi_status_t
amdsmi_get_temperature_limit(amdsmi_device_handle device_handle, amdsmi_temperature_type_t temp_type, amdsmi_temperature_limit_t *temp) {
AMDSMI_CHECK_INIT();
if (temp == nullptr || temp_type > TEMPERATURE_TYPE__MAX) {
return AMDSMI_STATUS_INVAL;
}
amd::smi::AMDSmiGPUDevice* gpu_device = nullptr;
amdsmi_status_t r = get_gpu_device_from_handle(device_handle, &gpu_device);
if (r != AMDSMI_STATUS_SUCCESS)
return r;
amdsmi_status_t status;
std::string name;
std::string path;
switch (temp_type) {
case TEMPERATURE_TYPE_EDGE:
name = "edge";
break;
case TEMPERATURE_TYPE_JUNCTION:
name = "junction";
break;
case TEMPERATURE_TYPE_VRAM:
name = "mem";
break;
default:
return AMDSMI_STATUS_INVAL;
}
status = smi_amdgpu_find_hwmon_dir(gpu_device, &path);
if (status != AMDSMI_STATUS_SUCCESS) {
return status;
}
SMIGPUDEVICE_MUTEX(gpu_device->get_mutex())
for (int count = 1; ; count++) {
std::string local_path = path + "/temp" +
std::to_string(count);
std::string local_temp = local_path + "_label";
char f_name[10];
std::ifstream file(local_temp.c_str(), std::ifstream::in);
if (!file.is_open()) {
printf("Failed to open file: %s \n", local_temp.c_str());
return AMDSMI_STATUS_API_FAILED;
}
file.getline(f_name, 10);
if (!strstr(name.c_str(), f_name)) {
int readTemp = 0;
local_temp = local_path + "_crit";
std::ifstream file2(local_temp.c_str(), std::ifstream::in);
if (!file2.is_open()) {
printf("Failed to open file: %s \n", local_temp.c_str());
return AMDSMI_STATUS_API_FAILED;
}
file2.getline(f_name, 10);
if (!sscanf(f_name, "%d", &readTemp)) {
return AMDSMI_STATUS_API_FAILED;
}
temp->limit = (uint16_t)(readTemp / 1000);
break;
}
file.close();
}
return AMDSMI_STATUS_SUCCESS;
}
amdsmi_status_t
amdsmi_get_temperature_measure(amdsmi_device_handle device_handle, amdsmi_temperature_type_t temp_type, amdsmi_temperature_t *info) {
AMDSMI_CHECK_INIT();
if (info == nullptr || temp_type > TEMPERATURE_TYPE__MAX) {
return AMDSMI_STATUS_INVAL;
}
amdsmi_gpu_metrics_t metrics;
amd::smi::AMDSmiGPUDevice* gpu_device = nullptr;
amdsmi_status_t r = get_gpu_device_from_handle(device_handle, &gpu_device);
if (r != AMDSMI_STATUS_SUCCESS)
return r;
amdsmi_status_t status;
status = amdsmi_dev_get_gpu_metrics_info(device_handle, &metrics);
if (status != AMDSMI_STATUS_SUCCESS) {
return status;
}
switch (temp_type) {
case TEMPERATURE_TYPE_EDGE:
info->cur_temp = metrics.temperature_edge;
break;
case TEMPERATURE_TYPE_JUNCTION:
info->cur_temp = metrics.temperature_hotspot;
break;
case TEMPERATURE_TYPE_VRAM:
info->cur_temp = metrics.temperature_mem;
break;
case TEMPERATURE_TYPE_PLX:
info->cur_temp = metrics.temperature_vrsoc;
break;
default:
return AMDSMI_STATUS_INVAL;
}
return AMDSMI_STATUS_SUCCESS;
}
amdsmi_status_t
amdsmi_get_ras_block_features_enabled(amdsmi_device_handle device_handle, amdsmi_gpu_block block, amdsmi_ras_err_state_t *state) {
AMDSMI_CHECK_INIT();
@@ -1833,6 +1701,13 @@ amdsmi_get_power_measure(amdsmi_device_handle device_handle, amdsmi_power_measur
return status;
}
int power_limit = 0;
status = smi_amdgpu_get_power_cap(gpu_device, &power_limit);
if (status != AMDSMI_STATUS_SUCCESS) {
return status;
}
info->power_limit = power_limit;
info->voltage_gfx = voltage_read;
info->average_socket_power = metrics.average_socket_power;