Merge amd-staging into amd-master 20240411
Signed-off-by: guanyu12 <guanyu12@amd.com> Change-Id: I25ed71cca91a0d78110a995861cff93ba748e056
Этот коммит содержится в:
@@ -9,4 +9,10 @@ updates:
|
||||
directory: "/docs/sphinx" # Location of package manifests
|
||||
open-pull-requests-limit: 10
|
||||
schedule:
|
||||
interval: "daily"
|
||||
interval: "monthly"
|
||||
labels:
|
||||
- "documentation"
|
||||
- "dependencies"
|
||||
- "ci:docs-only"
|
||||
reviewers:
|
||||
- "samjwu"
|
||||
|
||||
@@ -79,7 +79,7 @@ endif()
|
||||
## Compiler flags
|
||||
set(CMAKE_CXX_FLAGS
|
||||
"${CMAKE_CXX_FLAGS} -Wall -Wextra -fno-rtti")
|
||||
if (${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL "x86_64")
|
||||
if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64")
|
||||
set(CMAKE_CXX_FLAGS
|
||||
"${CMAKE_CXX_FLAGS} -m64 -msse -msse2")
|
||||
endif()
|
||||
|
||||
@@ -1290,7 +1290,7 @@ rsmi_status_t rsmi_dev_revision_get(uint32_t dv_ind, uint16_t *revision);
|
||||
* @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid
|
||||
*
|
||||
*/
|
||||
rsmi_status_t rsmi_dev_sku_get(uint32_t dv_ind, char *sku);
|
||||
rsmi_status_t rsmi_dev_sku_get(uint32_t dv_ind, uint16_t *sku);
|
||||
|
||||
/**
|
||||
* @brief Get the device vendor id associated with the device with provided
|
||||
|
||||
@@ -175,13 +175,13 @@ Set options:
|
||||
--rasenable BLOCK ERRTYPE Enable RAS for specified block and error type
|
||||
--rasdisable BLOCK ERRTYPE Disable RAS for specified block and error type
|
||||
--rasinject BLOCK Inject RAS poison for specified block (ONLY WORKS ON
|
||||
UNSECURE BOARDS)
|
||||
UNSECURED BOARDS)
|
||||
|
||||
Reset options:
|
||||
-r, --resetclocks Reset clocks and OverDrive to default
|
||||
--resetfans Reset fans to automatic (driver) control
|
||||
--resetprofile Reset Power Profile back to default
|
||||
--resetpoweroverdrive Set the maximum GPU power back to the device deafult
|
||||
--resetpoweroverdrive Set the maximum GPU power back to the device default
|
||||
state
|
||||
--resetxgmierr Reset XGMI error count
|
||||
--resetperfdeterminism Disable performance determinism
|
||||
|
||||
@@ -418,9 +418,45 @@ def getMaxPower(device, silent=False):
|
||||
power_cap = c_uint64()
|
||||
ret = rocmsmi.rsmi_dev_power_cap_get(device, 0, byref(power_cap))
|
||||
if rsmi_ret_ok(ret, device, 'get_power_cap', silent):
|
||||
return power_cap.value / 1000000
|
||||
# take floor of result (round down to nearest integer)
|
||||
return float(power_cap.value / 1000000) // 1
|
||||
return -1
|
||||
|
||||
def getAllocatedMemoryPercent(device):
|
||||
""" Return dictionary of allocated memory (VRAM) of a given device
|
||||
Response of allocated_memory_vram dictionary:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{
|
||||
'value': float allocated vram memory (floor of %) or 'N/A' (for rsmi_status_t.RSMI_STATUS_NOT_SUPPORTED),
|
||||
'unit': %,
|
||||
'combined': string (eg. '30%') or 'N/A' (for rsmi_status_t.RSMI_STATUS_NOT_SUPPORTED)
|
||||
'ret': rsmi_status_t.RSMI_STATUS_NOT_SUPPORTED or rsmi_status_t.RSMI_STATUS_NOT_SUPPORTED
|
||||
}
|
||||
|
||||
:param device: DRM device identifier
|
||||
"""
|
||||
allocated_memory_vram = {
|
||||
'value': "N/A",
|
||||
'unit': '%',
|
||||
'combined': "N/A",
|
||||
'ret': rsmi_status_t.RSMI_STATUS_NOT_SUPPORTED
|
||||
}
|
||||
vram_used, vram_total = getMemInfo(device, 'vram', silent=True)
|
||||
mem_use_pct = 0
|
||||
if vram_used is None:
|
||||
return allocated_memory_vram
|
||||
if vram_used != None and vram_total != None and float(vram_total) != 0:
|
||||
# take floor of result (round down to nearest integer)
|
||||
mem_use_pct = (100 * (float(vram_used) / float(vram_total))) // 1
|
||||
allocated_memory_vram['value'] = mem_use_pct
|
||||
mem_use_pct = '{:<.0f}%'.format(mem_use_pct) # left aligned
|
||||
# values with no precision
|
||||
allocated_memory_vram['combined'] = mem_use_pct
|
||||
allocated_memory_vram['ret'] = rsmi_status_t.RSMI_STATUS_SUCCESS
|
||||
return allocated_memory_vram
|
||||
|
||||
|
||||
def getMemInfo(device, memType, silent=False):
|
||||
""" Returns a tuple of (memory_used, memory_total) of
|
||||
@@ -1958,14 +1994,7 @@ def showAllConcise(deviceList):
|
||||
gpu_busy = str(getGpuUse(device, silent)) + '%'
|
||||
else:
|
||||
gpu_busy = 'Unsupported'
|
||||
vram_used, vram_total = getMemInfo(device, 'vram', silent)
|
||||
mem_use_pct = 0
|
||||
if vram_used is None:
|
||||
mem_use_pct='Unsupported'
|
||||
if vram_used != None and vram_total != None and float(vram_total) != 0:
|
||||
mem_use_pct = round(float(100 * (float(vram_used) / float(vram_total))))
|
||||
mem_use_pct = '{:<.0f}%'.format(mem_use_pct) # left aligned
|
||||
# values with no precision
|
||||
allocated_mem_percent = getAllocatedMemoryPercent(device)
|
||||
|
||||
# Top Row - per device data
|
||||
values['card%s' % (str(device))] = [device, getNodeId(device),
|
||||
@@ -1975,7 +2004,7 @@ def showAllConcise(deviceList):
|
||||
combined_partition_data,
|
||||
sclk, mclk, fan, str(perf).lower(),
|
||||
str(pwrCap),
|
||||
str(mem_use_pct),
|
||||
allocated_mem_percent['combined'],
|
||||
str(gpu_busy)]
|
||||
|
||||
val_widths = {}
|
||||
@@ -2476,9 +2505,12 @@ def showMemUse(deviceList):
|
||||
avgMemBandwidth = c_uint16()
|
||||
printLogSpacer(' Current Memory Use ')
|
||||
for device in deviceList:
|
||||
allocated_mem_percent = getAllocatedMemoryPercent(device)
|
||||
printLog(device, 'GPU Memory Allocated (VRAM%)',
|
||||
int(allocated_mem_percent['value']))
|
||||
ret = rocmsmi.rsmi_dev_memory_busy_percent_get(device, byref(memoryUse))
|
||||
if rsmi_ret_ok(ret, device, '% memory use'):
|
||||
printLog(device, 'GPU memory use (%)', memoryUse.value)
|
||||
printLog(device, 'GPU Memory Read/Write Activity (%)', memoryUse.value)
|
||||
util_counters = getCoarseGrainUtil(device, "Memory Activity")
|
||||
if util_counters != -1:
|
||||
for ut_counter in util_counters:
|
||||
@@ -2981,7 +3013,7 @@ def showEvents(deviceList, eventTypes):
|
||||
if len(eventTypeList) == 0:
|
||||
eventTypeList = notification_type_names
|
||||
print2DArray([['DEVICE\t', 'TIME\t', 'TYPE\t', 'DESCRIPTION']])
|
||||
# Create a seperate thread for each GPU
|
||||
# Create a separate thread for each GPU
|
||||
for device in deviceList:
|
||||
try:
|
||||
_thread.start_new_thread(printEventList, (device, 1000, eventTypeList))
|
||||
@@ -3682,7 +3714,7 @@ def rsmi_ret_ok(my_ret, device=None, metric=None, silent=False):
|
||||
:param device: DRM device identifier
|
||||
:param my_ret: Return of RSMI call (rocm_smi_lib API)
|
||||
:param metric: Parameter of GPU currently being analyzed
|
||||
:param silent: Echo verbose error reponse.
|
||||
:param silent: Echo verbose error response.
|
||||
True silences err output, False does not silence err output (default).
|
||||
"""
|
||||
global RETCODE
|
||||
@@ -3868,7 +3900,7 @@ if __name__ == '__main__':
|
||||
groupActionReset.add_argument('--resetfans', help='Reset fans to automatic (driver) control', action='store_true')
|
||||
groupActionReset.add_argument('--resetprofile', help='Reset Power Profile back to default', action='store_true')
|
||||
groupActionReset.add_argument('--resetpoweroverdrive',
|
||||
help='Set the maximum GPU power back to the device deafult state',
|
||||
help='Set the maximum GPU power back to the device default state',
|
||||
action='store_true')
|
||||
groupActionReset.add_argument('--resetxgmierr', help='Reset XGMI error count', action='store_true')
|
||||
groupActionReset.add_argument('--resetperfdeterminism', help='Disable performance determinism', action='store_true')
|
||||
@@ -3920,7 +3952,7 @@ if __name__ == '__main__':
|
||||
groupAction.add_argument('--rasdisable', help='Disable RAS for specified block and error type', type=str, nargs=2,
|
||||
metavar=('BLOCK', 'ERRTYPE'))
|
||||
groupAction.add_argument('--rasinject',
|
||||
help='Inject RAS poison for specified block (ONLY WORKS ON UNSECURE BOARDS)', type=str,
|
||||
help='Inject RAS poison for specified block (ONLY WORKS ON UNSECURED BOARDS)', type=str,
|
||||
metavar='BLOCK', nargs=1)
|
||||
groupActionGpuReset.add_argument('--gpureset', help='Reset specified GPU (One GPU must be specified)',
|
||||
action='store_true')
|
||||
|
||||
@@ -67,11 +67,11 @@ rsmi_status_verbose_err_out = {
|
||||
rsmi_status_t.RSMI_STATUS_OUT_OF_RESOURCES: 'Unable to acquire memory or other resource',
|
||||
rsmi_status_t.RSMI_STATUS_INTERNAL_EXCEPTION: 'An internal exception was caught',
|
||||
rsmi_status_t.RSMI_STATUS_INPUT_OUT_OF_BOUNDS: 'Provided input is out of allowable or safe range',
|
||||
rsmi_status_t.RSMI_INITIALIZATION_ERROR: 'Error occured during rsmi initialization',
|
||||
rsmi_status_t.RSMI_INITIALIZATION_ERROR: 'Error occurred during rsmi initialization',
|
||||
rsmi_status_t.RSMI_STATUS_NOT_YET_IMPLEMENTED: 'Requested function is not implemented on this setup',
|
||||
rsmi_status_t.RSMI_STATUS_NOT_FOUND: 'Item searched for but not found',
|
||||
rsmi_status_t.RSMI_STATUS_INSUFFICIENT_SIZE: 'Insufficient resources available',
|
||||
rsmi_status_t.RSMI_STATUS_INTERRUPT: 'Interrupt occured during execution',
|
||||
rsmi_status_t.RSMI_STATUS_INTERRUPT: 'Interrupt occurred during execution',
|
||||
rsmi_status_t.RSMI_STATUS_UNEXPECTED_SIZE: 'Unexpected amount of data read',
|
||||
rsmi_status_t.RSMI_STATUS_NO_DATA: 'No data found for the given input',
|
||||
rsmi_status_t.RSMI_STATUS_UNEXPECTED_DATA: 'Unexpected data received',
|
||||
@@ -639,4 +639,4 @@ rsmi_power_type_dict = {
|
||||
0: 'AVERAGE',
|
||||
1: 'CURRENT SOCKET',
|
||||
0xFFFFFFFF: 'INVALID_POWER_TYPE'
|
||||
}
|
||||
}
|
||||
|
||||
@@ -99,11 +99,11 @@ rsmi_status_verbose_err_out = {
|
||||
rsmi_status_t.RSMI_STATUS_OUT_OF_RESOURCES: 'Unable to acquire memory or other resource',
|
||||
rsmi_status_t.RSMI_STATUS_INTERNAL_EXCEPTION: 'An internal exception was caught',
|
||||
rsmi_status_t.RSMI_STATUS_INPUT_OUT_OF_BOUNDS: 'Provided input is out of allowable or safe range',
|
||||
rsmi_status_t.RSMI_INITIALIZATION_ERROR: 'Error occured during rsmi initialization',
|
||||
rsmi_status_t.RSMI_INITIALIZATION_ERROR: 'Error occurred during rsmi initialization',
|
||||
rsmi_status_t.RSMI_STATUS_NOT_YET_IMPLEMENTED: 'Requested function is not implemented on this setup',
|
||||
rsmi_status_t.RSMI_STATUS_NOT_FOUND: 'Item searched for but not found',
|
||||
rsmi_status_t.RSMI_STATUS_INSUFFICIENT_SIZE: 'Insufficient resources available',
|
||||
rsmi_status_t.RSMI_STATUS_INTERRUPT: 'Interrupt occured during execution',
|
||||
rsmi_status_t.RSMI_STATUS_INTERRUPT: 'Interrupt occurred during execution',
|
||||
rsmi_status_t.RSMI_STATUS_UNEXPECTED_SIZE: 'Unexpected amount of data read',
|
||||
rsmi_status_t.RSMI_STATUS_NO_DATA: 'No data found for the given input',
|
||||
rsmi_status_t.RSMI_STATUS_UNEXPECTED_DATA: 'Unexpected data received',
|
||||
@@ -671,4 +671,4 @@ rsmi_power_type_dict = {
|
||||
0: 'AVERAGE',
|
||||
1: 'CURRENT SOCKET',
|
||||
0xFFFFFFFF: 'INVALID_POWER_TYPE'
|
||||
}
|
||||
}
|
||||
|
||||
@@ -43,19 +43,16 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <chrono>
|
||||
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
#include "rocm_smi_test/functional/measure_api_execution_time.h"
|
||||
#include "rocm_smi_test/test_common.h"
|
||||
#include "rocm_smi_test/test_utils.h"
|
||||
|
||||
|
||||
TestMeasureApiExecutionTime::TestMeasureApiExecutionTime() : TestBase() {
|
||||
@@ -92,6 +89,8 @@ void TestMeasureApiExecutionTime::Run(void) {
|
||||
rsmi_temperature_metric_t met = RSMI_TEMP_CURRENT;
|
||||
rsmi_status_t ret;
|
||||
float repeat = 300.0;
|
||||
constexpr uint32_t kFAN_SPEED_ELAPSED_MS_BASE = (1000);
|
||||
constexpr uint32_t kMETRICS_ELAPSED_MS_BASE = (1500);
|
||||
bool skip = false;
|
||||
|
||||
TestBase::Run();
|
||||
@@ -102,13 +101,15 @@ void TestMeasureApiExecutionTime::Run(void) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto test_start = std::chrono::high_resolution_clock::now();
|
||||
|
||||
auto prev = std::cout.precision(3);
|
||||
for (uint32_t dv_ind = 0; dv_ind < num_monitor_devs(); ++dv_ind) {
|
||||
PrintDeviceHeader(dv_ind);
|
||||
|
||||
//test execution time for rsmi_dev_fan_speed_get
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
for (int i=0; i < repeat; ++i){
|
||||
for (int i=0; i < static_cast<int>(repeat); ++i){
|
||||
ret = rsmi_dev_fan_speed_get(dv_ind, 0, &val_i64);
|
||||
|
||||
}
|
||||
@@ -123,14 +124,14 @@ void TestMeasureApiExecutionTime::Run(void) {
|
||||
|
||||
if (!skip) {
|
||||
std::cout << "\trsmi_dev_fan_speed_get execution time: " <<
|
||||
(float(duration.count()) / repeat) << " microseconds" << std::endl;
|
||||
EXPECT_LT(duration.count(), 1000 * repeat);
|
||||
(static_cast<float>(duration.count()) / repeat) << " microseconds" << std::endl;
|
||||
EXPECT_LT(duration.count(), (kFAN_SPEED_ELAPSED_MS_BASE * repeat));
|
||||
}
|
||||
skip = false;
|
||||
|
||||
//test execution time for rsmi_dev_temp_metric_get
|
||||
start = std::chrono::high_resolution_clock::now();
|
||||
for (int i=0; i < repeat; ++i){
|
||||
for (int i=0; i < static_cast<int>(repeat); ++i){
|
||||
ret = rsmi_dev_temp_metric_get(dv_ind, 0, met, &val_i64);
|
||||
}
|
||||
stop = std::chrono::high_resolution_clock::now();
|
||||
@@ -142,14 +143,14 @@ void TestMeasureApiExecutionTime::Run(void) {
|
||||
}
|
||||
if (!skip) {
|
||||
std::cout << "\trsmi_dev_temp_metric_get execution time: " <<
|
||||
(float(duration.count()) / repeat ) << " microseconds" << std::endl;
|
||||
EXPECT_LT(duration.count(), 500 * repeat);
|
||||
(static_cast<float>(duration.count()) / repeat) << " microseconds" << std::endl;
|
||||
EXPECT_LT(duration.count(), (kMETRICS_ELAPSED_MS_BASE * repeat));
|
||||
}
|
||||
skip = false;
|
||||
|
||||
//test execution time for rsmi_dev_gpu_metrics_info_get
|
||||
start = std::chrono::high_resolution_clock::now();
|
||||
for (int i=0; i < repeat; ++i){
|
||||
for (int i=0; i < static_cast<int>(repeat); ++i){
|
||||
ret = rsmi_dev_gpu_metrics_info_get(dv_ind, &smu);
|
||||
}
|
||||
stop = std::chrono::high_resolution_clock::now();
|
||||
@@ -161,42 +162,36 @@ void TestMeasureApiExecutionTime::Run(void) {
|
||||
}
|
||||
if (!skip) {
|
||||
std::cout << "\trsmi_dev_gpu_metrics_info_get execution time: " <<
|
||||
(float(duration.count()) / repeat ) << " microseconds" << std::endl;
|
||||
EXPECT_LT(duration.count(), 500 * repeat);
|
||||
(static_cast<float>(duration.count()) / repeat ) << " microseconds" << std::endl;
|
||||
EXPECT_LT(duration.count(), (kMETRICS_ELAPSED_MS_BASE * repeat));
|
||||
}
|
||||
skip = false;
|
||||
std::cout << "----------------------------------------------------------------------------" << std::endl;
|
||||
|
||||
auto val_ui16 = uint16_t(0);
|
||||
auto val_ui16 = static_cast<uint16_t>(0);
|
||||
auto status_code(rsmi_status_t::RSMI_STATUS_SUCCESS);
|
||||
auto start_api = std::chrono::high_resolution_clock::now();
|
||||
for (int i=0; i < repeat; ++i) {
|
||||
start = std::chrono::high_resolution_clock::now();
|
||||
for (int i=0; i < static_cast<int>(repeat); ++i){
|
||||
status_code = rsmi_dev_metrics_xcd_counter_get(dv_ind, &val_ui16);
|
||||
}
|
||||
auto stop_api = std::chrono::high_resolution_clock::now();
|
||||
auto duration_api = std::chrono::duration_cast<std::chrono::microseconds>(stop_api - start_api);
|
||||
stop = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
|
||||
if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){
|
||||
skip = true;
|
||||
}
|
||||
if (!skip) {
|
||||
std::cout << "\rsmi_dev_metrics_xcd_counter_get() execution time: "
|
||||
<< (float(duration_api.count()) / repeat) << " microseconds" << std::endl;
|
||||
EXPECT_LT(duration_api.count(), 500 * repeat);
|
||||
std::cout << "\trsmi_dev_metrics_xcd_counter_get() execution time: "
|
||||
<< (static_cast<float>(duration.count()) / repeat) << " microseconds" << std::endl;
|
||||
EXPECT_LT(duration.count(), (kMETRICS_ELAPSED_MS_BASE * repeat));
|
||||
}
|
||||
skip = false;
|
||||
std::cout << "----------------------------------------------------------------------------" << std::endl;
|
||||
|
||||
stop = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
|
||||
if (!skip) {
|
||||
std::cout << "\rTotal execution time (All APIs): "
|
||||
<< (float(duration_api.count()) / repeat) << " microseconds" << std::endl;
|
||||
EXPECT_LT(duration_api.count(), (500 * repeat));
|
||||
}
|
||||
skip = false;
|
||||
std::cout << "============================================================================" << std::endl;
|
||||
|
||||
}
|
||||
std::cout.precision(prev);
|
||||
|
||||
std::cout.precision(prev);
|
||||
auto test_stop = std::chrono::high_resolution_clock::now();
|
||||
auto test_duration = std::chrono::duration_cast<std::chrono::microseconds>(test_stop - test_start);
|
||||
|
||||
std::cout << "\n" << "============================================================================" << "\n";
|
||||
std::cout << " Total execution time (All APIs): "
|
||||
<< (static_cast<float>(test_duration.count()) / repeat) << " microseconds" << "\n";
|
||||
std::cout << "============================================================================" << "\n";
|
||||
}
|
||||
|
||||
Ссылка в новой задаче
Block a user