[SWDEV-493274/SWDEV-514998] Add AMD SMI partition tests + Add Guest amd-smi static --partition (#127)

* [SWDEV-493274/SWDEV-514998] Add AMD SMI partition tests + Add Guest amd-smi static --partition

Changes:
    - Added amd-smi static --partition for guest systems
    - Added C++ tests for memory and compute (accelerator) partitions
    - Added Python tests for amdsmi_get_gpu_vram_info(),
       amdsmi_get_gpu_accelerator_partition_profile_config()
    - Updated Python tests for
      amdsmi_get_gpu_accelerator_partition_profile()
      Now includes more profile and resource detail
    - Added amdsmi_get_gpu_xcd_counter();
      Tests provided for both C++/Python APIs
    - Added AmdSmiVramType & AmdSmiVramVendor: they were missing
      python testing required adding.

Change-Id: Ib6549d8ccc5fb68726f38745b87c78f890186022
Signed-off-by: Charis Poag <Charis.Poag@amd.com>

[ROCm/amdsmi commit: 48cb5529d2]
Этот коммит содержится в:
Poag, Charis
2025-03-11 16:38:46 -05:00
коммит произвёл GitHub
родитель cb56b5e193
Коммит 267fa91e8a
30 изменённых файлов: 3505 добавлений и 399 удалений
+11 -11
Просмотреть файл
@@ -387,6 +387,8 @@ class AMDSMICommands():
args.cache = cache
if process_isolation:
args.process_isolation = process_isolation
if partition:
args.partition = partition
if clock:
args.clock = clock
# args.clock defaults to False so if it was overwritten to empty list, that indicates that it was given as an arguments but with an empty list
@@ -396,24 +398,22 @@ class AMDSMICommands():
# Store args that are applicable to the current platform
current_platform_args = ["asic", "bus", "vbios", "driver", "ras",
"vram", "cache", "board", "process_isolation",
"clock"]
"clock", "partition"]
current_platform_values = [args.asic, args.bus, args.vbios, args.driver, args.ras,
args.vram, args.cache, args.board, args.process_isolation,
args.clock]
args.clock, args.partition]
self.helpers.check_required_groups()
if self.helpers.is_linux() and self.helpers.is_baremetal():
if partition:
args.partition = partition
if limit:
args.limit = limit
if soc_pstate:
args.soc_pstate = soc_pstate
if xgmi_plpd:
args.xgmi_plpd = xgmi_plpd
current_platform_args += ["ras", "limit", "partition", "soc_pstate", "xgmi_plpd"]
current_platform_values += [args.ras, args.limit, args.partition, args.soc_pstate, args.xgmi_plpd]
current_platform_args += ["ras", "limit", "soc_pstate", "xgmi_plpd"]
current_platform_values += [args.ras, args.limit, args.soc_pstate, args.xgmi_plpd]
if self.helpers.is_linux() and not self.helpers.is_virtual_os():
if numa:
@@ -4240,7 +4240,7 @@ class AMDSMICommands():
if args.compute_partition in accelerator_profiles['profile_types']:
compute_partition = amdsmi_interface.AmdSmiComputePartitionType[args.compute_partition]
index = accelerator_profiles['profile_types'].index(args.compute_partition)
attempted_to_set = f"Attempted to set accelerator partition to {args.compute_partition} (profile #{accelerator_profiles['profile_indices'][int(index)]} on {gpu_string}"
attempted_to_set = f"Attempted to set accelerator partition to {args.compute_partition} (profile #{accelerator_profiles['profile_indices'][int(index)]}) on {gpu_string}"
amdsmi_interface.amdsmi_set_gpu_compute_partition(args.gpu, compute_partition)
self.logger.store_output(args.gpu, 'accelerator_partition', f"Successfully set accelerator partition to {args.compute_partition} (profile #{accelerator_profiles['profile_indices'][int(index)]})")
elif args.compute_partition in accelerator_profiles['profile_indices']:
@@ -4294,7 +4294,7 @@ class AMDSMICommands():
threads = []
k140secs = 140
string_out = f"Updating memory partition for gpu {gpu_id}"
string_out = f"Updating memory partition for GPU: {gpu_id}"
timesToRetryRestartErr = 1
self.helpers.increment_set_count()
@@ -4305,9 +4305,9 @@ class AMDSMICommands():
while timesToRetryRestartErr >= 0:
timesToRetryRestartErr -= 1
try:
if showProgressBar: # only show reload warning on 1st set
if showProgressBar: # we want to overwrite the previous progress bar
t1 = multiprocessing.Process(target=self.helpers.showProgressbar,
args=(string_out, k140secs,))
args=(string_out, k140secs, True,))
threads.append(t1)
t1.start()
memory_partition = amdsmi_interface.AmdSmiMemoryPartitionType[args.memory_partition]
@@ -4342,7 +4342,7 @@ class AMDSMICommands():
return
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_AMDGPU_RESTART_ERR:
# Try again on a failure -> work around for not being able to close libdrm
string_out = f"Trying again - Updating memory partition for gpu {gpu_id}"
string_out = f"Trying again - Updating memory partition for GPU: {gpu_id} "
for thread in threads:
thread.terminate()
thread.join()
+9 -5
Просмотреть файл
@@ -969,11 +969,15 @@ class AMDSMIHelpers():
continue
return pci_devices
def progressbar(self, it, prefix="", size=60, out=sys.stdout):
def progressbar(self, it, prefix="", size=60, out=sys.stdout, add_newline=False):
count = len(it)
if (add_newline):
print("{}\n".format(prefix),end='\r', file=out, flush=False)
else:
print("{}".format(prefix),end='\r', file=out, flush=False)
def show(j):
x = int(size*j/count)
print("{}[{}{}] {}/{} secs remain".format(prefix, u""*x, "."*(size-x), j, count),
print("[{}{}] {}/{} secs remain".format(u""*x, "."*(size-x), j, count),
end='\r', file=out, flush=True)
show(0)
for i, item in enumerate(it):
@@ -981,10 +985,10 @@ class AMDSMIHelpers():
show(i+1)
print("\n\n", end='\r', flush=True, file=out)
def showProgressbar(self, title="", timeInSeconds=13):
def showProgressbar(self, title="", timeInSeconds=13, add_newline=False):
if title != "":
title += ": "
for i in self.progressbar(range(timeInSeconds), title, 40):
title += " "
for i in self.progressbar(range(timeInSeconds), title, 40, add_newline=add_newline):
time.sleep(1)
def check_required_groups(self):
+1 -1
Просмотреть файл
@@ -695,10 +695,10 @@ class AMDSMIParser(argparse.ArgumentParser):
static_parser.add_argument('-R', '--process-isolation', action='store_true', required=False, help=process_isolation_help)
static_parser.add_argument('-r', '--ras', action='store_true', required=False, help=ras_help)
static_parser.add_argument('-C', '--clock', action='store', default=False, nargs='*', type=str, required=False, help=clock_help)
static_parser.add_argument('-p', '--partition', action='store_true', required=False, help=partition_help)
# Options to display on Hypervisors and Baremetal
if self.helpers.is_hypervisor() or self.helpers.is_baremetal():
static_parser.add_argument('-p', '--partition', action='store_true', required=False, help=partition_help)
static_parser.add_argument('-l', '--limit', action='store_true', required=False, help=limit_help)
static_parser.add_argument('-P', '--soc-pstate', action='store_true', required=False, help=soc_pstate_help)
static_parser.add_argument('-x', '--xgmi-plpd', action='store_true', required=False, help=xgmi_plpd_help)
+243 -5
Просмотреть файл
@@ -23,17 +23,18 @@
#include <pwd.h>
#include <sys/stat.h>
#include <unistd.h>
#include <inttypes.h>
#include <bitset>
#include <cassert>
#include <cstdint>
#include <cstring>
#include <iostream>
#include <inttypes.h>
#include <vector>
#include <sstream>
#include "amd_smi/amdsmi.h"
#include "amd_smi/impl/amd_smi_utils.h"
#define CHK_AMDSMI_RET(RET) \
@@ -201,8 +202,62 @@ std::string print_unsigned_int(T value) {
return ss.str();
}
static const std::string
computePartitionString(amdsmi_compute_partition_type_t computeParitionType) {
switch (computeParitionType) {
case AMDSMI_COMPUTE_PARTITION_SPX:
return "SPX";
case AMDSMI_COMPUTE_PARTITION_DPX:
return "DPX";
case AMDSMI_COMPUTE_PARTITION_TPX:
return "TPX";
case AMDSMI_COMPUTE_PARTITION_QPX:
return "QPX";
case AMDSMI_COMPUTE_PARTITION_CPX:
return "CPX";
default:
return "N/A";
}
}
static const std::map<std::string, amdsmi_compute_partition_type_t>
mapStringToSMIComputePartitionTypes {
{"SPX", AMDSMI_COMPUTE_PARTITION_SPX},
{"DPX", AMDSMI_COMPUTE_PARTITION_DPX},
{"TPX", AMDSMI_COMPUTE_PARTITION_TPX},
{"QPX", AMDSMI_COMPUTE_PARTITION_QPX},
{"CPX", AMDSMI_COMPUTE_PARTITION_CPX},
{"N/A", AMDSMI_COMPUTE_PARTITION_INVALID}
};
static const std::string
memoryPartitionString(amdsmi_memory_partition_type_t memoryParitionType) {
switch (memoryParitionType) {
case AMDSMI_MEMORY_PARTITION_NPS1:
return "NPS1";
case AMDSMI_MEMORY_PARTITION_NPS2:
return "NPS2";
case AMDSMI_MEMORY_PARTITION_NPS4:
return "NPS4";
case AMDSMI_MEMORY_PARTITION_NPS8:
return "NPS8";
default:
return "N/A";
}
}
static const std::map<std::string, amdsmi_memory_partition_type_t>
mapStringToSMIMemoryPartitionTypes {
{"NPS1", AMDSMI_MEMORY_PARTITION_NPS1},
{"NPS2", AMDSMI_MEMORY_PARTITION_NPS2},
{"NPS4", AMDSMI_MEMORY_PARTITION_NPS4},
{"NPS8", AMDSMI_MEMORY_PARTITION_NPS8},
{"N/A", AMDSMI_MEMORY_PARTITION_UNKNOWN}
};
int main() {
amdsmi_status_t ret;
amdsmi_status_t ret, ret_set;
const char *err_str;
// Init amdsmi for sockets and devices.
// Here we are only interested in AMD_GPUS.
@@ -248,6 +303,20 @@ int main() {
// For each device of the socket, get name and temperature.
for (uint32_t j = 0; j < device_count; j++) {
uint32_t device_cnt = 0;
ret = smi_amdgpu_get_device_count(&device_cnt);
CHK_AMDSMI_RET(ret)
std::cout << "Device Count: " << device_cnt << std::endl;
// Get device index
uint32_t device_index = 0;
ret = smi_amdgpu_get_device_index(processor_handles[j], &device_index);
CHK_AMDSMI_RET(ret)
std::cout << "Device Index: " << device_index << std::endl;
std::vector<amdsmi_processor_handle> p_handles(device_cnt);
ret = smi_amdgpu_get_processor_handle_by_index(device_index, &p_handles[j]);
// Get device type. Since the amdsmi is initialized with
// AMD_SMI_INIT_AMD_GPUS, the processor_type must be AMDSMI_PROCESSOR_TYPE_AMD_GPU.
processor_type_t processor_type = {};
@@ -286,6 +355,173 @@ int main() {
printf("\tAsic serial: 0x%s\n", asic_info.asic_serial);
printf("\tNum of Computes: %d\n\n", asic_info.num_of_compute_units);
bool is_power_management_enabled = false;
ret = amdsmi_is_gpu_power_management_enabled(processor_handles[j],
&is_power_management_enabled);
CHK_AMDSMI_RET(ret)
printf(" Output of amdsmi_is_gpu_power_management_enabled:\n");
printf("\tPower Management Enabled: %s\n\n",
(is_power_management_enabled ? "TRUE" : "FALSE"));
std::cout << " **Version 1: Accelerator/Compute Partition API Examples**\n";
char original_compute_partition[AMDSMI_MAX_STRING_LENGTH];
ret = amdsmi_get_gpu_compute_partition(processor_handles[j], original_compute_partition,
static_cast<uint32_t>(AMDSMI_MAX_STRING_LENGTH));
amdsmi_status_code_to_string(ret, &err_str);
if (ret == AMDSMI_STATUS_SUCCESS) {
CHK_AMDSMI_RET(ret)
std::cout << " Output of amdsmi_get_gpu_compute_partition:\n";
std::cout << "\tamdsmi_get_gpu_compute_partition(" << j << ", "
<< mapStringToSMIComputePartitionTypes.at(original_compute_partition) << "): "
<< err_str << "\n\n";
std::cout << "\tCompute Partition (original): "
<< original_compute_partition << "\n\n";
} else {
std::cout << "\tamdsmi_get_gpu_compute_partition(" << j << ", "
<< computePartitionString(AMDSMI_COMPUTE_PARTITION_INVALID) << "): "
<< err_str << "\n\n";
}
for (int partition = static_cast<int>(AMDSMI_COMPUTE_PARTITION_SPX);
partition <= static_cast<int>(AMDSMI_COMPUTE_PARTITION_CPX);
partition++) {
amdsmi_compute_partition_type_t updatePartition
= static_cast<amdsmi_compute_partition_type_t>(partition);
ret_set = amdsmi_set_gpu_compute_partition(processor_handles[j],
updatePartition);
amdsmi_status_code_to_string(ret_set, &err_str);
if (ret_set == AMDSMI_STATUS_SUCCESS) {
CHK_AMDSMI_RET(ret_set)
}
std::cout << "\tamdsmi_set_gpu_compute_partition(" << j << ", "
<< computePartitionString(updatePartition) << "): "
<< err_str << "\n\n";
// Get the current compute partition
char current_compute_partition[AMDSMI_MAX_STRING_LENGTH];
ret = amdsmi_get_gpu_compute_partition(processor_handles[j],
current_compute_partition,
static_cast<uint32_t>(AMDSMI_MAX_STRING_LENGTH));
amdsmi_status_code_to_string(ret, &err_str);
if (ret == AMDSMI_STATUS_SUCCESS) {
CHK_AMDSMI_RET(ret)
std::cout << " Output of amdsmi_get_gpu_compute_partition:\n";
std::cout << "\tamdsmi_get_gpu_compute_partition(" << j << ", "
<< computePartitionString(updatePartition) << "): "
<< err_str << "\n\n";
std::cout << "\tCompute Partition (current): "
<< current_compute_partition << "\n\n";
} else {
std::cout << "\tamdsmi_get_gpu_compute_partition(" << j << ", "
<< computePartitionString(AMDSMI_COMPUTE_PARTITION_INVALID) << "): "
<< err_str << "\n\n";
}
}
// return to original compute partition
amdsmi_compute_partition_type_t original_compute_partition_type;
if (ret == AMDSMI_STATUS_SUCCESS) {
original_compute_partition_type
= mapStringToSMIComputePartitionTypes.at(original_compute_partition);
} else {
original_compute_partition_type = AMDSMI_COMPUTE_PARTITION_INVALID;
}
std::cout << " Returning to original compute partition ("
<< computePartitionString(original_compute_partition_type) << ")\n";
auto ret_set = amdsmi_set_gpu_compute_partition(processor_handles[j],
original_compute_partition_type);
amdsmi_status_code_to_string(ret_set, &err_str);
if (ret_set == AMDSMI_STATUS_SUCCESS) {
CHK_AMDSMI_RET(ret_set)
}
std::cout << "\tamdsmi_set_gpu_compute_partition(" << j << ", "
<< computePartitionString(original_compute_partition_type) << "): "
<< err_str << "\n\n";
std::cout << " **Version 1: Memory Partition API Examples**\n";
char original_memory_partition[AMDSMI_MAX_STRING_LENGTH];
ret = amdsmi_get_gpu_memory_partition(processor_handles[j], original_memory_partition,
static_cast<uint32_t>(AMDSMI_MAX_STRING_LENGTH));
amdsmi_status_code_to_string(ret, &err_str);
if (ret == AMDSMI_STATUS_SUCCESS) {
CHK_AMDSMI_RET(ret)
std::cout << " Output of amdsmi_get_gpu_memory_partition:\n";
std::cout << "\tamdsmi_get_gpu_memory_partition(" << j << ", "
<< mapStringToSMIMemoryPartitionTypes.at(original_memory_partition) << "): "
<< err_str << "\n\n";
std::cout << "\tMemory Partition (original): "
<< original_memory_partition << "\n\n";
} else {
std::cout << "\tamdsmi_get_gpu_memory_partition(" << j << ", "
<< memoryPartitionString(AMDSMI_MEMORY_PARTITION_UNKNOWN) << "): "
<< err_str << "\n\n";
}
for (int partition = static_cast<int>(AMDSMI_MEMORY_PARTITION_NPS1);
partition <= static_cast<int>(AMDSMI_MEMORY_PARTITION_NPS8);
partition++) {
if (partition != static_cast<int>(AMDSMI_MEMORY_PARTITION_NPS1)
&& partition != static_cast<int>(AMDSMI_MEMORY_PARTITION_NPS2)
&& partition != static_cast<int>(AMDSMI_MEMORY_PARTITION_NPS4)
&& partition != static_cast<int>(AMDSMI_MEMORY_PARTITION_NPS8)) {
continue;
}
amdsmi_memory_partition_type_t updatePartition
= static_cast<amdsmi_memory_partition_type_t>(partition);
auto ret_set = amdsmi_set_gpu_memory_partition(processor_handles[j],
updatePartition);
amdsmi_status_code_to_string(ret_set, &err_str);
if (ret_set == AMDSMI_STATUS_SUCCESS) {
CHK_AMDSMI_RET(ret_set)
std::cout << " Output of amdsmi_set_gpu_memory_partition:\n";
}
std::cout << "\tamdsmi_set_gpu_memory_partition(" << j << ", "
<< memoryPartitionString(updatePartition) << "): "
<< err_str << "\n\n";
// Get the current memory partition
char current_memory_partition[AMDSMI_MAX_STRING_LENGTH];
ret = amdsmi_get_gpu_memory_partition(processor_handles[j],
current_memory_partition,
static_cast<uint32_t>(AMDSMI_MAX_STRING_LENGTH));
amdsmi_status_code_to_string(ret, &err_str);
if (ret == AMDSMI_STATUS_SUCCESS) {
CHK_AMDSMI_RET(ret)
std::cout << "\tamdsmi_get_gpu_memory_partition(" << j << ", "
<< memoryPartitionString(updatePartition) << "): "
<< err_str << "\n\n";
std::cout << "\tMemory Partition (current): "
<< current_memory_partition << "\n\n";
} else {
std::cout << "\tamdsmi_get_gpu_memory_partition(" << j << ", "
<< memoryPartitionString(AMDSMI_MEMORY_PARTITION_UNKNOWN) << "): "
<< err_str << "\n\n";
}
}
// return to original compute partition
amdsmi_memory_partition_type_t original_memory_partition_type;
if (ret == AMDSMI_STATUS_SUCCESS) {
original_memory_partition_type
= mapStringToSMIMemoryPartitionTypes.at(original_memory_partition);
} else {
original_memory_partition_type = AMDSMI_MEMORY_PARTITION_UNKNOWN;
}
std::cout << " Returning to original memory partition ("
<< memoryPartitionString(original_memory_partition_type)
<< ")\n";
ret_set = amdsmi_set_gpu_memory_partition(processor_handles[j],
original_memory_partition_type);
amdsmi_status_code_to_string(ret_set, &err_str);
if (ret_set == AMDSMI_STATUS_SUCCESS) {
CHK_AMDSMI_RET(ret_set)
}
std::cout << "\tamdsmi_set_gpu_compute_partition(" << j << ", "
<< memoryPartitionString(original_memory_partition_type) << "): "
<< err_str << "\n\n";
// TODO(amdsmi_team): Add V2 partiton APIs
// Get VRAM info
amdsmi_vram_info_t vram_info = {};
ret = amdsmi_get_gpu_vram_info(processor_handles[j], &vram_info);
@@ -478,7 +714,7 @@ int main() {
block = (amdsmi_gpu_block_t)(block * 2)) {
ret = amdsmi_get_gpu_ras_block_features_enabled(processor_handles[j], block,
&state);
if (ret != AMDSMI_STATUS_API_FAILED) {
if (ret != AMDSMI_STATUS_API_FAILED && ret != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_AMDSMI_RET(ret)
}
@@ -520,7 +756,9 @@ int main() {
// Get ECC error counts
amdsmi_error_count_t err_cnt_info = {};
ret = amdsmi_get_gpu_total_ecc_count(processor_handles[j], &err_cnt_info);
CHK_AMDSMI_RET(ret)
if (ret != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_AMDSMI_RET(ret)
}
printf(" Output of amdsmi_get_gpu_total_ecc_count:\n");
printf("\tCorrectable errors: %lu\n", err_cnt_info.correctable_count);
printf("\tUncorrectable errors: %lu\n\n",
@@ -530,7 +768,7 @@ int main() {
ret = amdsmi_get_gpu_process_list(processor_handles[j], &num_process, nullptr);
CHK_AMDSMI_RET(ret)
if (!num_process) {
printf("No processes found.\n");
printf("amdsmi_get_gpu_process_list(): No processes found.\n\n");
} else {
std::cout << "Processes found: " << num_process << "\n";
amdsmi_proc_info_t process_info_list[num_process];
+20 -1
Просмотреть файл
@@ -407,7 +407,7 @@ typedef enum {
//!< work together with shared memory
AMDSMI_COMPUTE_PARTITION_QPX, //!< Quad GPU mode (QPX)- Quarter XCCs
//!< work together with shared memory
AMDSMI_COMPUTE_PARTITION_CPX, //!< Core mode (CPX)- Per-chip XCC with
AMDSMI_COMPUTE_PARTITION_CPX //!< Core mode (CPX)- Per-chip XCC with
//!< shared memory
} amdsmi_compute_partition_type_t;
@@ -5847,6 +5847,25 @@ amdsmi_get_power_cap_info(amdsmi_processor_handle processor_handle, uint32_t sen
*/
amdsmi_status_t amdsmi_get_pcie_info(amdsmi_processor_handle processor_handle, amdsmi_pcie_info_t *info);
/**
* @brief Returns the 'xcd_counter' from the GPU metrics associated with the device
*
* @ingroup tagAsicBoardInfo
*
* @platform{gpu_bm_linux} @platform{guest_1vf} @platform{guest_mvf}
*
* @param[in] processor_handle Device which to query
*
* @param[inout] xcd_count a pointer to uint16_t to which the device gpu
* metric unit will be stored. Must be allocated by user.
*
* @retval ::AMDSMI_STATUS_SUCCESS is returned upon successful call.
* ::AMDSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit
* does not exist for the given device.
*/
amdsmi_status_t amdsmi_get_gpu_xcd_counter(amdsmi_processor_handle processor_handle,
uint16_t *xcd_count);
/** @} End tagAsicBoardInfo */
/*****************************************************************************/
-1
Просмотреть файл
@@ -27,7 +27,6 @@
#include "amd_smi/impl/amd_smi_processor.h"
#include "amd_smi/impl/amd_smi_drm.h"
#include "shared_mutex.h" // NOLINT
#include "rocm_smi/rocm_smi_logger.h"
namespace amd {
namespace smi {
+61 -6
Просмотреть файл
@@ -26,10 +26,10 @@
#include <limits>
#include <type_traits>
#include <string>
#include <utility>
#include "amd_smi/amdsmi.h"
#include "amd_smi/impl/amd_smi_gpu_device.h"
#include "rocm_smi/rocm_smi_utils.h"
#define SMIGPUDEVICE_MUTEX(MUTEX) \
@@ -55,6 +55,63 @@ amdsmi_status_t smi_amdgpu_is_gpu_power_management_enabled(amd::smi::AMDSmiGPUDe
std::string smi_split_string(std::string str, char delim);
std::string smi_amdgpu_get_status_string(amdsmi_status_t ret, bool fullStatus);
/**
* @brief Get the device index given the processor handle.
*
* @details Given a processor handle @p processor_handle
* and a pointer to a uint32_t @p device_index will be returned.
*
* @param[in] processor_handle Device which to query
*
* @param[inout] device_index a pointer to uint32_t to which the matching device
* index will be stored
*
* @retval ::AMDSMI_STATUS_SUCCESS is returned upon successful call.
* ::AMDSMI_STATUS_INVAL is returned if user provides a null pointer
* for device_index.
* ::AMDSMI_STATUS_API_FAILED is returned if the corresponding device
* index for the processor handle cannot be found.
*/
amdsmi_status_t smi_amdgpu_get_device_index(amdsmi_processor_handle processor_handle,
uint32_t* device_index);
/**
* @brief Get total number of devices
*
* @details Given a pointer to a uint32_t @p total_num_devices will be returned
*
* @param[inout] total_num_devices a pointer to uint32_t to which the total number
* of devices will be stored
*
* @retval ::AMDSMI_STATUS_SUCCESS is returned upon successful call.
* ::AMDSMI_STATUS_INVAL is returned if user provides a null pointer
* for total_num_devices.
*/
amdsmi_status_t smi_amdgpu_get_device_count(uint32_t *total_num_devices);
/**
* @brief Get the processor handle given the device index.
*
* @details Given a uint32_t @p device_index and a pointer to
* a processor handle @p processor_handle, the device index will be used to
* find the processor handle of the device and store it in the provided pointer
*
* @param[in] device_index a uint32_t to value to help find the corresponding
* processor handle
*
* @param[inout] processor_handle a pointer to amdsmi_processor_handle
* which the corresponding processor_handle will be stored
*
* @retval ::AMDSMI_STATUS_SUCCESS is returned upon successful call.
* ::AMDSMI_STATUS_INVAL is returned if user provides a null pointer
* for processor_handle.
* ::AMDSMI_STATUS_API_FAILED is returned if the device_index is cannot
* be found.
*/
amdsmi_status_t smi_amdgpu_get_processor_handle_by_index(
uint32_t device_index,
amdsmi_processor_handle *processor_handle);
template<typename>
constexpr bool is_dependent_false_v = false;
@@ -72,8 +129,7 @@ constexpr T get_std_num_limit()
{
if constexpr (is_supported_type_v<T>) {
return std::numeric_limits<T>::max();
}
else {
} else {
return std::numeric_limits<T>::min();
static_assert(is_dependent_false_v<T>, "Error: Type not supported...");
}
@@ -98,12 +154,11 @@ constexpr T translate_umax_or_assign_value(U source_value, V target_value)
}
return result;
}
else {
} else {
static_assert(is_dependent_false_v<T>, "Error: Type not supported...");
}
return result;
}
#endif //
#endif // AMD_SMI_INCLUDE_AMD_SMI_UTILS_H_
+3
Просмотреть файл
@@ -94,6 +94,7 @@ from .amdsmi_interface import amdsmi_get_gpu_kfd_info
from .amdsmi_interface import amdsmi_get_power_cap_info
from .amdsmi_interface import amdsmi_get_gpu_vram_info
from .amdsmi_interface import amdsmi_get_gpu_cache_info
from .amdsmi_interface import amdsmi_get_gpu_xcd_counter
# # Microcode and VBIOS Information
from .amdsmi_interface import amdsmi_get_gpu_vbios_info
@@ -272,6 +273,8 @@ from .amdsmi_interface import AmdSmiLinkType
from .amdsmi_interface import AmdSmiUtilizationCounterType
from .amdsmi_interface import AmdSmiProcessorType
from .amdsmi_interface import AmdSmiVirtualizationMode
from .amdsmi_interface import AmdSmiVramType
from .amdsmi_interface import AmdSmiVramVendor
# Exceptions
from .amdsmi_exception import AmdSmiLibraryException
+47 -9
Просмотреть файл
@@ -460,6 +460,36 @@ class AmdSmiVirtualizationMode(IntEnum):
GUEST = amdsmi_wrapper.AMDSMI_VIRTUALIZATION_MODE_GUEST
PASSTHROUGH = amdsmi_wrapper.AMDSMI_VIRTUALIZATION_MODE_PASSTHROUGH
class AmdSmiVramType(IntEnum):
UNKNOWN = amdsmi_wrapper.AMDSMI_VRAM_TYPE_UNKNOWN
HBM = amdsmi_wrapper.AMDSMI_VRAM_TYPE_HBM
HBM2 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_HBM2
HBM2E = amdsmi_wrapper.AMDSMI_VRAM_TYPE_HBM2E
HBM3 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_HBM3
DDR2 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_DDR2
DDR3 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_DDR3
DDR4 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_DDR4
GDDR1 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_GDDR1
GDDR2 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_GDDR2
GDDR3 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_GDDR3
GDDR4 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_GDDR4
GDDR5 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_GDDR5
GDDR6 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_GDDR6
GDDR7 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_GDDR7
MAX = amdsmi_wrapper.AMDSMI_VRAM_TYPE__MAX
class AmdSmiVramVendor(IntEnum):
SAMSUNG = amdsmi_wrapper.AMDSMI_VRAM_VENDOR_SAMSUNG
INFINEON = amdsmi_wrapper.AMDSMI_VRAM_VENDOR_INFINEON
ELPIDA = amdsmi_wrapper.AMDSMI_VRAM_VENDOR_ELPIDA
ETRON = amdsmi_wrapper.AMDSMI_VRAM_VENDOR_ETRON
NANYA = amdsmi_wrapper.AMDSMI_VRAM_VENDOR_NANYA
HYNIX = amdsmi_wrapper.AMDSMI_VRAM_VENDOR_HYNIX
MOSEL = amdsmi_wrapper.AMDSMI_VRAM_VENDOR_MOSEL
WINBOND = amdsmi_wrapper.AMDSMI_VRAM_VENDOR_WINBOND
ESMT = amdsmi_wrapper.AMDSMI_VRAM_VENDOR_ESMT
MICRON = amdsmi_wrapper.AMDSMI_VRAM_VENDOR_MICRON
UNKNOWN = amdsmi_wrapper.AMDSMI_VRAM_VENDOR_UNKNOWN
class AmdSmiEventReader:
def __init__(
@@ -2525,6 +2555,18 @@ def amdsmi_get_pcie_info(
return pcie_info_dict
def amdsmi_get_gpu_xcd_counter(processor_handle: amdsmi_wrapper.amdsmi_processor_handle) -> Dict[str, Any]:
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
raise AmdSmiParameterException(processor_handle, amdsmi_wrapper.amdsmi_processor_handle)
xcd_counter = ctypes.c_uint16()
_check_res(
amdsmi_wrapper.amdsmi_get_gpu_xcd_counter(
processor_handle, ctypes.byref(xcd_counter)
)
)
return xcd_counter.value
def amdsmi_get_processor_handle_from_bdf(bdf):
bdf = _parse_bdf(bdf)
@@ -2958,15 +3000,11 @@ def amdsmi_get_gpu_accelerator_partition_profile(
length = profile.num_partitions
partition_ids = []
for i in range(profile.num_partitions):
partition_ids.append(partition_id_list[i])
last_element = 0
if length > 0:
last_element = length - 1
if ((partition_ids[last_element] == 0)
and not((profile_type_ret == str("SPX")) or (profile_type_ret == str("N/A")))):
partition_ids = "N/A"
#partition_id[0] will contain the partition id of each device
#BM/Guest will include this logic. Host will only display primary partition ids.
kPOSITION_OF_PARTITION_ID = 0
partition_ids.append(partition_id_list[kPOSITION_OF_PARTITION_ID])
mem_caps_list = []
if profile.memory_caps.nps_flags.nps1_cap == 1:
+4 -1
Просмотреть файл
@@ -2603,6 +2603,9 @@ amdsmi_get_power_cap_info.argtypes = [amdsmi_processor_handle, uint32_t, ctypes.
amdsmi_get_pcie_info = _libraries['libamd_smi.so'].amdsmi_get_pcie_info
amdsmi_get_pcie_info.restype = amdsmi_status_t
amdsmi_get_pcie_info.argtypes = [amdsmi_processor_handle, ctypes.POINTER(struct_amdsmi_pcie_info_t)]
amdsmi_get_gpu_xcd_counter = _libraries['libamd_smi.so'].amdsmi_get_gpu_xcd_counter
amdsmi_get_gpu_xcd_counter.restype = amdsmi_status_t
amdsmi_get_gpu_xcd_counter.argtypes = [amdsmi_processor_handle, ctypes.POINTER(ctypes.c_uint16)]
amdsmi_get_fw_info = _libraries['libamd_smi.so'].amdsmi_get_fw_info
amdsmi_get_fw_info.restype = amdsmi_status_t
amdsmi_get_fw_info.argtypes = [amdsmi_processor_handle, ctypes.POINTER(struct_amdsmi_fw_info_t)]
@@ -3050,7 +3053,7 @@ __all__ = \
'amdsmi_get_gpu_virtualization_mode',
'amdsmi_get_gpu_volt_metric', 'amdsmi_get_gpu_vram_info',
'amdsmi_get_gpu_vram_usage', 'amdsmi_get_gpu_vram_vendor',
'amdsmi_get_gpu_xgmi_link_status',
'amdsmi_get_gpu_xcd_counter', 'amdsmi_get_gpu_xgmi_link_status',
'amdsmi_get_hsmp_metrics_table',
'amdsmi_get_hsmp_metrics_table_version', 'amdsmi_get_lib_version',
'amdsmi_get_link_metrics', 'amdsmi_get_link_topology_nearest',
+10
Просмотреть файл
@@ -200,6 +200,14 @@ class Device {
public:
explicit Device(std::string path, RocmSMI_env_vars const *e);
~Device(void);
typedef struct {
uint32_t card_index;
uint32_t drm_render_minor;
uint64_t bdfid;
uint64_t kfd_gpu_id;
uint32_t partition_id;
uint32_t smi_device_id;
} rsmi_device_identifiers_t;
void set_monitor(std::shared_ptr<Monitor> m) {monitor_ = m;}
std::string path(void) const {return path_;}
@@ -266,6 +274,8 @@ class Device {
void set_smi_device_id(uint32_t device_id) { m_device_id = device_id; }
void set_smi_partition_id(uint32_t partition_id) { m_partition_id = partition_id; }
static const char* get_type_string(DevInfoTypes type);
rsmi_status_t get_smi_device_identifiers(uint32_t device_id,
rsmi_device_identifiers_t *device_identifiers);
private:
std::shared_ptr<Monitor> monitor_;
+14 -2
Просмотреть файл
@@ -6577,6 +6577,16 @@ rsmi_dev_partition_id_get(uint32_t dv_ind, uint32_t *partition_id) {
if (ret == RSMI_STATUS_SUCCESS) {
*partition_id = static_cast<uint32_t>((pci_id >> 28) & 0xf);
}
std::ostringstream bdf_sstream;
bdf_sstream << std::hex << std::setfill('0') << std::setw(4)
<< ((pci_id >> 32) & 0xFFFFFFFF) << ":";
bdf_sstream << std::hex << std::setfill('0') << std::setw(2) << ((pci_id >> 8) & 0xFF) << ":";
bdf_sstream << std::hex << std::setfill('0') << std::setw(2) << ((pci_id >> 3) & 0xF8) << ".";
bdf_sstream << std::hex << std::setfill('0') << +(pci_id & 0x7);
bdf_sstream << "\nPartition ID ((pci_id >> 28) & 0xf): " << std::dec
<< static_cast<int>((pci_id >> 28) & 0xf);
bdf_sstream << "\nPartition ID (pci_id & 0x7): " << std::dec << static_cast<int>(pci_id & 0x7);
// std::cout << __PRETTY_FUNCTION__ << " BDF: " << bdf_sstream.str() << std::endl;
/**
* Fall back is required due to driver changes within KFD.
@@ -6603,9 +6613,11 @@ rsmi_dev_partition_id_get(uint32_t dv_ind, uint32_t *partition_id) {
<< " | Success"
<< " | Device #: " << dv_ind
<< " | Type: partition_id"
<< " | Data: " << *partition_id
<< " | Data: " << static_cast<int>(*partition_id)
<< " | Returning = "
<< getRSMIStatusString(RSMI_STATUS_SUCCESS) << " |";
<< getRSMIStatusString(RSMI_STATUS_SUCCESS) << " |"
<< "\n BDF: " << bdf_sstream.str() << std::endl;
// std::cout << ss.str() << std::endl;
LOG_INFO(ss);
return ret;
CATCH
+34 -1
Просмотреть файл
@@ -1020,7 +1020,7 @@ int Device::readDevInfoLine(DevInfoTypes type, std::string *line) {
<< get_type_string(type) << "), returning *line = "
<< *line;
LOG_INFO(ss);
fs.close();
return 0;
}
@@ -1103,6 +1103,7 @@ int Device::readDevInfoMultiLineStr(DevInfoTypes type,
while (std::getline(fs, line)) {
retVec->push_back(line);
}
fs.close();
if (retVec->empty()) {
ss << "Read devInfoMultiLineStr for DevInfoType ("
@@ -1771,6 +1772,38 @@ std::string Device::readBootPartitionState<rsmi_memory_partition_type_t>(
return boot_state;
}
rsmi_status_t Device::get_smi_device_identifiers(uint32_t device_id,
rsmi_device_identifiers_t *device_identifiers) {
bool found_device = false;
rsmi_status_t ret = RSMI_STATUS_NOT_SUPPORTED;
if (device_identifiers == nullptr) {
return RSMI_STATUS_INVALID_ARGS;
}
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance();
auto devices = smi.devices();
for (uint32_t i = 0; i < devices.size(); i++) {
if (i != device_id) {
continue;
}
rsmi_device_identifiers_t smi_device;
smi_device.card_index = devices[i]->index();
smi_device.drm_render_minor = devices[i]->drm_render_minor();
smi_device.bdfid = devices[i]->bdfid();
smi_device.kfd_gpu_id = devices[i]->kfd_gpu_id();
smi_device.partition_id = devices[i]->m_partition_id;
smi_device.smi_device_id = i;
*device_identifiers = smi_device;
found_device = true;
break;
}
if (found_device) {
ret = RSMI_STATUS_SUCCESS;
}
return ret;
}
#undef RET_IF_NONZERO
} // namespace smi
+1 -1
Просмотреть файл
@@ -3387,7 +3387,7 @@ AMGpuMetricsPublicLatestTupl_t GpuMetricsBase_v13_t::copy_internal_to_external_m
metrics_public_init.vcn_activity[0] = metrics_public_init.average_mm_activity;
}
// average_mm_activity needs to not be UIN16_MAX and
// metrics_public_init.xcp_stats->vcn_busy[0] should also be UIN16_MAX
// metrics_public_init.xcp_stats->vcn_busy[0] should also be UINT16_MAX
if (metrics_public_init.average_mm_activity != UINT16_MAX
&& metrics_public_init.xcp_stats->vcn_busy[0] == UINT16_MAX) {
metrics_public_init.xcp_stats->vcn_busy[0] = metrics_public_init.average_mm_activity;
Разница между файлами не показана из-за своего большого размера Загрузить разницу
+20 -15
Просмотреть файл
@@ -38,24 +38,23 @@
namespace amd {
namespace smi {
void closedir(DIR* /*ptr*/) {}
std::string AMDSmiDrm::find_file_in_folder(const std::string& folder,
const std::string& regex) {
std::string file_name;
using dir_ptr = std::unique_ptr<DIR, decltype(&closedir)>;
struct dirent *dir = nullptr;
DIR *drm_dir;
struct dirent *dir;
std::regex file_regex(regex);
auto drm_dir = dir_ptr(opendir(folder.c_str()), &closedir);
drm_dir = opendir(folder.c_str());
if (drm_dir == nullptr) return file_name;
std::cmatch m;
while ((dir = readdir(drm_dir.get())) != NULL) {
if (std::regex_search(dir->d_name, m, file_regex)) {
file_name = dir->d_name;
break;
}
while ((dir = readdir(drm_dir)) != nullptr) {
if (std::regex_search(dir->d_name, m, file_regex)) {
file_name = dir->d_name;
break;
}
}
closedir(drm_dir);
return file_name;
}
@@ -197,9 +196,9 @@ amdsmi_status_t AMDSmiDrm::cleanup() {
close(drm_fds_[i]);
}
drm_fds_.clear();
drm_paths_.clear();
drm_bdfs_.clear();
if (!drm_fds_.empty()) {drm_fds_.clear();}
if (!drm_paths_.empty()) {drm_paths_.clear();}
if (!drm_bdfs_.empty()) {drm_bdfs_.clear();}
lib_loader_.unload();
return AMDSMI_STATUS_SUCCESS;
}
@@ -306,9 +305,15 @@ amdsmi_status_t AMDSmiDrm::get_drm_fd_by_index(uint32_t gpu_index, uint32_t *fd_
}
amdsmi_status_t AMDSmiDrm::get_bdf_by_index(uint32_t gpu_index, amdsmi_bdf_t *bdf_info) const {
if (gpu_index + 1 > drm_bdfs_.size()) return AMDSMI_STATUS_NOT_SUPPORTED;
*bdf_info = drm_bdfs_[gpu_index];
std::ostringstream ss;
if (gpu_index + 1 > drm_bdfs_.size()) {
ss << __PRETTY_FUNCTION__ << " | gpu_index = " << gpu_index
<< "; \nReturning = AMDSMI_STATUS_NOT_SUPPORTED";
LOG_INFO(ss);
// std::cout << ss.str() << std::endl;
return AMDSMI_STATUS_NOT_SUPPORTED;
}
*bdf_info = drm_bdfs_[gpu_index];
ss << __PRETTY_FUNCTION__ << " | gpu_index = " << gpu_index
<< "; \nreceived bdf: Domain = " << bdf_info->domain_number
<< "; \nBus# = " << bdf_info->bus_number
+29 -6
Просмотреть файл
@@ -20,12 +20,6 @@
* THE SOFTWARE.
*/
#include "amd_smi/impl/amd_smi_gpu_device.h"
#include "amd_smi/impl/amd_smi_common.h"
#include "amd_smi/impl/fdinfo.h"
#include "rocm_smi/rocm_smi_kfd.h"
#include "rocm_smi/rocm_smi_utils.h"
#include <functional>
#include <map>
#include <memory>
@@ -33,6 +27,14 @@
#include <dirent.h>
#include <sys/types.h>
#include "amd_smi/impl/amd_smi_gpu_device.h"
#include "amd_smi/impl/amd_smi_common.h"
#include "amd_smi/impl/amd_smi_utils.h"
#include "amd_smi/impl/fdinfo.h"
#include "rocm_smi/rocm_smi_kfd.h"
#include "rocm_smi/rocm_smi_utils.h"
#include "rocm_smi/rocm_smi_logger.h"
namespace amd {
namespace smi {
@@ -61,11 +63,32 @@ amdsmi_status_t AMDSmiGPUDevice::get_drm_data() {
uint32_t fd = 0;
std::string path;
amdsmi_bdf_t bdf;
std::ostringstream ss;
ret = drm_.get_drm_fd_by_index(gpu_id_, &fd);
ss << __PRETTY_FUNCTION__ << " | gpu_id_: " << gpu_id_
<< "; fd: " << fd
<< "; drm_.get_drm_fd_by_index(gpu_id_, &fd): "
<< smi_amdgpu_get_status_string(ret, false) << std::endl;
// std::cout << ss.str();
LOG_DEBUG(ss);
if (ret != AMDSMI_STATUS_SUCCESS) return AMDSMI_STATUS_NOT_SUPPORTED;
ret = drm_.get_drm_path_by_index(gpu_id_, &path);
ss << __PRETTY_FUNCTION__ << " | gpu_id_: " << gpu_id_
<< "; path: " << path
<< "; drm_.get_drm_fd_by_index(gpu_id_, &path): "
<< smi_amdgpu_get_status_string(ret, false) << std::endl;
// std::cout << ss.str();
LOG_DEBUG(ss);
if (ret != AMDSMI_STATUS_SUCCESS) return AMDSMI_STATUS_NOT_SUPPORTED;
ret = drm_.get_bdf_by_index(gpu_id_, &bdf);
ss << __PRETTY_FUNCTION__ << " | gpu_id_: " << gpu_id_
<< "; domain: " << bdf.domain_number
<< "; bus: " << bdf.bus_number
<< "; device: " << bdf.device_number
<< "; drm_.get_drm_fd_by_index(gpu_id_, &bdf): "
<< smi_amdgpu_get_status_string(ret, false) << std::endl;
// std::cout << ss.str();
LOG_DEBUG(ss);
if (ret != AMDSMI_STATUS_SUCCESS) return AMDSMI_STATUS_NOT_SUPPORTED;
bdf_ = bdf, path_ = path, fd_ = fd;
+1 -1
Просмотреть файл
@@ -46,7 +46,7 @@ amdsmi_status_t AMDSmiLibraryLoader::load(const char* filename) {
if (!libHandler_) {
char* error = dlerror();
std::cerr << "Fail to open " << filename <<": " << error
<< std::endl;
<< std::endl;
return AMDSMI_STATUS_FAIL_LOAD_MODULE;
}
}
+7 -10
Просмотреть файл
@@ -22,13 +22,13 @@
#include <sstream>
#include <iomanip>
#include <fstream>
#include "amd_smi/impl/amd_smi_system.h"
#include "amd_smi/impl/amd_smi_gpu_device.h"
#include "amd_smi/impl/amd_smi_common.h"
#include "amd_smi/impl/amd_smi_utils.h"
#include "rocm_smi/rocm_smi.h"
#include "rocm_smi/rocm_smi_main.h"
#include <fstream>
namespace amd {
namespace smi {
@@ -111,7 +111,6 @@ amdsmi_status_t AMDSmiSystem::init(uint64_t flags) {
}
#endif
return AMDSMI_STATUS_SUCCESS;
}
#ifdef ENABLE_ESMI_LIB
@@ -160,6 +159,7 @@ amdsmi_status_t AMDSmiSystem::populate_amd_cpus() {
#endif
amdsmi_status_t AMDSmiSystem::populate_amd_gpu_devices() {
AMDSmiSystem::cleanup();
// init rsmi
rsmi_driver_state_t state;
rsmi_status_t ret = rsmi_init(0);
@@ -262,18 +262,15 @@ amdsmi_status_t AMDSmiSystem::cleanup() {
}
#endif
if (init_flag_ & AMDSMI_INIT_AMD_GPUS) {
for (uint32_t i = 0; i < sockets_.size(); i++) {
delete sockets_[i];
}
processors_.clear();
sockets_.clear();
// we do not need to delete the sockets/processors, clear takes care of this
if (!processors_.empty()) {processors_.clear();}
if (!sockets_.empty()) {sockets_.clear();}
init_flag_ &= ~AMDSMI_INIT_AMD_GPUS;
amd::smi::AMDSmiSystem::getInstance().clean_up_drm();
rsmi_status_t ret = rsmi_shut_down();
if (ret != RSMI_STATUS_SUCCESS) {
return amd::smi::rsmi_to_amdsmi_status(ret);
}
drm_.cleanup();
}
return AMDSMI_STATUS_SUCCESS;
+267 -117
Просмотреть файл
@@ -20,7 +20,9 @@
* THE SOFTWARE.
*/
#include <amdgpu.h>
#include <limits.h>
#include <sys/ioctl.h>
#include <libdrm/amdgpu.h>
#include <errno.h>
#include <fcntl.h>
#include <stdint.h>
@@ -34,6 +36,7 @@
#include <xf86drmMode.h>
#include <dirent.h>
#include <sys/types.h>
#include <memory>
#include <random>
#include <fstream>
@@ -42,13 +45,13 @@
#include <cstdio>
#include <sstream>
#include <iterator>
#include <sys/ioctl.h>
#include <algorithm>
#include <limits.h>
#include "amd_smi/impl/amd_smi_utils.h"
#include "amd_smi/impl/amd_smi_system.h"
#include "shared_mutex.h" // NOLINT
#include "rocm_smi/rocm_smi_logger.h"
#include "rocm_smi/rocm_smi_utils.h"
std::string leftTrim(const std::string &s) {
if (!s.empty()) {
@@ -94,15 +97,33 @@ std::string removeString(const std::string origStr,
return modifiedStr;
}
void openFileAndModifyBuffer(std::string path, char *buff, size_t sizeOfBuff) {
static void clearCharBufferAndReinitialize(char buffer[], uint32_t len, std::string newString) {
char *begin = &buffer[0];
char *end = &buffer[len];
std::fill(begin, end, 0);
// Safer approach - copy directly with length limit
size_t copy_len = std::min(static_cast<size_t>(len - 1), newString.length());
if (copy_len > 0) {
std::memcpy(buffer, newString.c_str(), copy_len);
}
buffer[copy_len] = '\0';
}
int openFileAndModifyBuffer(std::string path, char *buff, size_t sizeOfBuff,
bool trim_whitespace = true) {
bool errorDiscovered = false;
std::ifstream file(path, std::ifstream::in);
std::string contents = {std::istreambuf_iterator<char>{file}, std::istreambuf_iterator<char>{}};
memset(buff, 0, sizeof(char) * sizeOfBuff);
clearCharBufferAndReinitialize(buff, sizeOfBuff, contents);
if (!file.is_open()) {
errorDiscovered = true;
} else {
contents = trim(contents);
if (trim_whitespace) {
contents = amd::smi::trimAllWhiteSpace(contents);
}
// remove all new lines
contents.erase(std::remove(contents.begin(), contents.end(), '\n'), contents.cend());
}
file.close();
@@ -110,6 +131,9 @@ void openFileAndModifyBuffer(std::string path, char *buff, size_t sizeOfBuff) {
&& !contents.empty()) {
std::strncpy(buff, contents.c_str(), sizeOfBuff-1);
buff[sizeOfBuff-1] = '\0';
return 0;
} else {
return -1;
}
}
@@ -143,9 +167,6 @@ static bool isAMDGPU(std::string dev_path) {
amdsmi_status_t smi_amdgpu_find_hwmon_dir(amd::smi::AMDSmiGPUDevice *device, std::string* full_path)
{
if (!device->check_if_drm_is_supported()) {
return AMDSMI_STATUS_NOT_SUPPORTED;
}
if (full_path == nullptr) {
return AMDSMI_STATUS_API_FAILED;
}
@@ -181,9 +202,6 @@ amdsmi_status_t smi_amdgpu_find_hwmon_dir(amd::smi::AMDSmiGPUDevice *device, std
amdsmi_status_t smi_amdgpu_get_board_info(amd::smi::AMDSmiGPUDevice* device, amdsmi_board_info_t *info) {
if (!device->check_if_drm_is_supported()) {
return AMDSMI_STATUS_NOT_SUPPORTED;
}
SMIGPUDEVICE_MUTEX(device->get_mutex())
std::string model_number_path = "/sys/class/drm/" + device->get_gpu_path() + std::string("/device/product_number");
std::string product_serial_path = "/sys/class/drm/" + device->get_gpu_path() + std::string("/device/serial_number");
@@ -191,25 +209,34 @@ amdsmi_status_t smi_amdgpu_get_board_info(amd::smi::AMDSmiGPUDevice* device, amd
std::string manufacturer_name_path = "/sys/class/drm/" + device->get_gpu_path() + std::string("/device/manufacturer");
std::string product_name_path = "/sys/class/drm/" + device->get_gpu_path() + std::string("/device/product_name");
openFileAndModifyBuffer(model_number_path, info->model_number, AMDSMI_MAX_STRING_LENGTH);
openFileAndModifyBuffer(product_serial_path, info->product_serial, AMDSMI_MAX_STRING_LENGTH);
openFileAndModifyBuffer(fru_id_path, info->fru_id, AMDSMI_MAX_STRING_LENGTH);
openFileAndModifyBuffer(manufacturer_name_path, info->manufacturer_name, AMDSMI_MAX_STRING_LENGTH);
openFileAndModifyBuffer(product_name_path, info->product_name, AMDSMI_MAX_STRING_LENGTH);
auto ret_mod = openFileAndModifyBuffer(model_number_path, info->model_number,
AMDSMI_MAX_STRING_LENGTH);
auto ret_ser = openFileAndModifyBuffer(product_serial_path, info->product_serial,
AMDSMI_MAX_STRING_LENGTH);
auto ret_fru = openFileAndModifyBuffer(fru_id_path, info->fru_id, AMDSMI_MAX_STRING_LENGTH);
auto ret_man = openFileAndModifyBuffer(manufacturer_name_path, info->manufacturer_name,
AMDSMI_MAX_STRING_LENGTH);
auto ret_prod = openFileAndModifyBuffer(product_name_path, info->product_name,
AMDSMI_MAX_STRING_LENGTH, false);
std::ostringstream ss;
ss << __PRETTY_FUNCTION__ << "[Before correction] "
<< "Returning status = AMDSMI_STATUS_SUCCESS"
<< " | model_number_path = |" << model_number_path << "|\n"
<< "; info->model_number: |" << info->model_number << "|\n"
<< "; ret_mod = " << ret_mod << "|\n"
<< "\n product_serial_path = |" << product_serial_path << "|\n"
<< "; info->product_serial: |" << info->product_serial << "|\n"
<< "; ret_ser = " << ret_ser << "|\n"
<< "\n fru_id_path = |" << fru_id_path << "|\n"
<< "; info->fru_id: |" << info->fru_id << "|\n"
<< "; ret_fru = " << ret_fru << "|\n"
<< "\n manufacturer_name_path = |" << manufacturer_name_path << "|\n"
<< "; info->manufacturer_name: |" << info->manufacturer_name << "|\n"
<< "; ret_man = " << ret_man << "|\n"
<< "\n product_name_path = |" << product_name_path << "|\n"
<< "; info->product_name: |" << info->product_name << "|";
<< "; info->product_name: |" << info->product_name << "|"
<< "; ret_prod = " << ret_prod << "|\n";
LOG_INFO(ss);
return AMDSMI_STATUS_SUCCESS;
@@ -217,9 +244,6 @@ amdsmi_status_t smi_amdgpu_get_board_info(amd::smi::AMDSmiGPUDevice* device, amd
amdsmi_status_t smi_amdgpu_get_power_cap(amd::smi::AMDSmiGPUDevice* device, int *cap)
{
if (!device->check_if_drm_is_supported()) {
return AMDSMI_STATUS_NOT_SUPPORTED;
}
constexpr int DATA_SIZE = 16;
char val[DATA_SIZE];
std::string fullpath;
@@ -251,9 +275,6 @@ amdsmi_status_t smi_amdgpu_get_power_cap(amd::smi::AMDSmiGPUDevice* device, int
amdsmi_status_t smi_amdgpu_get_ranges(amd::smi::AMDSmiGPUDevice* device, amdsmi_clk_type_t domain,
int *max_freq, int *min_freq, int *num_dpm, int *sleep_state_freq)
{
if (!device->check_if_drm_is_supported()) {
return AMDSMI_STATUS_NOT_SUPPORTED;
}
SMIGPUDEVICE_MUTEX(device->get_mutex())
std::string fullpath = "/sys/class/drm/" + device->get_gpu_path() + "/device";
@@ -289,7 +310,7 @@ amdsmi_status_t smi_amdgpu_get_ranges(amd::smi::AMDSmiGPUDevice* device, amdsmi_
std::ifstream ranges(fullpath.c_str());
if (ranges.fail()) {
return AMDSMI_STATUS_API_FAILED;
return AMDSMI_STATUS_NOT_SUPPORTED;
}
unsigned int max, min, dpm, sleep_freq;
@@ -339,16 +360,13 @@ amdsmi_status_t smi_amdgpu_get_ranges(amd::smi::AMDSmiGPUDevice* device, amdsmi_
}
amdsmi_status_t smi_amdgpu_get_enabled_blocks(amd::smi::AMDSmiGPUDevice* device, uint64_t *enabled_blocks) {
if (!device->check_if_drm_is_supported()) {
return AMDSMI_STATUS_NOT_SUPPORTED;
}
SMIGPUDEVICE_MUTEX(device->get_mutex())
std::string fullpath = "/sys/class/drm/" + device->get_gpu_path() + "/device/ras/features";
std::string fullpath = "/sys/class/drm/" + device->get_gpu_path() + "/device/ras/features";
std::ifstream f(fullpath.c_str());
std::string tmp_str;
if (f.fail()) {
return AMDSMI_STATUS_API_FAILED;
return AMDSMI_STATUS_NOT_SUPPORTED;
}
std::string line;
@@ -372,9 +390,6 @@ amdsmi_status_t smi_amdgpu_get_enabled_blocks(amd::smi::AMDSmiGPUDevice* device,
amdsmi_status_t smi_amdgpu_get_bad_page_info(amd::smi::AMDSmiGPUDevice* device,
uint32_t *num_pages, amdsmi_retired_page_record_t *info) {
if (!device->check_if_drm_is_supported()) {
return AMDSMI_STATUS_NOT_SUPPORTED;
}
SMIGPUDEVICE_MUTEX(device->get_mutex())
std::string line;
std::vector<std::string> badPagesVec;
@@ -449,9 +464,6 @@ static uint32_t GetDeviceIndex(const std::string s) {
amdsmi_status_t smi_amdgpu_get_bad_page_threshold(amd::smi::AMDSmiGPUDevice* device,
uint32_t *threshold) {
if (!device->check_if_drm_is_supported()) {
return AMDSMI_STATUS_NOT_SUPPORTED;
}
SMIGPUDEVICE_MUTEX(device->get_mutex())
//TODO: Accessing the node requires root privileges, and its interface may need to be exposed in another path
@@ -475,9 +487,6 @@ amdsmi_status_t smi_amdgpu_get_bad_page_threshold(amd::smi::AMDSmiGPUDevice* dev
}
amdsmi_status_t smi_amdgpu_validate_ras_eeprom(amd::smi::AMDSmiGPUDevice* device) {
if (!device->check_if_drm_is_supported()) {
return AMDSMI_STATUS_NOT_SUPPORTED;
}
SMIGPUDEVICE_MUTEX(device->get_mutex())
//uint32_t index = GetDeviceIndex(device->get_gpu_path());
@@ -487,9 +496,6 @@ amdsmi_status_t smi_amdgpu_validate_ras_eeprom(amd::smi::AMDSmiGPUDevice* device
}
amdsmi_status_t smi_amdgpu_get_ecc_error_count(amd::smi::AMDSmiGPUDevice* device, amdsmi_error_count_t *err_cnt) {
if (!device->check_if_drm_is_supported()) {
return AMDSMI_STATUS_NOT_SUPPORTED;
}
SMIGPUDEVICE_MUTEX(device->get_mutex())
char str[10];
@@ -511,81 +517,26 @@ amdsmi_status_t smi_amdgpu_get_ecc_error_count(amd::smi::AMDSmiGPUDevice* device
return AMDSMI_STATUS_SUCCESS;
}
amdsmi_status_t smi_amdgpu_get_driver_version(amd::smi::AMDSmiGPUDevice* device, int *length, char *version) {
if (!device->check_if_drm_is_supported()) {
return AMDSMI_STATUS_NOT_SUPPORTED;
}
SMIGPUDEVICE_MUTEX(device->get_mutex())
amdsmi_status_t status = AMDSMI_STATUS_SUCCESS;
FILE *fp;
char *tmp, *ptr, *token;
char *ver = NULL;
int i = 0;
amdsmi_status_t status = AMDSMI_STATUS_SUCCESS;
size_t len;
if (length)
len = *length < AMDSMI_MAX_DRIVER_VERSION_LENGTH ? *length : AMDSMI_MAX_DRIVER_VERSION_LENGTH;
else
len = AMDSMI_MAX_DRIVER_VERSION_LENGTH;
std::string path = "/sys/module/amdgpu/version";
fp = fopen(path.c_str(), "r");
if (fp == nullptr){
fp = fopen("/proc/version", "r");
if (fp == nullptr) {
status = AMDSMI_STATUS_IO;
return status;
}
len = 0;
if (getline(&ver, &len, fp) <= 0) {
status = AMDSMI_STATUS_IO;
fclose(fp);
free(ver);
return status;
}
fclose(fp);
ptr = ver;
token = strtok_r(ptr, " ", &tmp);
if (!token) {
free(ver);
status = AMDSMI_STATUS_IO;
return status;
}
for (i = 0; i < 2; i++) {
ptr = strtok_r(NULL, " ", &tmp);
if (!ptr)
break;
}
if (i != 2 || !ptr) {
free(ver);
status = AMDSMI_STATUS_IO;
return status;
}
if (length)
len = *length < AMDSMI_MAX_DRIVER_VERSION_LENGTH ? *length :
AMDSMI_MAX_DRIVER_VERSION_LENGTH;
else
len = AMDSMI_MAX_DRIVER_VERSION_LENGTH;
strncpy(version, ptr, len);
free(ver);
if (*length <= 0 || version == nullptr) {
return AMDSMI_STATUS_INVAL;
} else {
if ((len = getline(&version, &len, fp)) <= 0)
status = AMDSMI_STATUS_IO;
fclose(fp);
if (length) {
*length = version[len-1] == '\n' ? static_cast<int>(len - 1) : static_cast<int>(len);
}
version[len-1] = version[len-1] == '\n' ? '\0' : version[len-1];
len = static_cast<size_t>(*length);
}
std::string empty = "";
std::strncpy(version, empty.c_str(), len-1);
openFileAndModifyBuffer("/sys/module/amdgpu/version",
version, static_cast<size_t>(len));
if (version[0] == '\0') {
openFileAndModifyBuffer("/proc/version", version, static_cast<size_t>(len));
if (version[0] == '\0') {
return AMDSMI_STATUS_IO;
}
}
return status;
}
@@ -621,17 +572,37 @@ amdsmi_status_t smi_amdgpu_get_market_name_from_dev_id(amd::smi::AMDSmiGPUDevice
return AMDSMI_STATUS_ARG_PTR_NULL;
}
std::ostringstream ss;
// requires libdrm being active
if (!device->check_if_drm_is_supported()) {
ss << __PRETTY_FUNCTION__ << " | DRM is not supported";
LOG_ERROR(ss);
return AMDSMI_STATUS_NOT_SUPPORTED;
}
uint32_t major_version, minor_version;
amdgpu_device_handle device_handle = nullptr;
std::string render_name = device->get_gpu_path();
int fd = -1;
std::string path = "/dev/dri/" + render_name;
uint32_t gpu_fd = device->get_gpu_fd();
if (render_name != "") {
fd = open(path.c_str(), O_RDWR | O_CLOEXEC);
} else {
market_name[0] = '\0';
close(fd);
return AMDSMI_STATUS_NOT_SUPPORTED;
}
ss << __PRETTY_FUNCTION__ << " | Render Name: "
<< render_name << "; path: " << path << "; fd: " << fd;
LOG_DEBUG(ss);
int ret = amdgpu_device_initialize(gpu_fd, &major_version, &minor_version, &device_handle);
int ret = amdgpu_device_initialize(fd, &major_version, &minor_version, &device_handle);
if (ret != 0) {
std::string empty = "";
std::strncpy(market_name, empty.c_str(), AMDSMI_256_LENGTH - 1);
amdgpu_device_deinitialize(device_handle);
close(fd);
return AMDSMI_STATUS_DRM_ERROR;
}
@@ -641,19 +612,17 @@ amdsmi_status_t smi_amdgpu_get_market_name_from_dev_id(amd::smi::AMDSmiGPUDevice
std::strncpy(market_name, name, AMDSMI_256_LENGTH - 1);
market_name[AMDSMI_256_LENGTH - 1] = '\0';
amdgpu_device_deinitialize(device_handle);
close(fd);
return AMDSMI_STATUS_SUCCESS;
}
amdgpu_device_deinitialize(device_handle);
close(fd);
return AMDSMI_STATUS_DRM_ERROR;
}
amdsmi_status_t smi_amdgpu_is_gpu_power_management_enabled(amd::smi::AMDSmiGPUDevice* device,
bool *enabled) {
if (!device->check_if_drm_is_supported()) {
return AMDSMI_STATUS_NOT_SUPPORTED;
}
if (enabled == nullptr) {
return AMDSMI_STATUS_API_FAILED;
}
@@ -713,3 +682,184 @@ std::string smi_amdgpu_get_status_string(amdsmi_status_t ret, bool fullStatus =
return std::string(err_str);
}
// TODO(amdsmi_team): Do we want to include these functions in header?
amdsmi_status_t smi_amdgpu_get_device_index(amdsmi_processor_handle processor_handle,
uint32_t *device_index) {
uint32_t socket_count;
std::vector<amdsmi_socket_handle> sockets;
std::ostringstream ss;
if (device_index == nullptr) {
return AMDSMI_STATUS_INVAL;
}
*device_index = std::numeric_limits<uint32_t>::max(); // set to max value for invalid readings
auto ret = amdsmi_get_socket_handles(&socket_count, nullptr);
if (ret != AMDSMI_STATUS_SUCCESS) {
return ret;
}
// allocate memory
sockets.resize(socket_count);
ret = amdsmi_get_socket_handles(&socket_count, &sockets[0]);
if (ret != AMDSMI_STATUS_SUCCESS) {
return ret;
}
uint32_t current_device_index = 0;
for (uint32_t i = 0; i < socket_count; i++) {
// Get Socket info
char socket_info[128];
ret = amdsmi_get_socket_info(sockets[i], 128, socket_info);
ss << __PRETTY_FUNCTION__ << " | Socket " << socket_info << "\n";
LOG_DEBUG(ss);
// Get the device count available for the socket.
uint32_t device_count = 0;
ret = amdsmi_get_processor_handles(sockets[i], &device_count, nullptr);
// Allocate the memory for the device handlers on the socket
std::vector<amdsmi_processor_handle> processor_handles(device_count);
// Get all devices of the socket
ret = amdsmi_get_processor_handles(sockets[i], &device_count, &processor_handles[0]);
ss << __PRETTY_FUNCTION__ << " | Processor Count: " << device_count << "\n";
LOG_DEBUG(ss);
for (uint32_t j = 0; j < device_count; j++) {
if (processor_handles[j] == processor_handle) {
*device_index = current_device_index;
ss << __PRETTY_FUNCTION__ << " | AMDSMI_STATUS_SUCCESS "
<< "Returning device_index: " << *device_index << "\nSocket #: " << i
<< "; Device #: " << j << "; current_device_index #: " << current_device_index
<< "\n";
// std::cout << ss.str();
LOG_DEBUG(ss);
return AMDSMI_STATUS_SUCCESS;
}
current_device_index++;
}
}
ss << __PRETTY_FUNCTION__ << " | AMDSMI_STATUS_API_FAILED "
<< "Returning device_index: " << *device_index << "\n";
LOG_DEBUG(ss);
return AMDSMI_STATUS_API_FAILED;
}
// TODO(amdsmi_team): Do we want to include these functions in header?
amdsmi_status_t smi_amdgpu_get_device_count(uint32_t *total_num_devices) {
uint32_t socket_count;
std::vector<amdsmi_socket_handle> sockets;
std::ostringstream ss;
if (total_num_devices == nullptr) {
return AMDSMI_STATUS_INVAL;
}
// set to max value for invalid readings
*total_num_devices = std::numeric_limits<uint32_t>::max();
auto ret = amdsmi_get_socket_handles(&socket_count, nullptr);
if (ret != AMDSMI_STATUS_SUCCESS) {
return ret;
}
// allocate memory
sockets.resize(socket_count);
ret = amdsmi_get_socket_handles(&socket_count, &sockets[0]);
if (ret != AMDSMI_STATUS_SUCCESS) {
return ret;
}
uint32_t device_num = 0;
for (uint32_t i = 0; i < socket_count; i++) {
// Get Socket info
char socket_info[128];
ret = amdsmi_get_socket_info(sockets[i], 128, socket_info);
ss << __PRETTY_FUNCTION__ << " | Socket " << socket_info << "\n";
LOG_DEBUG(ss);
// Get the processor count available for the socket.
uint32_t processor_count = 0;
ret = amdsmi_get_processor_handles(sockets[i], &processor_count, nullptr);
// Allocate the memory for the device handlers on the socket
std::vector<amdsmi_processor_handle> processor_handles(processor_count);
// Get all devices of the socket
ret = amdsmi_get_processor_handles(sockets[i], &processor_count, &processor_handles[0]);
ss << __PRETTY_FUNCTION__ << " | Processor Count: " << processor_count << "\n";
LOG_DEBUG(ss);
for (uint32_t j = 0; j < processor_count; j++) {
device_num++;
}
}
*total_num_devices = device_num;
ss << __PRETTY_FUNCTION__ << " | AMDSMI_STATUS_SUCCESS "
<< "Returning device_index: " << *total_num_devices << "\n";
LOG_DEBUG(ss);
return AMDSMI_STATUS_SUCCESS;
}
// TODO(amdsmi_team): Do we want to include these functions in header?
amdsmi_status_t smi_amdgpu_get_processor_handle_by_index(
uint32_t device_index,
amdsmi_processor_handle *processor_handle) {
uint32_t socket_count;
std::vector<amdsmi_socket_handle> sockets;
std::ostringstream ss;
if (processor_handle == nullptr) {
return AMDSMI_STATUS_INVAL;
}
auto ret = amdsmi_get_socket_handles(&socket_count, nullptr);
if (ret != AMDSMI_STATUS_SUCCESS) {
return ret;
}
// allocate memory
sockets.resize(socket_count);
ret = amdsmi_get_socket_handles(&socket_count, &sockets[0]);
if (ret != AMDSMI_STATUS_SUCCESS) {
return ret;
}
uint32_t current_device_index = 0;
for (uint32_t i = 0; i < socket_count; i++) {
// Get Socket info
char socket_info[128];
ret = amdsmi_get_socket_info(sockets[i], 128, socket_info);
ss << __PRETTY_FUNCTION__ << " | Socket " << socket_info << "\n";
LOG_DEBUG(ss);
// Get the device count available for the socket.
uint32_t device_count = 0;
ret = amdsmi_get_processor_handles(sockets[i], &device_count, nullptr);
// Allocate the memory for the device handlers on the socket
std::vector<amdsmi_processor_handle> processor_handles(device_count);
// Get all devices of the socket
ret = amdsmi_get_processor_handles(sockets[i], &device_count, &processor_handles[0]);
ss << __PRETTY_FUNCTION__ << " | Processor Count: " << device_count << "\n";
LOG_DEBUG(ss);
for (uint32_t j = 0; j < device_count; j++) {
// std::cout << "current_device_index: " << current_device_index
// << " device_index: " << device_index << std::endl;
if (current_device_index == device_index) {
*processor_handle = processor_handles[j];
ss << __PRETTY_FUNCTION__ << " | AMDSMI_STATUS_SUCCESS"
<< "\nReturning processor_handle for device_index: " << device_index
<< "\nSocket #: " << i << "; Device #: " << j
<< "; current_device_index #: " << current_device_index
<< "; processor_handle: " << *processor_handle
<< "; processor_handles[j]: " << processor_handles[j]
<< "\n";
// std::cout << ss.str();
LOG_DEBUG(ss);
return AMDSMI_STATUS_SUCCESS;
}
current_device_index++;
}
}
ss << __PRETTY_FUNCTION__ << " | AMDSMI_STATUS_API_FAILED "
<< "Could not find matching processor_handle for device_index: " << device_index << "\n";
LOG_DEBUG(ss);
return AMDSMI_STATUS_API_FAILED;
}
Разница между файлами не показана из-за своего большого размера Загрузить разницу
+51
Просмотреть файл
@@ -0,0 +1,51 @@
/*
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef TESTS_AMD_SMI_TEST_FUNCTIONAL_COMPUTEPARTITION_READ_WRITE_H_
#define TESTS_AMD_SMI_TEST_FUNCTIONAL_COMPUTEPARTITION_READ_WRITE_H_
#include "../test_base.h"
class TestComputePartitionReadWrite : public TestBase {
public:
TestComputePartitionReadWrite();
// @Brief: Destructor for test case of TestComputePartitionReadWrite
virtual ~TestComputePartitionReadWrite();
// @Brief: Setup the environment for measurement
virtual void SetUp();
// @Brief: Core measurement execution
virtual void Run();
// @Brief: Clean up and retrive the resource
virtual void Close();
// @Brief: Display results
virtual void DisplayResults() const;
// @Brief: Display information about what this test does
virtual void DisplayTestInfo(void);
};
#endif // TESTS_AMD_SMI_TEST_FUNCTIONAL_COMPUTEPARTITION_READ_WRITE_H_
+10 -8
Просмотреть файл
@@ -35,6 +35,7 @@
#include "gpu_metrics_read.h"
#include "../test_common.h"
#include "rocm_smi/rocm_smi_utils.h"
#include "amd_smi/impl/amd_smi_utils.h"
TestGpuMetricsRead::TestGpuMetricsRead() : TestBase() {
@@ -101,6 +102,15 @@ void TestGpuMetricsRead::Run(void) {
}
}
} else {
auto temp_xcd_counter_value = uint16_t(0);
auto ret_xcd = amdsmi_get_gpu_xcd_counter(processor_handles_[i], &temp_xcd_counter_value);
IF_VERB(STANDARD) {
std::cout << "\t\t** amdsmi_get_gpu_xcd_counter(): "
<< smi_amdgpu_get_status_string(ret_xcd, false)
<< "\n\t\t** XCD Counter Value: "
<< temp_xcd_counter_value
<< "\n";
}
CHK_ERR_ASRT(err);
IF_VERB(STANDARD) {
std::cout << "METRIC TABLE HEADER:\n";
@@ -380,13 +390,5 @@ void TestGpuMetricsRead::Run(void) {
amdsmi_status_code_to_string(err, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_info(nullptr check): " << status_string << "\n";
ASSERT_EQ(err, AMDSMI_STATUS_INVAL);
// TODO(AMD_SMI_team): add xcd_counter_get for amd smi
// auto temp_xcd_counter_value = uint16_t(0);
// err = rsmi_dev_metrics_xcd_counter_get(i, &temp_xcd_counter_value);
// if (err != RSMI_STATUS_NOT_SUPPORTED) {
// CHK_ERR_ASRT(err);
// }
}
}
+26 -2
Просмотреть файл
@@ -22,11 +22,11 @@
#include <stdint.h>
#include <stddef.h>
#include <gtest/gtest.h>
#include <iostream>
#include <string>
#include <gtest/gtest.h>
#include <map>
#include "amd_smi/amdsmi.h"
#include "id_info_read.h"
#include "../test_common.h"
@@ -63,6 +63,15 @@ void TestIdInfoRead::Close() {
static const uint32_t kBufferLen = 80;
static const std::map< amdsmi_virtualization_mode_t, std::string>
virtualization_mode_map = {
{AMDSMI_VIRTUALIZATION_MODE_UNKNOWN, "UNKNOWN"},
{AMDSMI_VIRTUALIZATION_MODE_BAREMETAL, "BAREMETAL"},
{ AMDSMI_VIRTUALIZATION_MODE_HOST, "HOST"},
{ AMDSMI_VIRTUALIZATION_MODE_GUEST, "GUEST"},
{AMDSMI_VIRTUALIZATION_MODE_PASSTHROUGH, "PASSTHROUGH"}
};
void TestIdInfoRead::Run(void) {
amdsmi_status_t err;
uint16_t id;
@@ -227,5 +236,20 @@ void TestIdInfoRead::Run(void) {
// Verify api support checking functionality is working
err = amdsmi_get_gpu_bdf_id(processor_handles_[i], nullptr);
ASSERT_EQ(err, AMDSMI_STATUS_INVAL);
// Verify api support checking functionality is working
err = amdsmi_get_gpu_virtualization_mode(processor_handles_[i], nullptr);
ASSERT_EQ(err, AMDSMI_STATUS_INVAL);
amdsmi_virtualization_mode_t vmode;
err = amdsmi_get_gpu_virtualization_mode(processor_handles_[i], &vmode);
ASSERT_EQ(err, AMDSMI_STATUS_SUCCESS);
IF_VERB(STANDARD) {
auto it = virtualization_mode_map.find(vmode);
if (it != virtualization_mode_map.end()) {
std::cout << "\t**Virtualization Mode: " << it->second << std::endl;
} else {
std::cout << "\t**Virtualization Mode: MAP TYPE UNKNOWN?" << std::endl;
}
}
}
}
+744
Просмотреть файл
@@ -0,0 +1,744 @@
/*
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <stdint.h>
#include <stddef.h>
#include <cstdint>
#include <iostream>
#include <string>
#include <map>
#include <limits>
#include "gtest/gtest.h"
#include "../test_base.h"
#include "../test_common.h"
#include "amd_smi/amdsmi.h"
#include "amd_smi/impl/amd_smi_utils.h"
#include "memorypartition_read_write.h"
const uint32_t MAX_UNSUPPORTED_PARTITIONS = 0;
const uint32_t MAX_SPX_PARTITIONS = 1; // Single GPU node
const uint32_t MAX_DPX_PARTITIONS = 2;
const uint32_t MAX_TPX_PARTITIONS = 3;
const uint32_t MAX_QPX_PARTITIONS = 4;
TestMemoryPartitionReadWrite::TestMemoryPartitionReadWrite() : TestBase() {
set_title("AMDSMI Memory Partition Read Test");
set_description("The memory partition tests verifies that the memory "
"partition settings can be read and updated properly.");
}
TestMemoryPartitionReadWrite::~TestMemoryPartitionReadWrite(void) {
}
void TestMemoryPartitionReadWrite::SetUp(void) {
TestBase::SetUp();
return;
}
void TestMemoryPartitionReadWrite::DisplayTestInfo(void) {
TestBase::DisplayTestInfo();
}
void TestMemoryPartitionReadWrite::DisplayResults(void) const {
TestBase::DisplayResults();
return;
}
void TestMemoryPartitionReadWrite::Close() {
// This will close handles opened within rsmitst utility calls and call
// amdsmi_shut_down(), so it should be done after other hsa cleanup
TestBase::Close();
}
static const std::string
memoryPartitionString(amdsmi_memory_partition_type_t memoryPartitionType) {
switch (memoryPartitionType) {
case AMDSMI_MEMORY_PARTITION_NPS1:
return "NPS1";
case AMDSMI_MEMORY_PARTITION_NPS2:
return "NPS2";
case AMDSMI_MEMORY_PARTITION_NPS4:
return "NPS4";
case AMDSMI_MEMORY_PARTITION_NPS8:
return "NPS8";
default:
return "UNKNOWN";
}
}
static const std::map<std::string, amdsmi_memory_partition_type_t>
mapStringToRSMIMemoryPartitionTypes {
{"NPS1", AMDSMI_MEMORY_PARTITION_NPS1},
{"NPS2", AMDSMI_MEMORY_PARTITION_NPS2},
{"NPS4", AMDSMI_MEMORY_PARTITION_NPS4},
{"NPS8", AMDSMI_MEMORY_PARTITION_NPS8}
};
void TestMemoryPartitionReadWrite::Run(void) {
amdsmi_status_t ret, err, ret_set;
constexpr uint32_t k255Len = 255;
constexpr uint32_t k0Len = 0;
char orig_memory_partition[k255Len];
char current_memory_partition[k255Len];
orig_memory_partition[0] = '\0';
current_memory_partition[0] = '\0';
amdsmi_memory_partition_config_t current_memory_config;
const uint32_t kMAX_UINT32 = std::numeric_limits<uint32_t>::max();
std::map<uint32_t, AcceleratorProfileConfig> orig_dev_config; // index, ProfileConfig
TestBase::Run();
if (setup_failed_) {
std::cout << "** SetUp Failed for this test. Skipping.**" << std::endl;
return;
}
bool isVerbose = (this->verbosity() &&
this->verbosity() >= (this->TestBase::VERBOSE_STANDARD)) ? true: false;
// Save original memory partition settings (see orig_dev_config ^)
IF_VERB(STANDARD) {
std::cout << "\t**=========================================================\n";
std::cout << "\t**Save Original Compute Partition Settings ================\n";
std::cout << "\t**=========================================================\n";
}
auto initial_num_devices = num_monitor_devs();
for (uint32_t dv_ind = 0; dv_ind < initial_num_devices; ++dv_ind) {
if (dv_ind != 0) {
std::cout << "\n";
}
PrintDeviceHeader(processor_handles_[dv_ind]);
amdsmi_accelerator_partition_profile_t profile = {};
uint32_t partition_id[8] = {0, 0, 0, 0, 0, 0, 0, 0};
ret = amdsmi_get_gpu_accelerator_partition_profile(processor_handles_[dv_ind],
&profile, &partition_id[0]);
std::string nps_caps_str = "";
if ((profile.memory_caps.nps_flags.nps1_cap == 0
&& profile.memory_caps.nps_flags.nps2_cap == 0
&& profile.memory_caps.nps_flags.nps4_cap == 0
&& profile.memory_caps.nps_flags.nps8_cap == 0)) {
nps_caps_str = "N/A";
} else {
nps_caps_str.clear();
if (profile.memory_caps.nps_flags.nps1_cap) {
(nps_caps_str.empty()) ? nps_caps_str += "NPS1" : nps_caps_str += ", NPS1";
}
if (profile.memory_caps.nps_flags.nps2_cap) {
(nps_caps_str.empty()) ? nps_caps_str += "NPS2" : nps_caps_str += ", NPS2";
}
if (profile.memory_caps.nps_flags.nps4_cap) {
(nps_caps_str.empty()) ? nps_caps_str += "NPS4" : nps_caps_str += ", NPS4";
}
if (profile.memory_caps.nps_flags.nps8_cap) {
(nps_caps_str.empty()) ? nps_caps_str += "NPS8" : nps_caps_str += ", NPS8";
}
}
std::string profile_type_str = "N/A";
if (profile.profile_type == AMDSMI_ACCELERATOR_PARTITION_SPX) {
profile_type_str = "SPX";
} else if (profile.profile_type == AMDSMI_ACCELERATOR_PARTITION_DPX) {
profile_type_str = "DPX";
} else if (profile.profile_type == AMDSMI_ACCELERATOR_PARTITION_TPX) {
profile_type_str = "TPX";
} else if (profile.profile_type == AMDSMI_ACCELERATOR_PARTITION_QPX) {
profile_type_str = "QPX";
} else if (profile.profile_type == AMDSMI_ACCELERATOR_PARTITION_CPX) {
profile_type_str = "CPX";
}
std::string partition_id_str = "";
for (int i = 0; i < 8; i++) {
partition_id_str += std::to_string(partition_id[i]);
if (i < 7) {
partition_id_str += ", ";
}
switch (profile.profile_type) {
case AMDSMI_ACCELERATOR_PARTITION_SPX:
EXPECT_LT(partition_id[i], MAX_SPX_PARTITIONS);
break;
case AMDSMI_ACCELERATOR_PARTITION_DPX:
EXPECT_LT(partition_id[i], MAX_DPX_PARTITIONS);
break;
case AMDSMI_ACCELERATOR_PARTITION_TPX:
EXPECT_LT(partition_id[i], MAX_TPX_PARTITIONS);
break;
case AMDSMI_ACCELERATOR_PARTITION_QPX:
EXPECT_LT(partition_id[i], MAX_QPX_PARTITIONS);
break;
case AMDSMI_ACCELERATOR_PARTITION_CPX: {
uint16_t num_xcd;
uint32_t max_xcps = 0;
ret = amdsmi_get_gpu_xcd_counter(processor_handles_[dv_ind], &num_xcd);
if (ret == AMDSMI_STATUS_SUCCESS) {
max_xcps = static_cast<uint32_t>(num_xcd);
}
EXPECT_LT(partition_id[i], max_xcps);
break;
}
case AMDSMI_ACCELERATOR_PARTITION_INVALID:
EXPECT_EQ(partition_id[i], MAX_UNSUPPORTED_PARTITIONS);
break;
default:
EXPECT_EQ(partition_id[i], MAX_UNSUPPORTED_PARTITIONS);
break;
}
}
IF_VERB(STANDARD) {
std::cout << "\t**amdsmi_get_gpu_accelerator_partition_profile(processor_handles_["
<< dv_ind << "], &profile, &partition_id[0]):\n"
<< "\t\t" << smi_amdgpu_get_status_string(ret, false)
<< "\n\t**Current profile.profile_type: "
<< profile_type_str
<< "\n\t**profile.num_partitions: "
<< (profile.num_partitions == kMAX_UINT32
? "N/A" : std::to_string(profile.num_partitions))
<< "\n\t**profile.memory_caps: "
<< nps_caps_str
<< "\n\t**profile.profile_index: "
<< (profile.profile_index == kMAX_UINT32
? "N/A" : std::to_string(profile.profile_index))
<< "\n\t**profile.num_resources: "
<< profile.num_resources
<< "\n\t**partition_id: "
<< partition_id_str
<< std::endl;
}
EXPECT_TRUE(ret == AMDSMI_STATUS_SUCCESS
|| ret == AMDSMI_STATUS_NOT_SUPPORTED);
amdsmi_accelerator_partition_profile_config_t profile_config = {};
ret = amdsmi_get_gpu_accelerator_partition_profile_config(processor_handles_[dv_ind],
&profile_config);
IF_VERB(STANDARD) {
std::cout << "\t**amdsmi_get_gpu_accelerator_partition_profile_config(processor_handles_["
<< dv_ind << "], &profile_config):\n"
<< "\t\t" << smi_amdgpu_get_status_string(ret, false)
<< "\n\t**profile_config.num_profiles: "
<< profile_config.num_profiles
<< "\n\t**profile_config.num_resource_profiles: "
<< profile_config.num_resource_profiles
<< std::endl;
}
AcceleratorProfileConfig original_profile_config =
getAvailableProfileConfigs(dv_ind, profile, profile_config, isVerbose);
orig_dev_config[dv_ind] = original_profile_config;
// waitForUserInput(); // watch for any errors
IF_VERB(STANDARD) {
std::cout << "\t**=========================================================\n";
std::cout << "\t**Checking valid profile Sets =============================\n";
std::cout << "\t**=========================================================\n";
}
int resource_index = 0;
for (uint32_t i = 0; i < profile_config.num_profiles; i++) {
auto current_profile = profile_config.profiles[i];
std::string profile_type_str = "N/A";
if (current_profile.profile_type == AMDSMI_ACCELERATOR_PARTITION_SPX) {
profile_type_str = "SPX";
} else if (current_profile.profile_type == AMDSMI_ACCELERATOR_PARTITION_DPX) {
profile_type_str = "DPX";
} else if (current_profile.profile_type == AMDSMI_ACCELERATOR_PARTITION_TPX) {
profile_type_str = "TPX";
} else if (current_profile.profile_type == AMDSMI_ACCELERATOR_PARTITION_QPX) {
profile_type_str = "QPX";
} else if (current_profile.profile_type == AMDSMI_ACCELERATOR_PARTITION_CPX) {
profile_type_str = "CPX";
}
std::string nps_caps_str = "";
if ((current_profile.memory_caps.nps_flags.nps1_cap == 0
&& current_profile.memory_caps.nps_flags.nps2_cap == 0
&& current_profile.memory_caps.nps_flags.nps4_cap == 0
&& current_profile.memory_caps.nps_flags.nps8_cap == 0)) {
nps_caps_str = "N/A";
} else {
nps_caps_str.clear();
if (current_profile.memory_caps.nps_flags.nps1_cap) {
(nps_caps_str.empty()) ? nps_caps_str += "NPS1" : nps_caps_str += ", NPS1";
}
if (current_profile.memory_caps.nps_flags.nps2_cap) {
(nps_caps_str.empty()) ? nps_caps_str += "NPS2" : nps_caps_str += ", NPS2";
}
if (current_profile.memory_caps.nps_flags.nps4_cap) {
(nps_caps_str.empty()) ? nps_caps_str += "NPS4" : nps_caps_str += ", NPS4";
}
if (current_profile.memory_caps.nps_flags.nps8_cap) {
(nps_caps_str.empty()) ? nps_caps_str += "NPS8" : nps_caps_str += ", NPS8";
}
}
IF_VERB(STANDARD) {
std::cout << "\t**profile_config.profiles[" << i << "]:\n"
<< "\t\tprofile_type: " << profile_type_str
<< "\n\t\tnum_partitions: " << current_profile.num_partitions
<< "\n\t\tmemory_caps: " << nps_caps_str
<< "\n\t\tcurrent_profile.num_resources: " << current_profile.num_resources
<< std::endl;
}
for (auto j = 0; j < current_profile.num_resources; j++) {
auto rp = profile_config.resource_profiles[resource_index];
IF_VERB(STANDARD) {
std::cout << "\n\t\t\tprofile_index: " << current_profile.profile_index
<< "\n\t\t\tresource_index: " << resource_index
<< "\n\t\t\tprofile_config.resource_profiles[" << resource_index
<< "].resource_type: "
<< getResourceType(rp.resource_type)
<< "\n\t\t\tprofile_config.resource_profiles[" << resource_index
<< "].partition_resource: "
<< rp.partition_resource
<< "\n\t\t\tprofile_config.resource_profiles[" << resource_index
<< "].num_partitions_share_resource: "
<< rp.num_partitions_share_resource
<< std::endl;
}
resource_index++;
}
}
EXPECT_TRUE(ret == AMDSMI_STATUS_SUCCESS
|| ret == AMDSMI_STATUS_NOT_SUPPORTED);
if (ret == AMDSMI_STATUS_NOT_SUPPORTED) {
IF_VERB(STANDARD) {
std::cout << "\t**" << "amdsmi_get_gpu_accelerator_partition_profile_config: "
<< "Not supported on this machine" << std::endl;
}
continue;
}
}
// Run memory partition tests
uint32_t current_num_devices = 0;
smi_amdgpu_get_device_count(&current_num_devices);
IF_VERB(STANDARD) {
std::cout << "\t**Total Num Devices: " << current_num_devices << std::endl;
}
// Leaving for debug purposes - uncomment to test a specific number of devices
// uint32_t num_devices_to_test = promptNumDevicesToTest(current_num_devices);
uint32_t num_devices_to_test = current_num_devices;
for (uint32_t dv_ind = 0; dv_ind < num_devices_to_test; ++dv_ind) {
bool wasSetSuccess = false;
if (dv_ind != 0) {
IF_VERB(STANDARD) {
std::cout << std::endl;
}
}
PrintDeviceHeader(processor_handles_[dv_ind]);
// Standard checks to see if API is supported, before running full tests
ret = amdsmi_get_gpu_memory_partition(
processor_handles_[dv_ind], orig_memory_partition, k255Len);
if (ret == AMDSMI_STATUS_NOT_SUPPORTED) {
IF_VERB(STANDARD) {
std::cout << "\t**" << ": "
<< "Not supported on this machine" << std::endl;
}
continue;
} else {
CHK_ERR_ASRT(ret)
}
IF_VERB(STANDARD) {
std::cout << std::endl << "\t**Current Memory Partition: "
<< orig_memory_partition << std::endl;
}
if ((orig_memory_partition == nullptr) ||
(orig_memory_partition[0] == '\0')) {
std::cout << "***System memory partition value is not defined or received"
" unexpected data. Skip memory partition test." << std::endl;
continue;
}
ASSERT_TRUE(ret == AMDSMI_STATUS_SUCCESS);
// Verify api support checking functionality is working
constexpr uint32_t k2Len = 2;
char smallBuffer[k2Len];
err = amdsmi_get_gpu_memory_partition(processor_handles_[dv_ind], smallBuffer, k2Len);
uint32_t size = static_cast<uint32_t>(sizeof(smallBuffer)/sizeof(*smallBuffer));
ASSERT_EQ(err, AMDSMI_STATUS_INSUFFICIENT_SIZE);
ASSERT_EQ(k2Len, size);
if (err == AMDSMI_STATUS_INSUFFICIENT_SIZE) {
IF_VERB(STANDARD) {
std::cout << "\t**"
<< "Confirmed AMDSMI_STATUS_INSUFFICIENT_SIZE was returned "
<< "and size is 2, as requested." << std::endl;
}
}
// Verify api support checking functionality is working
err = amdsmi_get_gpu_memory_partition(processor_handles_[dv_ind], nullptr, k255Len);
ASSERT_EQ(err, AMDSMI_STATUS_INVAL);
if (err == AMDSMI_STATUS_INVAL) {
IF_VERB(STANDARD) {
std::cout << "\t**amdsmi_get_gpu_memory_partition(processor_handles_[" << dv_ind << "], "
<< "nullptr, 255): "
<< "Confirmed AMDSMI_STATUS_INVAL was returned."
<< std::endl;
}
}
err = amdsmi_get_gpu_memory_partition_config(processor_handles_[dv_ind], nullptr);
ASSERT_EQ(err, AMDSMI_STATUS_INVAL);
if (err == AMDSMI_STATUS_INVAL) {
IF_VERB(STANDARD) {
std::cout << "\t**amdsmi_get_gpu_memory_partition(processor_handles_[" << dv_ind
<< "], nullptr): Confirmed AMDSMI_STATUS_INVAL was returned."
<< std::endl;
}
}
// Verify api support checking functionality is working
err = amdsmi_get_gpu_memory_partition(processor_handles_[dv_ind], orig_memory_partition, k0Len);
ASSERT_TRUE(err == AMDSMI_STATUS_INVAL);
if (err == AMDSMI_STATUS_INVAL) {
IF_VERB(STANDARD) {
std::cout << "\t**amdsmi_get_gpu_memory_partition(processor_handles_[" << dv_ind << "], "
<< "orig_memory_partition, 0): "
<< "Confirmed AMDSMI_STATUS_INVAL was returned."
<< std::endl;
}
}
amdsmi_memory_partition_config_t* null_memory_partition_config = nullptr;
err = amdsmi_get_gpu_memory_partition_config(processor_handles_[dv_ind],
null_memory_partition_config);
ASSERT_TRUE((err == AMDSMI_STATUS_INVAL) ||
(err == AMDSMI_STATUS_NOT_SUPPORTED));
if (err == AMDSMI_STATUS_INVAL) {
IF_VERB(STANDARD) {
std::cout << "\t**"
<< "amdsmi_get_gpu_memory_partition_config(processor_handles_[" << dv_ind << "], "
<< "nullptr): "
<< "Confirmed AMDSMI_STATUS_INVAL was returned."
<< std::endl;
}
}
/****************************************/
/* amdsmi_set_gpu_memory_partition(...) */
/****************************************/
// Verify api support checking functionality is working
amdsmi_memory_partition_type_t null_memory_partition = {};
err = amdsmi_set_gpu_memory_partition_mode(processor_handles_[dv_ind], null_memory_partition);
std::cout << "\t**amdsmi_set_gpu_memory_partition(amdsmi_set_gpu_memory_partition_mode"
<< "(processor_handles_[" << dv_ind << "], nullptr): "
<< smi_amdgpu_get_status_string(err, false) << "\n";
// Note: new_memory_partition is not set
ASSERT_TRUE(err == AMDSMI_STATUS_INVAL);
if (err == AMDSMI_STATUS_INVAL) {
IF_VERB(STANDARD) {
std::cout << "\t**"
<< "Confirmed AMDSMI_STATUS_INVAL was returned."
<< std::endl;
}
} else if (err == AMDSMI_STATUS_NOT_SUPPORTED) {
IF_VERB(STANDARD) {
std::cout << "\t**" << ": "
<< "amdsmi_set_gpu_memory_partition_mode not supported on this "
<< "device\n\t (if amdsmi_get_gpu_memory_partition works, "
<< "then likely need to set in bios)"
<< std::endl;
}
continue;
} else {
DISPLAY_AMDSMI_ERR(err)
}
ASSERT_FALSE(err == AMDSMI_STATUS_NO_PERM);
// Verify api support checking functionality is working
amdsmi_memory_partition_type_t new_memory_partition = AMDSMI_MEMORY_PARTITION_UNKNOWN;
err = amdsmi_set_gpu_memory_partition_mode(processor_handles_[dv_ind], new_memory_partition);
ASSERT_TRUE((err == AMDSMI_STATUS_INVAL) ||
(err == AMDSMI_STATUS_NOT_SUPPORTED) ||
(err == AMDSMI_STATUS_NO_PERM));
if (err == AMDSMI_STATUS_INVAL) {
IF_VERB(STANDARD) {
std::cout << "\t**"
<< "Confirmed AMDSMI_STATUS_INVAL was returned."
<< std::endl;
} else if (err == AMDSMI_STATUS_NO_PERM) {
DISPLAY_AMDSMI_ERR(err)
// tests should not continue if err is a permission issue
ASSERT_FALSE(err == AMDSMI_STATUS_NO_PERM);
} else {
DISPLAY_AMDSMI_ERR(err)
}
}
// Re-run original get, so we can reset to later
ret = amdsmi_get_gpu_memory_partition(processor_handles_[dv_ind],
orig_memory_partition, k255Len);
ASSERT_EQ(AMDSMI_STATUS_SUCCESS, ret);
for (int partition = static_cast<int>(AMDSMI_MEMORY_PARTITION_NPS1);
partition <= static_cast<int>(AMDSMI_MEMORY_PARTITION_NPS8);
partition++) {
ret_set = AMDSMI_STATUS_NOT_SUPPORTED;
wasSetSuccess = false;
new_memory_partition = static_cast<amdsmi_memory_partition_type_t>(partition);
if (new_memory_partition != AMDSMI_MEMORY_PARTITION_NPS1
&& new_memory_partition != AMDSMI_MEMORY_PARTITION_NPS2
&& new_memory_partition != AMDSMI_MEMORY_PARTITION_NPS4
&& new_memory_partition != AMDSMI_MEMORY_PARTITION_NPS8) {
continue; // skip unknown partition, this is already tested above ^
}
IF_VERB(STANDARD) {
std::cout << std::endl;
std::cout << "\t**"
<< "======== TEST AMDSMI_MEMORY_PARTITION_"
<< memoryPartitionString(new_memory_partition)
<< " ===============" << std::endl;
}
IF_VERB(STANDARD) {
std::cout << "\t**"
<< "Attempting to set memory partition to: "
<< memoryPartitionString(new_memory_partition) << std::endl;
}
auto ret_caps = amdsmi_get_gpu_memory_partition_config(processor_handles_[dv_ind],
&current_memory_config);
std::string memory_caps_str = "N/A";
if (ret_caps == AMDSMI_STATUS_SUCCESS) {
memory_caps_str.clear();
if (current_memory_config.partition_caps.nps_flags.nps1_cap) {
memory_caps_str += (memory_caps_str.empty() ? "NPS1" : ", NPS1");
}
if (current_memory_config.partition_caps.nps_flags.nps2_cap) {
memory_caps_str += (memory_caps_str.empty() ? "NPS2" : ", NPS2");
}
if (current_memory_config.partition_caps.nps_flags.nps4_cap) {
memory_caps_str += (memory_caps_str.empty() ? "NPS4" : ", NPS4");
}
if (current_memory_config.partition_caps.nps_flags.nps8_cap) {
memory_caps_str += (memory_caps_str.empty() ? "NPS8" : ", NPS8");
}
}
IF_VERB(STANDARD) {
std::cout << "\t**"
<< "amdsmi_get_gpu_memory_partition_config(processor_handles_[" << dv_ind
<< "], current_memory_config): "
<< smi_amdgpu_get_status_string(ret_caps, false) << std::endl;
std::cout << "\t**" << "Available Memory Partition Capabilities: "
<< memory_caps_str << "\n"
<< "\t**" << "current_memory_partition_mode: "
<< memoryPartitionString(current_memory_config.mp_mode) << "\n"
<< "\t**" << "num_numa_ranges: "
<< current_memory_config.num_numa_ranges
<< std::endl;
}
ASSERT_TRUE((ret_caps == AMDSMI_STATUS_NOT_SUPPORTED) ||
(ret_caps == AMDSMI_STATUS_SUCCESS));
ret_set = amdsmi_set_gpu_memory_partition_mode(processor_handles_[dv_ind],
new_memory_partition);
IF_VERB(STANDARD) {
std::cout << "\t**" << "amdsmi_set_gpu_memory_partition_mode(processor_handles_["
<< dv_ind << "], " << memoryPartitionString(new_memory_partition) << "): "
<< smi_amdgpu_get_status_string(ret_set, false) << "\n";
}
if (ret_set == AMDSMI_STATUS_NOT_SUPPORTED) {
IF_VERB(STANDARD) {
std::cout << "\t**" << ": "
<< "Not supported on this machine" << std::endl;
}
break;
} else {
ASSERT_TRUE((ret_set == AMDSMI_STATUS_SUCCESS)
|| (ret_set == AMDSMI_STATUS_BUSY)
|| (ret_set == AMDSMI_STATUS_AMDGPU_RESTART_ERR)
|| (ret_set == AMDSMI_STATUS_INVAL)
|| (ret_set == AMDSMI_STATUS_NOT_SUPPORTED));
}
if (ret_set == AMDSMI_STATUS_SUCCESS) { // do not continue trying to reset
wasSetSuccess = true;
}
ret = amdsmi_get_gpu_memory_partition_config(processor_handles_[dv_ind],
&current_memory_config);
if (ret == AMDSMI_STATUS_NOT_SUPPORTED) {
IF_VERB(STANDARD) {
std::cout << "\t**" << "amdsmi_get_gpu_memory_partition_config(): "
<< "Not supported on this machine" << std::endl;
}
continue;
}
CHK_ERR_ASRT(ret)
IF_VERB(STANDARD) {
std::cout << "\t**"
<< "Current memory partition: "
<< memoryPartitionString(current_memory_config.mp_mode)
<< std::endl;
}
if (wasSetSuccess) {
ASSERT_EQ(AMDSMI_STATUS_SUCCESS, ret_set);
ASSERT_STREQ(memoryPartitionString(new_memory_partition).c_str(),
memoryPartitionString(current_memory_config.mp_mode).c_str());
CHK_ERR_ASRT(ret_set)
} else {
ASSERT_NE(AMDSMI_STATUS_SUCCESS, ret_set);
ASSERT_STRNE(memoryPartitionString(new_memory_partition).c_str(),
memoryPartitionString(current_memory_config.mp_mode).c_str());
}
} // END MEMORY PARTITION FOR LOOP
/* TEST RETURN TO ORIGINAL MEMORY PARTITION SETTING */
IF_VERB(STANDARD) {
std::cout << std::endl;
std::cout << "\t**"
<< "=========== TEST RETURN TO ORIGINAL MEMORY PARTITION "
<< "SETTING (" << orig_memory_partition
<< ") ========" << std::endl;
}
ret = amdsmi_get_gpu_memory_partition_config(processor_handles_[dv_ind],
&current_memory_config);
CHK_ERR_ASRT(ret)
IF_VERB(STANDARD) {
std::cout << "\t**"
<< "amdsmi_get_gpu_memory_partition_config(processor_handles_[" << dv_ind
<< "], current_memory_config): "
<< smi_amdgpu_get_status_string(ret, false) << std::endl;
std::cout << "\t**"
<< "Current memory partition: "
<< memoryPartitionString(current_memory_config.mp_mode)
<< std::endl;
}
new_memory_partition
= mapStringToRSMIMemoryPartitionTypes.at(orig_memory_partition);
IF_VERB(STANDARD) {
std::cout << "\t**" << "Returning memory partition to: "
<< memoryPartitionString(new_memory_partition) << std::endl;
}
ret = amdsmi_set_gpu_memory_partition(processor_handles_[dv_ind], new_memory_partition);
IF_VERB(STANDARD) {
std::cout << "\t**"
<< "amdsmi_set_gpu_memory_partition(processor_handles_[" << dv_ind
<< "], " << orig_memory_partition << "): "
<< smi_amdgpu_get_status_string(ret, false) << std::endl;
}
CHK_ERR_ASRT(ret)
ret = amdsmi_get_gpu_memory_partition(processor_handles_[dv_ind],
current_memory_partition, k255Len);
CHK_ERR_ASRT(ret)
IF_VERB(STANDARD) {
std::cout << "\t**" << "Attempted to set memory partition: "
<< memoryPartitionString(new_memory_partition) << std::endl
<< "\t**" << "Current memory partition: "
<< current_memory_partition
<< std::endl;
}
ASSERT_EQ(AMDSMI_STATUS_SUCCESS, ret);
ASSERT_STREQ(orig_memory_partition, current_memory_partition);
IF_VERB(STANDARD) {
std::cout << "\t**"
<< "Confirmed prior memory partition (" << orig_memory_partition
<< ") is equal to current memory partition ("
<< current_memory_partition << ")" << std::endl;
}
} // END DEVICE FOR LOOP
// Restore original compute partition settings (see orig_dev_config ^)
IF_VERB(STANDARD) {
std::cout << "\t**=========================================================\n";
std::cout << "\t**Restore Original Compute Partition Settings =============\n";
std::cout << "\t**=========================================================\n";
}
initial_num_devices = num_monitor_devs();
for (uint32_t dv_ind = 0; dv_ind < initial_num_devices; ++dv_ind) {
if (dv_ind != 0) {
std::cout << "\n";
}
PrintDeviceHeader(processor_handles_[dv_ind]);
AcceleratorProfileConfig original_profile_config = orig_dev_config[dv_ind];
// Return to original profile
IF_VERB(STANDARD) {
std::cout << "\t**Device Index: " << dv_ind << std::endl
<< "\t**======== Return to original AMDSMI_ACCELERATOR_PARTITION_"
<< original_profile_config.original_profile_type_str
<< " (profile_index: "
<< (original_profile_config.original_profile_index == kMAX_UINT32
? "N/A" : std::to_string(original_profile_config.original_profile_index))
<< ")"
<< " ===============" << std::endl;
}
auto ret_set = amdsmi_set_gpu_accelerator_partition_profile(
processor_handles_[dv_ind],
original_profile_config.original_profile_index);
EXPECT_TRUE((ret_set == AMDSMI_STATUS_SETTING_UNAVAILABLE)
|| (ret_set== AMDSMI_STATUS_NO_PERM)
|| (ret_set == AMDSMI_STATUS_SUCCESS)
|| ret_set == AMDSMI_STATUS_BUSY
|| ret_set == AMDSMI_STATUS_NOT_SUPPORTED);
amdsmi_accelerator_partition_profile_t profile = {};
uint32_t partition_id[8] = {0, 0, 0, 0, 0, 0, 0, 0};
auto ret_get = amdsmi_get_gpu_accelerator_partition_profile(processor_handles_[dv_ind],
&profile, &partition_id[0]);
if (ret_get == AMDSMI_STATUS_SUCCESS && ret_set == AMDSMI_STATUS_SUCCESS) {
std::string profile_type_str = partition_types_map.at(profile.profile_type);
IF_VERB(STANDARD) {
std::cout << "\t**amdsmi_set_gpu_accelerator_partition_profile(processor_handles_["
<< dv_ind << "],"
<< "\n\t\t" << original_profile_config.original_profile_index
<< " (AMDSMI_ACCELERATOR_PARTITION_"
<< original_profile_config.original_profile_type_str
<< "): "
<< "\n\t\t" << smi_amdgpu_get_status_string(ret_set, false)
<< "\n\t**amdsmi_get_gpu_accelerator_partition_profile(processor_handles_["
<< dv_ind << "], &profile, &partition_id[0]):\n"
<< "\t\t" << smi_amdgpu_get_status_string(ret_get, false)
<< "\n\t**Current profile.profile_type: "
<< profile_type_str
<< "\n\t**profile.num_partitions: "
<< (profile.num_partitions == kMAX_UINT32
? "N/A" : std::to_string(profile.num_partitions))
<< "\n\t**profile.profile_index: "
<< (profile.profile_index == kMAX_UINT32
? "N/A" : std::to_string(profile.profile_index))
<< std::endl;
}
EXPECT_STREQ(partition_types_map.at(profile.profile_type).c_str(),
original_profile_config.original_profile_type_str.c_str());
EXPECT_EQ(profile.profile_type, original_profile_config.original_profile_type);
EXPECT_EQ(profile.profile_index, original_profile_config.original_profile_index);
} else {
IF_VERB(STANDARD) {
std::cout << "\t**Could not change or read profiles. "
<< "Skipping return to original profile on this device."
<< "\n\t**amdsmi_set_gpu_accelerator_partition_profile(): "
<< smi_amdgpu_get_status_string(ret_set, false)
<< "\n\t**amdsmi_get_gpu_accelerator_partition_profile(): "
<< smi_amdgpu_get_status_string(ret_get, false)
<< std::endl;
}
}
}
}
+51
Просмотреть файл
@@ -0,0 +1,51 @@
/*
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef TESTS_AMD_SMI_TEST_FUNCTIONAL_MEMORYPARTITION_READ_WRITE_H_
#define TESTS_AMD_SMI_TEST_FUNCTIONAL_MEMORYPARTITION_READ_WRITE_H_
#include "../test_base.h"
class TestMemoryPartitionReadWrite : public TestBase {
public:
TestMemoryPartitionReadWrite();
// @Brief: Destructor for test case of TestMemoryPartitionReadWrite
virtual ~TestMemoryPartitionReadWrite();
// @Brief: Setup the environment for measurement
virtual void SetUp();
// @Brief: Core measurement execution
virtual void Run();
// @Brief: Clean up and retrive the resource
virtual void Close();
// @Brief: Display results
virtual void DisplayResults() const;
// @Brief: Display information about what this test does
virtual void DisplayTestInfo(void);
};
#endif // TESTS_AMD_SMI_TEST_FUNCTIONAL_MEMORYPARTITION_READ_WRITE_H_
+13 -2
Просмотреть файл
@@ -64,6 +64,8 @@
#include "functional/version_read.h"
#include "functional/mutual_exclusion.h"
#include "functional/init_shutdown_refcount.h"
#include "functional/memorypartition_read_write.h"
#include "functional/computepartition_read_write.h"
static AMDSMITstGlobals *sRSMIGlvalues = nullptr;
@@ -250,8 +252,17 @@ TEST(amdsmitstReadOnly, TestMutualExclusion) {
RunCustomTestEpilog(&tst);
}
*/
// TODO: add TestComputePartitionReadWrite
// TODO: add TestMemoryPartitionReadWrite
TEST(amdsmitstReadWrite, TestComputePartitionReadWrite) {
TestComputePartitionReadWrite tst;
RunGenericTest(&tst);
}
TEST(amdsmitstReadWrite, TestMemoryPartitionReadWrite) {
TestMemoryPartitionReadWrite tst;
RunGenericTest(&tst);
}
TEST(amdsmitstReadWrite, TestEvtNotifReadWrite) {
TestEvtNotifReadWrite tst;
RunGenericTest(&tst);
+159 -1
Просмотреть файл
@@ -20,12 +20,14 @@
* THE SOFTWARE.
*/
#include <gtest/gtest.h>
#include <cassert>
#include <limits>
#include "amd_smi/amdsmi.h"
#include "amd_smi/impl/amd_smi_utils.h"
#include "test_base.h"
#include "test_common.h"
#include <gtest/gtest.h>
static const int kOutputLineLength = 80;
static const char kLabelDelimiter[] = "####";
@@ -136,8 +138,21 @@ void TestBase::SetUp(uint64_t init_flags) {
void TestBase::PrintDeviceHeader(amdsmi_processor_handle dv_ind) {
amdsmi_status_t err;
uint16_t val_ui16;
uint32_t val_ui32;
amdsmi_asic_info_t info;
err = smi_amdgpu_get_device_count(&val_ui32);
CHK_ERR_ASRT(err)
IF_VERB(STANDARD) {
std::cout << "\t**Total Devices: " << val_ui32 << std::endl;
}
err = smi_amdgpu_get_device_index(dv_ind, &val_ui32);
CHK_ERR_ASRT(err)
IF_VERB(STANDARD) {
std::cout << "\t**AMD SMI Device index: " << val_ui32 << std::endl;
}
IF_VERB(STANDARD) {
std::cout << "\t**Device handle: " << dv_ind << std::endl;
}
@@ -168,6 +183,15 @@ void TestBase::PrintDeviceHeader(amdsmi_processor_handle dv_ind) {
}
}
amdsmi_asic_info_t asic_info;
err = amdsmi_get_gpu_asic_info(dv_ind, &asic_info);
CHK_ERR_ASRT(err)
IF_VERB(STANDARD) {
std::cout << "\t**Market name: " << asic_info.market_name << std::endl;
std::cout << "\t**ASIC serial: 0x" << std::hex << asic_info.asic_serial << std::endl;
std::cout << "\t**Target GFX Version: gfx" << asic_info.target_graphics_version << std::endl;
}
err = amdsmi_get_gpu_subsystem_id(dv_ind, &val_ui16);
CHK_ERR_ASRT(err)
IF_VERB(STANDARD) {
@@ -234,3 +258,137 @@ void TestBase::set_description(std::string d) {
}
}
TestBase::AcceleratorProfileConfig TestBase::getAvailableProfileConfigs(
uint32_t device_index,
amdsmi_accelerator_partition_profile_t current_profile,
amdsmi_accelerator_partition_profile_config_t config,
bool isVerbose) {
AcceleratorProfileConfig profile_config = {};
profile_config.number_of_profiles = config.num_profiles;
profile_config.original_profile_type = current_profile.profile_type;
profile_config.original_profile_index = current_profile.profile_index;
profile_config.original_profile_type_str =
partition_types_map.at(current_profile.profile_type);
profile_config.available_profiles = std::vector<amdsmi_accelerator_partition_type_t>(
config.num_profiles);
profile_config.available_profile_str = std::vector<std::string>(config.num_profiles);
profile_config.available_profile_indices = std::vector<uint32_t>(config.num_profiles);
for (uint32_t i = 0; i < config.num_profiles; i++) {
std::string profile_type_str = "N/A";
profile_config.available_profiles[i] = config.profiles[i].profile_type;
profile_config.available_profile_str[i].clear();
profile_config.available_profile_str[i] =
partition_types_map.at(config.profiles[i].profile_type);
profile_config.available_profile_indices[i] = config.profiles[i].profile_index;
}
if (isVerbose) {
const uint32_t kMAX_UINT32 = std::numeric_limits<uint32_t>::max();
std::cout << "\t**[Device #" << device_index << "] Profile Configs: ";
std::cout << "\n\t\t**Original Profile Index: "
<< (profile_config.original_profile_index == kMAX_UINT32 ?
"N/A" : std::to_string(profile_config.original_profile_index))
<< "\n\t\t**Original Profile Type: "
<< profile_config.original_profile_type_str
<< "\n\t\t**Original profile: " << profile_config.original_profile_type
<< " (" << accelerator_types_map.at(profile_config.original_profile_type) << ")"
<< "\n\t\t**Number of Profiles: " << profile_config.number_of_profiles
<< "\n\t\t**Available_profiles: ";
}
std::string available_profiles_str = "N/A\n";
for (uint32_t j = 0; j < profile_config.number_of_profiles; j++) {
if (available_profiles_str == "N/A\n") {
available_profiles_str.clear();
}
if (j + 1 >= profile_config.number_of_profiles) {
available_profiles_str += ("\n\t\t\tProfile[profile_index: "
+ std::to_string(profile_config.available_profile_indices[j])
+ "]: " + profile_config.available_profile_str[j] + "\n");
} else {
available_profiles_str += ("\n\t\t\tProfile[profile_index: "
+ std::to_string(profile_config.available_profile_indices[j])
+ "]: " + profile_config.available_profile_str[j] + ", ");
}
}
if (isVerbose) {
std::cout << available_profiles_str;
}
return profile_config;
}
void TestBase::waitForUserInput() {
for (;;) {
std::cout << "\n\t**Press any key to continue**" << std::endl;
int input = std::cin.get();
if (input == EOF) {
std::cout << "EOF detected. Exiting." << std::endl;
return;
}
char input_char = static_cast<char>(input);
std::cout << "User entered: " << input_char << std::endl;
if (input_char == '\n') {
return;
}
}
}
uint32_t TestBase::promptNumDevicesToTest(uint32_t current_num_devices) {
uint32_t return_value = 0;
std::cout << "**How many devices would you like to test? (0 to skip): ";
std::string devices_to_test = "";
do {
int input = std::cin.get();
if (input == EOF) {
std::cout << "EOF detected. Exiting." << std::endl;
return 0;
}
char input_char = static_cast<char>(input);
if (input_char == '\n') {
break;
}
if (input_char >= '0' && input_char <= '9') {
devices_to_test += input_char;
} else {
std::cout << "Invalid input. Please enter a number between 0 and "
<< current_num_devices << std::endl;
}
} while (true);
return_value = std::stoi(devices_to_test);
if (return_value > current_num_devices) {
std::cout << "Invalid input. Please enter a number between 0 and "
<< current_num_devices << std::endl;
return 0;
}
return return_value;
}
std::string TestBase::getResourceType(amdsmi_accelerator_partition_resource_type_t resource_type) {
std::string resource_type_str = "";
switch (resource_type) {
case AMDSMI_ACCELERATOR_XCC:
resource_type_str = "XCC";
break;
case AMDSMI_ACCELERATOR_ENCODER:
resource_type_str = "ENCODER";
break;
case AMDSMI_ACCELERATOR_DECODER:
resource_type_str = "DECODER";
break;
case AMDSMI_ACCELERATOR_DMA:
resource_type_str = "DMA";
break;
case AMDSMI_ACCELERATOR_JPEG:
resource_type_str = "JPEG";
break;
case AMDSMI_ACCELERATOR_MAX:
resource_type_str = "MAX";
break;
default:
resource_type_str = "N/A";
break;
}
return resource_type_str;
}
+41
Просмотреть файл
@@ -26,6 +26,7 @@
#include <cstdint>
#include <string>
#include <vector>
#include <map>
#include "amd_smi/amdsmi.h"
// The max devices can be monitored
@@ -98,6 +99,46 @@ class TestBase {
return num_iterations_;
}
const std::map<amdsmi_accelerator_partition_type_t, std::string> partition_types_map = {
{ AMDSMI_ACCELERATOR_PARTITION_INVALID, "N/A" },
{ AMDSMI_ACCELERATOR_PARTITION_SPX, "SPX" },
{ AMDSMI_ACCELERATOR_PARTITION_DPX, "DPX" },
{ AMDSMI_ACCELERATOR_PARTITION_TPX, "TPX" },
{ AMDSMI_ACCELERATOR_PARTITION_QPX, "QPX" },
{ AMDSMI_ACCELERATOR_PARTITION_CPX, "CPX" },
{ AMDSMI_ACCELERATOR_PARTITION_MAX, "MAX" },
};
const std::map<amdsmi_accelerator_partition_type_t, std::string> accelerator_types_map = {
{ AMDSMI_ACCELERATOR_PARTITION_INVALID, "AMDSMI_ACCELERATOR_PARTITION_INVALID" },
{ AMDSMI_ACCELERATOR_PARTITION_SPX, "AMDSMI_ACCELERATOR_PARTITION_SPX" },
{ AMDSMI_ACCELERATOR_PARTITION_DPX, "AMDSMI_ACCELERATOR_PARTITION_DPX" },
{ AMDSMI_ACCELERATOR_PARTITION_TPX, "AMDSMI_ACCELERATOR_PARTITION_TPX" },
{ AMDSMI_ACCELERATOR_PARTITION_QPX, "AMDSMI_ACCELERATOR_PARTITION_QPX" },
{ AMDSMI_ACCELERATOR_PARTITION_CPX, "AMDSMI_ACCELERATOR_PARTITION_CPX" },
{ AMDSMI_ACCELERATOR_PARTITION_MAX, "AMDSMI_ACCELERATOR_PARTITION_MAX" },
};
struct AcceleratorProfileConfig {
amdsmi_accelerator_partition_type_t original_profile_type;
std::string original_profile_type_str;
uint32_t original_profile_index;
uint32_t number_of_profiles;
std::vector<amdsmi_accelerator_partition_type_t> available_profiles;
std::vector<std::string> available_profile_str;
std::vector<uint32_t> available_profile_indices;
};
AcceleratorProfileConfig getAvailableProfileConfigs(uint32_t device_index,
amdsmi_accelerator_partition_profile_t current_profile,
amdsmi_accelerator_partition_profile_config_t config,
bool isVerbose);
void waitForUserInput();
uint32_t promptNumDevicesToTest(uint32_t current_num_devices);
std::string getResourceType(amdsmi_accelerator_partition_resource_type_t resource_type);
protected:
void MakeHeaderStr(const char *inStr, std::string *outStr) const;
void PrintDeviceHeader(amdsmi_processor_handle dv_ind);
+115
Просмотреть файл
@@ -121,6 +121,83 @@ class TestAmdSmiPythonInterface(unittest.TestCase):
print()
self.tearDown()
# amdsmi_get_vram_info should be supported on all ASICs
@handle_exceptions
def test_get_vram_info(self):
self.setUp()
processors = amdsmi.amdsmi_get_processor_handles()
self.assertGreaterEqual(len(processors), 1)
self.assertLessEqual(len(processors), 32)
for i in range(0, len(processors)):
bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
print("\n###Test amdsmi_get_gpu_vram_info \n")
vram_types = {
amdsmi.AmdSmiVramType.UNKNOWN: "UNKNOWN",
amdsmi.AmdSmiVramType.HBM: "HBM",
amdsmi.AmdSmiVramType.HBM2: "HBM2",
amdsmi.AmdSmiVramType.HBM2E: "HBM2E",
amdsmi.AmdSmiVramType.HBM3: "HBM3",
amdsmi.AmdSmiVramType.DDR2: "DDR2",
amdsmi.AmdSmiVramType.DDR3: "DDR3",
amdsmi.AmdSmiVramType.DDR4: "DDR4",
amdsmi.AmdSmiVramType.GDDR1: "GDDR1",
amdsmi.AmdSmiVramType.GDDR2: "GDDR2",
amdsmi.AmdSmiVramType.GDDR3: "GDDR3",
amdsmi.AmdSmiVramType.GDDR4: "GDDR4",
amdsmi.AmdSmiVramType.GDDR5: "GDDR5",
amdsmi.AmdSmiVramType.GDDR6: "GDDR6",
amdsmi.AmdSmiVramType.GDDR7: "GDDR7",
amdsmi.AmdSmiVramType.MAX: "MAX"
}
vram_vendors = {
amdsmi.AmdSmiVramVendor.SAMSUNG: "SAMSUNG",
amdsmi.AmdSmiVramVendor.INFINEON: "INFINEON",
amdsmi.AmdSmiVramVendor.ELPIDA: "ELPIDA",
amdsmi.AmdSmiVramVendor.ETRON: "ETRON",
amdsmi.AmdSmiVramVendor.NANYA: "NANYA",
amdsmi.AmdSmiVramVendor.HYNIX: "HYNIX",
amdsmi.AmdSmiVramVendor.MOSEL: "MOSEL",
amdsmi.AmdSmiVramVendor.WINBOND: "WINBOND",
amdsmi.AmdSmiVramVendor.ESMT: "ESMT",
amdsmi.AmdSmiVramVendor.MICRON: "MICRON",
amdsmi.AmdSmiVramVendor.UNKNOWN: "UNKNOWN"
}
vram_info = amdsmi.amdsmi_get_gpu_vram_info(processors[i])
print(" vram_info['vram_type'] is: {}".format(
vram_types[vram_info['vram_type']]))
print(" vram_info['vram_vendor'] is: {}".format(
vram_vendors[vram_info['vram_vendor']]))
print(" vram_info['vram_size'] is: {} MB".format(
vram_info['vram_size']))
print(" vram_info['vram_bit_width'] is: {}".format(
vram_info['vram_bit_width']))
print(" vram_info['vram_max_bandwidth'] is: {} GB/s".format(
vram_info['vram_max_bandwidth']))
print()
self.tearDown()
# amdsmi_get_gpu_xcd_counter should be supported on all ASICs
@handle_exceptions
def test_get_xcd_counter(self):
self.setUp()
processors = amdsmi.amdsmi_get_processor_handles()
self.assertGreaterEqual(len(processors), 1)
self.assertLessEqual(len(processors), 32)
for i in range(0, len(processors)):
bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
print("\n###Test amdsmi_get_gpu_xcd_counter \n")
xcd_count = amdsmi.amdsmi_get_gpu_xcd_counter(processors[i])
print(" xcd_counter['counter'] is: {}".format(
xcd_count))
print()
self.tearDown()
# amdsmi_get_gpu_bad_page_info is not supported in Navi2x, Navi3x
@handle_exceptions
def test_bad_page_info(self):
@@ -863,6 +940,44 @@ class TestAmdSmiPythonInterface(unittest.TestCase):
accelerator_partition = amdsmi.amdsmi_get_gpu_accelerator_partition_profile(processors[i])
print(" Current partition id: {}".format(
accelerator_partition['partition_id']))
print(" Profile_type: {}".format(
accelerator_partition['partition_profile']['profile_type']))
print(" profile_index: {}".format(
accelerator_partition['partition_profile']['profile_index']))
print(" memory_caps: {}".format(
accelerator_partition['partition_profile']['memory_caps']))
print(" num_resources: {}".format(
accelerator_partition['partition_profile']['num_resources']))
print()
self.tearDown()
# Requires sudo (to see full resource/config detail).
# Should only be supported on MI300+ ASICs
@handle_exceptions
def test_accelerator_partition_profile_config(self):
self.setUp()
processors = amdsmi.amdsmi_get_processor_handles()
self.assertGreaterEqual(len(processors), 1)
self.assertLessEqual(len(processors), 32)
for i in range(0, len(processors)):
bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
print("\n###Test amdsmi_get_gpu_accelerator_partition_profile_config \n")
profile_config = amdsmi.amdsmi_get_gpu_accelerator_partition_profile_config(processors[i])
print(" num_profiles: {}".format(profile_config['num_profiles']))
print(" num_resource_profiles: {}".format(profile_config['num_resource_profiles']))
print(" default_profile_index: {}".format(profile_config['default_profile_index']))
for p in profile_config['profiles']:
print("\t\t profile_type: {}".format(p['profile_type']))
print("\t\t num_partitions: {}".format(p['num_partitions']))
print("\t\t profile_index: {}".format(p['profile_index']))
print("\t\t num_resources: {}".format(p['num_resources']))
for r in range(0, p['num_resources']):
print("\t\t\t profile_index: {}".format(p['resources'][r]['profile_index']))
print("\t\t\t resource_type: {}".format(p['resources'][r]['resource_type']))
print("\t\t\t partition_resource: {}".format(p['resources'][r]['partition_resource']))
print("\t\t\t num_partitions_share_resource: {}".format(
p['resources'][r]['num_partitions_share_resource']))
print()
self.tearDown()