[SWDEV-493274/SWDEV-514998] Add AMD SMI partition tests + Add Guest amd-smi static --partition (#127)
* [SWDEV-493274/SWDEV-514998] Add AMD SMI partition tests + Add Guest amd-smi static --partition
Changes:
- Added amd-smi static --partition for guest systems
- Added C++ tests for memory and compute (accelerator) partitions
- Added Python tests for amdsmi_get_gpu_vram_info(),
amdsmi_get_gpu_accelerator_partition_profile_config()
- Updated Python tests for
amdsmi_get_gpu_accelerator_partition_profile()
Now includes more profile and resource detail
- Added amdsmi_get_gpu_xcd_counter();
Tests provided for both C++/Python APIs
- Added AmdSmiVramType & AmdSmiVramVendor: they were missing
python testing required adding.
Change-Id: Ib6549d8ccc5fb68726f38745b87c78f890186022
Signed-off-by: Charis Poag <Charis.Poag@amd.com>
Αυτή η υποβολή περιλαμβάνεται σε:
@@ -387,6 +387,8 @@ class AMDSMICommands():
|
||||
args.cache = cache
|
||||
if process_isolation:
|
||||
args.process_isolation = process_isolation
|
||||
if partition:
|
||||
args.partition = partition
|
||||
if clock:
|
||||
args.clock = clock
|
||||
# args.clock defaults to False so if it was overwritten to empty list, that indicates that it was given as an arguments but with an empty list
|
||||
@@ -396,24 +398,22 @@ class AMDSMICommands():
|
||||
# Store args that are applicable to the current platform
|
||||
current_platform_args = ["asic", "bus", "vbios", "driver", "ras",
|
||||
"vram", "cache", "board", "process_isolation",
|
||||
"clock"]
|
||||
"clock", "partition"]
|
||||
current_platform_values = [args.asic, args.bus, args.vbios, args.driver, args.ras,
|
||||
args.vram, args.cache, args.board, args.process_isolation,
|
||||
args.clock]
|
||||
args.clock, args.partition]
|
||||
|
||||
self.helpers.check_required_groups()
|
||||
|
||||
if self.helpers.is_linux() and self.helpers.is_baremetal():
|
||||
if partition:
|
||||
args.partition = partition
|
||||
if limit:
|
||||
args.limit = limit
|
||||
if soc_pstate:
|
||||
args.soc_pstate = soc_pstate
|
||||
if xgmi_plpd:
|
||||
args.xgmi_plpd = xgmi_plpd
|
||||
current_platform_args += ["ras", "limit", "partition", "soc_pstate", "xgmi_plpd"]
|
||||
current_platform_values += [args.ras, args.limit, args.partition, args.soc_pstate, args.xgmi_plpd]
|
||||
current_platform_args += ["ras", "limit", "soc_pstate", "xgmi_plpd"]
|
||||
current_platform_values += [args.ras, args.limit, args.soc_pstate, args.xgmi_plpd]
|
||||
|
||||
if self.helpers.is_linux() and not self.helpers.is_virtual_os():
|
||||
if numa:
|
||||
@@ -4240,7 +4240,7 @@ class AMDSMICommands():
|
||||
if args.compute_partition in accelerator_profiles['profile_types']:
|
||||
compute_partition = amdsmi_interface.AmdSmiComputePartitionType[args.compute_partition]
|
||||
index = accelerator_profiles['profile_types'].index(args.compute_partition)
|
||||
attempted_to_set = f"Attempted to set accelerator partition to {args.compute_partition} (profile #{accelerator_profiles['profile_indices'][int(index)]} on {gpu_string}"
|
||||
attempted_to_set = f"Attempted to set accelerator partition to {args.compute_partition} (profile #{accelerator_profiles['profile_indices'][int(index)]}) on {gpu_string}"
|
||||
amdsmi_interface.amdsmi_set_gpu_compute_partition(args.gpu, compute_partition)
|
||||
self.logger.store_output(args.gpu, 'accelerator_partition', f"Successfully set accelerator partition to {args.compute_partition} (profile #{accelerator_profiles['profile_indices'][int(index)]})")
|
||||
elif args.compute_partition in accelerator_profiles['profile_indices']:
|
||||
@@ -4294,7 +4294,7 @@ class AMDSMICommands():
|
||||
|
||||
threads = []
|
||||
k140secs = 140
|
||||
string_out = f"Updating memory partition for gpu {gpu_id}"
|
||||
string_out = f"Updating memory partition for GPU: {gpu_id}"
|
||||
timesToRetryRestartErr = 1
|
||||
|
||||
self.helpers.increment_set_count()
|
||||
@@ -4305,9 +4305,9 @@ class AMDSMICommands():
|
||||
while timesToRetryRestartErr >= 0:
|
||||
timesToRetryRestartErr -= 1
|
||||
try:
|
||||
if showProgressBar: # only show reload warning on 1st set
|
||||
if showProgressBar: # we want to overwrite the previous progress bar
|
||||
t1 = multiprocessing.Process(target=self.helpers.showProgressbar,
|
||||
args=(string_out, k140secs,))
|
||||
args=(string_out, k140secs, True,))
|
||||
threads.append(t1)
|
||||
t1.start()
|
||||
memory_partition = amdsmi_interface.AmdSmiMemoryPartitionType[args.memory_partition]
|
||||
@@ -4342,7 +4342,7 @@ class AMDSMICommands():
|
||||
return
|
||||
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_AMDGPU_RESTART_ERR:
|
||||
# Try again on a failure -> work around for not being able to close libdrm
|
||||
string_out = f"Trying again - Updating memory partition for gpu {gpu_id}"
|
||||
string_out = f"Trying again - Updating memory partition for GPU: {gpu_id} "
|
||||
for thread in threads:
|
||||
thread.terminate()
|
||||
thread.join()
|
||||
|
||||
@@ -969,11 +969,15 @@ class AMDSMIHelpers():
|
||||
continue
|
||||
return pci_devices
|
||||
|
||||
def progressbar(self, it, prefix="", size=60, out=sys.stdout):
|
||||
def progressbar(self, it, prefix="", size=60, out=sys.stdout, add_newline=False):
|
||||
count = len(it)
|
||||
if (add_newline):
|
||||
print("{}\n".format(prefix),end='\r', file=out, flush=False)
|
||||
else:
|
||||
print("{}".format(prefix),end='\r', file=out, flush=False)
|
||||
def show(j):
|
||||
x = int(size*j/count)
|
||||
print("{}[{}{}] {}/{} secs remain".format(prefix, u"█"*x, "."*(size-x), j, count),
|
||||
print("[{}{}] {}/{} secs remain".format(u"█"*x, "."*(size-x), j, count),
|
||||
end='\r', file=out, flush=True)
|
||||
show(0)
|
||||
for i, item in enumerate(it):
|
||||
@@ -981,10 +985,10 @@ class AMDSMIHelpers():
|
||||
show(i+1)
|
||||
print("\n\n", end='\r', flush=True, file=out)
|
||||
|
||||
def showProgressbar(self, title="", timeInSeconds=13):
|
||||
def showProgressbar(self, title="", timeInSeconds=13, add_newline=False):
|
||||
if title != "":
|
||||
title += ": "
|
||||
for i in self.progressbar(range(timeInSeconds), title, 40):
|
||||
title += " "
|
||||
for i in self.progressbar(range(timeInSeconds), title, 40, add_newline=add_newline):
|
||||
time.sleep(1)
|
||||
|
||||
def check_required_groups(self):
|
||||
|
||||
@@ -695,10 +695,10 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
static_parser.add_argument('-R', '--process-isolation', action='store_true', required=False, help=process_isolation_help)
|
||||
static_parser.add_argument('-r', '--ras', action='store_true', required=False, help=ras_help)
|
||||
static_parser.add_argument('-C', '--clock', action='store', default=False, nargs='*', type=str, required=False, help=clock_help)
|
||||
static_parser.add_argument('-p', '--partition', action='store_true', required=False, help=partition_help)
|
||||
|
||||
# Options to display on Hypervisors and Baremetal
|
||||
if self.helpers.is_hypervisor() or self.helpers.is_baremetal():
|
||||
static_parser.add_argument('-p', '--partition', action='store_true', required=False, help=partition_help)
|
||||
static_parser.add_argument('-l', '--limit', action='store_true', required=False, help=limit_help)
|
||||
static_parser.add_argument('-P', '--soc-pstate', action='store_true', required=False, help=soc_pstate_help)
|
||||
static_parser.add_argument('-x', '--xgmi-plpd', action='store_true', required=False, help=xgmi_plpd_help)
|
||||
|
||||
@@ -23,17 +23,18 @@
|
||||
#include <pwd.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
#include <bitset>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <inttypes.h>
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
|
||||
#include "amd_smi/amdsmi.h"
|
||||
#include "amd_smi/impl/amd_smi_utils.h"
|
||||
|
||||
|
||||
#define CHK_AMDSMI_RET(RET) \
|
||||
@@ -201,8 +202,62 @@ std::string print_unsigned_int(T value) {
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
static const std::string
|
||||
computePartitionString(amdsmi_compute_partition_type_t computeParitionType) {
|
||||
switch (computeParitionType) {
|
||||
case AMDSMI_COMPUTE_PARTITION_SPX:
|
||||
return "SPX";
|
||||
case AMDSMI_COMPUTE_PARTITION_DPX:
|
||||
return "DPX";
|
||||
case AMDSMI_COMPUTE_PARTITION_TPX:
|
||||
return "TPX";
|
||||
case AMDSMI_COMPUTE_PARTITION_QPX:
|
||||
return "QPX";
|
||||
case AMDSMI_COMPUTE_PARTITION_CPX:
|
||||
return "CPX";
|
||||
default:
|
||||
return "N/A";
|
||||
}
|
||||
}
|
||||
|
||||
static const std::map<std::string, amdsmi_compute_partition_type_t>
|
||||
mapStringToSMIComputePartitionTypes {
|
||||
{"SPX", AMDSMI_COMPUTE_PARTITION_SPX},
|
||||
{"DPX", AMDSMI_COMPUTE_PARTITION_DPX},
|
||||
{"TPX", AMDSMI_COMPUTE_PARTITION_TPX},
|
||||
{"QPX", AMDSMI_COMPUTE_PARTITION_QPX},
|
||||
{"CPX", AMDSMI_COMPUTE_PARTITION_CPX},
|
||||
{"N/A", AMDSMI_COMPUTE_PARTITION_INVALID}
|
||||
};
|
||||
|
||||
static const std::string
|
||||
memoryPartitionString(amdsmi_memory_partition_type_t memoryParitionType) {
|
||||
switch (memoryParitionType) {
|
||||
case AMDSMI_MEMORY_PARTITION_NPS1:
|
||||
return "NPS1";
|
||||
case AMDSMI_MEMORY_PARTITION_NPS2:
|
||||
return "NPS2";
|
||||
case AMDSMI_MEMORY_PARTITION_NPS4:
|
||||
return "NPS4";
|
||||
case AMDSMI_MEMORY_PARTITION_NPS8:
|
||||
return "NPS8";
|
||||
default:
|
||||
return "N/A";
|
||||
}
|
||||
}
|
||||
|
||||
static const std::map<std::string, amdsmi_memory_partition_type_t>
|
||||
mapStringToSMIMemoryPartitionTypes {
|
||||
{"NPS1", AMDSMI_MEMORY_PARTITION_NPS1},
|
||||
{"NPS2", AMDSMI_MEMORY_PARTITION_NPS2},
|
||||
{"NPS4", AMDSMI_MEMORY_PARTITION_NPS4},
|
||||
{"NPS8", AMDSMI_MEMORY_PARTITION_NPS8},
|
||||
{"N/A", AMDSMI_MEMORY_PARTITION_UNKNOWN}
|
||||
};
|
||||
|
||||
int main() {
|
||||
amdsmi_status_t ret;
|
||||
amdsmi_status_t ret, ret_set;
|
||||
const char *err_str;
|
||||
|
||||
// Init amdsmi for sockets and devices.
|
||||
// Here we are only interested in AMD_GPUS.
|
||||
@@ -248,6 +303,20 @@ int main() {
|
||||
|
||||
// For each device of the socket, get name and temperature.
|
||||
for (uint32_t j = 0; j < device_count; j++) {
|
||||
uint32_t device_cnt = 0;
|
||||
ret = smi_amdgpu_get_device_count(&device_cnt);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
std::cout << "Device Count: " << device_cnt << std::endl;
|
||||
|
||||
// Get device index
|
||||
uint32_t device_index = 0;
|
||||
ret = smi_amdgpu_get_device_index(processor_handles[j], &device_index);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
std::cout << "Device Index: " << device_index << std::endl;
|
||||
|
||||
std::vector<amdsmi_processor_handle> p_handles(device_cnt);
|
||||
ret = smi_amdgpu_get_processor_handle_by_index(device_index, &p_handles[j]);
|
||||
|
||||
// Get device type. Since the amdsmi is initialized with
|
||||
// AMD_SMI_INIT_AMD_GPUS, the processor_type must be AMDSMI_PROCESSOR_TYPE_AMD_GPU.
|
||||
processor_type_t processor_type = {};
|
||||
@@ -286,6 +355,173 @@ int main() {
|
||||
printf("\tAsic serial: 0x%s\n", asic_info.asic_serial);
|
||||
printf("\tNum of Computes: %d\n\n", asic_info.num_of_compute_units);
|
||||
|
||||
bool is_power_management_enabled = false;
|
||||
ret = amdsmi_is_gpu_power_management_enabled(processor_handles[j],
|
||||
&is_power_management_enabled);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_is_gpu_power_management_enabled:\n");
|
||||
printf("\tPower Management Enabled: %s\n\n",
|
||||
(is_power_management_enabled ? "TRUE" : "FALSE"));
|
||||
|
||||
std::cout << " **Version 1: Accelerator/Compute Partition API Examples**\n";
|
||||
char original_compute_partition[AMDSMI_MAX_STRING_LENGTH];
|
||||
ret = amdsmi_get_gpu_compute_partition(processor_handles[j], original_compute_partition,
|
||||
static_cast<uint32_t>(AMDSMI_MAX_STRING_LENGTH));
|
||||
|
||||
amdsmi_status_code_to_string(ret, &err_str);
|
||||
if (ret == AMDSMI_STATUS_SUCCESS) {
|
||||
CHK_AMDSMI_RET(ret)
|
||||
std::cout << " Output of amdsmi_get_gpu_compute_partition:\n";
|
||||
std::cout << "\tamdsmi_get_gpu_compute_partition(" << j << ", "
|
||||
<< mapStringToSMIComputePartitionTypes.at(original_compute_partition) << "): "
|
||||
<< err_str << "\n\n";
|
||||
std::cout << "\tCompute Partition (original): "
|
||||
<< original_compute_partition << "\n\n";
|
||||
} else {
|
||||
std::cout << "\tamdsmi_get_gpu_compute_partition(" << j << ", "
|
||||
<< computePartitionString(AMDSMI_COMPUTE_PARTITION_INVALID) << "): "
|
||||
<< err_str << "\n\n";
|
||||
}
|
||||
|
||||
for (int partition = static_cast<int>(AMDSMI_COMPUTE_PARTITION_SPX);
|
||||
partition <= static_cast<int>(AMDSMI_COMPUTE_PARTITION_CPX);
|
||||
partition++) {
|
||||
amdsmi_compute_partition_type_t updatePartition
|
||||
= static_cast<amdsmi_compute_partition_type_t>(partition);
|
||||
ret_set = amdsmi_set_gpu_compute_partition(processor_handles[j],
|
||||
updatePartition);
|
||||
amdsmi_status_code_to_string(ret_set, &err_str);
|
||||
if (ret_set == AMDSMI_STATUS_SUCCESS) {
|
||||
CHK_AMDSMI_RET(ret_set)
|
||||
}
|
||||
std::cout << "\tamdsmi_set_gpu_compute_partition(" << j << ", "
|
||||
<< computePartitionString(updatePartition) << "): "
|
||||
<< err_str << "\n\n";
|
||||
|
||||
// Get the current compute partition
|
||||
char current_compute_partition[AMDSMI_MAX_STRING_LENGTH];
|
||||
ret = amdsmi_get_gpu_compute_partition(processor_handles[j],
|
||||
current_compute_partition,
|
||||
static_cast<uint32_t>(AMDSMI_MAX_STRING_LENGTH));
|
||||
amdsmi_status_code_to_string(ret, &err_str);
|
||||
if (ret == AMDSMI_STATUS_SUCCESS) {
|
||||
CHK_AMDSMI_RET(ret)
|
||||
std::cout << " Output of amdsmi_get_gpu_compute_partition:\n";
|
||||
std::cout << "\tamdsmi_get_gpu_compute_partition(" << j << ", "
|
||||
<< computePartitionString(updatePartition) << "): "
|
||||
<< err_str << "\n\n";
|
||||
std::cout << "\tCompute Partition (current): "
|
||||
<< current_compute_partition << "\n\n";
|
||||
} else {
|
||||
std::cout << "\tamdsmi_get_gpu_compute_partition(" << j << ", "
|
||||
<< computePartitionString(AMDSMI_COMPUTE_PARTITION_INVALID) << "): "
|
||||
<< err_str << "\n\n";
|
||||
}
|
||||
}
|
||||
// return to original compute partition
|
||||
amdsmi_compute_partition_type_t original_compute_partition_type;
|
||||
if (ret == AMDSMI_STATUS_SUCCESS) {
|
||||
original_compute_partition_type
|
||||
= mapStringToSMIComputePartitionTypes.at(original_compute_partition);
|
||||
} else {
|
||||
original_compute_partition_type = AMDSMI_COMPUTE_PARTITION_INVALID;
|
||||
}
|
||||
std::cout << " Returning to original compute partition ("
|
||||
<< computePartitionString(original_compute_partition_type) << ")\n";
|
||||
auto ret_set = amdsmi_set_gpu_compute_partition(processor_handles[j],
|
||||
original_compute_partition_type);
|
||||
amdsmi_status_code_to_string(ret_set, &err_str);
|
||||
if (ret_set == AMDSMI_STATUS_SUCCESS) {
|
||||
CHK_AMDSMI_RET(ret_set)
|
||||
}
|
||||
std::cout << "\tamdsmi_set_gpu_compute_partition(" << j << ", "
|
||||
<< computePartitionString(original_compute_partition_type) << "): "
|
||||
<< err_str << "\n\n";
|
||||
|
||||
std::cout << " **Version 1: Memory Partition API Examples**\n";
|
||||
char original_memory_partition[AMDSMI_MAX_STRING_LENGTH];
|
||||
ret = amdsmi_get_gpu_memory_partition(processor_handles[j], original_memory_partition,
|
||||
static_cast<uint32_t>(AMDSMI_MAX_STRING_LENGTH));
|
||||
amdsmi_status_code_to_string(ret, &err_str);
|
||||
if (ret == AMDSMI_STATUS_SUCCESS) {
|
||||
CHK_AMDSMI_RET(ret)
|
||||
std::cout << " Output of amdsmi_get_gpu_memory_partition:\n";
|
||||
std::cout << "\tamdsmi_get_gpu_memory_partition(" << j << ", "
|
||||
<< mapStringToSMIMemoryPartitionTypes.at(original_memory_partition) << "): "
|
||||
<< err_str << "\n\n";
|
||||
std::cout << "\tMemory Partition (original): "
|
||||
<< original_memory_partition << "\n\n";
|
||||
} else {
|
||||
std::cout << "\tamdsmi_get_gpu_memory_partition(" << j << ", "
|
||||
<< memoryPartitionString(AMDSMI_MEMORY_PARTITION_UNKNOWN) << "): "
|
||||
<< err_str << "\n\n";
|
||||
}
|
||||
|
||||
for (int partition = static_cast<int>(AMDSMI_MEMORY_PARTITION_NPS1);
|
||||
partition <= static_cast<int>(AMDSMI_MEMORY_PARTITION_NPS8);
|
||||
partition++) {
|
||||
if (partition != static_cast<int>(AMDSMI_MEMORY_PARTITION_NPS1)
|
||||
&& partition != static_cast<int>(AMDSMI_MEMORY_PARTITION_NPS2)
|
||||
&& partition != static_cast<int>(AMDSMI_MEMORY_PARTITION_NPS4)
|
||||
&& partition != static_cast<int>(AMDSMI_MEMORY_PARTITION_NPS8)) {
|
||||
continue;
|
||||
}
|
||||
amdsmi_memory_partition_type_t updatePartition
|
||||
= static_cast<amdsmi_memory_partition_type_t>(partition);
|
||||
auto ret_set = amdsmi_set_gpu_memory_partition(processor_handles[j],
|
||||
updatePartition);
|
||||
amdsmi_status_code_to_string(ret_set, &err_str);
|
||||
if (ret_set == AMDSMI_STATUS_SUCCESS) {
|
||||
CHK_AMDSMI_RET(ret_set)
|
||||
std::cout << " Output of amdsmi_set_gpu_memory_partition:\n";
|
||||
}
|
||||
std::cout << "\tamdsmi_set_gpu_memory_partition(" << j << ", "
|
||||
<< memoryPartitionString(updatePartition) << "): "
|
||||
<< err_str << "\n\n";
|
||||
|
||||
// Get the current memory partition
|
||||
char current_memory_partition[AMDSMI_MAX_STRING_LENGTH];
|
||||
ret = amdsmi_get_gpu_memory_partition(processor_handles[j],
|
||||
current_memory_partition,
|
||||
static_cast<uint32_t>(AMDSMI_MAX_STRING_LENGTH));
|
||||
|
||||
amdsmi_status_code_to_string(ret, &err_str);
|
||||
if (ret == AMDSMI_STATUS_SUCCESS) {
|
||||
CHK_AMDSMI_RET(ret)
|
||||
std::cout << "\tamdsmi_get_gpu_memory_partition(" << j << ", "
|
||||
<< memoryPartitionString(updatePartition) << "): "
|
||||
<< err_str << "\n\n";
|
||||
std::cout << "\tMemory Partition (current): "
|
||||
<< current_memory_partition << "\n\n";
|
||||
} else {
|
||||
std::cout << "\tamdsmi_get_gpu_memory_partition(" << j << ", "
|
||||
<< memoryPartitionString(AMDSMI_MEMORY_PARTITION_UNKNOWN) << "): "
|
||||
<< err_str << "\n\n";
|
||||
}
|
||||
}
|
||||
// return to original compute partition
|
||||
amdsmi_memory_partition_type_t original_memory_partition_type;
|
||||
if (ret == AMDSMI_STATUS_SUCCESS) {
|
||||
original_memory_partition_type
|
||||
= mapStringToSMIMemoryPartitionTypes.at(original_memory_partition);
|
||||
} else {
|
||||
original_memory_partition_type = AMDSMI_MEMORY_PARTITION_UNKNOWN;
|
||||
}
|
||||
std::cout << " Returning to original memory partition ("
|
||||
<< memoryPartitionString(original_memory_partition_type)
|
||||
<< ")\n";
|
||||
ret_set = amdsmi_set_gpu_memory_partition(processor_handles[j],
|
||||
original_memory_partition_type);
|
||||
amdsmi_status_code_to_string(ret_set, &err_str);
|
||||
if (ret_set == AMDSMI_STATUS_SUCCESS) {
|
||||
CHK_AMDSMI_RET(ret_set)
|
||||
}
|
||||
std::cout << "\tamdsmi_set_gpu_compute_partition(" << j << ", "
|
||||
<< memoryPartitionString(original_memory_partition_type) << "): "
|
||||
<< err_str << "\n\n";
|
||||
|
||||
// TODO(amdsmi_team): Add V2 partiton APIs
|
||||
|
||||
// Get VRAM info
|
||||
amdsmi_vram_info_t vram_info = {};
|
||||
ret = amdsmi_get_gpu_vram_info(processor_handles[j], &vram_info);
|
||||
@@ -478,7 +714,7 @@ int main() {
|
||||
block = (amdsmi_gpu_block_t)(block * 2)) {
|
||||
ret = amdsmi_get_gpu_ras_block_features_enabled(processor_handles[j], block,
|
||||
&state);
|
||||
if (ret != AMDSMI_STATUS_API_FAILED) {
|
||||
if (ret != AMDSMI_STATUS_API_FAILED && ret != AMDSMI_STATUS_NOT_SUPPORTED) {
|
||||
CHK_AMDSMI_RET(ret)
|
||||
}
|
||||
|
||||
@@ -520,7 +756,9 @@ int main() {
|
||||
// Get ECC error counts
|
||||
amdsmi_error_count_t err_cnt_info = {};
|
||||
ret = amdsmi_get_gpu_total_ecc_count(processor_handles[j], &err_cnt_info);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
if (ret != AMDSMI_STATUS_NOT_SUPPORTED) {
|
||||
CHK_AMDSMI_RET(ret)
|
||||
}
|
||||
printf(" Output of amdsmi_get_gpu_total_ecc_count:\n");
|
||||
printf("\tCorrectable errors: %lu\n", err_cnt_info.correctable_count);
|
||||
printf("\tUncorrectable errors: %lu\n\n",
|
||||
@@ -530,7 +768,7 @@ int main() {
|
||||
ret = amdsmi_get_gpu_process_list(processor_handles[j], &num_process, nullptr);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
if (!num_process) {
|
||||
printf("No processes found.\n");
|
||||
printf("amdsmi_get_gpu_process_list(): No processes found.\n\n");
|
||||
} else {
|
||||
std::cout << "Processes found: " << num_process << "\n";
|
||||
amdsmi_proc_info_t process_info_list[num_process];
|
||||
|
||||
@@ -407,7 +407,7 @@ typedef enum {
|
||||
//!< work together with shared memory
|
||||
AMDSMI_COMPUTE_PARTITION_QPX, //!< Quad GPU mode (QPX)- Quarter XCCs
|
||||
//!< work together with shared memory
|
||||
AMDSMI_COMPUTE_PARTITION_CPX, //!< Core mode (CPX)- Per-chip XCC with
|
||||
AMDSMI_COMPUTE_PARTITION_CPX //!< Core mode (CPX)- Per-chip XCC with
|
||||
//!< shared memory
|
||||
} amdsmi_compute_partition_type_t;
|
||||
|
||||
@@ -5847,6 +5847,25 @@ amdsmi_get_power_cap_info(amdsmi_processor_handle processor_handle, uint32_t sen
|
||||
*/
|
||||
amdsmi_status_t amdsmi_get_pcie_info(amdsmi_processor_handle processor_handle, amdsmi_pcie_info_t *info);
|
||||
|
||||
/**
|
||||
* @brief Returns the 'xcd_counter' from the GPU metrics associated with the device
|
||||
*
|
||||
* @ingroup tagAsicBoardInfo
|
||||
*
|
||||
* @platform{gpu_bm_linux} @platform{guest_1vf} @platform{guest_mvf}
|
||||
*
|
||||
* @param[in] processor_handle Device which to query
|
||||
*
|
||||
* @param[inout] xcd_count a pointer to uint16_t to which the device gpu
|
||||
* metric unit will be stored. Must be allocated by user.
|
||||
*
|
||||
* @retval ::AMDSMI_STATUS_SUCCESS is returned upon successful call.
|
||||
* ::AMDSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit
|
||||
* does not exist for the given device.
|
||||
*/
|
||||
amdsmi_status_t amdsmi_get_gpu_xcd_counter(amdsmi_processor_handle processor_handle,
|
||||
uint16_t *xcd_count);
|
||||
|
||||
/** @} End tagAsicBoardInfo */
|
||||
|
||||
/*****************************************************************************/
|
||||
|
||||
@@ -27,7 +27,6 @@
|
||||
#include "amd_smi/impl/amd_smi_processor.h"
|
||||
#include "amd_smi/impl/amd_smi_drm.h"
|
||||
#include "shared_mutex.h" // NOLINT
|
||||
#include "rocm_smi/rocm_smi_logger.h"
|
||||
|
||||
namespace amd {
|
||||
namespace smi {
|
||||
|
||||
@@ -26,10 +26,10 @@
|
||||
#include <limits>
|
||||
#include <type_traits>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "amd_smi/amdsmi.h"
|
||||
#include "amd_smi/impl/amd_smi_gpu_device.h"
|
||||
#include "rocm_smi/rocm_smi_utils.h"
|
||||
|
||||
|
||||
#define SMIGPUDEVICE_MUTEX(MUTEX) \
|
||||
@@ -55,6 +55,63 @@ amdsmi_status_t smi_amdgpu_is_gpu_power_management_enabled(amd::smi::AMDSmiGPUDe
|
||||
std::string smi_split_string(std::string str, char delim);
|
||||
std::string smi_amdgpu_get_status_string(amdsmi_status_t ret, bool fullStatus);
|
||||
|
||||
/**
|
||||
* @brief Get the device index given the processor handle.
|
||||
*
|
||||
* @details Given a processor handle @p processor_handle
|
||||
* and a pointer to a uint32_t @p device_index will be returned.
|
||||
*
|
||||
* @param[in] processor_handle Device which to query
|
||||
*
|
||||
* @param[inout] device_index a pointer to uint32_t to which the matching device
|
||||
* index will be stored
|
||||
*
|
||||
* @retval ::AMDSMI_STATUS_SUCCESS is returned upon successful call.
|
||||
* ::AMDSMI_STATUS_INVAL is returned if user provides a null pointer
|
||||
* for device_index.
|
||||
* ::AMDSMI_STATUS_API_FAILED is returned if the corresponding device
|
||||
* index for the processor handle cannot be found.
|
||||
*/
|
||||
amdsmi_status_t smi_amdgpu_get_device_index(amdsmi_processor_handle processor_handle,
|
||||
uint32_t* device_index);
|
||||
|
||||
/**
|
||||
* @brief Get total number of devices
|
||||
*
|
||||
* @details Given a pointer to a uint32_t @p total_num_devices will be returned
|
||||
*
|
||||
* @param[inout] total_num_devices a pointer to uint32_t to which the total number
|
||||
* of devices will be stored
|
||||
*
|
||||
* @retval ::AMDSMI_STATUS_SUCCESS is returned upon successful call.
|
||||
* ::AMDSMI_STATUS_INVAL is returned if user provides a null pointer
|
||||
* for total_num_devices.
|
||||
*/
|
||||
amdsmi_status_t smi_amdgpu_get_device_count(uint32_t *total_num_devices);
|
||||
|
||||
/**
|
||||
* @brief Get the processor handle given the device index.
|
||||
*
|
||||
* @details Given a uint32_t @p device_index and a pointer to
|
||||
* a processor handle @p processor_handle, the device index will be used to
|
||||
* find the processor handle of the device and store it in the provided pointer
|
||||
*
|
||||
* @param[in] device_index a uint32_t to value to help find the corresponding
|
||||
* processor handle
|
||||
*
|
||||
* @param[inout] processor_handle a pointer to amdsmi_processor_handle
|
||||
* which the corresponding processor_handle will be stored
|
||||
*
|
||||
* @retval ::AMDSMI_STATUS_SUCCESS is returned upon successful call.
|
||||
* ::AMDSMI_STATUS_INVAL is returned if user provides a null pointer
|
||||
* for processor_handle.
|
||||
* ::AMDSMI_STATUS_API_FAILED is returned if the device_index is cannot
|
||||
* be found.
|
||||
*/
|
||||
amdsmi_status_t smi_amdgpu_get_processor_handle_by_index(
|
||||
uint32_t device_index,
|
||||
amdsmi_processor_handle *processor_handle);
|
||||
|
||||
|
||||
template<typename>
|
||||
constexpr bool is_dependent_false_v = false;
|
||||
@@ -72,8 +129,7 @@ constexpr T get_std_num_limit()
|
||||
{
|
||||
if constexpr (is_supported_type_v<T>) {
|
||||
return std::numeric_limits<T>::max();
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
return std::numeric_limits<T>::min();
|
||||
static_assert(is_dependent_false_v<T>, "Error: Type not supported...");
|
||||
}
|
||||
@@ -98,12 +154,11 @@ constexpr T translate_umax_or_assign_value(U source_value, V target_value)
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
static_assert(is_dependent_false_v<T>, "Error: Type not supported...");
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
#endif //
|
||||
#endif // AMD_SMI_INCLUDE_AMD_SMI_UTILS_H_
|
||||
|
||||
@@ -94,6 +94,7 @@ from .amdsmi_interface import amdsmi_get_gpu_kfd_info
|
||||
from .amdsmi_interface import amdsmi_get_power_cap_info
|
||||
from .amdsmi_interface import amdsmi_get_gpu_vram_info
|
||||
from .amdsmi_interface import amdsmi_get_gpu_cache_info
|
||||
from .amdsmi_interface import amdsmi_get_gpu_xcd_counter
|
||||
|
||||
# # Microcode and VBIOS Information
|
||||
from .amdsmi_interface import amdsmi_get_gpu_vbios_info
|
||||
@@ -272,6 +273,8 @@ from .amdsmi_interface import AmdSmiLinkType
|
||||
from .amdsmi_interface import AmdSmiUtilizationCounterType
|
||||
from .amdsmi_interface import AmdSmiProcessorType
|
||||
from .amdsmi_interface import AmdSmiVirtualizationMode
|
||||
from .amdsmi_interface import AmdSmiVramType
|
||||
from .amdsmi_interface import AmdSmiVramVendor
|
||||
|
||||
# Exceptions
|
||||
from .amdsmi_exception import AmdSmiLibraryException
|
||||
|
||||
@@ -460,6 +460,36 @@ class AmdSmiVirtualizationMode(IntEnum):
|
||||
GUEST = amdsmi_wrapper.AMDSMI_VIRTUALIZATION_MODE_GUEST
|
||||
PASSTHROUGH = amdsmi_wrapper.AMDSMI_VIRTUALIZATION_MODE_PASSTHROUGH
|
||||
|
||||
class AmdSmiVramType(IntEnum):
|
||||
UNKNOWN = amdsmi_wrapper.AMDSMI_VRAM_TYPE_UNKNOWN
|
||||
HBM = amdsmi_wrapper.AMDSMI_VRAM_TYPE_HBM
|
||||
HBM2 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_HBM2
|
||||
HBM2E = amdsmi_wrapper.AMDSMI_VRAM_TYPE_HBM2E
|
||||
HBM3 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_HBM3
|
||||
DDR2 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_DDR2
|
||||
DDR3 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_DDR3
|
||||
DDR4 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_DDR4
|
||||
GDDR1 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_GDDR1
|
||||
GDDR2 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_GDDR2
|
||||
GDDR3 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_GDDR3
|
||||
GDDR4 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_GDDR4
|
||||
GDDR5 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_GDDR5
|
||||
GDDR6 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_GDDR6
|
||||
GDDR7 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_GDDR7
|
||||
MAX = amdsmi_wrapper.AMDSMI_VRAM_TYPE__MAX
|
||||
|
||||
class AmdSmiVramVendor(IntEnum):
|
||||
SAMSUNG = amdsmi_wrapper.AMDSMI_VRAM_VENDOR_SAMSUNG
|
||||
INFINEON = amdsmi_wrapper.AMDSMI_VRAM_VENDOR_INFINEON
|
||||
ELPIDA = amdsmi_wrapper.AMDSMI_VRAM_VENDOR_ELPIDA
|
||||
ETRON = amdsmi_wrapper.AMDSMI_VRAM_VENDOR_ETRON
|
||||
NANYA = amdsmi_wrapper.AMDSMI_VRAM_VENDOR_NANYA
|
||||
HYNIX = amdsmi_wrapper.AMDSMI_VRAM_VENDOR_HYNIX
|
||||
MOSEL = amdsmi_wrapper.AMDSMI_VRAM_VENDOR_MOSEL
|
||||
WINBOND = amdsmi_wrapper.AMDSMI_VRAM_VENDOR_WINBOND
|
||||
ESMT = amdsmi_wrapper.AMDSMI_VRAM_VENDOR_ESMT
|
||||
MICRON = amdsmi_wrapper.AMDSMI_VRAM_VENDOR_MICRON
|
||||
UNKNOWN = amdsmi_wrapper.AMDSMI_VRAM_VENDOR_UNKNOWN
|
||||
|
||||
class AmdSmiEventReader:
|
||||
def __init__(
|
||||
@@ -2525,6 +2555,18 @@ def amdsmi_get_pcie_info(
|
||||
|
||||
return pcie_info_dict
|
||||
|
||||
def amdsmi_get_gpu_xcd_counter(processor_handle: amdsmi_wrapper.amdsmi_processor_handle) -> Dict[str, Any]:
|
||||
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
||||
raise AmdSmiParameterException(processor_handle, amdsmi_wrapper.amdsmi_processor_handle)
|
||||
|
||||
xcd_counter = ctypes.c_uint16()
|
||||
_check_res(
|
||||
amdsmi_wrapper.amdsmi_get_gpu_xcd_counter(
|
||||
processor_handle, ctypes.byref(xcd_counter)
|
||||
)
|
||||
)
|
||||
|
||||
return xcd_counter.value
|
||||
|
||||
def amdsmi_get_processor_handle_from_bdf(bdf):
|
||||
bdf = _parse_bdf(bdf)
|
||||
@@ -2958,15 +3000,11 @@ def amdsmi_get_gpu_accelerator_partition_profile(
|
||||
|
||||
length = profile.num_partitions
|
||||
partition_ids = []
|
||||
for i in range(profile.num_partitions):
|
||||
partition_ids.append(partition_id_list[i])
|
||||
|
||||
last_element = 0
|
||||
if length > 0:
|
||||
last_element = length - 1
|
||||
if ((partition_ids[last_element] == 0)
|
||||
and not((profile_type_ret == str("SPX")) or (profile_type_ret == str("N/A")))):
|
||||
partition_ids = "N/A"
|
||||
|
||||
#partition_id[0] will contain the partition id of each device
|
||||
#BM/Guest will include this logic. Host will only display primary partition ids.
|
||||
kPOSITION_OF_PARTITION_ID = 0
|
||||
partition_ids.append(partition_id_list[kPOSITION_OF_PARTITION_ID])
|
||||
|
||||
mem_caps_list = []
|
||||
if profile.memory_caps.nps_flags.nps1_cap == 1:
|
||||
|
||||
@@ -2603,6 +2603,9 @@ amdsmi_get_power_cap_info.argtypes = [amdsmi_processor_handle, uint32_t, ctypes.
|
||||
amdsmi_get_pcie_info = _libraries['libamd_smi.so'].amdsmi_get_pcie_info
|
||||
amdsmi_get_pcie_info.restype = amdsmi_status_t
|
||||
amdsmi_get_pcie_info.argtypes = [amdsmi_processor_handle, ctypes.POINTER(struct_amdsmi_pcie_info_t)]
|
||||
amdsmi_get_gpu_xcd_counter = _libraries['libamd_smi.so'].amdsmi_get_gpu_xcd_counter
|
||||
amdsmi_get_gpu_xcd_counter.restype = amdsmi_status_t
|
||||
amdsmi_get_gpu_xcd_counter.argtypes = [amdsmi_processor_handle, ctypes.POINTER(ctypes.c_uint16)]
|
||||
amdsmi_get_fw_info = _libraries['libamd_smi.so'].amdsmi_get_fw_info
|
||||
amdsmi_get_fw_info.restype = amdsmi_status_t
|
||||
amdsmi_get_fw_info.argtypes = [amdsmi_processor_handle, ctypes.POINTER(struct_amdsmi_fw_info_t)]
|
||||
@@ -3050,7 +3053,7 @@ __all__ = \
|
||||
'amdsmi_get_gpu_virtualization_mode',
|
||||
'amdsmi_get_gpu_volt_metric', 'amdsmi_get_gpu_vram_info',
|
||||
'amdsmi_get_gpu_vram_usage', 'amdsmi_get_gpu_vram_vendor',
|
||||
'amdsmi_get_gpu_xgmi_link_status',
|
||||
'amdsmi_get_gpu_xcd_counter', 'amdsmi_get_gpu_xgmi_link_status',
|
||||
'amdsmi_get_hsmp_metrics_table',
|
||||
'amdsmi_get_hsmp_metrics_table_version', 'amdsmi_get_lib_version',
|
||||
'amdsmi_get_link_metrics', 'amdsmi_get_link_topology_nearest',
|
||||
|
||||
@@ -200,6 +200,14 @@ class Device {
|
||||
public:
|
||||
explicit Device(std::string path, RocmSMI_env_vars const *e);
|
||||
~Device(void);
|
||||
typedef struct {
|
||||
uint32_t card_index;
|
||||
uint32_t drm_render_minor;
|
||||
uint64_t bdfid;
|
||||
uint64_t kfd_gpu_id;
|
||||
uint32_t partition_id;
|
||||
uint32_t smi_device_id;
|
||||
} rsmi_device_identifiers_t;
|
||||
|
||||
void set_monitor(std::shared_ptr<Monitor> m) {monitor_ = m;}
|
||||
std::string path(void) const {return path_;}
|
||||
@@ -266,6 +274,8 @@ class Device {
|
||||
void set_smi_device_id(uint32_t device_id) { m_device_id = device_id; }
|
||||
void set_smi_partition_id(uint32_t partition_id) { m_partition_id = partition_id; }
|
||||
static const char* get_type_string(DevInfoTypes type);
|
||||
rsmi_status_t get_smi_device_identifiers(uint32_t device_id,
|
||||
rsmi_device_identifiers_t *device_identifiers);
|
||||
|
||||
private:
|
||||
std::shared_ptr<Monitor> monitor_;
|
||||
|
||||
@@ -6577,6 +6577,16 @@ rsmi_dev_partition_id_get(uint32_t dv_ind, uint32_t *partition_id) {
|
||||
if (ret == RSMI_STATUS_SUCCESS) {
|
||||
*partition_id = static_cast<uint32_t>((pci_id >> 28) & 0xf);
|
||||
}
|
||||
std::ostringstream bdf_sstream;
|
||||
bdf_sstream << std::hex << std::setfill('0') << std::setw(4)
|
||||
<< ((pci_id >> 32) & 0xFFFFFFFF) << ":";
|
||||
bdf_sstream << std::hex << std::setfill('0') << std::setw(2) << ((pci_id >> 8) & 0xFF) << ":";
|
||||
bdf_sstream << std::hex << std::setfill('0') << std::setw(2) << ((pci_id >> 3) & 0xF8) << ".";
|
||||
bdf_sstream << std::hex << std::setfill('0') << +(pci_id & 0x7);
|
||||
bdf_sstream << "\nPartition ID ((pci_id >> 28) & 0xf): " << std::dec
|
||||
<< static_cast<int>((pci_id >> 28) & 0xf);
|
||||
bdf_sstream << "\nPartition ID (pci_id & 0x7): " << std::dec << static_cast<int>(pci_id & 0x7);
|
||||
// std::cout << __PRETTY_FUNCTION__ << " BDF: " << bdf_sstream.str() << std::endl;
|
||||
|
||||
/**
|
||||
* Fall back is required due to driver changes within KFD.
|
||||
@@ -6603,9 +6613,11 @@ rsmi_dev_partition_id_get(uint32_t dv_ind, uint32_t *partition_id) {
|
||||
<< " | Success"
|
||||
<< " | Device #: " << dv_ind
|
||||
<< " | Type: partition_id"
|
||||
<< " | Data: " << *partition_id
|
||||
<< " | Data: " << static_cast<int>(*partition_id)
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(RSMI_STATUS_SUCCESS) << " |";
|
||||
<< getRSMIStatusString(RSMI_STATUS_SUCCESS) << " |"
|
||||
<< "\n BDF: " << bdf_sstream.str() << std::endl;
|
||||
// std::cout << ss.str() << std::endl;
|
||||
LOG_INFO(ss);
|
||||
return ret;
|
||||
CATCH
|
||||
|
||||
@@ -1020,7 +1020,7 @@ int Device::readDevInfoLine(DevInfoTypes type, std::string *line) {
|
||||
<< get_type_string(type) << "), returning *line = "
|
||||
<< *line;
|
||||
LOG_INFO(ss);
|
||||
|
||||
fs.close();
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1103,6 +1103,7 @@ int Device::readDevInfoMultiLineStr(DevInfoTypes type,
|
||||
while (std::getline(fs, line)) {
|
||||
retVec->push_back(line);
|
||||
}
|
||||
fs.close();
|
||||
|
||||
if (retVec->empty()) {
|
||||
ss << "Read devInfoMultiLineStr for DevInfoType ("
|
||||
@@ -1771,6 +1772,38 @@ std::string Device::readBootPartitionState<rsmi_memory_partition_type_t>(
|
||||
return boot_state;
|
||||
}
|
||||
|
||||
rsmi_status_t Device::get_smi_device_identifiers(uint32_t device_id,
|
||||
rsmi_device_identifiers_t *device_identifiers) {
|
||||
bool found_device = false;
|
||||
rsmi_status_t ret = RSMI_STATUS_NOT_SUPPORTED;
|
||||
if (device_identifiers == nullptr) {
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
|
||||
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance();
|
||||
auto devices = smi.devices();
|
||||
|
||||
for (uint32_t i = 0; i < devices.size(); i++) {
|
||||
if (i != device_id) {
|
||||
continue;
|
||||
}
|
||||
rsmi_device_identifiers_t smi_device;
|
||||
smi_device.card_index = devices[i]->index();
|
||||
smi_device.drm_render_minor = devices[i]->drm_render_minor();
|
||||
smi_device.bdfid = devices[i]->bdfid();
|
||||
smi_device.kfd_gpu_id = devices[i]->kfd_gpu_id();
|
||||
smi_device.partition_id = devices[i]->m_partition_id;
|
||||
smi_device.smi_device_id = i;
|
||||
*device_identifiers = smi_device;
|
||||
found_device = true;
|
||||
break;
|
||||
}
|
||||
if (found_device) {
|
||||
ret = RSMI_STATUS_SUCCESS;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
#undef RET_IF_NONZERO
|
||||
} // namespace smi
|
||||
|
||||
@@ -3387,7 +3387,7 @@ AMGpuMetricsPublicLatestTupl_t GpuMetricsBase_v13_t::copy_internal_to_external_m
|
||||
metrics_public_init.vcn_activity[0] = metrics_public_init.average_mm_activity;
|
||||
}
|
||||
// average_mm_activity needs to not be UIN16_MAX and
|
||||
// metrics_public_init.xcp_stats->vcn_busy[0] should also be UIN16_MAX
|
||||
// metrics_public_init.xcp_stats->vcn_busy[0] should also be UINT16_MAX
|
||||
if (metrics_public_init.average_mm_activity != UINT16_MAX
|
||||
&& metrics_public_init.xcp_stats->vcn_busy[0] == UINT16_MAX) {
|
||||
metrics_public_init.xcp_stats->vcn_busy[0] = metrics_public_init.average_mm_activity;
|
||||
|
||||
+424
-193
Το diff αρχείου καταστέλλεται επειδή είναι πολύ μεγάλο
Φόρτωση Διαφορών
@@ -38,24 +38,23 @@
|
||||
namespace amd {
|
||||
namespace smi {
|
||||
|
||||
void closedir(DIR* /*ptr*/) {}
|
||||
|
||||
std::string AMDSmiDrm::find_file_in_folder(const std::string& folder,
|
||||
const std::string& regex) {
|
||||
std::string file_name;
|
||||
using dir_ptr = std::unique_ptr<DIR, decltype(&closedir)>;
|
||||
|
||||
struct dirent *dir = nullptr;
|
||||
DIR *drm_dir;
|
||||
struct dirent *dir;
|
||||
std::regex file_regex(regex);
|
||||
auto drm_dir = dir_ptr(opendir(folder.c_str()), &closedir);
|
||||
drm_dir = opendir(folder.c_str());
|
||||
if (drm_dir == nullptr) return file_name;
|
||||
std::cmatch m;
|
||||
while ((dir = readdir(drm_dir.get())) != NULL) {
|
||||
if (std::regex_search(dir->d_name, m, file_regex)) {
|
||||
file_name = dir->d_name;
|
||||
break;
|
||||
}
|
||||
while ((dir = readdir(drm_dir)) != nullptr) {
|
||||
if (std::regex_search(dir->d_name, m, file_regex)) {
|
||||
file_name = dir->d_name;
|
||||
break;
|
||||
}
|
||||
}
|
||||
closedir(drm_dir);
|
||||
return file_name;
|
||||
}
|
||||
|
||||
@@ -197,9 +196,9 @@ amdsmi_status_t AMDSmiDrm::cleanup() {
|
||||
close(drm_fds_[i]);
|
||||
}
|
||||
|
||||
drm_fds_.clear();
|
||||
drm_paths_.clear();
|
||||
drm_bdfs_.clear();
|
||||
if (!drm_fds_.empty()) {drm_fds_.clear();}
|
||||
if (!drm_paths_.empty()) {drm_paths_.clear();}
|
||||
if (!drm_bdfs_.empty()) {drm_bdfs_.clear();}
|
||||
lib_loader_.unload();
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
@@ -306,9 +305,15 @@ amdsmi_status_t AMDSmiDrm::get_drm_fd_by_index(uint32_t gpu_index, uint32_t *fd_
|
||||
}
|
||||
|
||||
amdsmi_status_t AMDSmiDrm::get_bdf_by_index(uint32_t gpu_index, amdsmi_bdf_t *bdf_info) const {
|
||||
if (gpu_index + 1 > drm_bdfs_.size()) return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
*bdf_info = drm_bdfs_[gpu_index];
|
||||
std::ostringstream ss;
|
||||
if (gpu_index + 1 > drm_bdfs_.size()) {
|
||||
ss << __PRETTY_FUNCTION__ << " | gpu_index = " << gpu_index
|
||||
<< "; \nReturning = AMDSMI_STATUS_NOT_SUPPORTED";
|
||||
LOG_INFO(ss);
|
||||
// std::cout << ss.str() << std::endl;
|
||||
return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
*bdf_info = drm_bdfs_[gpu_index];
|
||||
ss << __PRETTY_FUNCTION__ << " | gpu_index = " << gpu_index
|
||||
<< "; \nreceived bdf: Domain = " << bdf_info->domain_number
|
||||
<< "; \nBus# = " << bdf_info->bus_number
|
||||
|
||||
@@ -20,12 +20,6 @@
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "amd_smi/impl/amd_smi_gpu_device.h"
|
||||
#include "amd_smi/impl/amd_smi_common.h"
|
||||
#include "amd_smi/impl/fdinfo.h"
|
||||
#include "rocm_smi/rocm_smi_kfd.h"
|
||||
#include "rocm_smi/rocm_smi_utils.h"
|
||||
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
@@ -33,6 +27,14 @@
|
||||
#include <dirent.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "amd_smi/impl/amd_smi_gpu_device.h"
|
||||
#include "amd_smi/impl/amd_smi_common.h"
|
||||
#include "amd_smi/impl/amd_smi_utils.h"
|
||||
#include "amd_smi/impl/fdinfo.h"
|
||||
#include "rocm_smi/rocm_smi_kfd.h"
|
||||
#include "rocm_smi/rocm_smi_utils.h"
|
||||
#include "rocm_smi/rocm_smi_logger.h"
|
||||
|
||||
namespace amd {
|
||||
namespace smi {
|
||||
|
||||
@@ -61,11 +63,32 @@ amdsmi_status_t AMDSmiGPUDevice::get_drm_data() {
|
||||
uint32_t fd = 0;
|
||||
std::string path;
|
||||
amdsmi_bdf_t bdf;
|
||||
std::ostringstream ss;
|
||||
ret = drm_.get_drm_fd_by_index(gpu_id_, &fd);
|
||||
ss << __PRETTY_FUNCTION__ << " | gpu_id_: " << gpu_id_
|
||||
<< "; fd: " << fd
|
||||
<< "; drm_.get_drm_fd_by_index(gpu_id_, &fd): "
|
||||
<< smi_amdgpu_get_status_string(ret, false) << std::endl;
|
||||
// std::cout << ss.str();
|
||||
LOG_DEBUG(ss);
|
||||
if (ret != AMDSMI_STATUS_SUCCESS) return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
ret = drm_.get_drm_path_by_index(gpu_id_, &path);
|
||||
ss << __PRETTY_FUNCTION__ << " | gpu_id_: " << gpu_id_
|
||||
<< "; path: " << path
|
||||
<< "; drm_.get_drm_fd_by_index(gpu_id_, &path): "
|
||||
<< smi_amdgpu_get_status_string(ret, false) << std::endl;
|
||||
// std::cout << ss.str();
|
||||
LOG_DEBUG(ss);
|
||||
if (ret != AMDSMI_STATUS_SUCCESS) return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
ret = drm_.get_bdf_by_index(gpu_id_, &bdf);
|
||||
ss << __PRETTY_FUNCTION__ << " | gpu_id_: " << gpu_id_
|
||||
<< "; domain: " << bdf.domain_number
|
||||
<< "; bus: " << bdf.bus_number
|
||||
<< "; device: " << bdf.device_number
|
||||
<< "; drm_.get_drm_fd_by_index(gpu_id_, &bdf): "
|
||||
<< smi_amdgpu_get_status_string(ret, false) << std::endl;
|
||||
// std::cout << ss.str();
|
||||
LOG_DEBUG(ss);
|
||||
if (ret != AMDSMI_STATUS_SUCCESS) return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
|
||||
bdf_ = bdf, path_ = path, fd_ = fd;
|
||||
|
||||
@@ -46,7 +46,7 @@ amdsmi_status_t AMDSmiLibraryLoader::load(const char* filename) {
|
||||
if (!libHandler_) {
|
||||
char* error = dlerror();
|
||||
std::cerr << "Fail to open " << filename <<": " << error
|
||||
<< std::endl;
|
||||
<< std::endl;
|
||||
return AMDSMI_STATUS_FAIL_LOAD_MODULE;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,13 +22,13 @@
|
||||
|
||||
#include <sstream>
|
||||
#include <iomanip>
|
||||
#include <fstream>
|
||||
#include "amd_smi/impl/amd_smi_system.h"
|
||||
#include "amd_smi/impl/amd_smi_gpu_device.h"
|
||||
#include "amd_smi/impl/amd_smi_common.h"
|
||||
#include "amd_smi/impl/amd_smi_utils.h"
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
#include "rocm_smi/rocm_smi_main.h"
|
||||
#include <fstream>
|
||||
|
||||
|
||||
namespace amd {
|
||||
namespace smi {
|
||||
@@ -111,7 +111,6 @@ amdsmi_status_t AMDSmiSystem::init(uint64_t flags) {
|
||||
}
|
||||
#endif
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
|
||||
}
|
||||
|
||||
#ifdef ENABLE_ESMI_LIB
|
||||
@@ -160,6 +159,7 @@ amdsmi_status_t AMDSmiSystem::populate_amd_cpus() {
|
||||
#endif
|
||||
|
||||
amdsmi_status_t AMDSmiSystem::populate_amd_gpu_devices() {
|
||||
AMDSmiSystem::cleanup();
|
||||
// init rsmi
|
||||
rsmi_driver_state_t state;
|
||||
rsmi_status_t ret = rsmi_init(0);
|
||||
@@ -262,18 +262,15 @@ amdsmi_status_t AMDSmiSystem::cleanup() {
|
||||
}
|
||||
#endif
|
||||
if (init_flag_ & AMDSMI_INIT_AMD_GPUS) {
|
||||
for (uint32_t i = 0; i < sockets_.size(); i++) {
|
||||
delete sockets_[i];
|
||||
}
|
||||
processors_.clear();
|
||||
sockets_.clear();
|
||||
// we do not need to delete the sockets/processors, clear takes care of this
|
||||
if (!processors_.empty()) {processors_.clear();}
|
||||
if (!sockets_.empty()) {sockets_.clear();}
|
||||
init_flag_ &= ~AMDSMI_INIT_AMD_GPUS;
|
||||
amd::smi::AMDSmiSystem::getInstance().clean_up_drm();
|
||||
rsmi_status_t ret = rsmi_shut_down();
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
return amd::smi::rsmi_to_amdsmi_status(ret);
|
||||
}
|
||||
|
||||
drm_.cleanup();
|
||||
}
|
||||
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
|
||||
+267
-117
@@ -20,7 +20,9 @@
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <amdgpu.h>
|
||||
#include <limits.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <libdrm/amdgpu.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdint.h>
|
||||
@@ -34,6 +36,7 @@
|
||||
#include <xf86drmMode.h>
|
||||
#include <dirent.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <memory>
|
||||
#include <random>
|
||||
#include <fstream>
|
||||
@@ -42,13 +45,13 @@
|
||||
#include <cstdio>
|
||||
#include <sstream>
|
||||
#include <iterator>
|
||||
#include <sys/ioctl.h>
|
||||
#include <algorithm>
|
||||
#include <limits.h>
|
||||
|
||||
#include "amd_smi/impl/amd_smi_utils.h"
|
||||
#include "amd_smi/impl/amd_smi_system.h"
|
||||
#include "shared_mutex.h" // NOLINT
|
||||
#include "rocm_smi/rocm_smi_logger.h"
|
||||
#include "rocm_smi/rocm_smi_utils.h"
|
||||
|
||||
std::string leftTrim(const std::string &s) {
|
||||
if (!s.empty()) {
|
||||
@@ -94,15 +97,33 @@ std::string removeString(const std::string origStr,
|
||||
return modifiedStr;
|
||||
}
|
||||
|
||||
void openFileAndModifyBuffer(std::string path, char *buff, size_t sizeOfBuff) {
|
||||
static void clearCharBufferAndReinitialize(char buffer[], uint32_t len, std::string newString) {
|
||||
char *begin = &buffer[0];
|
||||
char *end = &buffer[len];
|
||||
std::fill(begin, end, 0);
|
||||
|
||||
// Safer approach - copy directly with length limit
|
||||
size_t copy_len = std::min(static_cast<size_t>(len - 1), newString.length());
|
||||
if (copy_len > 0) {
|
||||
std::memcpy(buffer, newString.c_str(), copy_len);
|
||||
}
|
||||
buffer[copy_len] = '\0';
|
||||
}
|
||||
|
||||
int openFileAndModifyBuffer(std::string path, char *buff, size_t sizeOfBuff,
|
||||
bool trim_whitespace = true) {
|
||||
bool errorDiscovered = false;
|
||||
std::ifstream file(path, std::ifstream::in);
|
||||
std::string contents = {std::istreambuf_iterator<char>{file}, std::istreambuf_iterator<char>{}};
|
||||
memset(buff, 0, sizeof(char) * sizeOfBuff);
|
||||
clearCharBufferAndReinitialize(buff, sizeOfBuff, contents);
|
||||
if (!file.is_open()) {
|
||||
errorDiscovered = true;
|
||||
} else {
|
||||
contents = trim(contents);
|
||||
if (trim_whitespace) {
|
||||
contents = amd::smi::trimAllWhiteSpace(contents);
|
||||
}
|
||||
// remove all new lines
|
||||
contents.erase(std::remove(contents.begin(), contents.end(), '\n'), contents.cend());
|
||||
}
|
||||
|
||||
file.close();
|
||||
@@ -110,6 +131,9 @@ void openFileAndModifyBuffer(std::string path, char *buff, size_t sizeOfBuff) {
|
||||
&& !contents.empty()) {
|
||||
std::strncpy(buff, contents.c_str(), sizeOfBuff-1);
|
||||
buff[sizeOfBuff-1] = '\0';
|
||||
return 0;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -143,9 +167,6 @@ static bool isAMDGPU(std::string dev_path) {
|
||||
|
||||
amdsmi_status_t smi_amdgpu_find_hwmon_dir(amd::smi::AMDSmiGPUDevice *device, std::string* full_path)
|
||||
{
|
||||
if (!device->check_if_drm_is_supported()) {
|
||||
return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
if (full_path == nullptr) {
|
||||
return AMDSMI_STATUS_API_FAILED;
|
||||
}
|
||||
@@ -181,9 +202,6 @@ amdsmi_status_t smi_amdgpu_find_hwmon_dir(amd::smi::AMDSmiGPUDevice *device, std
|
||||
|
||||
|
||||
amdsmi_status_t smi_amdgpu_get_board_info(amd::smi::AMDSmiGPUDevice* device, amdsmi_board_info_t *info) {
|
||||
if (!device->check_if_drm_is_supported()) {
|
||||
return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
SMIGPUDEVICE_MUTEX(device->get_mutex())
|
||||
std::string model_number_path = "/sys/class/drm/" + device->get_gpu_path() + std::string("/device/product_number");
|
||||
std::string product_serial_path = "/sys/class/drm/" + device->get_gpu_path() + std::string("/device/serial_number");
|
||||
@@ -191,25 +209,34 @@ amdsmi_status_t smi_amdgpu_get_board_info(amd::smi::AMDSmiGPUDevice* device, amd
|
||||
std::string manufacturer_name_path = "/sys/class/drm/" + device->get_gpu_path() + std::string("/device/manufacturer");
|
||||
std::string product_name_path = "/sys/class/drm/" + device->get_gpu_path() + std::string("/device/product_name");
|
||||
|
||||
openFileAndModifyBuffer(model_number_path, info->model_number, AMDSMI_MAX_STRING_LENGTH);
|
||||
openFileAndModifyBuffer(product_serial_path, info->product_serial, AMDSMI_MAX_STRING_LENGTH);
|
||||
openFileAndModifyBuffer(fru_id_path, info->fru_id, AMDSMI_MAX_STRING_LENGTH);
|
||||
openFileAndModifyBuffer(manufacturer_name_path, info->manufacturer_name, AMDSMI_MAX_STRING_LENGTH);
|
||||
openFileAndModifyBuffer(product_name_path, info->product_name, AMDSMI_MAX_STRING_LENGTH);
|
||||
auto ret_mod = openFileAndModifyBuffer(model_number_path, info->model_number,
|
||||
AMDSMI_MAX_STRING_LENGTH);
|
||||
auto ret_ser = openFileAndModifyBuffer(product_serial_path, info->product_serial,
|
||||
AMDSMI_MAX_STRING_LENGTH);
|
||||
auto ret_fru = openFileAndModifyBuffer(fru_id_path, info->fru_id, AMDSMI_MAX_STRING_LENGTH);
|
||||
auto ret_man = openFileAndModifyBuffer(manufacturer_name_path, info->manufacturer_name,
|
||||
AMDSMI_MAX_STRING_LENGTH);
|
||||
auto ret_prod = openFileAndModifyBuffer(product_name_path, info->product_name,
|
||||
AMDSMI_MAX_STRING_LENGTH, false);
|
||||
|
||||
std::ostringstream ss;
|
||||
ss << __PRETTY_FUNCTION__ << "[Before correction] "
|
||||
<< "Returning status = AMDSMI_STATUS_SUCCESS"
|
||||
<< " | model_number_path = |" << model_number_path << "|\n"
|
||||
<< "; info->model_number: |" << info->model_number << "|\n"
|
||||
<< "; ret_mod = " << ret_mod << "|\n"
|
||||
<< "\n product_serial_path = |" << product_serial_path << "|\n"
|
||||
<< "; info->product_serial: |" << info->product_serial << "|\n"
|
||||
<< "; ret_ser = " << ret_ser << "|\n"
|
||||
<< "\n fru_id_path = |" << fru_id_path << "|\n"
|
||||
<< "; info->fru_id: |" << info->fru_id << "|\n"
|
||||
<< "; ret_fru = " << ret_fru << "|\n"
|
||||
<< "\n manufacturer_name_path = |" << manufacturer_name_path << "|\n"
|
||||
<< "; info->manufacturer_name: |" << info->manufacturer_name << "|\n"
|
||||
<< "; ret_man = " << ret_man << "|\n"
|
||||
<< "\n product_name_path = |" << product_name_path << "|\n"
|
||||
<< "; info->product_name: |" << info->product_name << "|";
|
||||
<< "; info->product_name: |" << info->product_name << "|"
|
||||
<< "; ret_prod = " << ret_prod << "|\n";
|
||||
LOG_INFO(ss);
|
||||
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
@@ -217,9 +244,6 @@ amdsmi_status_t smi_amdgpu_get_board_info(amd::smi::AMDSmiGPUDevice* device, amd
|
||||
|
||||
amdsmi_status_t smi_amdgpu_get_power_cap(amd::smi::AMDSmiGPUDevice* device, int *cap)
|
||||
{
|
||||
if (!device->check_if_drm_is_supported()) {
|
||||
return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
constexpr int DATA_SIZE = 16;
|
||||
char val[DATA_SIZE];
|
||||
std::string fullpath;
|
||||
@@ -251,9 +275,6 @@ amdsmi_status_t smi_amdgpu_get_power_cap(amd::smi::AMDSmiGPUDevice* device, int
|
||||
amdsmi_status_t smi_amdgpu_get_ranges(amd::smi::AMDSmiGPUDevice* device, amdsmi_clk_type_t domain,
|
||||
int *max_freq, int *min_freq, int *num_dpm, int *sleep_state_freq)
|
||||
{
|
||||
if (!device->check_if_drm_is_supported()) {
|
||||
return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
SMIGPUDEVICE_MUTEX(device->get_mutex())
|
||||
std::string fullpath = "/sys/class/drm/" + device->get_gpu_path() + "/device";
|
||||
|
||||
@@ -289,7 +310,7 @@ amdsmi_status_t smi_amdgpu_get_ranges(amd::smi::AMDSmiGPUDevice* device, amdsmi_
|
||||
std::ifstream ranges(fullpath.c_str());
|
||||
|
||||
if (ranges.fail()) {
|
||||
return AMDSMI_STATUS_API_FAILED;
|
||||
return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
unsigned int max, min, dpm, sleep_freq;
|
||||
@@ -339,16 +360,13 @@ amdsmi_status_t smi_amdgpu_get_ranges(amd::smi::AMDSmiGPUDevice* device, amdsmi_
|
||||
}
|
||||
|
||||
amdsmi_status_t smi_amdgpu_get_enabled_blocks(amd::smi::AMDSmiGPUDevice* device, uint64_t *enabled_blocks) {
|
||||
if (!device->check_if_drm_is_supported()) {
|
||||
return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
SMIGPUDEVICE_MUTEX(device->get_mutex())
|
||||
std::string fullpath = "/sys/class/drm/" + device->get_gpu_path() + "/device/ras/features";
|
||||
std::string fullpath = "/sys/class/drm/" + device->get_gpu_path() + "/device/ras/features";
|
||||
std::ifstream f(fullpath.c_str());
|
||||
std::string tmp_str;
|
||||
|
||||
if (f.fail()) {
|
||||
return AMDSMI_STATUS_API_FAILED;
|
||||
return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
std::string line;
|
||||
@@ -372,9 +390,6 @@ amdsmi_status_t smi_amdgpu_get_enabled_blocks(amd::smi::AMDSmiGPUDevice* device,
|
||||
|
||||
amdsmi_status_t smi_amdgpu_get_bad_page_info(amd::smi::AMDSmiGPUDevice* device,
|
||||
uint32_t *num_pages, amdsmi_retired_page_record_t *info) {
|
||||
if (!device->check_if_drm_is_supported()) {
|
||||
return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
SMIGPUDEVICE_MUTEX(device->get_mutex())
|
||||
std::string line;
|
||||
std::vector<std::string> badPagesVec;
|
||||
@@ -449,9 +464,6 @@ static uint32_t GetDeviceIndex(const std::string s) {
|
||||
|
||||
amdsmi_status_t smi_amdgpu_get_bad_page_threshold(amd::smi::AMDSmiGPUDevice* device,
|
||||
uint32_t *threshold) {
|
||||
if (!device->check_if_drm_is_supported()) {
|
||||
return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
SMIGPUDEVICE_MUTEX(device->get_mutex())
|
||||
|
||||
//TODO: Accessing the node requires root privileges, and its interface may need to be exposed in another path
|
||||
@@ -475,9 +487,6 @@ amdsmi_status_t smi_amdgpu_get_bad_page_threshold(amd::smi::AMDSmiGPUDevice* dev
|
||||
}
|
||||
|
||||
amdsmi_status_t smi_amdgpu_validate_ras_eeprom(amd::smi::AMDSmiGPUDevice* device) {
|
||||
if (!device->check_if_drm_is_supported()) {
|
||||
return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
SMIGPUDEVICE_MUTEX(device->get_mutex())
|
||||
|
||||
//uint32_t index = GetDeviceIndex(device->get_gpu_path());
|
||||
@@ -487,9 +496,6 @@ amdsmi_status_t smi_amdgpu_validate_ras_eeprom(amd::smi::AMDSmiGPUDevice* device
|
||||
}
|
||||
|
||||
amdsmi_status_t smi_amdgpu_get_ecc_error_count(amd::smi::AMDSmiGPUDevice* device, amdsmi_error_count_t *err_cnt) {
|
||||
if (!device->check_if_drm_is_supported()) {
|
||||
return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
SMIGPUDEVICE_MUTEX(device->get_mutex())
|
||||
char str[10];
|
||||
|
||||
@@ -511,81 +517,26 @@ amdsmi_status_t smi_amdgpu_get_ecc_error_count(amd::smi::AMDSmiGPUDevice* device
|
||||
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
amdsmi_status_t smi_amdgpu_get_driver_version(amd::smi::AMDSmiGPUDevice* device, int *length, char *version) {
|
||||
if (!device->check_if_drm_is_supported()) {
|
||||
return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
SMIGPUDEVICE_MUTEX(device->get_mutex())
|
||||
amdsmi_status_t status = AMDSMI_STATUS_SUCCESS;
|
||||
FILE *fp;
|
||||
char *tmp, *ptr, *token;
|
||||
char *ver = NULL;
|
||||
int i = 0;
|
||||
amdsmi_status_t status = AMDSMI_STATUS_SUCCESS;
|
||||
size_t len;
|
||||
|
||||
if (length)
|
||||
len = *length < AMDSMI_MAX_DRIVER_VERSION_LENGTH ? *length : AMDSMI_MAX_DRIVER_VERSION_LENGTH;
|
||||
else
|
||||
len = AMDSMI_MAX_DRIVER_VERSION_LENGTH;
|
||||
|
||||
std::string path = "/sys/module/amdgpu/version";
|
||||
|
||||
fp = fopen(path.c_str(), "r");
|
||||
if (fp == nullptr){
|
||||
fp = fopen("/proc/version", "r");
|
||||
if (fp == nullptr) {
|
||||
status = AMDSMI_STATUS_IO;
|
||||
return status;
|
||||
}
|
||||
|
||||
len = 0;
|
||||
if (getline(&ver, &len, fp) <= 0) {
|
||||
status = AMDSMI_STATUS_IO;
|
||||
fclose(fp);
|
||||
free(ver);
|
||||
return status;
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
|
||||
ptr = ver;
|
||||
token = strtok_r(ptr, " ", &tmp);
|
||||
|
||||
if (!token) {
|
||||
free(ver);
|
||||
status = AMDSMI_STATUS_IO;
|
||||
return status;
|
||||
}
|
||||
for (i = 0; i < 2; i++) {
|
||||
ptr = strtok_r(NULL, " ", &tmp);
|
||||
if (!ptr)
|
||||
break;
|
||||
}
|
||||
if (i != 2 || !ptr) {
|
||||
free(ver);
|
||||
status = AMDSMI_STATUS_IO;
|
||||
return status;
|
||||
}
|
||||
if (length)
|
||||
len = *length < AMDSMI_MAX_DRIVER_VERSION_LENGTH ? *length :
|
||||
AMDSMI_MAX_DRIVER_VERSION_LENGTH;
|
||||
else
|
||||
len = AMDSMI_MAX_DRIVER_VERSION_LENGTH;
|
||||
|
||||
strncpy(version, ptr, len);
|
||||
free(ver);
|
||||
if (*length <= 0 || version == nullptr) {
|
||||
return AMDSMI_STATUS_INVAL;
|
||||
} else {
|
||||
if ((len = getline(&version, &len, fp)) <= 0)
|
||||
status = AMDSMI_STATUS_IO;
|
||||
|
||||
fclose(fp);
|
||||
if (length) {
|
||||
*length = version[len-1] == '\n' ? static_cast<int>(len - 1) : static_cast<int>(len);
|
||||
}
|
||||
version[len-1] = version[len-1] == '\n' ? '\0' : version[len-1];
|
||||
len = static_cast<size_t>(*length);
|
||||
}
|
||||
|
||||
std::string empty = "";
|
||||
std::strncpy(version, empty.c_str(), len-1);
|
||||
openFileAndModifyBuffer("/sys/module/amdgpu/version",
|
||||
version, static_cast<size_t>(len));
|
||||
if (version[0] == '\0') {
|
||||
openFileAndModifyBuffer("/proc/version", version, static_cast<size_t>(len));
|
||||
if (version[0] == '\0') {
|
||||
return AMDSMI_STATUS_IO;
|
||||
}
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -621,17 +572,37 @@ amdsmi_status_t smi_amdgpu_get_market_name_from_dev_id(amd::smi::AMDSmiGPUDevice
|
||||
return AMDSMI_STATUS_ARG_PTR_NULL;
|
||||
}
|
||||
|
||||
std::ostringstream ss;
|
||||
// requires libdrm being active
|
||||
if (!device->check_if_drm_is_supported()) {
|
||||
ss << __PRETTY_FUNCTION__ << " | DRM is not supported";
|
||||
LOG_ERROR(ss);
|
||||
return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
uint32_t major_version, minor_version;
|
||||
amdgpu_device_handle device_handle = nullptr;
|
||||
std::string render_name = device->get_gpu_path();
|
||||
int fd = -1;
|
||||
std::string path = "/dev/dri/" + render_name;
|
||||
|
||||
uint32_t gpu_fd = device->get_gpu_fd();
|
||||
if (render_name != "") {
|
||||
fd = open(path.c_str(), O_RDWR | O_CLOEXEC);
|
||||
} else {
|
||||
market_name[0] = '\0';
|
||||
close(fd);
|
||||
return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
ss << __PRETTY_FUNCTION__ << " | Render Name: "
|
||||
<< render_name << "; path: " << path << "; fd: " << fd;
|
||||
LOG_DEBUG(ss);
|
||||
|
||||
int ret = amdgpu_device_initialize(gpu_fd, &major_version, &minor_version, &device_handle);
|
||||
int ret = amdgpu_device_initialize(fd, &major_version, &minor_version, &device_handle);
|
||||
if (ret != 0) {
|
||||
std::string empty = "";
|
||||
std::strncpy(market_name, empty.c_str(), AMDSMI_256_LENGTH - 1);
|
||||
amdgpu_device_deinitialize(device_handle);
|
||||
close(fd);
|
||||
return AMDSMI_STATUS_DRM_ERROR;
|
||||
}
|
||||
|
||||
@@ -641,19 +612,17 @@ amdsmi_status_t smi_amdgpu_get_market_name_from_dev_id(amd::smi::AMDSmiGPUDevice
|
||||
std::strncpy(market_name, name, AMDSMI_256_LENGTH - 1);
|
||||
market_name[AMDSMI_256_LENGTH - 1] = '\0';
|
||||
amdgpu_device_deinitialize(device_handle);
|
||||
close(fd);
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
amdgpu_device_deinitialize(device_handle);
|
||||
close(fd);
|
||||
return AMDSMI_STATUS_DRM_ERROR;
|
||||
}
|
||||
|
||||
amdsmi_status_t smi_amdgpu_is_gpu_power_management_enabled(amd::smi::AMDSmiGPUDevice* device,
|
||||
bool *enabled) {
|
||||
if (!device->check_if_drm_is_supported()) {
|
||||
return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
if (enabled == nullptr) {
|
||||
return AMDSMI_STATUS_API_FAILED;
|
||||
}
|
||||
@@ -713,3 +682,184 @@ std::string smi_amdgpu_get_status_string(amdsmi_status_t ret, bool fullStatus =
|
||||
return std::string(err_str);
|
||||
}
|
||||
|
||||
// TODO(amdsmi_team): Do we want to include these functions in header?
|
||||
amdsmi_status_t smi_amdgpu_get_device_index(amdsmi_processor_handle processor_handle,
|
||||
uint32_t *device_index) {
|
||||
uint32_t socket_count;
|
||||
std::vector<amdsmi_socket_handle> sockets;
|
||||
std::ostringstream ss;
|
||||
|
||||
if (device_index == nullptr) {
|
||||
return AMDSMI_STATUS_INVAL;
|
||||
}
|
||||
*device_index = std::numeric_limits<uint32_t>::max(); // set to max value for invalid readings
|
||||
|
||||
auto ret = amdsmi_get_socket_handles(&socket_count, nullptr);
|
||||
if (ret != AMDSMI_STATUS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
// allocate memory
|
||||
sockets.resize(socket_count);
|
||||
ret = amdsmi_get_socket_handles(&socket_count, &sockets[0]);
|
||||
if (ret != AMDSMI_STATUS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
uint32_t current_device_index = 0;
|
||||
for (uint32_t i = 0; i < socket_count; i++) {
|
||||
// Get Socket info
|
||||
char socket_info[128];
|
||||
ret = amdsmi_get_socket_info(sockets[i], 128, socket_info);
|
||||
ss << __PRETTY_FUNCTION__ << " | Socket " << socket_info << "\n";
|
||||
LOG_DEBUG(ss);
|
||||
|
||||
// Get the device count available for the socket.
|
||||
uint32_t device_count = 0;
|
||||
ret = amdsmi_get_processor_handles(sockets[i], &device_count, nullptr);
|
||||
|
||||
// Allocate the memory for the device handlers on the socket
|
||||
std::vector<amdsmi_processor_handle> processor_handles(device_count);
|
||||
// Get all devices of the socket
|
||||
ret = amdsmi_get_processor_handles(sockets[i], &device_count, &processor_handles[0]);
|
||||
ss << __PRETTY_FUNCTION__ << " | Processor Count: " << device_count << "\n";
|
||||
LOG_DEBUG(ss);
|
||||
|
||||
for (uint32_t j = 0; j < device_count; j++) {
|
||||
if (processor_handles[j] == processor_handle) {
|
||||
*device_index = current_device_index;
|
||||
ss << __PRETTY_FUNCTION__ << " | AMDSMI_STATUS_SUCCESS "
|
||||
<< "Returning device_index: " << *device_index << "\nSocket #: " << i
|
||||
<< "; Device #: " << j << "; current_device_index #: " << current_device_index
|
||||
<< "\n";
|
||||
// std::cout << ss.str();
|
||||
LOG_DEBUG(ss);
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
current_device_index++;
|
||||
}
|
||||
}
|
||||
ss << __PRETTY_FUNCTION__ << " | AMDSMI_STATUS_API_FAILED "
|
||||
<< "Returning device_index: " << *device_index << "\n";
|
||||
LOG_DEBUG(ss);
|
||||
return AMDSMI_STATUS_API_FAILED;
|
||||
}
|
||||
|
||||
// TODO(amdsmi_team): Do we want to include these functions in header?
|
||||
amdsmi_status_t smi_amdgpu_get_device_count(uint32_t *total_num_devices) {
|
||||
uint32_t socket_count;
|
||||
std::vector<amdsmi_socket_handle> sockets;
|
||||
std::ostringstream ss;
|
||||
|
||||
if (total_num_devices == nullptr) {
|
||||
return AMDSMI_STATUS_INVAL;
|
||||
}
|
||||
// set to max value for invalid readings
|
||||
*total_num_devices = std::numeric_limits<uint32_t>::max();
|
||||
|
||||
auto ret = amdsmi_get_socket_handles(&socket_count, nullptr);
|
||||
if (ret != AMDSMI_STATUS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
// allocate memory
|
||||
sockets.resize(socket_count);
|
||||
ret = amdsmi_get_socket_handles(&socket_count, &sockets[0]);
|
||||
if (ret != AMDSMI_STATUS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
uint32_t device_num = 0;
|
||||
for (uint32_t i = 0; i < socket_count; i++) {
|
||||
// Get Socket info
|
||||
char socket_info[128];
|
||||
ret = amdsmi_get_socket_info(sockets[i], 128, socket_info);
|
||||
ss << __PRETTY_FUNCTION__ << " | Socket " << socket_info << "\n";
|
||||
LOG_DEBUG(ss);
|
||||
|
||||
// Get the processor count available for the socket.
|
||||
uint32_t processor_count = 0;
|
||||
ret = amdsmi_get_processor_handles(sockets[i], &processor_count, nullptr);
|
||||
|
||||
// Allocate the memory for the device handlers on the socket
|
||||
std::vector<amdsmi_processor_handle> processor_handles(processor_count);
|
||||
// Get all devices of the socket
|
||||
ret = amdsmi_get_processor_handles(sockets[i], &processor_count, &processor_handles[0]);
|
||||
ss << __PRETTY_FUNCTION__ << " | Processor Count: " << processor_count << "\n";
|
||||
LOG_DEBUG(ss);
|
||||
|
||||
for (uint32_t j = 0; j < processor_count; j++) {
|
||||
device_num++;
|
||||
}
|
||||
}
|
||||
*total_num_devices = device_num;
|
||||
ss << __PRETTY_FUNCTION__ << " | AMDSMI_STATUS_SUCCESS "
|
||||
<< "Returning device_index: " << *total_num_devices << "\n";
|
||||
LOG_DEBUG(ss);
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// TODO(amdsmi_team): Do we want to include these functions in header?
|
||||
amdsmi_status_t smi_amdgpu_get_processor_handle_by_index(
|
||||
uint32_t device_index,
|
||||
amdsmi_processor_handle *processor_handle) {
|
||||
uint32_t socket_count;
|
||||
std::vector<amdsmi_socket_handle> sockets;
|
||||
std::ostringstream ss;
|
||||
|
||||
if (processor_handle == nullptr) {
|
||||
return AMDSMI_STATUS_INVAL;
|
||||
}
|
||||
|
||||
auto ret = amdsmi_get_socket_handles(&socket_count, nullptr);
|
||||
if (ret != AMDSMI_STATUS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
// allocate memory
|
||||
sockets.resize(socket_count);
|
||||
ret = amdsmi_get_socket_handles(&socket_count, &sockets[0]);
|
||||
if (ret != AMDSMI_STATUS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
uint32_t current_device_index = 0;
|
||||
for (uint32_t i = 0; i < socket_count; i++) {
|
||||
// Get Socket info
|
||||
char socket_info[128];
|
||||
ret = amdsmi_get_socket_info(sockets[i], 128, socket_info);
|
||||
ss << __PRETTY_FUNCTION__ << " | Socket " << socket_info << "\n";
|
||||
LOG_DEBUG(ss);
|
||||
|
||||
// Get the device count available for the socket.
|
||||
uint32_t device_count = 0;
|
||||
ret = amdsmi_get_processor_handles(sockets[i], &device_count, nullptr);
|
||||
|
||||
// Allocate the memory for the device handlers on the socket
|
||||
std::vector<amdsmi_processor_handle> processor_handles(device_count);
|
||||
// Get all devices of the socket
|
||||
ret = amdsmi_get_processor_handles(sockets[i], &device_count, &processor_handles[0]);
|
||||
ss << __PRETTY_FUNCTION__ << " | Processor Count: " << device_count << "\n";
|
||||
LOG_DEBUG(ss);
|
||||
|
||||
for (uint32_t j = 0; j < device_count; j++) {
|
||||
// std::cout << "current_device_index: " << current_device_index
|
||||
// << " device_index: " << device_index << std::endl;
|
||||
if (current_device_index == device_index) {
|
||||
*processor_handle = processor_handles[j];
|
||||
ss << __PRETTY_FUNCTION__ << " | AMDSMI_STATUS_SUCCESS"
|
||||
<< "\nReturning processor_handle for device_index: " << device_index
|
||||
<< "\nSocket #: " << i << "; Device #: " << j
|
||||
<< "; current_device_index #: " << current_device_index
|
||||
<< "; processor_handle: " << *processor_handle
|
||||
<< "; processor_handles[j]: " << processor_handles[j]
|
||||
<< "\n";
|
||||
// std::cout << ss.str();
|
||||
LOG_DEBUG(ss);
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
current_device_index++;
|
||||
}
|
||||
}
|
||||
ss << __PRETTY_FUNCTION__ << " | AMDSMI_STATUS_API_FAILED "
|
||||
<< "Could not find matching processor_handle for device_index: " << device_index << "\n";
|
||||
LOG_DEBUG(ss);
|
||||
return AMDSMI_STATUS_API_FAILED;
|
||||
}
|
||||
|
||||
Εκτελέσιμο αρχείο
+1089
Το diff αρχείου καταστέλλεται επειδή είναι πολύ μεγάλο
Φόρτωση Διαφορών
Εκτελέσιμο αρχείο
+51
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef TESTS_AMD_SMI_TEST_FUNCTIONAL_COMPUTEPARTITION_READ_WRITE_H_
|
||||
#define TESTS_AMD_SMI_TEST_FUNCTIONAL_COMPUTEPARTITION_READ_WRITE_H_
|
||||
|
||||
#include "../test_base.h"
|
||||
|
||||
class TestComputePartitionReadWrite : public TestBase {
|
||||
public:
|
||||
TestComputePartitionReadWrite();
|
||||
|
||||
// @Brief: Destructor for test case of TestComputePartitionReadWrite
|
||||
virtual ~TestComputePartitionReadWrite();
|
||||
|
||||
// @Brief: Setup the environment for measurement
|
||||
virtual void SetUp();
|
||||
|
||||
// @Brief: Core measurement execution
|
||||
virtual void Run();
|
||||
|
||||
// @Brief: Clean up and retrive the resource
|
||||
virtual void Close();
|
||||
|
||||
// @Brief: Display results
|
||||
virtual void DisplayResults() const;
|
||||
|
||||
// @Brief: Display information about what this test does
|
||||
virtual void DisplayTestInfo(void);
|
||||
};
|
||||
|
||||
#endif // TESTS_AMD_SMI_TEST_FUNCTIONAL_COMPUTEPARTITION_READ_WRITE_H_
|
||||
@@ -35,6 +35,7 @@
|
||||
#include "gpu_metrics_read.h"
|
||||
#include "../test_common.h"
|
||||
#include "rocm_smi/rocm_smi_utils.h"
|
||||
#include "amd_smi/impl/amd_smi_utils.h"
|
||||
|
||||
|
||||
TestGpuMetricsRead::TestGpuMetricsRead() : TestBase() {
|
||||
@@ -101,6 +102,15 @@ void TestGpuMetricsRead::Run(void) {
|
||||
}
|
||||
}
|
||||
} else {
|
||||
auto temp_xcd_counter_value = uint16_t(0);
|
||||
auto ret_xcd = amdsmi_get_gpu_xcd_counter(processor_handles_[i], &temp_xcd_counter_value);
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t\t** amdsmi_get_gpu_xcd_counter(): "
|
||||
<< smi_amdgpu_get_status_string(ret_xcd, false)
|
||||
<< "\n\t\t** XCD Counter Value: "
|
||||
<< temp_xcd_counter_value
|
||||
<< "\n";
|
||||
}
|
||||
CHK_ERR_ASRT(err);
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "METRIC TABLE HEADER:\n";
|
||||
@@ -380,13 +390,5 @@ void TestGpuMetricsRead::Run(void) {
|
||||
amdsmi_status_code_to_string(err, &status_string);
|
||||
std::cout << "\t\t** amdsmi_get_gpu_metrics_info(nullptr check): " << status_string << "\n";
|
||||
ASSERT_EQ(err, AMDSMI_STATUS_INVAL);
|
||||
|
||||
|
||||
// TODO(AMD_SMI_team): add xcd_counter_get for amd smi
|
||||
// auto temp_xcd_counter_value = uint16_t(0);
|
||||
// err = rsmi_dev_metrics_xcd_counter_get(i, &temp_xcd_counter_value);
|
||||
// if (err != RSMI_STATUS_NOT_SUPPORTED) {
|
||||
// CHK_ERR_ASRT(err);
|
||||
// }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,11 +22,11 @@
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <map>
|
||||
#include "amd_smi/amdsmi.h"
|
||||
#include "id_info_read.h"
|
||||
#include "../test_common.h"
|
||||
@@ -63,6 +63,15 @@ void TestIdInfoRead::Close() {
|
||||
|
||||
static const uint32_t kBufferLen = 80;
|
||||
|
||||
static const std::map< amdsmi_virtualization_mode_t, std::string>
|
||||
virtualization_mode_map = {
|
||||
{AMDSMI_VIRTUALIZATION_MODE_UNKNOWN, "UNKNOWN"},
|
||||
{AMDSMI_VIRTUALIZATION_MODE_BAREMETAL, "BAREMETAL"},
|
||||
{ AMDSMI_VIRTUALIZATION_MODE_HOST, "HOST"},
|
||||
{ AMDSMI_VIRTUALIZATION_MODE_GUEST, "GUEST"},
|
||||
{AMDSMI_VIRTUALIZATION_MODE_PASSTHROUGH, "PASSTHROUGH"}
|
||||
};
|
||||
|
||||
void TestIdInfoRead::Run(void) {
|
||||
amdsmi_status_t err;
|
||||
uint16_t id;
|
||||
@@ -227,5 +236,20 @@ void TestIdInfoRead::Run(void) {
|
||||
// Verify api support checking functionality is working
|
||||
err = amdsmi_get_gpu_bdf_id(processor_handles_[i], nullptr);
|
||||
ASSERT_EQ(err, AMDSMI_STATUS_INVAL);
|
||||
|
||||
// Verify api support checking functionality is working
|
||||
err = amdsmi_get_gpu_virtualization_mode(processor_handles_[i], nullptr);
|
||||
ASSERT_EQ(err, AMDSMI_STATUS_INVAL);
|
||||
amdsmi_virtualization_mode_t vmode;
|
||||
err = amdsmi_get_gpu_virtualization_mode(processor_handles_[i], &vmode);
|
||||
ASSERT_EQ(err, AMDSMI_STATUS_SUCCESS);
|
||||
IF_VERB(STANDARD) {
|
||||
auto it = virtualization_mode_map.find(vmode);
|
||||
if (it != virtualization_mode_map.end()) {
|
||||
std::cout << "\t**Virtualization Mode: " << it->second << std::endl;
|
||||
} else {
|
||||
std::cout << "\t**Virtualization Mode: MAP TYPE UNKNOWN?" << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Εκτελέσιμο αρχείο
+744
@@ -0,0 +1,744 @@
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <limits>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "../test_base.h"
|
||||
#include "../test_common.h"
|
||||
#include "amd_smi/amdsmi.h"
|
||||
#include "amd_smi/impl/amd_smi_utils.h"
|
||||
#include "memorypartition_read_write.h"
|
||||
|
||||
const uint32_t MAX_UNSUPPORTED_PARTITIONS = 0;
|
||||
const uint32_t MAX_SPX_PARTITIONS = 1; // Single GPU node
|
||||
const uint32_t MAX_DPX_PARTITIONS = 2;
|
||||
const uint32_t MAX_TPX_PARTITIONS = 3;
|
||||
const uint32_t MAX_QPX_PARTITIONS = 4;
|
||||
|
||||
TestMemoryPartitionReadWrite::TestMemoryPartitionReadWrite() : TestBase() {
|
||||
set_title("AMDSMI Memory Partition Read Test");
|
||||
set_description("The memory partition tests verifies that the memory "
|
||||
"partition settings can be read and updated properly.");
|
||||
}
|
||||
|
||||
TestMemoryPartitionReadWrite::~TestMemoryPartitionReadWrite(void) {
|
||||
}
|
||||
|
||||
void TestMemoryPartitionReadWrite::SetUp(void) {
|
||||
TestBase::SetUp();
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void TestMemoryPartitionReadWrite::DisplayTestInfo(void) {
|
||||
TestBase::DisplayTestInfo();
|
||||
}
|
||||
|
||||
void TestMemoryPartitionReadWrite::DisplayResults(void) const {
|
||||
TestBase::DisplayResults();
|
||||
return;
|
||||
}
|
||||
|
||||
void TestMemoryPartitionReadWrite::Close() {
|
||||
// This will close handles opened within rsmitst utility calls and call
|
||||
// amdsmi_shut_down(), so it should be done after other hsa cleanup
|
||||
TestBase::Close();
|
||||
}
|
||||
|
||||
static const std::string
|
||||
memoryPartitionString(amdsmi_memory_partition_type_t memoryPartitionType) {
|
||||
switch (memoryPartitionType) {
|
||||
case AMDSMI_MEMORY_PARTITION_NPS1:
|
||||
return "NPS1";
|
||||
case AMDSMI_MEMORY_PARTITION_NPS2:
|
||||
return "NPS2";
|
||||
case AMDSMI_MEMORY_PARTITION_NPS4:
|
||||
return "NPS4";
|
||||
case AMDSMI_MEMORY_PARTITION_NPS8:
|
||||
return "NPS8";
|
||||
default:
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
static const std::map<std::string, amdsmi_memory_partition_type_t>
|
||||
mapStringToRSMIMemoryPartitionTypes {
|
||||
{"NPS1", AMDSMI_MEMORY_PARTITION_NPS1},
|
||||
{"NPS2", AMDSMI_MEMORY_PARTITION_NPS2},
|
||||
{"NPS4", AMDSMI_MEMORY_PARTITION_NPS4},
|
||||
{"NPS8", AMDSMI_MEMORY_PARTITION_NPS8}
|
||||
};
|
||||
|
||||
void TestMemoryPartitionReadWrite::Run(void) {
|
||||
amdsmi_status_t ret, err, ret_set;
|
||||
constexpr uint32_t k255Len = 255;
|
||||
constexpr uint32_t k0Len = 0;
|
||||
char orig_memory_partition[k255Len];
|
||||
char current_memory_partition[k255Len];
|
||||
orig_memory_partition[0] = '\0';
|
||||
current_memory_partition[0] = '\0';
|
||||
amdsmi_memory_partition_config_t current_memory_config;
|
||||
const uint32_t kMAX_UINT32 = std::numeric_limits<uint32_t>::max();
|
||||
std::map<uint32_t, AcceleratorProfileConfig> orig_dev_config; // index, ProfileConfig
|
||||
|
||||
TestBase::Run();
|
||||
if (setup_failed_) {
|
||||
std::cout << "** SetUp Failed for this test. Skipping.**" << std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
bool isVerbose = (this->verbosity() &&
|
||||
this->verbosity() >= (this->TestBase::VERBOSE_STANDARD)) ? true: false;
|
||||
|
||||
// Save original memory partition settings (see orig_dev_config ^)
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**=========================================================\n";
|
||||
std::cout << "\t**Save Original Compute Partition Settings ================\n";
|
||||
std::cout << "\t**=========================================================\n";
|
||||
}
|
||||
auto initial_num_devices = num_monitor_devs();
|
||||
for (uint32_t dv_ind = 0; dv_ind < initial_num_devices; ++dv_ind) {
|
||||
if (dv_ind != 0) {
|
||||
std::cout << "\n";
|
||||
}
|
||||
PrintDeviceHeader(processor_handles_[dv_ind]);
|
||||
amdsmi_accelerator_partition_profile_t profile = {};
|
||||
uint32_t partition_id[8] = {0, 0, 0, 0, 0, 0, 0, 0};
|
||||
ret = amdsmi_get_gpu_accelerator_partition_profile(processor_handles_[dv_ind],
|
||||
&profile, &partition_id[0]);
|
||||
std::string nps_caps_str = "";
|
||||
if ((profile.memory_caps.nps_flags.nps1_cap == 0
|
||||
&& profile.memory_caps.nps_flags.nps2_cap == 0
|
||||
&& profile.memory_caps.nps_flags.nps4_cap == 0
|
||||
&& profile.memory_caps.nps_flags.nps8_cap == 0)) {
|
||||
nps_caps_str = "N/A";
|
||||
} else {
|
||||
nps_caps_str.clear();
|
||||
if (profile.memory_caps.nps_flags.nps1_cap) {
|
||||
(nps_caps_str.empty()) ? nps_caps_str += "NPS1" : nps_caps_str += ", NPS1";
|
||||
}
|
||||
if (profile.memory_caps.nps_flags.nps2_cap) {
|
||||
(nps_caps_str.empty()) ? nps_caps_str += "NPS2" : nps_caps_str += ", NPS2";
|
||||
}
|
||||
if (profile.memory_caps.nps_flags.nps4_cap) {
|
||||
(nps_caps_str.empty()) ? nps_caps_str += "NPS4" : nps_caps_str += ", NPS4";
|
||||
}
|
||||
if (profile.memory_caps.nps_flags.nps8_cap) {
|
||||
(nps_caps_str.empty()) ? nps_caps_str += "NPS8" : nps_caps_str += ", NPS8";
|
||||
}
|
||||
}
|
||||
|
||||
std::string profile_type_str = "N/A";
|
||||
if (profile.profile_type == AMDSMI_ACCELERATOR_PARTITION_SPX) {
|
||||
profile_type_str = "SPX";
|
||||
} else if (profile.profile_type == AMDSMI_ACCELERATOR_PARTITION_DPX) {
|
||||
profile_type_str = "DPX";
|
||||
} else if (profile.profile_type == AMDSMI_ACCELERATOR_PARTITION_TPX) {
|
||||
profile_type_str = "TPX";
|
||||
} else if (profile.profile_type == AMDSMI_ACCELERATOR_PARTITION_QPX) {
|
||||
profile_type_str = "QPX";
|
||||
} else if (profile.profile_type == AMDSMI_ACCELERATOR_PARTITION_CPX) {
|
||||
profile_type_str = "CPX";
|
||||
}
|
||||
|
||||
std::string partition_id_str = "";
|
||||
for (int i = 0; i < 8; i++) {
|
||||
partition_id_str += std::to_string(partition_id[i]);
|
||||
if (i < 7) {
|
||||
partition_id_str += ", ";
|
||||
}
|
||||
|
||||
switch (profile.profile_type) {
|
||||
case AMDSMI_ACCELERATOR_PARTITION_SPX:
|
||||
EXPECT_LT(partition_id[i], MAX_SPX_PARTITIONS);
|
||||
break;
|
||||
case AMDSMI_ACCELERATOR_PARTITION_DPX:
|
||||
EXPECT_LT(partition_id[i], MAX_DPX_PARTITIONS);
|
||||
break;
|
||||
case AMDSMI_ACCELERATOR_PARTITION_TPX:
|
||||
EXPECT_LT(partition_id[i], MAX_TPX_PARTITIONS);
|
||||
break;
|
||||
case AMDSMI_ACCELERATOR_PARTITION_QPX:
|
||||
EXPECT_LT(partition_id[i], MAX_QPX_PARTITIONS);
|
||||
break;
|
||||
case AMDSMI_ACCELERATOR_PARTITION_CPX: {
|
||||
uint16_t num_xcd;
|
||||
uint32_t max_xcps = 0;
|
||||
ret = amdsmi_get_gpu_xcd_counter(processor_handles_[dv_ind], &num_xcd);
|
||||
if (ret == AMDSMI_STATUS_SUCCESS) {
|
||||
max_xcps = static_cast<uint32_t>(num_xcd);
|
||||
}
|
||||
EXPECT_LT(partition_id[i], max_xcps);
|
||||
break;
|
||||
}
|
||||
case AMDSMI_ACCELERATOR_PARTITION_INVALID:
|
||||
EXPECT_EQ(partition_id[i], MAX_UNSUPPORTED_PARTITIONS);
|
||||
break;
|
||||
default:
|
||||
EXPECT_EQ(partition_id[i], MAX_UNSUPPORTED_PARTITIONS);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**amdsmi_get_gpu_accelerator_partition_profile(processor_handles_["
|
||||
<< dv_ind << "], &profile, &partition_id[0]):\n"
|
||||
<< "\t\t" << smi_amdgpu_get_status_string(ret, false)
|
||||
<< "\n\t**Current profile.profile_type: "
|
||||
<< profile_type_str
|
||||
<< "\n\t**profile.num_partitions: "
|
||||
<< (profile.num_partitions == kMAX_UINT32
|
||||
? "N/A" : std::to_string(profile.num_partitions))
|
||||
<< "\n\t**profile.memory_caps: "
|
||||
<< nps_caps_str
|
||||
<< "\n\t**profile.profile_index: "
|
||||
<< (profile.profile_index == kMAX_UINT32
|
||||
? "N/A" : std::to_string(profile.profile_index))
|
||||
<< "\n\t**profile.num_resources: "
|
||||
<< profile.num_resources
|
||||
<< "\n\t**partition_id: "
|
||||
<< partition_id_str
|
||||
<< std::endl;
|
||||
}
|
||||
EXPECT_TRUE(ret == AMDSMI_STATUS_SUCCESS
|
||||
|| ret == AMDSMI_STATUS_NOT_SUPPORTED);
|
||||
amdsmi_accelerator_partition_profile_config_t profile_config = {};
|
||||
ret = amdsmi_get_gpu_accelerator_partition_profile_config(processor_handles_[dv_ind],
|
||||
&profile_config);
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**amdsmi_get_gpu_accelerator_partition_profile_config(processor_handles_["
|
||||
<< dv_ind << "], &profile_config):\n"
|
||||
<< "\t\t" << smi_amdgpu_get_status_string(ret, false)
|
||||
<< "\n\t**profile_config.num_profiles: "
|
||||
<< profile_config.num_profiles
|
||||
<< "\n\t**profile_config.num_resource_profiles: "
|
||||
<< profile_config.num_resource_profiles
|
||||
<< std::endl;
|
||||
}
|
||||
AcceleratorProfileConfig original_profile_config =
|
||||
getAvailableProfileConfigs(dv_ind, profile, profile_config, isVerbose);
|
||||
orig_dev_config[dv_ind] = original_profile_config;
|
||||
// waitForUserInput(); // watch for any errors
|
||||
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**=========================================================\n";
|
||||
std::cout << "\t**Checking valid profile Sets =============================\n";
|
||||
std::cout << "\t**=========================================================\n";
|
||||
}
|
||||
int resource_index = 0;
|
||||
for (uint32_t i = 0; i < profile_config.num_profiles; i++) {
|
||||
auto current_profile = profile_config.profiles[i];
|
||||
std::string profile_type_str = "N/A";
|
||||
if (current_profile.profile_type == AMDSMI_ACCELERATOR_PARTITION_SPX) {
|
||||
profile_type_str = "SPX";
|
||||
} else if (current_profile.profile_type == AMDSMI_ACCELERATOR_PARTITION_DPX) {
|
||||
profile_type_str = "DPX";
|
||||
} else if (current_profile.profile_type == AMDSMI_ACCELERATOR_PARTITION_TPX) {
|
||||
profile_type_str = "TPX";
|
||||
} else if (current_profile.profile_type == AMDSMI_ACCELERATOR_PARTITION_QPX) {
|
||||
profile_type_str = "QPX";
|
||||
} else if (current_profile.profile_type == AMDSMI_ACCELERATOR_PARTITION_CPX) {
|
||||
profile_type_str = "CPX";
|
||||
}
|
||||
|
||||
std::string nps_caps_str = "";
|
||||
if ((current_profile.memory_caps.nps_flags.nps1_cap == 0
|
||||
&& current_profile.memory_caps.nps_flags.nps2_cap == 0
|
||||
&& current_profile.memory_caps.nps_flags.nps4_cap == 0
|
||||
&& current_profile.memory_caps.nps_flags.nps8_cap == 0)) {
|
||||
nps_caps_str = "N/A";
|
||||
} else {
|
||||
nps_caps_str.clear();
|
||||
if (current_profile.memory_caps.nps_flags.nps1_cap) {
|
||||
(nps_caps_str.empty()) ? nps_caps_str += "NPS1" : nps_caps_str += ", NPS1";
|
||||
}
|
||||
if (current_profile.memory_caps.nps_flags.nps2_cap) {
|
||||
(nps_caps_str.empty()) ? nps_caps_str += "NPS2" : nps_caps_str += ", NPS2";
|
||||
}
|
||||
if (current_profile.memory_caps.nps_flags.nps4_cap) {
|
||||
(nps_caps_str.empty()) ? nps_caps_str += "NPS4" : nps_caps_str += ", NPS4";
|
||||
}
|
||||
if (current_profile.memory_caps.nps_flags.nps8_cap) {
|
||||
(nps_caps_str.empty()) ? nps_caps_str += "NPS8" : nps_caps_str += ", NPS8";
|
||||
}
|
||||
}
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**profile_config.profiles[" << i << "]:\n"
|
||||
<< "\t\tprofile_type: " << profile_type_str
|
||||
<< "\n\t\tnum_partitions: " << current_profile.num_partitions
|
||||
<< "\n\t\tmemory_caps: " << nps_caps_str
|
||||
<< "\n\t\tcurrent_profile.num_resources: " << current_profile.num_resources
|
||||
<< std::endl;
|
||||
}
|
||||
for (auto j = 0; j < current_profile.num_resources; j++) {
|
||||
auto rp = profile_config.resource_profiles[resource_index];
|
||||
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\n\t\t\tprofile_index: " << current_profile.profile_index
|
||||
<< "\n\t\t\tresource_index: " << resource_index
|
||||
<< "\n\t\t\tprofile_config.resource_profiles[" << resource_index
|
||||
<< "].resource_type: "
|
||||
<< getResourceType(rp.resource_type)
|
||||
<< "\n\t\t\tprofile_config.resource_profiles[" << resource_index
|
||||
<< "].partition_resource: "
|
||||
<< rp.partition_resource
|
||||
<< "\n\t\t\tprofile_config.resource_profiles[" << resource_index
|
||||
<< "].num_partitions_share_resource: "
|
||||
<< rp.num_partitions_share_resource
|
||||
<< std::endl;
|
||||
}
|
||||
resource_index++;
|
||||
}
|
||||
}
|
||||
EXPECT_TRUE(ret == AMDSMI_STATUS_SUCCESS
|
||||
|| ret == AMDSMI_STATUS_NOT_SUPPORTED);
|
||||
if (ret == AMDSMI_STATUS_NOT_SUPPORTED) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**" << "amdsmi_get_gpu_accelerator_partition_profile_config: "
|
||||
<< "Not supported on this machine" << std::endl;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Run memory partition tests
|
||||
uint32_t current_num_devices = 0;
|
||||
smi_amdgpu_get_device_count(¤t_num_devices);
|
||||
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**Total Num Devices: " << current_num_devices << std::endl;
|
||||
}
|
||||
// Leaving for debug purposes - uncomment to test a specific number of devices
|
||||
// uint32_t num_devices_to_test = promptNumDevicesToTest(current_num_devices);
|
||||
uint32_t num_devices_to_test = current_num_devices;
|
||||
for (uint32_t dv_ind = 0; dv_ind < num_devices_to_test; ++dv_ind) {
|
||||
bool wasSetSuccess = false;
|
||||
if (dv_ind != 0) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << std::endl;
|
||||
}
|
||||
}
|
||||
PrintDeviceHeader(processor_handles_[dv_ind]);
|
||||
|
||||
// Standard checks to see if API is supported, before running full tests
|
||||
ret = amdsmi_get_gpu_memory_partition(
|
||||
processor_handles_[dv_ind], orig_memory_partition, k255Len);
|
||||
if (ret == AMDSMI_STATUS_NOT_SUPPORTED) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**" << ": "
|
||||
<< "Not supported on this machine" << std::endl;
|
||||
}
|
||||
continue;
|
||||
} else {
|
||||
CHK_ERR_ASRT(ret)
|
||||
}
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << std::endl << "\t**Current Memory Partition: "
|
||||
<< orig_memory_partition << std::endl;
|
||||
}
|
||||
|
||||
if ((orig_memory_partition == nullptr) ||
|
||||
(orig_memory_partition[0] == '\0')) {
|
||||
std::cout << "***System memory partition value is not defined or received"
|
||||
" unexpected data. Skip memory partition test." << std::endl;
|
||||
continue;
|
||||
}
|
||||
ASSERT_TRUE(ret == AMDSMI_STATUS_SUCCESS);
|
||||
|
||||
// Verify api support checking functionality is working
|
||||
constexpr uint32_t k2Len = 2;
|
||||
char smallBuffer[k2Len];
|
||||
err = amdsmi_get_gpu_memory_partition(processor_handles_[dv_ind], smallBuffer, k2Len);
|
||||
uint32_t size = static_cast<uint32_t>(sizeof(smallBuffer)/sizeof(*smallBuffer));
|
||||
ASSERT_EQ(err, AMDSMI_STATUS_INSUFFICIENT_SIZE);
|
||||
ASSERT_EQ(k2Len, size);
|
||||
if (err == AMDSMI_STATUS_INSUFFICIENT_SIZE) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**"
|
||||
<< "Confirmed AMDSMI_STATUS_INSUFFICIENT_SIZE was returned "
|
||||
<< "and size is 2, as requested." << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// Verify api support checking functionality is working
|
||||
err = amdsmi_get_gpu_memory_partition(processor_handles_[dv_ind], nullptr, k255Len);
|
||||
ASSERT_EQ(err, AMDSMI_STATUS_INVAL);
|
||||
|
||||
if (err == AMDSMI_STATUS_INVAL) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**amdsmi_get_gpu_memory_partition(processor_handles_[" << dv_ind << "], "
|
||||
<< "nullptr, 255): "
|
||||
<< "Confirmed AMDSMI_STATUS_INVAL was returned."
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
err = amdsmi_get_gpu_memory_partition_config(processor_handles_[dv_ind], nullptr);
|
||||
ASSERT_EQ(err, AMDSMI_STATUS_INVAL);
|
||||
|
||||
if (err == AMDSMI_STATUS_INVAL) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**amdsmi_get_gpu_memory_partition(processor_handles_[" << dv_ind
|
||||
<< "], nullptr): Confirmed AMDSMI_STATUS_INVAL was returned."
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// Verify api support checking functionality is working
|
||||
err = amdsmi_get_gpu_memory_partition(processor_handles_[dv_ind], orig_memory_partition, k0Len);
|
||||
ASSERT_TRUE(err == AMDSMI_STATUS_INVAL);
|
||||
if (err == AMDSMI_STATUS_INVAL) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**amdsmi_get_gpu_memory_partition(processor_handles_[" << dv_ind << "], "
|
||||
<< "orig_memory_partition, 0): "
|
||||
<< "Confirmed AMDSMI_STATUS_INVAL was returned."
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
amdsmi_memory_partition_config_t* null_memory_partition_config = nullptr;
|
||||
err = amdsmi_get_gpu_memory_partition_config(processor_handles_[dv_ind],
|
||||
null_memory_partition_config);
|
||||
ASSERT_TRUE((err == AMDSMI_STATUS_INVAL) ||
|
||||
(err == AMDSMI_STATUS_NOT_SUPPORTED));
|
||||
if (err == AMDSMI_STATUS_INVAL) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**"
|
||||
<< "amdsmi_get_gpu_memory_partition_config(processor_handles_[" << dv_ind << "], "
|
||||
<< "nullptr): "
|
||||
<< "Confirmed AMDSMI_STATUS_INVAL was returned."
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
/****************************************/
|
||||
/* amdsmi_set_gpu_memory_partition(...) */
|
||||
/****************************************/
|
||||
// Verify api support checking functionality is working
|
||||
amdsmi_memory_partition_type_t null_memory_partition = {};
|
||||
err = amdsmi_set_gpu_memory_partition_mode(processor_handles_[dv_ind], null_memory_partition);
|
||||
std::cout << "\t**amdsmi_set_gpu_memory_partition(amdsmi_set_gpu_memory_partition_mode"
|
||||
<< "(processor_handles_[" << dv_ind << "], nullptr): "
|
||||
<< smi_amdgpu_get_status_string(err, false) << "\n";
|
||||
// Note: new_memory_partition is not set
|
||||
ASSERT_TRUE(err == AMDSMI_STATUS_INVAL);
|
||||
if (err == AMDSMI_STATUS_INVAL) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**"
|
||||
<< "Confirmed AMDSMI_STATUS_INVAL was returned."
|
||||
<< std::endl;
|
||||
}
|
||||
} else if (err == AMDSMI_STATUS_NOT_SUPPORTED) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**" << ": "
|
||||
<< "amdsmi_set_gpu_memory_partition_mode not supported on this "
|
||||
<< "device\n\t (if amdsmi_get_gpu_memory_partition works, "
|
||||
<< "then likely need to set in bios)"
|
||||
<< std::endl;
|
||||
}
|
||||
continue;
|
||||
} else {
|
||||
DISPLAY_AMDSMI_ERR(err)
|
||||
}
|
||||
ASSERT_FALSE(err == AMDSMI_STATUS_NO_PERM);
|
||||
|
||||
// Verify api support checking functionality is working
|
||||
amdsmi_memory_partition_type_t new_memory_partition = AMDSMI_MEMORY_PARTITION_UNKNOWN;
|
||||
err = amdsmi_set_gpu_memory_partition_mode(processor_handles_[dv_ind], new_memory_partition);
|
||||
ASSERT_TRUE((err == AMDSMI_STATUS_INVAL) ||
|
||||
(err == AMDSMI_STATUS_NOT_SUPPORTED) ||
|
||||
(err == AMDSMI_STATUS_NO_PERM));
|
||||
if (err == AMDSMI_STATUS_INVAL) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**"
|
||||
<< "Confirmed AMDSMI_STATUS_INVAL was returned."
|
||||
<< std::endl;
|
||||
} else if (err == AMDSMI_STATUS_NO_PERM) {
|
||||
DISPLAY_AMDSMI_ERR(err)
|
||||
// tests should not continue if err is a permission issue
|
||||
ASSERT_FALSE(err == AMDSMI_STATUS_NO_PERM);
|
||||
} else {
|
||||
DISPLAY_AMDSMI_ERR(err)
|
||||
}
|
||||
}
|
||||
|
||||
// Re-run original get, so we can reset to later
|
||||
ret = amdsmi_get_gpu_memory_partition(processor_handles_[dv_ind],
|
||||
orig_memory_partition, k255Len);
|
||||
ASSERT_EQ(AMDSMI_STATUS_SUCCESS, ret);
|
||||
|
||||
for (int partition = static_cast<int>(AMDSMI_MEMORY_PARTITION_NPS1);
|
||||
partition <= static_cast<int>(AMDSMI_MEMORY_PARTITION_NPS8);
|
||||
partition++) {
|
||||
ret_set = AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
wasSetSuccess = false;
|
||||
new_memory_partition = static_cast<amdsmi_memory_partition_type_t>(partition);
|
||||
if (new_memory_partition != AMDSMI_MEMORY_PARTITION_NPS1
|
||||
&& new_memory_partition != AMDSMI_MEMORY_PARTITION_NPS2
|
||||
&& new_memory_partition != AMDSMI_MEMORY_PARTITION_NPS4
|
||||
&& new_memory_partition != AMDSMI_MEMORY_PARTITION_NPS8) {
|
||||
continue; // skip unknown partition, this is already tested above ^
|
||||
}
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << std::endl;
|
||||
std::cout << "\t**"
|
||||
<< "======== TEST AMDSMI_MEMORY_PARTITION_"
|
||||
<< memoryPartitionString(new_memory_partition)
|
||||
<< " ===============" << std::endl;
|
||||
}
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**"
|
||||
<< "Attempting to set memory partition to: "
|
||||
<< memoryPartitionString(new_memory_partition) << std::endl;
|
||||
}
|
||||
|
||||
auto ret_caps = amdsmi_get_gpu_memory_partition_config(processor_handles_[dv_ind],
|
||||
¤t_memory_config);
|
||||
std::string memory_caps_str = "N/A";
|
||||
if (ret_caps == AMDSMI_STATUS_SUCCESS) {
|
||||
memory_caps_str.clear();
|
||||
if (current_memory_config.partition_caps.nps_flags.nps1_cap) {
|
||||
memory_caps_str += (memory_caps_str.empty() ? "NPS1" : ", NPS1");
|
||||
}
|
||||
if (current_memory_config.partition_caps.nps_flags.nps2_cap) {
|
||||
memory_caps_str += (memory_caps_str.empty() ? "NPS2" : ", NPS2");
|
||||
}
|
||||
if (current_memory_config.partition_caps.nps_flags.nps4_cap) {
|
||||
memory_caps_str += (memory_caps_str.empty() ? "NPS4" : ", NPS4");
|
||||
}
|
||||
if (current_memory_config.partition_caps.nps_flags.nps8_cap) {
|
||||
memory_caps_str += (memory_caps_str.empty() ? "NPS8" : ", NPS8");
|
||||
}
|
||||
}
|
||||
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**"
|
||||
<< "amdsmi_get_gpu_memory_partition_config(processor_handles_[" << dv_ind
|
||||
<< "], current_memory_config): "
|
||||
<< smi_amdgpu_get_status_string(ret_caps, false) << std::endl;
|
||||
std::cout << "\t**" << "Available Memory Partition Capabilities: "
|
||||
<< memory_caps_str << "\n"
|
||||
<< "\t**" << "current_memory_partition_mode: "
|
||||
<< memoryPartitionString(current_memory_config.mp_mode) << "\n"
|
||||
<< "\t**" << "num_numa_ranges: "
|
||||
<< current_memory_config.num_numa_ranges
|
||||
<< std::endl;
|
||||
}
|
||||
ASSERT_TRUE((ret_caps == AMDSMI_STATUS_NOT_SUPPORTED) ||
|
||||
(ret_caps == AMDSMI_STATUS_SUCCESS));
|
||||
|
||||
ret_set = amdsmi_set_gpu_memory_partition_mode(processor_handles_[dv_ind],
|
||||
new_memory_partition);
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**" << "amdsmi_set_gpu_memory_partition_mode(processor_handles_["
|
||||
<< dv_ind << "], " << memoryPartitionString(new_memory_partition) << "): "
|
||||
<< smi_amdgpu_get_status_string(ret_set, false) << "\n";
|
||||
}
|
||||
if (ret_set == AMDSMI_STATUS_NOT_SUPPORTED) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**" << ": "
|
||||
<< "Not supported on this machine" << std::endl;
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
ASSERT_TRUE((ret_set == AMDSMI_STATUS_SUCCESS)
|
||||
|| (ret_set == AMDSMI_STATUS_BUSY)
|
||||
|| (ret_set == AMDSMI_STATUS_AMDGPU_RESTART_ERR)
|
||||
|| (ret_set == AMDSMI_STATUS_INVAL)
|
||||
|| (ret_set == AMDSMI_STATUS_NOT_SUPPORTED));
|
||||
}
|
||||
|
||||
if (ret_set == AMDSMI_STATUS_SUCCESS) { // do not continue trying to reset
|
||||
wasSetSuccess = true;
|
||||
}
|
||||
|
||||
ret = amdsmi_get_gpu_memory_partition_config(processor_handles_[dv_ind],
|
||||
¤t_memory_config);
|
||||
if (ret == AMDSMI_STATUS_NOT_SUPPORTED) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**" << "amdsmi_get_gpu_memory_partition_config(): "
|
||||
<< "Not supported on this machine" << std::endl;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
CHK_ERR_ASRT(ret)
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**"
|
||||
<< "Current memory partition: "
|
||||
<< memoryPartitionString(current_memory_config.mp_mode)
|
||||
<< std::endl;
|
||||
}
|
||||
if (wasSetSuccess) {
|
||||
ASSERT_EQ(AMDSMI_STATUS_SUCCESS, ret_set);
|
||||
ASSERT_STREQ(memoryPartitionString(new_memory_partition).c_str(),
|
||||
memoryPartitionString(current_memory_config.mp_mode).c_str());
|
||||
CHK_ERR_ASRT(ret_set)
|
||||
} else {
|
||||
ASSERT_NE(AMDSMI_STATUS_SUCCESS, ret_set);
|
||||
ASSERT_STRNE(memoryPartitionString(new_memory_partition).c_str(),
|
||||
memoryPartitionString(current_memory_config.mp_mode).c_str());
|
||||
}
|
||||
} // END MEMORY PARTITION FOR LOOP
|
||||
|
||||
/* TEST RETURN TO ORIGINAL MEMORY PARTITION SETTING */
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << std::endl;
|
||||
std::cout << "\t**"
|
||||
<< "=========== TEST RETURN TO ORIGINAL MEMORY PARTITION "
|
||||
<< "SETTING (" << orig_memory_partition
|
||||
<< ") ========" << std::endl;
|
||||
}
|
||||
|
||||
ret = amdsmi_get_gpu_memory_partition_config(processor_handles_[dv_ind],
|
||||
¤t_memory_config);
|
||||
CHK_ERR_ASRT(ret)
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**"
|
||||
<< "amdsmi_get_gpu_memory_partition_config(processor_handles_[" << dv_ind
|
||||
<< "], current_memory_config): "
|
||||
<< smi_amdgpu_get_status_string(ret, false) << std::endl;
|
||||
std::cout << "\t**"
|
||||
<< "Current memory partition: "
|
||||
<< memoryPartitionString(current_memory_config.mp_mode)
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
new_memory_partition
|
||||
= mapStringToRSMIMemoryPartitionTypes.at(orig_memory_partition);
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**" << "Returning memory partition to: "
|
||||
<< memoryPartitionString(new_memory_partition) << std::endl;
|
||||
}
|
||||
ret = amdsmi_set_gpu_memory_partition(processor_handles_[dv_ind], new_memory_partition);
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**"
|
||||
<< "amdsmi_set_gpu_memory_partition(processor_handles_[" << dv_ind
|
||||
<< "], " << orig_memory_partition << "): "
|
||||
<< smi_amdgpu_get_status_string(ret, false) << std::endl;
|
||||
}
|
||||
CHK_ERR_ASRT(ret)
|
||||
ret = amdsmi_get_gpu_memory_partition(processor_handles_[dv_ind],
|
||||
current_memory_partition, k255Len);
|
||||
CHK_ERR_ASRT(ret)
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**" << "Attempted to set memory partition: "
|
||||
<< memoryPartitionString(new_memory_partition) << std::endl
|
||||
<< "\t**" << "Current memory partition: "
|
||||
<< current_memory_partition
|
||||
<< std::endl;
|
||||
}
|
||||
ASSERT_EQ(AMDSMI_STATUS_SUCCESS, ret);
|
||||
ASSERT_STREQ(orig_memory_partition, current_memory_partition);
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**"
|
||||
<< "Confirmed prior memory partition (" << orig_memory_partition
|
||||
<< ") is equal to current memory partition ("
|
||||
<< current_memory_partition << ")" << std::endl;
|
||||
}
|
||||
} // END DEVICE FOR LOOP
|
||||
|
||||
// Restore original compute partition settings (see orig_dev_config ^)
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**=========================================================\n";
|
||||
std::cout << "\t**Restore Original Compute Partition Settings =============\n";
|
||||
std::cout << "\t**=========================================================\n";
|
||||
}
|
||||
initial_num_devices = num_monitor_devs();
|
||||
for (uint32_t dv_ind = 0; dv_ind < initial_num_devices; ++dv_ind) {
|
||||
if (dv_ind != 0) {
|
||||
std::cout << "\n";
|
||||
}
|
||||
PrintDeviceHeader(processor_handles_[dv_ind]);
|
||||
|
||||
AcceleratorProfileConfig original_profile_config = orig_dev_config[dv_ind];
|
||||
|
||||
// Return to original profile
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**Device Index: " << dv_ind << std::endl
|
||||
<< "\t**======== Return to original AMDSMI_ACCELERATOR_PARTITION_"
|
||||
<< original_profile_config.original_profile_type_str
|
||||
<< " (profile_index: "
|
||||
<< (original_profile_config.original_profile_index == kMAX_UINT32
|
||||
? "N/A" : std::to_string(original_profile_config.original_profile_index))
|
||||
<< ")"
|
||||
<< " ===============" << std::endl;
|
||||
}
|
||||
auto ret_set = amdsmi_set_gpu_accelerator_partition_profile(
|
||||
processor_handles_[dv_ind],
|
||||
original_profile_config.original_profile_index);
|
||||
EXPECT_TRUE((ret_set == AMDSMI_STATUS_SETTING_UNAVAILABLE)
|
||||
|| (ret_set== AMDSMI_STATUS_NO_PERM)
|
||||
|| (ret_set == AMDSMI_STATUS_SUCCESS)
|
||||
|| ret_set == AMDSMI_STATUS_BUSY
|
||||
|| ret_set == AMDSMI_STATUS_NOT_SUPPORTED);
|
||||
amdsmi_accelerator_partition_profile_t profile = {};
|
||||
uint32_t partition_id[8] = {0, 0, 0, 0, 0, 0, 0, 0};
|
||||
auto ret_get = amdsmi_get_gpu_accelerator_partition_profile(processor_handles_[dv_ind],
|
||||
&profile, &partition_id[0]);
|
||||
if (ret_get == AMDSMI_STATUS_SUCCESS && ret_set == AMDSMI_STATUS_SUCCESS) {
|
||||
std::string profile_type_str = partition_types_map.at(profile.profile_type);
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**amdsmi_set_gpu_accelerator_partition_profile(processor_handles_["
|
||||
<< dv_ind << "],"
|
||||
<< "\n\t\t" << original_profile_config.original_profile_index
|
||||
<< " (AMDSMI_ACCELERATOR_PARTITION_"
|
||||
<< original_profile_config.original_profile_type_str
|
||||
<< "): "
|
||||
<< "\n\t\t" << smi_amdgpu_get_status_string(ret_set, false)
|
||||
<< "\n\t**amdsmi_get_gpu_accelerator_partition_profile(processor_handles_["
|
||||
<< dv_ind << "], &profile, &partition_id[0]):\n"
|
||||
<< "\t\t" << smi_amdgpu_get_status_string(ret_get, false)
|
||||
<< "\n\t**Current profile.profile_type: "
|
||||
<< profile_type_str
|
||||
<< "\n\t**profile.num_partitions: "
|
||||
<< (profile.num_partitions == kMAX_UINT32
|
||||
? "N/A" : std::to_string(profile.num_partitions))
|
||||
<< "\n\t**profile.profile_index: "
|
||||
<< (profile.profile_index == kMAX_UINT32
|
||||
? "N/A" : std::to_string(profile.profile_index))
|
||||
<< std::endl;
|
||||
}
|
||||
EXPECT_STREQ(partition_types_map.at(profile.profile_type).c_str(),
|
||||
original_profile_config.original_profile_type_str.c_str());
|
||||
EXPECT_EQ(profile.profile_type, original_profile_config.original_profile_type);
|
||||
EXPECT_EQ(profile.profile_index, original_profile_config.original_profile_index);
|
||||
} else {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**Could not change or read profiles. "
|
||||
<< "Skipping return to original profile on this device."
|
||||
<< "\n\t**amdsmi_set_gpu_accelerator_partition_profile(): "
|
||||
<< smi_amdgpu_get_status_string(ret_set, false)
|
||||
<< "\n\t**amdsmi_get_gpu_accelerator_partition_profile(): "
|
||||
<< smi_amdgpu_get_status_string(ret_get, false)
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Εκτελέσιμο αρχείο
+51
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef TESTS_AMD_SMI_TEST_FUNCTIONAL_MEMORYPARTITION_READ_WRITE_H_
|
||||
#define TESTS_AMD_SMI_TEST_FUNCTIONAL_MEMORYPARTITION_READ_WRITE_H_
|
||||
|
||||
#include "../test_base.h"
|
||||
|
||||
class TestMemoryPartitionReadWrite : public TestBase {
|
||||
public:
|
||||
TestMemoryPartitionReadWrite();
|
||||
|
||||
// @Brief: Destructor for test case of TestMemoryPartitionReadWrite
|
||||
virtual ~TestMemoryPartitionReadWrite();
|
||||
|
||||
// @Brief: Setup the environment for measurement
|
||||
virtual void SetUp();
|
||||
|
||||
// @Brief: Core measurement execution
|
||||
virtual void Run();
|
||||
|
||||
// @Brief: Clean up and retrive the resource
|
||||
virtual void Close();
|
||||
|
||||
// @Brief: Display results
|
||||
virtual void DisplayResults() const;
|
||||
|
||||
// @Brief: Display information about what this test does
|
||||
virtual void DisplayTestInfo(void);
|
||||
};
|
||||
|
||||
#endif // TESTS_AMD_SMI_TEST_FUNCTIONAL_MEMORYPARTITION_READ_WRITE_H_
|
||||
@@ -64,6 +64,8 @@
|
||||
#include "functional/version_read.h"
|
||||
#include "functional/mutual_exclusion.h"
|
||||
#include "functional/init_shutdown_refcount.h"
|
||||
#include "functional/memorypartition_read_write.h"
|
||||
#include "functional/computepartition_read_write.h"
|
||||
|
||||
static AMDSMITstGlobals *sRSMIGlvalues = nullptr;
|
||||
|
||||
@@ -250,8 +252,17 @@ TEST(amdsmitstReadOnly, TestMutualExclusion) {
|
||||
RunCustomTestEpilog(&tst);
|
||||
}
|
||||
*/
|
||||
// TODO: add TestComputePartitionReadWrite
|
||||
// TODO: add TestMemoryPartitionReadWrite
|
||||
|
||||
TEST(amdsmitstReadWrite, TestComputePartitionReadWrite) {
|
||||
TestComputePartitionReadWrite tst;
|
||||
RunGenericTest(&tst);
|
||||
}
|
||||
|
||||
TEST(amdsmitstReadWrite, TestMemoryPartitionReadWrite) {
|
||||
TestMemoryPartitionReadWrite tst;
|
||||
RunGenericTest(&tst);
|
||||
}
|
||||
|
||||
TEST(amdsmitstReadWrite, TestEvtNotifReadWrite) {
|
||||
TestEvtNotifReadWrite tst;
|
||||
RunGenericTest(&tst);
|
||||
|
||||
@@ -20,12 +20,14 @@
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <cassert>
|
||||
#include <limits>
|
||||
|
||||
#include "amd_smi/amdsmi.h"
|
||||
#include "amd_smi/impl/amd_smi_utils.h"
|
||||
#include "test_base.h"
|
||||
#include "test_common.h"
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
static const int kOutputLineLength = 80;
|
||||
static const char kLabelDelimiter[] = "####";
|
||||
@@ -136,8 +138,21 @@ void TestBase::SetUp(uint64_t init_flags) {
|
||||
void TestBase::PrintDeviceHeader(amdsmi_processor_handle dv_ind) {
|
||||
amdsmi_status_t err;
|
||||
uint16_t val_ui16;
|
||||
uint32_t val_ui32;
|
||||
amdsmi_asic_info_t info;
|
||||
|
||||
err = smi_amdgpu_get_device_count(&val_ui32);
|
||||
CHK_ERR_ASRT(err)
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**Total Devices: " << val_ui32 << std::endl;
|
||||
}
|
||||
|
||||
err = smi_amdgpu_get_device_index(dv_ind, &val_ui32);
|
||||
CHK_ERR_ASRT(err)
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**AMD SMI Device index: " << val_ui32 << std::endl;
|
||||
}
|
||||
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**Device handle: " << dv_ind << std::endl;
|
||||
}
|
||||
@@ -168,6 +183,15 @@ void TestBase::PrintDeviceHeader(amdsmi_processor_handle dv_ind) {
|
||||
}
|
||||
}
|
||||
|
||||
amdsmi_asic_info_t asic_info;
|
||||
err = amdsmi_get_gpu_asic_info(dv_ind, &asic_info);
|
||||
CHK_ERR_ASRT(err)
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**Market name: " << asic_info.market_name << std::endl;
|
||||
std::cout << "\t**ASIC serial: 0x" << std::hex << asic_info.asic_serial << std::endl;
|
||||
std::cout << "\t**Target GFX Version: gfx" << asic_info.target_graphics_version << std::endl;
|
||||
}
|
||||
|
||||
err = amdsmi_get_gpu_subsystem_id(dv_ind, &val_ui16);
|
||||
CHK_ERR_ASRT(err)
|
||||
IF_VERB(STANDARD) {
|
||||
@@ -234,3 +258,137 @@ void TestBase::set_description(std::string d) {
|
||||
}
|
||||
}
|
||||
|
||||
TestBase::AcceleratorProfileConfig TestBase::getAvailableProfileConfigs(
|
||||
uint32_t device_index,
|
||||
amdsmi_accelerator_partition_profile_t current_profile,
|
||||
amdsmi_accelerator_partition_profile_config_t config,
|
||||
bool isVerbose) {
|
||||
AcceleratorProfileConfig profile_config = {};
|
||||
profile_config.number_of_profiles = config.num_profiles;
|
||||
profile_config.original_profile_type = current_profile.profile_type;
|
||||
profile_config.original_profile_index = current_profile.profile_index;
|
||||
profile_config.original_profile_type_str =
|
||||
partition_types_map.at(current_profile.profile_type);
|
||||
profile_config.available_profiles = std::vector<amdsmi_accelerator_partition_type_t>(
|
||||
config.num_profiles);
|
||||
profile_config.available_profile_str = std::vector<std::string>(config.num_profiles);
|
||||
profile_config.available_profile_indices = std::vector<uint32_t>(config.num_profiles);
|
||||
for (uint32_t i = 0; i < config.num_profiles; i++) {
|
||||
std::string profile_type_str = "N/A";
|
||||
profile_config.available_profiles[i] = config.profiles[i].profile_type;
|
||||
profile_config.available_profile_str[i].clear();
|
||||
profile_config.available_profile_str[i] =
|
||||
partition_types_map.at(config.profiles[i].profile_type);
|
||||
profile_config.available_profile_indices[i] = config.profiles[i].profile_index;
|
||||
}
|
||||
|
||||
if (isVerbose) {
|
||||
const uint32_t kMAX_UINT32 = std::numeric_limits<uint32_t>::max();
|
||||
std::cout << "\t**[Device #" << device_index << "] Profile Configs: ";
|
||||
std::cout << "\n\t\t**Original Profile Index: "
|
||||
<< (profile_config.original_profile_index == kMAX_UINT32 ?
|
||||
"N/A" : std::to_string(profile_config.original_profile_index))
|
||||
<< "\n\t\t**Original Profile Type: "
|
||||
<< profile_config.original_profile_type_str
|
||||
<< "\n\t\t**Original profile: " << profile_config.original_profile_type
|
||||
<< " (" << accelerator_types_map.at(profile_config.original_profile_type) << ")"
|
||||
<< "\n\t\t**Number of Profiles: " << profile_config.number_of_profiles
|
||||
<< "\n\t\t**Available_profiles: ";
|
||||
}
|
||||
std::string available_profiles_str = "N/A\n";
|
||||
for (uint32_t j = 0; j < profile_config.number_of_profiles; j++) {
|
||||
if (available_profiles_str == "N/A\n") {
|
||||
available_profiles_str.clear();
|
||||
}
|
||||
|
||||
if (j + 1 >= profile_config.number_of_profiles) {
|
||||
available_profiles_str += ("\n\t\t\tProfile[profile_index: "
|
||||
+ std::to_string(profile_config.available_profile_indices[j])
|
||||
+ "]: " + profile_config.available_profile_str[j] + "\n");
|
||||
} else {
|
||||
available_profiles_str += ("\n\t\t\tProfile[profile_index: "
|
||||
+ std::to_string(profile_config.available_profile_indices[j])
|
||||
+ "]: " + profile_config.available_profile_str[j] + ", ");
|
||||
}
|
||||
}
|
||||
if (isVerbose) {
|
||||
std::cout << available_profiles_str;
|
||||
}
|
||||
return profile_config;
|
||||
}
|
||||
|
||||
void TestBase::waitForUserInput() {
|
||||
for (;;) {
|
||||
std::cout << "\n\t**Press any key to continue**" << std::endl;
|
||||
int input = std::cin.get();
|
||||
if (input == EOF) {
|
||||
std::cout << "EOF detected. Exiting." << std::endl;
|
||||
return;
|
||||
}
|
||||
char input_char = static_cast<char>(input);
|
||||
std::cout << "User entered: " << input_char << std::endl;
|
||||
if (input_char == '\n') {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t TestBase::promptNumDevicesToTest(uint32_t current_num_devices) {
|
||||
uint32_t return_value = 0;
|
||||
std::cout << "**How many devices would you like to test? (0 to skip): ";
|
||||
std::string devices_to_test = "";
|
||||
do {
|
||||
int input = std::cin.get();
|
||||
if (input == EOF) {
|
||||
std::cout << "EOF detected. Exiting." << std::endl;
|
||||
return 0;
|
||||
}
|
||||
char input_char = static_cast<char>(input);
|
||||
if (input_char == '\n') {
|
||||
break;
|
||||
}
|
||||
if (input_char >= '0' && input_char <= '9') {
|
||||
devices_to_test += input_char;
|
||||
} else {
|
||||
std::cout << "Invalid input. Please enter a number between 0 and "
|
||||
<< current_num_devices << std::endl;
|
||||
}
|
||||
} while (true);
|
||||
|
||||
return_value = std::stoi(devices_to_test);
|
||||
if (return_value > current_num_devices) {
|
||||
std::cout << "Invalid input. Please enter a number between 0 and "
|
||||
<< current_num_devices << std::endl;
|
||||
return 0;
|
||||
}
|
||||
return return_value;
|
||||
}
|
||||
|
||||
std::string TestBase::getResourceType(amdsmi_accelerator_partition_resource_type_t resource_type) {
|
||||
std::string resource_type_str = "";
|
||||
switch (resource_type) {
|
||||
case AMDSMI_ACCELERATOR_XCC:
|
||||
resource_type_str = "XCC";
|
||||
break;
|
||||
case AMDSMI_ACCELERATOR_ENCODER:
|
||||
resource_type_str = "ENCODER";
|
||||
break;
|
||||
case AMDSMI_ACCELERATOR_DECODER:
|
||||
resource_type_str = "DECODER";
|
||||
break;
|
||||
case AMDSMI_ACCELERATOR_DMA:
|
||||
resource_type_str = "DMA";
|
||||
break;
|
||||
case AMDSMI_ACCELERATOR_JPEG:
|
||||
resource_type_str = "JPEG";
|
||||
break;
|
||||
case AMDSMI_ACCELERATOR_MAX:
|
||||
resource_type_str = "MAX";
|
||||
break;
|
||||
default:
|
||||
resource_type_str = "N/A";
|
||||
break;
|
||||
}
|
||||
return resource_type_str;
|
||||
}
|
||||
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include "amd_smi/amdsmi.h"
|
||||
|
||||
// The max devices can be monitored
|
||||
@@ -98,6 +99,46 @@ class TestBase {
|
||||
return num_iterations_;
|
||||
}
|
||||
|
||||
const std::map<amdsmi_accelerator_partition_type_t, std::string> partition_types_map = {
|
||||
{ AMDSMI_ACCELERATOR_PARTITION_INVALID, "N/A" },
|
||||
{ AMDSMI_ACCELERATOR_PARTITION_SPX, "SPX" },
|
||||
{ AMDSMI_ACCELERATOR_PARTITION_DPX, "DPX" },
|
||||
{ AMDSMI_ACCELERATOR_PARTITION_TPX, "TPX" },
|
||||
{ AMDSMI_ACCELERATOR_PARTITION_QPX, "QPX" },
|
||||
{ AMDSMI_ACCELERATOR_PARTITION_CPX, "CPX" },
|
||||
{ AMDSMI_ACCELERATOR_PARTITION_MAX, "MAX" },
|
||||
};
|
||||
|
||||
const std::map<amdsmi_accelerator_partition_type_t, std::string> accelerator_types_map = {
|
||||
{ AMDSMI_ACCELERATOR_PARTITION_INVALID, "AMDSMI_ACCELERATOR_PARTITION_INVALID" },
|
||||
{ AMDSMI_ACCELERATOR_PARTITION_SPX, "AMDSMI_ACCELERATOR_PARTITION_SPX" },
|
||||
{ AMDSMI_ACCELERATOR_PARTITION_DPX, "AMDSMI_ACCELERATOR_PARTITION_DPX" },
|
||||
{ AMDSMI_ACCELERATOR_PARTITION_TPX, "AMDSMI_ACCELERATOR_PARTITION_TPX" },
|
||||
{ AMDSMI_ACCELERATOR_PARTITION_QPX, "AMDSMI_ACCELERATOR_PARTITION_QPX" },
|
||||
{ AMDSMI_ACCELERATOR_PARTITION_CPX, "AMDSMI_ACCELERATOR_PARTITION_CPX" },
|
||||
{ AMDSMI_ACCELERATOR_PARTITION_MAX, "AMDSMI_ACCELERATOR_PARTITION_MAX" },
|
||||
};
|
||||
|
||||
struct AcceleratorProfileConfig {
|
||||
amdsmi_accelerator_partition_type_t original_profile_type;
|
||||
std::string original_profile_type_str;
|
||||
uint32_t original_profile_index;
|
||||
uint32_t number_of_profiles;
|
||||
std::vector<amdsmi_accelerator_partition_type_t> available_profiles;
|
||||
std::vector<std::string> available_profile_str;
|
||||
std::vector<uint32_t> available_profile_indices;
|
||||
};
|
||||
|
||||
AcceleratorProfileConfig getAvailableProfileConfigs(uint32_t device_index,
|
||||
amdsmi_accelerator_partition_profile_t current_profile,
|
||||
amdsmi_accelerator_partition_profile_config_t config,
|
||||
bool isVerbose);
|
||||
void waitForUserInput();
|
||||
|
||||
uint32_t promptNumDevicesToTest(uint32_t current_num_devices);
|
||||
|
||||
std::string getResourceType(amdsmi_accelerator_partition_resource_type_t resource_type);
|
||||
|
||||
protected:
|
||||
void MakeHeaderStr(const char *inStr, std::string *outStr) const;
|
||||
void PrintDeviceHeader(amdsmi_processor_handle dv_ind);
|
||||
|
||||
@@ -121,6 +121,83 @@ class TestAmdSmiPythonInterface(unittest.TestCase):
|
||||
print()
|
||||
self.tearDown()
|
||||
|
||||
# amdsmi_get_vram_info should be supported on all ASICs
|
||||
@handle_exceptions
|
||||
def test_get_vram_info(self):
|
||||
self.setUp()
|
||||
processors = amdsmi.amdsmi_get_processor_handles()
|
||||
self.assertGreaterEqual(len(processors), 1)
|
||||
self.assertLessEqual(len(processors), 32)
|
||||
for i in range(0, len(processors)):
|
||||
bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
|
||||
print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
|
||||
print("\n###Test amdsmi_get_gpu_vram_info \n")
|
||||
|
||||
vram_types = {
|
||||
amdsmi.AmdSmiVramType.UNKNOWN: "UNKNOWN",
|
||||
amdsmi.AmdSmiVramType.HBM: "HBM",
|
||||
amdsmi.AmdSmiVramType.HBM2: "HBM2",
|
||||
amdsmi.AmdSmiVramType.HBM2E: "HBM2E",
|
||||
amdsmi.AmdSmiVramType.HBM3: "HBM3",
|
||||
amdsmi.AmdSmiVramType.DDR2: "DDR2",
|
||||
amdsmi.AmdSmiVramType.DDR3: "DDR3",
|
||||
amdsmi.AmdSmiVramType.DDR4: "DDR4",
|
||||
amdsmi.AmdSmiVramType.GDDR1: "GDDR1",
|
||||
amdsmi.AmdSmiVramType.GDDR2: "GDDR2",
|
||||
amdsmi.AmdSmiVramType.GDDR3: "GDDR3",
|
||||
amdsmi.AmdSmiVramType.GDDR4: "GDDR4",
|
||||
amdsmi.AmdSmiVramType.GDDR5: "GDDR5",
|
||||
amdsmi.AmdSmiVramType.GDDR6: "GDDR6",
|
||||
amdsmi.AmdSmiVramType.GDDR7: "GDDR7",
|
||||
amdsmi.AmdSmiVramType.MAX: "MAX"
|
||||
}
|
||||
|
||||
vram_vendors = {
|
||||
amdsmi.AmdSmiVramVendor.SAMSUNG: "SAMSUNG",
|
||||
amdsmi.AmdSmiVramVendor.INFINEON: "INFINEON",
|
||||
amdsmi.AmdSmiVramVendor.ELPIDA: "ELPIDA",
|
||||
amdsmi.AmdSmiVramVendor.ETRON: "ETRON",
|
||||
amdsmi.AmdSmiVramVendor.NANYA: "NANYA",
|
||||
amdsmi.AmdSmiVramVendor.HYNIX: "HYNIX",
|
||||
amdsmi.AmdSmiVramVendor.MOSEL: "MOSEL",
|
||||
amdsmi.AmdSmiVramVendor.WINBOND: "WINBOND",
|
||||
amdsmi.AmdSmiVramVendor.ESMT: "ESMT",
|
||||
amdsmi.AmdSmiVramVendor.MICRON: "MICRON",
|
||||
amdsmi.AmdSmiVramVendor.UNKNOWN: "UNKNOWN"
|
||||
}
|
||||
|
||||
vram_info = amdsmi.amdsmi_get_gpu_vram_info(processors[i])
|
||||
print(" vram_info['vram_type'] is: {}".format(
|
||||
vram_types[vram_info['vram_type']]))
|
||||
print(" vram_info['vram_vendor'] is: {}".format(
|
||||
vram_vendors[vram_info['vram_vendor']]))
|
||||
print(" vram_info['vram_size'] is: {} MB".format(
|
||||
vram_info['vram_size']))
|
||||
print(" vram_info['vram_bit_width'] is: {}".format(
|
||||
vram_info['vram_bit_width']))
|
||||
print(" vram_info['vram_max_bandwidth'] is: {} GB/s".format(
|
||||
vram_info['vram_max_bandwidth']))
|
||||
print()
|
||||
self.tearDown()
|
||||
|
||||
# amdsmi_get_gpu_xcd_counter should be supported on all ASICs
|
||||
@handle_exceptions
|
||||
def test_get_xcd_counter(self):
|
||||
self.setUp()
|
||||
processors = amdsmi.amdsmi_get_processor_handles()
|
||||
self.assertGreaterEqual(len(processors), 1)
|
||||
self.assertLessEqual(len(processors), 32)
|
||||
for i in range(0, len(processors)):
|
||||
bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
|
||||
print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
|
||||
print("\n###Test amdsmi_get_gpu_xcd_counter \n")
|
||||
|
||||
xcd_count = amdsmi.amdsmi_get_gpu_xcd_counter(processors[i])
|
||||
print(" xcd_counter['counter'] is: {}".format(
|
||||
xcd_count))
|
||||
print()
|
||||
self.tearDown()
|
||||
|
||||
# amdsmi_get_gpu_bad_page_info is not supported in Navi2x, Navi3x
|
||||
@handle_exceptions
|
||||
def test_bad_page_info(self):
|
||||
@@ -863,6 +940,44 @@ class TestAmdSmiPythonInterface(unittest.TestCase):
|
||||
accelerator_partition = amdsmi.amdsmi_get_gpu_accelerator_partition_profile(processors[i])
|
||||
print(" Current partition id: {}".format(
|
||||
accelerator_partition['partition_id']))
|
||||
print(" Profile_type: {}".format(
|
||||
accelerator_partition['partition_profile']['profile_type']))
|
||||
print(" profile_index: {}".format(
|
||||
accelerator_partition['partition_profile']['profile_index']))
|
||||
print(" memory_caps: {}".format(
|
||||
accelerator_partition['partition_profile']['memory_caps']))
|
||||
print(" num_resources: {}".format(
|
||||
accelerator_partition['partition_profile']['num_resources']))
|
||||
print()
|
||||
self.tearDown()
|
||||
|
||||
# Requires sudo (to see full resource/config detail).
|
||||
# Should only be supported on MI300+ ASICs
|
||||
@handle_exceptions
|
||||
def test_accelerator_partition_profile_config(self):
|
||||
self.setUp()
|
||||
processors = amdsmi.amdsmi_get_processor_handles()
|
||||
self.assertGreaterEqual(len(processors), 1)
|
||||
self.assertLessEqual(len(processors), 32)
|
||||
for i in range(0, len(processors)):
|
||||
bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
|
||||
print("\n\n###Test Processor {}, bdf: {}".format(i, bdf))
|
||||
print("\n###Test amdsmi_get_gpu_accelerator_partition_profile_config \n")
|
||||
profile_config = amdsmi.amdsmi_get_gpu_accelerator_partition_profile_config(processors[i])
|
||||
print(" num_profiles: {}".format(profile_config['num_profiles']))
|
||||
print(" num_resource_profiles: {}".format(profile_config['num_resource_profiles']))
|
||||
print(" default_profile_index: {}".format(profile_config['default_profile_index']))
|
||||
for p in profile_config['profiles']:
|
||||
print("\t\t profile_type: {}".format(p['profile_type']))
|
||||
print("\t\t num_partitions: {}".format(p['num_partitions']))
|
||||
print("\t\t profile_index: {}".format(p['profile_index']))
|
||||
print("\t\t num_resources: {}".format(p['num_resources']))
|
||||
for r in range(0, p['num_resources']):
|
||||
print("\t\t\t profile_index: {}".format(p['resources'][r]['profile_index']))
|
||||
print("\t\t\t resource_type: {}".format(p['resources'][r]['resource_type']))
|
||||
print("\t\t\t partition_resource: {}".format(p['resources'][r]['partition_resource']))
|
||||
print("\t\t\t num_partitions_share_resource: {}".format(
|
||||
p['resources'][r]['num_partitions_share_resource']))
|
||||
print()
|
||||
self.tearDown()
|
||||
|
||||
|
||||
Αναφορά σε νέο ζήτημα
Block a user