[SWDEV-499029] Fix unable to change memory partition modes
Changes:
* [API] Removed checking board name, fixes for other MI ASICs
* [API] Fixed unable to restart AMD GPU, libdrm blocked
doing this operation
* [API] Added ability to unload/reload libdrm
from within AMD SMI APIs
* [CLI] Increased progress bar to change memory partition modes
to 140 seconds, since driver reload is variable per system
Change-Id: I52f227f2ab850c4a6332ff3ecdc899903b1080f1
Signed-off-by: Charis Poag <Charis.Poag@amd.com>
Этот коммит содержится в:
@@ -3978,18 +3978,20 @@ class AMDSMICommands():
|
||||
raise ValueError(f"Unable to set compute partition to {args.compute_partition} on {gpu_string}") from e
|
||||
self.logger.store_output(args.gpu, 'computepartition', f"Successfully set compute partition to {args.compute_partition}")
|
||||
if args.memory_partition:
|
||||
lock = multiprocessing.Lock()
|
||||
lock.acquire()
|
||||
####################################################################
|
||||
# Get current and available memory partition modes #
|
||||
# Info used if AMDSMI_STATUS_INVAL is caught & to set progress bar #
|
||||
####################################################################
|
||||
try:
|
||||
memory_partition = amdsmi_interface.amdsmi_get_gpu_memory_partition(gpu) # this info likely actually comes from different apis than used here
|
||||
memory_partition = amdsmi_interface.amdsmi_get_gpu_memory_partition(args.gpu) # this info likely actually comes from different apis than used here
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
memory_partition = "N/A"
|
||||
logging.debug("Failed to get current memory partition for GPU %s | %s", gpu_id, e.get_error_info())
|
||||
try:
|
||||
mem_caps_str = "N/A"
|
||||
partition_dict = amdsmi_interface.amdsmi_get_gpu_accelerator_partition_profile(gpu)
|
||||
partition_dict = amdsmi_interface.amdsmi_get_gpu_accelerator_partition_profile(args.gpu)
|
||||
temp_mem_caps = partition_dict['partition_profile']['memory_caps']
|
||||
mem_caps = temp_mem_caps.nps_cap_mask
|
||||
if temp_mem_caps.amdsmi_nps_flags_t == None:
|
||||
@@ -4032,48 +4034,92 @@ class AMDSMICommands():
|
||||
# 2) Requested mode is a valid mode to set
|
||||
# 3) Current is not already the requested mode
|
||||
# otherwise function will return fast
|
||||
else:
|
||||
showProgressBar = False
|
||||
|
||||
threads = []
|
||||
kTimeWait = 40
|
||||
k140secs = 140
|
||||
string_out = f"Updating memory partition for gpu {gpu_id}"
|
||||
timesToRetryRestartErr = 1
|
||||
|
||||
self.helpers.increment_set_count()
|
||||
set_count = self.helpers.get_set_count()
|
||||
if set_count == 1: # only show reload warning on 1st set
|
||||
self.helpers.confirm_changing_memory_partition_gpu_reload_warning()
|
||||
memory_partition = amdsmi_interface.AmdSmiMemoryPartitionType[args.memory_partition]
|
||||
try:
|
||||
if set_count == 1 and showProgressBar: # only show reload warning on 1st set
|
||||
string_out = f"Updating memory partition for gpu {gpu_id}"
|
||||
t1 = multiprocessing.Process(target=self.helpers.showProgressbar,
|
||||
args=(string_out, kTimeWait,))
|
||||
threads.append(t1)
|
||||
t1.start()
|
||||
amdsmi_interface.amdsmi_set_gpu_memory_partition(args.gpu, memory_partition)
|
||||
for thread in threads:
|
||||
thread.terminate()
|
||||
thread.join()
|
||||
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
f = open(os.devnull, 'w') #redirect to /dev/null (crossplatform)
|
||||
print("\n\n", end='\r', flush=True, file=f)
|
||||
for thread in threads:
|
||||
thread.join()
|
||||
thread.terminate()
|
||||
while timesToRetryRestartErr >= 0:
|
||||
timesToRetryRestartErr -= 1
|
||||
try:
|
||||
if showProgressBar: # only show reload warning on 1st set
|
||||
t1 = multiprocessing.Process(target=self.helpers.showProgressbar,
|
||||
args=(string_out, k140secs,))
|
||||
threads.append(t1)
|
||||
t1.start()
|
||||
memory_partition = amdsmi_interface.AmdSmiMemoryPartitionType[args.memory_partition]
|
||||
amdsmi_interface.amdsmi_set_gpu_memory_partition(args.gpu, memory_partition)
|
||||
for thread in threads:
|
||||
thread.terminate()
|
||||
print("")
|
||||
break # successful case
|
||||
|
||||
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_INVAL:
|
||||
print(f"[amdsmi_wrapper.AMDSMI_STATUS_INVAL] Unable to set memory partition to {args.memory_partition} on {gpu_string}")
|
||||
print(f"Valid Memory partition Modes: {mem_caps_str}\n")
|
||||
# fall through for value error
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
f = open(os.devnull, 'w') #redirect to /dev/null (crossplatform)
|
||||
print("\n\n", end='\r', flush=True, file=f)
|
||||
for thread in threads:
|
||||
thread.terminate()
|
||||
|
||||
f = open(os.devnull, 'w') #redirect to /dev/null (crossplatform)
|
||||
print("\n\n", end='\r', flush=True, file=f)
|
||||
raise ValueError(f"Unable to set memory partition to {args.memory_partition} on {gpu_string}") from e
|
||||
except Exception as e:
|
||||
for thread in threads:
|
||||
thread.join()
|
||||
thread.terminate()
|
||||
raise ValueError(f"Generic error found | Unable to set memory partition to {args.memory_partition} on {gpu_string}") from e
|
||||
self.logger.store_output(args.gpu, 'memorypartition', f"Successfully set memory partition to {args.memory_partition}")
|
||||
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_INVAL:
|
||||
out = f"[AMDSMI_STATUS_INVAL] Unable to set memory partition to {args.memory_partition} on {gpu_string}"
|
||||
print(f"Valid Memory partition Modes: {mem_caps_str}\n")
|
||||
self.logger.store_output(args.gpu, 'memory_partition', out)
|
||||
self.logger.print_output()
|
||||
self.logger.clear_multiple_devices_ouput()
|
||||
lock.release()
|
||||
return
|
||||
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_SUPPORTED:
|
||||
out = f"[AMDSMI_STATUS_NOT_SUPPORTED] Device does not support setting memory partition to {args.memory_partition} on {gpu_string}"
|
||||
self.logger.store_output(args.gpu, 'memory_partition', out)
|
||||
self.logger.print_output()
|
||||
self.logger.clear_multiple_devices_ouput()
|
||||
lock.release()
|
||||
return
|
||||
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_AMDGPU_RESTART_ERR:
|
||||
# Try again on a failure -> work around for not being able to close libdrm
|
||||
string_out = f"Trying again - Updating memory partition for gpu {gpu_id}"
|
||||
for thread in threads:
|
||||
thread.terminate()
|
||||
thread.join()
|
||||
if timesToRetryRestartErr < 0:
|
||||
out = f"[AMDSMI_STATUS_AMDGPU_RESTART_ERR] Could not successfully restart driver after applying {args.memory_partition} on {gpu_string}"
|
||||
self.logger.store_output(args.gpu, 'memory_partition', out)
|
||||
self.logger.print_output()
|
||||
self.logger.clear_multiple_devices_ouput()
|
||||
return
|
||||
continue
|
||||
|
||||
f = open(os.devnull, 'w') #redirect to /dev/null (crossplatform)
|
||||
print("\n\n", end='\r', flush=True, file=f)
|
||||
out = f"Unable to set memory partition to {args.memory_partition} on {gpu_string}"
|
||||
print(out)
|
||||
self.logger.store_output(args.gpu, 'memorypartition', out)
|
||||
self.logger.print_output()
|
||||
self.logger.clear_multiple_devices_ouput()
|
||||
lock.release()
|
||||
return
|
||||
except Exception as e:
|
||||
for thread in threads:
|
||||
thread.terminate()
|
||||
out = f"Generic error found | Unable to set memory partition to {args.memory_partition} on {gpu_string}"
|
||||
print(out)
|
||||
lock.release()
|
||||
raise ValueError(f"Generic error found | Unable to set memory partition to {args.memory_partition} on {gpu_string}") from e
|
||||
self.logger.store_output(args.gpu, 'memory_partition', f"Successfully set memory partition to {args.memory_partition}")
|
||||
self.logger.print_output()
|
||||
self.logger.clear_multiple_devices_ouput()
|
||||
lock.release()
|
||||
return
|
||||
if isinstance(args.power_cap, int):
|
||||
try:
|
||||
power_cap_info = amdsmi_interface.amdsmi_get_power_cap_info(args.gpu)
|
||||
|
||||
@@ -755,7 +755,7 @@ class AMDSMIHelpers():
|
||||
****** WARNING ******\n
|
||||
Setting Dynamic Memory (NPS) partition modes require users to quit all GPU workloads.
|
||||
AMD SMI will then attempt to change memory (NPS) partition mode.
|
||||
Upon a successful set, AMD SMI will then initiate an action to restart amdgpu driver.
|
||||
Upon a successful set, AMD SMI will then initiate an action to restart AMD GPU driver.
|
||||
This action will change all GPU's in the hive to the requested memory (NPS) partition mode.
|
||||
|
||||
Please use this utility with caution.
|
||||
|
||||
@@ -49,6 +49,7 @@ class AMDSmiLibraryLoader {
|
||||
private:
|
||||
void* libHandler_;
|
||||
std::mutex library_mutex_;
|
||||
bool library_loaded_ = false;
|
||||
};
|
||||
|
||||
template<typename T> amdsmi_status_t AMDSmiLibraryLoader::load_symbol(
|
||||
|
||||
@@ -57,6 +57,11 @@ class AMDSmiSystem {
|
||||
amdsmi_status_t get_cpu_family(uint32_t *cpu_family);
|
||||
|
||||
amdsmi_status_t get_cpu_model(uint32_t *cpu_model);
|
||||
|
||||
amdsmi_status_t clean_up_drm() { return drm_.cleanup();}
|
||||
|
||||
amdsmi_status_t init_drm() { return drm_.init();}
|
||||
|
||||
private:
|
||||
AMDSmiSystem() : init_flag_(AMDSMI_INIT_AMD_GPUS) {}
|
||||
|
||||
|
||||
@@ -5703,6 +5703,7 @@ rsmi_dev_memory_partition_set(uint32_t dv_ind,
|
||||
LOG_TRACE(ss);
|
||||
REQUIRE_ROOT_ACCESS
|
||||
DEVICE_MUTEX
|
||||
const int k1000_MS_WAIT = 1000;
|
||||
const uint32_t kMaxBoardLength = 128;
|
||||
bool isCorrectDevice = false;
|
||||
char boardName[kMaxBoardLength];
|
||||
@@ -5716,32 +5717,6 @@ rsmi_dev_memory_partition_set(uint32_t dv_ind,
|
||||
char current_memory_mode[kMaxCurrentMemoryMode];
|
||||
current_memory_mode[0] = '\0';
|
||||
|
||||
// rsmi_dev_memory_partition_set is only available for for discrete variant,
|
||||
// others are required to update through bios settings
|
||||
rsmi_dev_name_get(dv_ind, boardName, static_cast<size_t>(kMaxBoardLength));
|
||||
std::string myBoardName = boardName;
|
||||
if (!myBoardName.empty()) {
|
||||
std::transform(myBoardName.begin(), myBoardName.end(), myBoardName.begin(),
|
||||
::tolower);
|
||||
if (myBoardName.find("mi") != std::string::npos &&
|
||||
myBoardName.find("00x") != std::string::npos) {
|
||||
isCorrectDevice = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!isCorrectDevice) {
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | ======= end ======= "
|
||||
<< " | Fail "
|
||||
<< " | Device #: " << dv_ind
|
||||
<< " | Type: "
|
||||
<< amd::smi::Device::get_type_string(amd::smi::kDevMemoryPartition)
|
||||
<< " | Cause: device board name does not support this action"
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(RSMI_STATUS_NOT_SUPPORTED, false);
|
||||
LOG_ERROR(ss);
|
||||
return RSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
// Is the current mode already what user requested?
|
||||
switch (memory_partition) {
|
||||
@@ -5877,6 +5852,7 @@ rsmi_dev_memory_partition_set(uint32_t dv_ind,
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(restartRet, false);
|
||||
LOG_TRACE(ss);
|
||||
|
||||
if (restartRet != RSMI_STATUS_SUCCESS) {
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | ======= end ======= "
|
||||
@@ -5894,10 +5870,10 @@ rsmi_dev_memory_partition_set(uint32_t dv_ind,
|
||||
std::string current_memory_mode_str = "unknown";
|
||||
rsmi_status_t can_read_sysfs_again = RSMI_STATUS_AMDGPU_RESTART_ERR;
|
||||
int maxWaitSeconds = 10;
|
||||
const int k1000_MS_WAIT = 1000;
|
||||
// wait until we can read SYSFS again
|
||||
if (restartRet == RSMI_STATUS_SUCCESS) {
|
||||
while (current_memory_mode_str != user_requested_memory_partition) {
|
||||
while ((current_memory_mode_str != user_requested_memory_partition)
|
||||
&& maxWaitSeconds > 0) {
|
||||
maxWaitSeconds -= 1;
|
||||
can_read_sysfs_again =
|
||||
rsmi_dev_memory_partition_get(dv_ind, current_memory_mode, kMaxCurrentMemoryMode);
|
||||
@@ -5913,6 +5889,7 @@ rsmi_dev_memory_partition_set(uint32_t dv_ind,
|
||||
<< " | Data (user requested mode): " << user_requested_memory_partition
|
||||
<< " | Current Memory Partition Mode: " << current_memory_mode_str
|
||||
<< " | Available Memory Partition Modes: " << memory_capabilities_str
|
||||
<< " | maxWaitSeconds: " << maxWaitSeconds
|
||||
<< " | total wait time (sec): " << (10 - maxWaitSeconds)
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(can_read_sysfs_again, false);
|
||||
|
||||
@@ -1477,38 +1477,56 @@ rsmi_status_t Device::restartAMDGpuDriver(void) {
|
||||
bool restartInProgress = true;
|
||||
bool isRestartInProgress = true;
|
||||
bool isAMDGPUModuleLive = false;
|
||||
bool restartGDM = false;
|
||||
std::string captureRestartErr;
|
||||
const int kTimeToWaitForDriverMSec = 1000;
|
||||
|
||||
// sudo systemctl is-active gdm
|
||||
// we do not care about the success of checking if gdm is active
|
||||
std::tie(success, out) = executeCommand("systemctl is-active gdm");
|
||||
(out == "active") ? (restartSuccessful &= success) :
|
||||
(restartSuccessful = true);
|
||||
std::tie(success, out) = executeCommand("systemctl is-active gdm", true);
|
||||
(out == "active") ? (restartGDM = true) : (restartGDM = false);
|
||||
ss << __PRETTY_FUNCTION__ << " | systemctl is-active gdm: out = "
|
||||
<< out << "; success = " << (success ? "True" : "False");
|
||||
LOG_INFO(ss);
|
||||
|
||||
// if gdm is active -> sudo systemctl stop gdm
|
||||
// TODO: are are there other display manager's we need to take into account?
|
||||
// Search GNOME_Display_Manager on wikipedia
|
||||
if (success && (out == "active")) {
|
||||
// TODO(AMD_SMI_team): are are there other display manager's we need to take into account?
|
||||
// see https://help.gnome.org/admin/gdm/stable/overview.html.en_GB
|
||||
if (success && (out == "active") && (restartGDM)) {
|
||||
wasGdmServiceActive = true;
|
||||
std::tie(success, out) = executeCommand("systemctl stop gdm&", false);
|
||||
restartSuccessful &= success;
|
||||
std::tie(success, out) = executeCommand("systemctl stop gdm&", true);
|
||||
ss << __PRETTY_FUNCTION__ << " | systemctl stop gdm&: out = "
|
||||
<< out << "; success = " << (success ? "True" : "False");
|
||||
LOG_INFO(ss);
|
||||
} else {
|
||||
success = true; // ignore failures to restart gdm
|
||||
}
|
||||
|
||||
ss << __PRETTY_FUNCTION__ << " | B4 modprobing anything!!! out = "
|
||||
<< out << "; success = " << (success ? "True" : "False")
|
||||
<< "; restartSuccessful = " << (restartSuccessful ? "True" : "False")
|
||||
<< "; captureRestartErr = " << captureRestartErr;
|
||||
LOG_INFO(ss);
|
||||
|
||||
// sudo modprobe -r amdgpu
|
||||
// sudo modprobe amdgpu
|
||||
std::tie(success, out) =
|
||||
executeCommand("modprobe -r amdgpu && modprobe amdgpu&", true);
|
||||
std::tie(success, out) = executeCommand(
|
||||
"modprobe -r -v amdgpu >/dev/null 2>&1 && modprobe -v amdgpu >/dev/null 2>&1", true);
|
||||
restartSuccessful &= success;
|
||||
captureRestartErr = out;
|
||||
|
||||
if (success) {
|
||||
restartSuccessful = false;
|
||||
}
|
||||
ss << __PRETTY_FUNCTION__ << " | modprobe -r -v amdgpu && modprobe -v amdgpu: out = "
|
||||
<< out << "; success = " << (success ? "True" : "False")
|
||||
<< "; restartSuccessful = " << (restartSuccessful ? "True" : "False")
|
||||
<< "; captureRestartErr = " << captureRestartErr;
|
||||
LOG_INFO(ss);
|
||||
|
||||
// if gdm was active -> sudo systemctl start gdm
|
||||
if (wasGdmServiceActive) {
|
||||
std::tie(success, out) = executeCommand("systemctl start gdm&", false);
|
||||
restartSuccessful &= success;
|
||||
// We don't care if successful or not, just try to restart as a courtesy
|
||||
if (wasGdmServiceActive && restartGDM) {
|
||||
std::tie(success, out) = executeCommand("systemctl start gdm&", true);
|
||||
ss << __PRETTY_FUNCTION__ << " | systemctl start gdm&: out = "
|
||||
<< out << "; success = " << (success ? "True" : "False");
|
||||
LOG_INFO(ss);
|
||||
}
|
||||
|
||||
// Return early if there was an issue restarting amdgpu
|
||||
@@ -1522,7 +1540,6 @@ rsmi_status_t Device::restartAMDGpuDriver(void) {
|
||||
// wait for amdgpu module to come back up
|
||||
rsmi_status_t status = Device::isRestartInProgress(&isRestartInProgress,
|
||||
&isAMDGPUModuleLive);
|
||||
const int kTimeToWaitForDriverMSec = 1000;
|
||||
int maxLoops = 10; // wait a max of 10 sec
|
||||
while (status != RSMI_STATUS_SUCCESS) {
|
||||
maxLoops -= 1;
|
||||
@@ -1553,7 +1570,7 @@ rsmi_status_t Device::isRestartInProgress(bool *isRestartInProgress,
|
||||
// wait for amdgpu module to come back up
|
||||
std::tie(success, out) = executeCommand("cat /sys/module/amdgpu/initstate", true);
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | success = " << success
|
||||
<< " | success = " << (success ? "True" : "False")
|
||||
<< " | out = " << out;
|
||||
LOG_DEBUG(ss);
|
||||
if ((success == true) && (!out.empty())) {
|
||||
@@ -1564,6 +1581,11 @@ rsmi_status_t Device::isRestartInProgress(bool *isRestartInProgress,
|
||||
}
|
||||
*isRestartInProgress = deviceRestartInProgress;
|
||||
*isAMDGPUModuleLive = isSystemAMDGPUModuleLive;
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | *isRestartInProgress = " << (*isRestartInProgress ? "True":"False")
|
||||
<< " | *isAMDGPUModuleLive = " << (*isAMDGPUModuleLive ? "True":"False")
|
||||
<< " | out = " << out;
|
||||
LOG_DEBUG(ss);
|
||||
|
||||
return ((*isAMDGPUModuleLive && !*isRestartInProgress) ? RSMI_STATUS_SUCCESS :
|
||||
RSMI_STATUS_AMDGPU_RESTART_ERR);
|
||||
|
||||
@@ -53,6 +53,8 @@
|
||||
#include "amd_smi/impl/amd_smi_processor.h"
|
||||
#include "rocm_smi/rocm_smi_logger.h"
|
||||
|
||||
// a global instance of std::mutex to protect data passed during threads
|
||||
std::mutex myMutex;
|
||||
static bool initialized_lib = false;
|
||||
|
||||
#define SIZE 10
|
||||
@@ -1426,8 +1428,63 @@ amdsmi_status_t
|
||||
amdsmi_set_gpu_memory_partition(amdsmi_processor_handle processor_handle,
|
||||
amdsmi_memory_partition_type_t memory_partition) {
|
||||
AMDSMI_CHECK_INIT();
|
||||
return rsmi_wrapper(rsmi_dev_memory_partition_set, processor_handle,
|
||||
std::ostringstream ss;
|
||||
std::lock_guard<std::mutex> g(myMutex);
|
||||
// open libdrm connections prevents the ability to unload driver
|
||||
amd::smi::AMDSmiSystem::getInstance().clean_up_drm();
|
||||
ss << __PRETTY_FUNCTION__ << " | \n"
|
||||
<< "***********************************\n"
|
||||
<< "* Cleaned up - clean_up_drm() *\n"
|
||||
<< "***********************************\n";
|
||||
LOG_INFO(ss);
|
||||
amdsmi_status_t ret = rsmi_wrapper(rsmi_dev_memory_partition_set, processor_handle,
|
||||
static_cast<rsmi_memory_partition_type_t>(memory_partition));
|
||||
if (ret == AMDSMI_STATUS_SUCCESS) {
|
||||
const uint32_t k256 = 256;
|
||||
char current_partition[k256];
|
||||
std::string current_partition_str = "UNKNOWN";
|
||||
std::string req_user_partition;
|
||||
amdsmi_status_t ret_get = rsmi_wrapper(rsmi_dev_memory_partition_get, processor_handle,
|
||||
current_partition, k256);
|
||||
if (ret_get == AMDSMI_STATUS_SUCCESS) {
|
||||
current_partition_str.clear();
|
||||
current_partition_str = current_partition;
|
||||
}
|
||||
switch (memory_partition) {
|
||||
case AMDSMI_MEMORY_PARTITION_NPS1:
|
||||
req_user_partition = "NPS1";
|
||||
break;
|
||||
case AMDSMI_MEMORY_PARTITION_NPS2:
|
||||
req_user_partition = "NPS2";
|
||||
break;
|
||||
case AMDSMI_MEMORY_PARTITION_NPS4:
|
||||
req_user_partition = "NPS4";
|
||||
break;
|
||||
case AMDSMI_MEMORY_PARTITION_NPS8:
|
||||
req_user_partition = "NPS8";
|
||||
break;
|
||||
default:
|
||||
req_user_partition = "UNKNOWN";
|
||||
break;
|
||||
}
|
||||
if (req_user_partition == current_partition_str) {
|
||||
amd::smi::AMDSmiSystem::getInstance().init_drm();
|
||||
ss << __PRETTY_FUNCTION__ << " | \n"
|
||||
<< "***********************************\n"
|
||||
<< "* Initialized libdrm - init_drm() *\n"
|
||||
<< "***********************************\n";
|
||||
LOG_INFO(ss);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(amdsmi_team): issue completely closing -> reopening libdrm on 1st try (workaround above)
|
||||
// amd::smi::AMDSmiSystem::getInstance().init_drm();
|
||||
// ss << __PRETTY_FUNCTION__ << " | \n"
|
||||
// << "***********************************\n"
|
||||
// << "* Initialized libdrm - init_drm() *\n"
|
||||
// << "***********************************\n";
|
||||
// LOG_INFO(ss);
|
||||
return ret;
|
||||
}
|
||||
|
||||
amdsmi_status_t
|
||||
|
||||
@@ -33,18 +33,24 @@ amdsmi_status_t AMDSmiLibraryLoader::load(const char* filename) {
|
||||
if (filename == nullptr) {
|
||||
return AMDSMI_STATUS_FAIL_LOAD_MODULE;
|
||||
}
|
||||
if (libHandler_) {
|
||||
if (libHandler_ || library_loaded_) {
|
||||
unload();
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> guard(library_mutex_);
|
||||
libHandler_ = dlopen(filename, RTLD_LAZY);
|
||||
if (!libHandler_) {
|
||||
char* error = dlerror();
|
||||
std::cerr << "Fail to open " << filename <<": " << error
|
||||
<< std::endl;
|
||||
return AMDSMI_STATUS_FAIL_LOAD_MODULE;
|
||||
// check if already loaded, return success if it is
|
||||
// dlopen(filename, RTLD_NOLOAD) == null only IFF library is not loaded
|
||||
void* isLibOpen = dlopen(filename, RTLD_NOLOAD);
|
||||
if (isLibOpen == nullptr) {
|
||||
libHandler_ = dlopen(filename, RTLD_LAZY);
|
||||
if (!libHandler_) {
|
||||
char* error = dlerror();
|
||||
std::cerr << "Fail to open " << filename <<": " << error
|
||||
<< std::endl;
|
||||
return AMDSMI_STATUS_FAIL_LOAD_MODULE;
|
||||
}
|
||||
}
|
||||
library_loaded_ = true;
|
||||
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
@@ -54,6 +60,7 @@ amdsmi_status_t AMDSmiLibraryLoader::unload() {
|
||||
if (libHandler_) {
|
||||
dlclose(libHandler_);
|
||||
libHandler_ = nullptr;
|
||||
library_loaded_ = false;
|
||||
}
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
Ссылка в новой задаче
Block a user