fix: prevent double-free crash during process exit in amd-smi (#2213)
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
883fdfb820
Коммит
7b00d3a89b
@@ -31,8 +31,6 @@
|
||||
} \
|
||||
} // namespace ::tim::cereal
|
||||
|
||||
#include "common/defines.h"
|
||||
|
||||
#if !defined(ROCPROFSYS_USE_ROCM)
|
||||
# define ROCPROFSYS_USE_ROCM 0
|
||||
#endif
|
||||
@@ -43,6 +41,7 @@
|
||||
|
||||
#include <timemory/manager.hpp>
|
||||
|
||||
#include <dlfcn.h>
|
||||
#include <string>
|
||||
|
||||
#include "core/agent_manager.hpp"
|
||||
@@ -90,6 +89,17 @@ _amdsmi_is_initialized()
|
||||
return initialized;
|
||||
}
|
||||
|
||||
void
|
||||
prevent_amdsmi_library_unload()
|
||||
{
|
||||
static bool _initialized = false;
|
||||
if(_initialized) return;
|
||||
_initialized = true;
|
||||
|
||||
dlopen("libamd_smi.so", RTLD_NOW | RTLD_NOLOAD | RTLD_NODELETE);
|
||||
dlopen("librocm_smi64.so", RTLD_NOW | RTLD_NOLOAD | RTLD_NODELETE);
|
||||
}
|
||||
|
||||
bool
|
||||
amdsmi_init()
|
||||
{
|
||||
@@ -100,6 +110,8 @@ amdsmi_init()
|
||||
ROCPROFSYS_AMD_SMI_CALL(::amdsmi_init(AMDSMI_INIT_AMD_GPUS));
|
||||
get_processor_handles();
|
||||
_amdsmi_is_initialized() = true; // Mark as initialized
|
||||
|
||||
prevent_amdsmi_library_unload();
|
||||
} catch(std::exception& _e)
|
||||
{
|
||||
ROCPROFSYS_BASIC_VERBOSE(1, "Exception thrown initializing amd-smi: %s\n",
|
||||
|
||||
@@ -1265,10 +1265,7 @@ shutdown()
|
||||
|
||||
try
|
||||
{
|
||||
if(data::shutdown())
|
||||
{
|
||||
ROCPROFSYS_AMD_SMI_CALL(amdsmi_shut_down());
|
||||
}
|
||||
data::shutdown();
|
||||
} catch(std::runtime_error& _e)
|
||||
{
|
||||
ROCPROFSYS_VERBOSE(0, "Exception thrown when shutting down amd-smi: %s\n",
|
||||
|
||||
+35
-1
@@ -25,7 +25,6 @@
|
||||
#include "core/config.hpp"
|
||||
#include "core/debug.hpp"
|
||||
#include "core/state.hpp"
|
||||
#include "core/timemory.hpp"
|
||||
#include "library/runtime.hpp"
|
||||
|
||||
#include <timemory/backends/threading.hpp>
|
||||
@@ -33,7 +32,9 @@
|
||||
#include <timemory/utility/types.hpp>
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <unistd.h>
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
@@ -105,6 +106,39 @@ void
|
||||
exit_gotcha::operator()(const gotcha_data& _data, exit_func_t _func, int _ec) const
|
||||
{
|
||||
_exit_info = { true, _data.tool_id.find("quick") != std::string::npos, _ec };
|
||||
|
||||
if(config::get_use_amd_smi())
|
||||
{
|
||||
threading::clear_callbacks();
|
||||
|
||||
if(get_state() < ::rocprofsys::State::Finalized && !is_child_process())
|
||||
{
|
||||
if(config::settings_are_configured())
|
||||
{
|
||||
ROCPROFSYS_VERBOSE(0, "finalizing %s before calling %s(%i)...\n",
|
||||
get_exe_name().c_str(), _data.tool_id.c_str(), _ec);
|
||||
}
|
||||
else
|
||||
{
|
||||
ROCPROFSYS_BASIC_VERBOSE(0, "finalizing %s before calling %s(%i)...\n",
|
||||
get_exe_name().c_str(), _data.tool_id.c_str(),
|
||||
_ec);
|
||||
}
|
||||
|
||||
rocprofsys_finalize();
|
||||
}
|
||||
|
||||
if(config::settings_are_configured())
|
||||
{
|
||||
ROCPROFSYS_VERBOSE(
|
||||
0, "calling _exit(%i) in %s to avoid AMD SMI cleanup issues...\n", _ec,
|
||||
get_exe_name().c_str());
|
||||
}
|
||||
|
||||
std::fflush(nullptr);
|
||||
_exit(_ec);
|
||||
}
|
||||
|
||||
invoke_exit_gotcha(_data, _func, _ec);
|
||||
}
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user