- Remove tooling initialization from rocprofiler_configure:
when rocprofiler configure is called from __hip_module_ctor
(which in turn is called as a global constructor when loading shared
libraries or before main in a hip program), initializing tooling
in it can cause problems because it is too early to do some of the tasks
that it involves (e.g. opening shared libraries, creating threads).
Instead, we rely on rocprofsys_main to initialize tooling later.

- Skip rocprofiler_configure if ROCPROFSYS_PRELOAD is not set since
preload is required for tooling (such as perfetto, which is used by
the rocprofiler callbacks) to be initialized.

- Revert RCCL initialization changes: These are no longer needed since rocprofsys_init_tooling_hidden will not
be called from rocprofiler_configure

- Force rocprofiler_configure in rocprofsys_init_tooling_hidden if it hasn't been
called through __hip_module_ctor global constructor
Этот коммит содержится в:
Sohaib Nadeem
2025-04-21 17:04:24 -04:00
коммит произвёл GitHub
родитель f03dce5fa6
Коммит 0e535daa93
4 изменённых файлов: 30 добавлений и 41 удалений
+1 -1
Просмотреть файл
@@ -122,7 +122,7 @@ rocprofsys_init_library(void)
extern "C" void
rocprofsys_init_tooling(void)
{
rocprofsys_init_tooling_hidden(true);
rocprofsys_init_tooling_hidden();
}
extern "C" void
+1 -1
Просмотреть файл
@@ -95,7 +95,7 @@ extern "C"
// these are the real implementations for internal calling convention
void rocprofsys_init_library_hidden(void) ROCPROFSYS_HIDDEN_API;
bool rocprofsys_init_tooling_hidden(bool postinit = false) ROCPROFSYS_HIDDEN_API;
bool rocprofsys_init_tooling_hidden(void) ROCPROFSYS_HIDDEN_API;
void rocprofsys_init_hidden(const char*, bool, const char*) ROCPROFSYS_HIDDEN_API;
void rocprofsys_finalize_hidden(void) ROCPROFSYS_HIDDEN_API;
void rocprofsys_reset_preload_hidden(void) ROCPROFSYS_HIDDEN_API;
+21 -34
Просмотреть файл
@@ -404,42 +404,16 @@ rocprofsys_init_library_hidden()
ROCPROFSYS_CONDITIONAL_BASIC_PRINT_F(_debug_init, "\n");
}
// Initialize RCCL if:
// - postinit=true - so the code doesn't hang at the initialization stage
// - get_state() >= State::Init - so the code doesn't throw an exception
// - rccl_initialized=false - so we don't try to initialize RCCL twice
// - get_use_rcclp()=true - only if the environment is configured to use RCCL
static void
rccl_setup(bool postinit)
{
// Flag used to avoid initializing RCCL twice
static bool rccl_initialized = false;
if(postinit && (get_state() >= State::Init) && !rccl_initialized && get_use_rcclp())
{
ROCPROFSYS_VERBOSE_F(1, "Setting up RCCLP...\n");
rcclp::setup();
rccl_initialized = true;
}
}
static void
rocprofsys_init_library_hidden_with_rccl(bool postinit)
{
rocprofsys_init_library_hidden();
rccl_setup(postinit);
}
//======================================================================================//
extern "C" bool
rocprofsys_init_tooling_hidden(bool postinit)
rocprofsys_init_tooling_hidden()
{
if(get_env("ROCPROFSYS_MONOCHROME", false, false)) tim::log::monochrome() = true;
if(!tim::get_env("ROCPROFSYS_INIT_TOOLING", true))
{
rocprofsys_init_library_hidden_with_rccl(postinit);
rocprofsys_init_library_hidden();
return false;
}
@@ -456,11 +430,7 @@ rocprofsys_init_tooling_hidden(bool postinit)
ROCPROFSYS_CONDITIONAL_BASIC_PRINT_F(_debug_init, "State is %s...\n",
std::to_string(get_state()).c_str());
if(get_state() != State::PreInit || get_state() == State::Init || _once)
{
rccl_setup(postinit);
return false;
}
if(get_state() != State::PreInit || get_state() == State::Init || _once) return false;
_once = true;
ROCPROFSYS_SCOPED_THREAD_STATE(ThreadState::Internal);
@@ -481,7 +451,7 @@ rocprofsys_init_tooling_hidden(bool postinit)
ROCPROFSYS_CONDITIONAL_BASIC_PRINT_F(_debug_init,
"Calling rocprofsys_init_library()...\n");
rocprofsys_init_library_hidden_with_rccl(postinit);
rocprofsys_init_library_hidden();
ROCPROFSYS_DEBUG_F("\n");
@@ -579,6 +549,23 @@ rocprofsys_init_tooling_hidden(bool postinit)
ompt::setup();
}
#if defined(ROCPROFSYS_USE_ROCM) && ROCPROFSYS_USE_ROCM > 0
// Force rocprofiler_configure if it hasn't been called through __hip_module_ctor.
// rocprofiler_configure needs to be called before rcclp::setup to decide
// whether we want to use gotcha wrappers for rccl or rocpofiler based tracing.
if(get_use_rocm())
{
ROCPROFSYS_VERBOSE_F(1, "Setting up ROCm...\n");
rocprofiler_sdk::setup();
}
#endif
if(get_use_rcclp())
{
ROCPROFSYS_VERBOSE_F(1, "Setting up RCCLP...\n");
rcclp::setup();
}
if(get_use_perfetto())
{
ROCPROFSYS_VERBOSE_F(1, "Starting Perfetto...\n");
+7 -5
Просмотреть файл
@@ -1301,12 +1301,14 @@ rocprofiler_configure(uint32_t version, const char* runtime_version, uint32_t pr
_first = false;
}
if(!tim::get_env("ROCPROFSYS_INIT_TOOLING", true)) return nullptr;
if(!tim::settings::enabled()) return nullptr;
// If ROCPROFSYS_PRELOAD or ROCPROFSYS_INIT_TOOLING is not set,
// the tooling will not be initialized so we cannot enable
// profiling with rocprofiler
if(!tim::get_env("ROCPROFSYS_PRELOAD", true) ||
!tim::get_env("ROCPROFSYS_INIT_TOOLING", true))
return nullptr;
if(!rocprofsys::config::settings_are_configured() &&
rocprofsys::get_state() < rocprofsys::State::Active)
rocprofsys_init_tooling_hidden();
if(!tim::settings::enabled()) return nullptr;
if(!rocprofsys::config::get_use_rocm())
{