diff --git a/runtime/hsa-runtime/hsacore.so.def b/runtime/hsa-runtime/hsacore.so.def index c415bb811a..ea90b3363e 100644 --- a/runtime/hsa-runtime/hsacore.so.def +++ b/runtime/hsa-runtime/hsacore.so.def @@ -220,6 +220,7 @@ global: hsa_amd_queue_set_priority; hsa_amd_register_deallocation_callback; hsa_amd_deregister_deallocation_callback; + _amdgpu_r_debug; local: *; diff --git a/runtime/hsa-runtime/loader/executable.cpp b/runtime/hsa-runtime/loader/executable.cpp index 0ec868cbc9..5d65a32b97 100644 --- a/runtime/hsa-runtime/loader/executable.cpp +++ b/runtime/hsa-runtime/loader/executable.cpp @@ -62,18 +62,17 @@ using namespace amd::hsa; using namespace amd::hsa::common; -#if defined __clang__ -#define NONOPTIMIZE __attribute__((noinline, optnone)) -#else -#define NONOPTIMIZE __attribute__((noinline, optimize(0))) -#endif - -NONOPTIMIZE static void _loader_debug_state() {}; -r_debug _amdgpu_r_debug __attribute__((visibility("default"))) = {1, - nullptr, - reinterpret_cast(&_loader_debug_state), - r_debug::RT_CONSISTENT, - 0}; +// Having a side effect prevents call site optimization that allows removal of a noinline function call +// with no side effect. +__attribute__((noinline)) static void _loader_debug_state() { + static volatile int function_needs_a_side_effect = 0; + function_needs_a_side_effect ^= 1; +} +HSA_API r_debug _amdgpu_r_debug = {1, + nullptr, + reinterpret_cast(&_loader_debug_state), + r_debug::RT_CONSISTENT, + 0}; static link_map* r_debug_tail = nullptr; namespace amd {