From 3e9aca0f3472802957a34d096c7233b673ce0f55 Mon Sep 17 00:00:00 2001 From: Sean Keely Date: Wed, 15 Jan 2020 20:55:54 -0600 Subject: [PATCH] Support stripped binaries and remove unneeded attributes. Attribute optimize(0) doesn't appear to be helpful helpful. This prevents optimization in the function but not at call sites to the function. The function may still be inlined since it has no side effect (in some cases that we currently don't support). Having a side effect prevents a call site optimization that allows removal of a noinline function call with no side effect. Call site optimization should only happen (in GCC at least) when using whole program optimization so this may be stronger than we strictly need. Also added _amdgpu_r_debug to the exported symbol list (global) and switched to the standard macro for an exported symbol (HSA_API). Without being in the global list the debugger will not find this symbol if the binary has been stripped. Change-Id: Ieb00175ccc55fda4491deee44711cd55b3f24aeb --- runtime/hsa-runtime/hsacore.so.def | 1 + runtime/hsa-runtime/loader/executable.cpp | 23 +++++++++++------------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/runtime/hsa-runtime/hsacore.so.def b/runtime/hsa-runtime/hsacore.so.def index c415bb811a..ea90b3363e 100644 --- a/runtime/hsa-runtime/hsacore.so.def +++ b/runtime/hsa-runtime/hsacore.so.def @@ -220,6 +220,7 @@ global: hsa_amd_queue_set_priority; hsa_amd_register_deallocation_callback; hsa_amd_deregister_deallocation_callback; + _amdgpu_r_debug; local: *; diff --git a/runtime/hsa-runtime/loader/executable.cpp b/runtime/hsa-runtime/loader/executable.cpp index 0ec868cbc9..5d65a32b97 100644 --- a/runtime/hsa-runtime/loader/executable.cpp +++ b/runtime/hsa-runtime/loader/executable.cpp @@ -62,18 +62,17 @@ using namespace amd::hsa; using namespace amd::hsa::common; -#if defined __clang__ -#define NONOPTIMIZE __attribute__((noinline, optnone)) -#else -#define NONOPTIMIZE __attribute__((noinline, optimize(0))) -#endif - -NONOPTIMIZE static void _loader_debug_state() {}; -r_debug _amdgpu_r_debug __attribute__((visibility("default"))) = {1, - nullptr, - reinterpret_cast(&_loader_debug_state), - r_debug::RT_CONSISTENT, - 0}; +// Having a side effect prevents call site optimization that allows removal of a noinline function call +// with no side effect. +__attribute__((noinline)) static void _loader_debug_state() { + static volatile int function_needs_a_side_effect = 0; + function_needs_a_side_effect ^= 1; +} +HSA_API r_debug _amdgpu_r_debug = {1, + nullptr, + reinterpret_cast(&_loader_debug_state), + r_debug::RT_CONSISTENT, + 0}; static link_map* r_debug_tail = nullptr; namespace amd {