Do not inline the body of __assert_fail()

The device side assertion calls printf to write out a message. In the
device compiler, printf is expanded into a series of hostcalls that
transmit the printf payload to the host. This expansion increases the
length of the kernel, resulting in sub-optimal compilation. The
solution is to ensure that the assert() implementation is not inlined
into the kernel.

Change-Id: Ia3a075461a755cf007218f262b0863e1926c76aa


[ROCm/clr commit: 160dfb5a1f]
This commit is contained in:
Sameer Sahasrabuddhe
2020-10-05 11:55:42 +05:30
کامیت شده توسط Sameer Sahasrabuddhe
والد cd76837c2e
کامیت 4a1c0fca7f
@@ -1218,25 +1218,23 @@ void abort() {
#elif defined(__clang__) && defined(__HIP__)
#pragma push_macro("__DEVICE__")
#define __DEVICE__ extern "C" __device__ __attribute__((always_inline)) \
__attribute__((weak))
__DEVICE__
inline
// The noinline attribute helps encapsulate the printf expansion,
// which otherwise has a performance impact just by increasing the
// size of the calling function. Additionally, the weak attribute
// allows the function to exist as a global although its definition is
// included in every compilation unit.
extern "C" __device__ __attribute__((noinline)) __attribute__((weak))
void __assert_fail(const char * __assertion,
const char *__file,
unsigned int __line,
const char *__function)
const char *__file,
unsigned int __line,
const char *__function)
{
printf("%s:%u: %s: Device-side assertion `%s' failed.\n", __file, __line,
__function, __assertion);
// Ignore all the args for now.
__builtin_trap();
}
__DEVICE__
inline
extern "C" __device__ __attribute__((noinline)) __attribute__((weak))
void __assertfail(const char * __assertion,
const char *__file,
unsigned int __line,
@@ -1351,8 +1349,6 @@ unsigned __smid(void)
return (se_id << HW_ID_CU_ID_SIZE) + cu_id;
}
#pragma push_macro("__DEVICE__")
// Macro to replace extern __shared__ declarations
// to local variable definitions
#define HIP_DYNAMIC_SHARED(type, var) \