Do not inline the body of __assert_fail()
The device side assertion calls printf to write out a message. In the
device compiler, printf is expanded into a series of hostcalls that
transmit the printf payload to the host. This expansion increases the
length of the kernel, resulting in sub-optimal compilation. The
solution is to ensure that the assert() implementation is not inlined
into the kernel.
Change-Id: Ia3a075461a755cf007218f262b0863e1926c76aa
[ROCm/clr commit: 160dfb5a1f]
This commit is contained in:
کامیت شده توسط
Sameer Sahasrabuddhe
والد
cd76837c2e
کامیت
4a1c0fca7f
@@ -1218,25 +1218,23 @@ void abort() {
|
||||
|
||||
#elif defined(__clang__) && defined(__HIP__)
|
||||
|
||||
#pragma push_macro("__DEVICE__")
|
||||
#define __DEVICE__ extern "C" __device__ __attribute__((always_inline)) \
|
||||
__attribute__((weak))
|
||||
|
||||
__DEVICE__
|
||||
inline
|
||||
// The noinline attribute helps encapsulate the printf expansion,
|
||||
// which otherwise has a performance impact just by increasing the
|
||||
// size of the calling function. Additionally, the weak attribute
|
||||
// allows the function to exist as a global although its definition is
|
||||
// included in every compilation unit.
|
||||
extern "C" __device__ __attribute__((noinline)) __attribute__((weak))
|
||||
void __assert_fail(const char * __assertion,
|
||||
const char *__file,
|
||||
unsigned int __line,
|
||||
const char *__function)
|
||||
const char *__file,
|
||||
unsigned int __line,
|
||||
const char *__function)
|
||||
{
|
||||
printf("%s:%u: %s: Device-side assertion `%s' failed.\n", __file, __line,
|
||||
__function, __assertion);
|
||||
// Ignore all the args for now.
|
||||
__builtin_trap();
|
||||
}
|
||||
|
||||
__DEVICE__
|
||||
inline
|
||||
extern "C" __device__ __attribute__((noinline)) __attribute__((weak))
|
||||
void __assertfail(const char * __assertion,
|
||||
const char *__file,
|
||||
unsigned int __line,
|
||||
@@ -1351,8 +1349,6 @@ unsigned __smid(void)
|
||||
return (se_id << HW_ID_CU_ID_SIZE) + cu_id;
|
||||
}
|
||||
|
||||
#pragma push_macro("__DEVICE__")
|
||||
|
||||
// Macro to replace extern __shared__ declarations
|
||||
// to local variable definitions
|
||||
#define HIP_DYNAMIC_SHARED(type, var) \
|
||||
|
||||
مرجع در شماره جدید
Block a user