From 4a1c0fca7fbfb5cd6de2d5f38e6ea7e924016663 Mon Sep 17 00:00:00 2001 From: Sameer Sahasrabuddhe Date: Mon, 5 Oct 2020 11:55:42 +0530 Subject: [PATCH] Do not inline the body of __assert_fail() The device side assertion calls printf to write out a message. In the device compiler, printf is expanded into a series of hostcalls that transmit the printf payload to the host. This expansion increases the length of the kernel, resulting in sub-optimal compilation. The solution is to ensure that the assert() implementation is not inlined into the kernel. Change-Id: Ia3a075461a755cf007218f262b0863e1926c76aa [ROCm/clr commit: 160dfb5a1f6741c92baf63843bef3e0e42dc8b98] --- .../include/hip/hcc_detail/device_functions.h | 24 ++++++++----------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/projects/clr/hipamd/include/hip/hcc_detail/device_functions.h b/projects/clr/hipamd/include/hip/hcc_detail/device_functions.h index 822c626fa2..e2ffacca2d 100644 --- a/projects/clr/hipamd/include/hip/hcc_detail/device_functions.h +++ b/projects/clr/hipamd/include/hip/hcc_detail/device_functions.h @@ -1218,25 +1218,23 @@ void abort() { #elif defined(__clang__) && defined(__HIP__) -#pragma push_macro("__DEVICE__") -#define __DEVICE__ extern "C" __device__ __attribute__((always_inline)) \ - __attribute__((weak)) - -__DEVICE__ -inline +// The noinline attribute helps encapsulate the printf expansion, +// which otherwise has a performance impact just by increasing the +// size of the calling function. Additionally, the weak attribute +// allows the function to exist as a global although its definition is +// included in every compilation unit. +extern "C" __device__ __attribute__((noinline)) __attribute__((weak)) void __assert_fail(const char * __assertion, - const char *__file, - unsigned int __line, - const char *__function) + const char *__file, + unsigned int __line, + const char *__function) { printf("%s:%u: %s: Device-side assertion `%s' failed.\n", __file, __line, __function, __assertion); - // Ignore all the args for now. __builtin_trap(); } -__DEVICE__ -inline +extern "C" __device__ __attribute__((noinline)) __attribute__((weak)) void __assertfail(const char * __assertion, const char *__file, unsigned int __line, @@ -1351,8 +1349,6 @@ unsigned __smid(void) return (se_id << HW_ID_CU_ID_SIZE) + cu_id; } -#pragma push_macro("__DEVICE__") - // Macro to replace extern __shared__ declarations // to local variable definitions #define HIP_DYNAMIC_SHARED(type, var) \