diff --git a/hipamd/include/hip/amd_detail/amd_device_functions.h b/hipamd/include/hip/amd_detail/amd_device_functions.h index cd62c6626c..2a5cf48a03 100644 --- a/hipamd/include/hip/amd_detail/amd_device_functions.h +++ b/hipamd/include/hip/amd_detail/amd_device_functions.h @@ -25,12 +25,13 @@ THE SOFTWARE. #if !defined(__HIPCC_RTC__) #include +#include +#include #include "host_defines.h" #include "math_fwd.h" #include #include #include -#include #endif // !defined(__HIPCC_RTC__) #if defined(__clang__) && defined(__HIP__) @@ -789,77 +790,6 @@ static void __threadfence_system() { __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, ""); } - -// abort -__device__ -inline -__attribute__((weak)) -void abort() { - return __builtin_trap(); -} - -// The noinline attribute helps encapsulate the printf expansion, -// which otherwise has a performance impact just by increasing the -// size of the calling function. Additionally, the weak attribute -// allows the function to exist as a global although its definition is -// included in every compilation unit. -#if defined(_WIN32) || defined(_WIN64) -extern "C" __device__ __attribute__((noinline)) __attribute__((weak)) -void _wassert(const wchar_t *_msg, const wchar_t *_file, unsigned _line) { - // FIXME: Need `wchar_t` support to generate assertion message. - __builtin_trap(); -} -#else /* defined(_WIN32) || defined(_WIN64) */ -extern "C" __device__ __attribute__((noinline)) __attribute__((weak)) -void __assert_fail(const char *assertion, - const char *file, - unsigned int line, - const char *function) -{ - const char fmt[] = "%s:%u: %s: Device-side assertion `%s' failed.\n"; - - // strlen is not available as a built-in yet, so we create our own - // loop in a macro. With a string literal argument, the compiler - // usually manages to replace the loop with a constant. - // - // The macro does not check for null pointer, since all the string - // arguments are defined to be constant literals when called from - // the assert() macro. - // - // NOTE: The loop below includes the null terminator in the length - // as required by append_string_n(). -#define __hip_get_string_length(LEN, STR) \ - do { \ - const char *tmp = STR; \ - while (*tmp++); \ - LEN = tmp - STR; \ - } while (0) - - auto msg = __ockl_fprintf_stderr_begin(); - int len = 0; - __hip_get_string_length(len, fmt); - msg = __ockl_fprintf_append_string_n(msg, fmt, len, 0); - __hip_get_string_length(len, file); - msg = __ockl_fprintf_append_string_n(msg, file, len, 0); - msg = __ockl_fprintf_append_args(msg, 1, line, 0, 0, 0, 0, 0, 0, 0); - __hip_get_string_length(len, function); - msg = __ockl_fprintf_append_string_n(msg, function, len, 0); - __hip_get_string_length(len, assertion); - __ockl_fprintf_append_string_n(msg, assertion, len, /* is_last = */ 1); - -#undef __hip_get_string_length - - __builtin_trap(); -} - -extern "C" __device__ __attribute__((noinline)) __attribute__((weak)) -void __assertfail() -{ - // ignore all the args for now. - __builtin_trap(); -} -#endif /* defined(_WIN32) || defined(_WIN64) */ - __device__ inline static void __work_group_barrier(__cl_mem_fence_flags flags) { if (flags) { __builtin_amdgcn_fence(__ATOMIC_RELEASE, "workgroup"); diff --git a/hipamd/include/hip/amd_detail/amd_hip_cooperative_groups.h b/hipamd/include/hip/amd_detail/amd_hip_cooperative_groups.h index eeb67bd075..15775ca1d7 100644 --- a/hipamd/include/hip/amd_detail/amd_hip_cooperative_groups.h +++ b/hipamd/include/hip/amd_detail/amd_hip_cooperative_groups.h @@ -37,19 +37,6 @@ THE SOFTWARE. #include #endif -#define __hip_abort() \ - { abort(); } -#if defined(NDEBUG) -#define __hip_assert(COND) -#else -#define __hip_assert(COND) \ - { \ - if (!COND) { \ - __hip_abort(); \ - } \ - } -#endif - namespace cooperative_groups { /** @brief The base type of all cooperative group types @@ -227,7 +214,7 @@ class thread_block : public thread_group { const bool pow2 = ((tile_size & (tile_size - 1)) == 0); // Invalid tile size, assert if (!tile_size || (tile_size > __AMDGCN_WAVEFRONT_SIZE) || !pow2) { - __hip_assert(false && "invalid tile size") + __hip_assert(false && "invalid tile size"); } thread_group tiledGroup = thread_group(internal::cg_tiled_group, tile_size); @@ -282,7 +269,7 @@ class tiled_group : public thread_group { const bool pow2 = ((tile_size & (tile_size - 1)) == 0); if (!tile_size || (tile_size > __AMDGCN_WAVEFRONT_SIZE) || !pow2) { - __hip_assert(false && "invalid tile size") + __hip_assert(false && "invalid tile size"); } if (size() <= tile_size) { @@ -508,7 +495,7 @@ __CG_QUALIFIER__ uint32_t thread_group::thread_rank() const { return (static_cast(this)->thread_rank()); } default: { - __hip_assert(false && "invalid cooperative group type") + __hip_assert(false && "invalid cooperative group type"); return -1; } } @@ -536,7 +523,7 @@ __CG_QUALIFIER__ bool thread_group::is_valid() const { return (static_cast(this)->is_valid()); } default: { - __hip_assert(false && "invalid cooperative group type") + __hip_assert(false && "invalid cooperative group type"); return false; } } @@ -569,7 +556,7 @@ __CG_QUALIFIER__ void thread_group::sync() const { break; } default: { - __hip_assert(false && "invalid cooperative group type") + __hip_assert(false && "invalid cooperative group type"); } } } diff --git a/hipamd/include/hip/amd_detail/hip_assert.h b/hipamd/include/hip/amd_detail/hip_assert.h new file mode 100644 index 0000000000..7d634eae0d --- /dev/null +++ b/hipamd/include/hip/amd_detail/hip_assert.h @@ -0,0 +1,101 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +// abort +extern "C" __device__ inline __attribute__((weak)) +void abort() { + __builtin_trap(); +} + +// The noinline attribute helps encapsulate the printf expansion, +// which otherwise has a performance impact just by increasing the +// size of the calling function. Additionally, the weak attribute +// allows the function to exist as a global although its definition is +// included in every compilation unit. +#if defined(_WIN32) || defined(_WIN64) +extern "C" __device__ __attribute__((noinline)) __attribute__((weak)) +void _wassert(const wchar_t *_msg, const wchar_t *_file, unsigned _line) { + // FIXME: Need `wchar_t` support to generate assertion message. + __builtin_trap(); +} +#else /* defined(_WIN32) || defined(_WIN64) */ +extern "C" __device__ __attribute__((noinline)) __attribute__((weak)) +void __assert_fail(const char *assertion, + const char *file, + unsigned int line, + const char *function) +{ + const char fmt[] = "%s:%u: %s: Device-side assertion `%s' failed.\n"; + + // strlen is not available as a built-in yet, so we create our own + // loop in a macro. With a string literal argument, the compiler + // usually manages to replace the loop with a constant. + // + // The macro does not check for null pointer, since all the string + // arguments are defined to be constant literals when called from + // the assert() macro. + // + // NOTE: The loop below includes the null terminator in the length + // as required by append_string_n(). +#define __hip_get_string_length(LEN, STR) \ + do { \ + const char *tmp = STR; \ + while (*tmp++); \ + LEN = tmp - STR; \ + } while (0) + + auto msg = __ockl_fprintf_stderr_begin(); + int len = 0; + __hip_get_string_length(len, fmt); + msg = __ockl_fprintf_append_string_n(msg, fmt, len, 0); + __hip_get_string_length(len, file); + msg = __ockl_fprintf_append_string_n(msg, file, len, 0); + msg = __ockl_fprintf_append_args(msg, 1, line, 0, 0, 0, 0, 0, 0, 0); + __hip_get_string_length(len, function); + msg = __ockl_fprintf_append_string_n(msg, function, len, 0); + __hip_get_string_length(len, assertion); + __ockl_fprintf_append_string_n(msg, assertion, len, /* is_last = */ 1); + +#undef __hip_get_string_length + + __builtin_trap(); +} + +extern "C" __device__ __attribute__((noinline)) __attribute__((weak)) +void __assertfail() +{ + // ignore all the args for now. + __builtin_trap(); +} +#endif /* defined(_WIN32) || defined(_WIN64) */ + +#if defined(NDEBUG) +#define __hip_assert(COND) +#else +#define __hip_assert(COND) \ + do { \ + if (!(COND)) \ + __builtin_trap(); \ + } while (0) +#endif diff --git a/hipamd/src/hiprtc/CMakeLists.txt b/hipamd/src/hiprtc/CMakeLists.txt index 1c4e2a1723..20ee8833fd 100644 --- a/hipamd/src/hiprtc/CMakeLists.txt +++ b/hipamd/src/hiprtc/CMakeLists.txt @@ -132,6 +132,8 @@ set(HIPRTC_GEN_OBJ "${HIPRTC_GEN_DIR}/hipRTC_header${CMAKE_CXX_OUTPUT_EXTENSION} # list of headers which needs to be appended to the hiprtc preprocessed file set(HIPRTC_HEADERS ${HIP_COMMON_INCLUDE_DIR}/hip/hip_common.h +${PROJECT_SOURCE_DIR}/include/hip/amd_detail/device_library_decls.h +${PROJECT_SOURCE_DIR}/include/hip/amd_detail/hip_assert.h ${HIP_COMMON_INCLUDE_DIR}/hip/library_types.h ${HIP_COMMON_INCLUDE_DIR}/hip/driver_types.h ${HIP_COMMON_INCLUDE_DIR}/hip/surface_types.h @@ -144,7 +146,6 @@ ${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_surface_functions.h ${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_hip_complex.h ${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_hip_math_constants.h ${PROJECT_SOURCE_DIR}/include/hip/amd_detail/math_fwd.h -${PROJECT_SOURCE_DIR}/include/hip/amd_detail/device_library_decls.h ${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_device_functions.h ${PROJECT_SOURCE_DIR}/include/hip/amd_detail/amd_warp_functions.h ${PROJECT_SOURCE_DIR}/include/hip/amd_detail/hip_cooperative_groups_helper.h