diff --git a/projects/hip/include/hip/hcc_detail/grid_launch_GGL.hpp b/projects/hip/include/hip/hcc_detail/grid_launch_GGL.hpp index 8f1abbb70b..2dd9a95bc6 100644 --- a/projects/hip/include/hip/hcc_detail/grid_launch_GGL.hpp +++ b/projects/hip/include/hip/hcc_detail/grid_launch_GGL.hpp @@ -21,6 +21,7 @@ THE SOFTWARE. */ #pragma once + #if GENERIC_GRID_LAUNCH == 1 #include "concepts.hpp" @@ -71,7 +72,7 @@ namespace hip_impl template using is_new_grid_launch_t = typename std::conditional< - std::is_callable{}, + is_callable{}, New_grid_launch_tag, Old_grid_launch_tag>::type; } @@ -118,6 +119,7 @@ namespace hip_impl // TODO: these are workarounds, they should be removed. hc::accelerator_view lock_stream_hip_(hipStream_t&, void*&); + void print_prelaunch_trace_(const char*, dim3, dim3, int, hipStream_t); void unlock_stream_hip_( hipStream_t, void*, const char*, hc::accelerator_view*); @@ -137,7 +139,13 @@ namespace hip_impl void* lck_stream = nullptr; auto acc_v = lock_stream_hip_(stream, lck_stream); auto stream_guard = make_RAII_guard( - [](){ /* perhaps use a slimmed down ihipPrintKernelLaunch here */ }, + std::bind( + print_prelaunch_trace_, + kernel_name, + num_blocks, + dim_blocks, + group_mem_bytes, + stream), std::bind( unlock_stream_hip_, stream, lck_stream, kernel_name, &acc_v)); @@ -841,16 +849,15 @@ namespace hip_impl group_mem_bytes,\ stream,\ ...)\ - do {\ - hipLaunchKernelGGL(\ - kernel_name,\ - num_blocks,\ - dim_blocks,\ - group_mem_bytes,\ - stream,\ - hipLaunchParm{},\ - ##__VA_ARGS__);\ - } while(0) - + do {\ + hipLaunchKernelGGL(\ + kernel_name,\ + num_blocks,\ + dim_blocks,\ + group_mem_bytes,\ + stream,\ + hipLaunchParm{},\ + ##__VA_ARGS__);\ + } while(0) } #endif //GENERIC_GRID_LAUNCH diff --git a/projects/hip/include/hip/hcc_detail/helpers.hpp b/projects/hip/include/hip/hcc_detail/helpers.hpp index e5a84a4678..611929766b 100644 --- a/projects/hip/include/hip/hcc_detail/helpers.hpp +++ b/projects/hip/include/hip/hcc_detail/helpers.hpp @@ -21,6 +21,7 @@ THE SOFTWARE. */ #pragma once +#include "concepts.hpp" #include // For std::conditional, std::decay, std::enable_if, // std::false_type, std result_of and std::true_type. @@ -29,9 +30,6 @@ THE SOFTWARE. namespace std { // TODO: these should be removed as soon as possible. #if (__cplusplus < 201406L) - template - using void_t = void; - #if (__cplusplus < 201402L) template using enable_if_t = typename enable_if::type; @@ -43,88 +41,80 @@ namespace std using result_of_t = typename result_of::type; template using remove_reference_t = typename remove_reference::type; - template< - FunctionalProcedure F, - unsigned int n = 0u, - typename = void> - struct is_callable_impl : is_callable_impl {}; - - // Pointer to member function, call through non-pointer. - template - struct is_callable_impl< - F(C, Ts...), - 0u, - void_t().*declval())(declval()...))> - > : true_type { - }; - - // Pointer to member function, call through pointer. - template - struct is_callable_impl< - F(C, Ts...), - 1u, - void_t()).*declval())(declval()...))> - > : std::true_type { - }; - - // Pointer to member data, call through non-pointer, no args. - template - struct is_callable_impl< - F(C), - 2u, - void_t().*declval())> - > : true_type { - }; - - // Pointer to member data, call through pointer, no args. - template - struct is_callable_impl< - F(C), - 3u, - void_t().*declval())> - > : true_type { - }; - - // General call, n args. - template - struct is_callable_impl< - F(Ts...), - 4u, - void_t()(declval()...))> - > : true_type { - }; - - // Not callable. - template - struct is_callable_impl : false_type {}; - - template - struct is_callable : is_callable_impl {}; - #else - template - struct is_callable_impl : false_type {}; - - template - struct is_callable_impl< - F(Ts...), - void_t>> : true_type {}; - - template - struct is_callable : is_callable_impl {}; #endif - template - struct disjunction : false_type {}; - template - struct disjunction : B1 {}; - template - struct disjunction - : conditional_t> - {}; #endif } -namespace hip_impl // Only for documentation, macros ignore namespaces. +namespace hip_impl { + template + using void_t_ = void; + + #if (__cplusplus < 201402L) + template< + FunctionalProcedure F, + unsigned int n = 0u, + typename = void> + struct is_callable_impl : is_callable_impl {}; + + // Pointer to member function, call through non-pointer. + template + struct is_callable_impl< + F(C, Ts...), + 0u, + void_t_().*std::declval())( + std::declval()...))> + > : std::true_type {}; + + // Pointer to member function, call through pointer. + template + struct is_callable_impl< + F(C, Ts...), + 1u, + void_t_()).*std::declval())( + std::declval()...))> + > : std::true_type {}; + + // Pointer to member data, call through non-pointer, no args. + template + struct is_callable_impl< + F(C), + 2u, + void_t_().*std::declval())> + > : std::true_type {}; + + // Pointer to member data, call through pointer, no args. + template + struct is_callable_impl< + F(C), + 3u, + void_t_().*std::declval())> + > : std::true_type {}; + + // General call, n args. + template + struct is_callable_impl< + F(Ts...), + 4u, + void_t_()(std::declval()...))> + > : std::true_type {}; + + // Not callable. + template + struct is_callable_impl : std::false_type {}; + + template + struct is_callable : is_callable_impl {}; + #else + template + struct is_callable_impl : std::false_type {}; + + template + struct is_callable_impl< + F(Ts...), + void_t_>> : std::true_type {}; + #endif + #define count_macro_args_impl_hip_(\ _0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15,\ _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29,\ diff --git a/projects/hip/src/grid_launch.cpp b/projects/hip/src/grid_launch.cpp index 7739995600..cac01df7dc 100644 --- a/projects/hip/src/grid_launch.cpp +++ b/projects/hip/src/grid_launch.cpp @@ -27,6 +27,9 @@ THE SOFTWARE. #include "hc.hpp" #include "trace_helper.h" +#include +#include + namespace hip_impl { hc::accelerator_view lock_stream_hip_( @@ -42,6 +45,39 @@ namespace hip_impl return (*static_cast(locked_stream))->_av; } + void print_prelaunch_trace_( + const char* kernel_name, + dim3 num_blocks, + dim3 dim_blocks, + int group_mem_bytes, + hipStream_t stream) + { + if ((HIP_TRACE_API & (1 << TRACE_CMD)) || + HIP_PROFILE_API || + (COMPILE_HIP_DB && HIP_TRACE_API)) { + std::stringstream os; + os << tls_tidInfo.tid() << "." << tls_tidInfo.apiSeqNum() + << " hipLaunchKernel '" << kernel_name << "'" + << " gridDim:" << num_blocks + << " groupDim:" << dim_blocks + << " sharedMem:+" << group_mem_bytes + << " " << *stream; + + if (HIP_PROFILE_API == 0x1) { + std::string shortAtpString("hipLaunchKernel:"); + shortAtpString += kernel_name; + MARKER_BEGIN(shortAtpString.c_str(), "HIP"); + } else if (HIP_PROFILE_API == 0x2) { + MARKER_BEGIN(os.str().c_str(), "HIP"); + } + + if (COMPILE_HIP_DB && HIP_TRACE_API) { + std::cerr << API_COLOR << os.str() << API_COLOR_END + << std::endl; + } + } + } + void unlock_stream_hip_( hipStream_t stream, void* locked_stream,