diff --git a/include/hip/hcc_detail/grid_launch_GGL.hpp b/include/hip/hcc_detail/grid_launch_GGL.hpp index 2dd9a95bc6..8e3dab8482 100644 --- a/include/hip/hcc_detail/grid_launch_GGL.hpp +++ b/include/hip/hcc_detail/grid_launch_GGL.hpp @@ -89,8 +89,7 @@ namespace hip_impl dim3 dim_blocks, int group_mem_bytes, const hc::accelerator_view& acc_v, - K k, - Ts&&... args) + K k) { const auto d = hc::extent<3>{ num_blocks.z * dim_blocks.z, @@ -102,16 +101,11 @@ namespace hip_impl group_mem_bytes); try { - hc::parallel_for_each( - acc_v, - d, - [=](const hc::tiled_index<3>& idx) [[hc]] { - k(args...); - }); + hc::parallel_for_each(acc_v, d, k); } catch (std::exception& ex) { - std::cerr << "Failed in " << __FUNCTION__ << ", with exception: " - << ex.what() << std::endl; + std::cerr << "Failed in " << __func__ << ", with exception: " + << ex.what() << std::endl; throw; } } @@ -133,8 +127,7 @@ namespace hip_impl int group_mem_bytes, hipStream_t stream, const char* kernel_name, - K k, - Ts&&... args) + K k) { void* lck_stream = nullptr; auto acc_v = lock_stream_hip_(stream, lck_stream); @@ -156,12 +149,11 @@ namespace hip_impl std::move(dim_blocks), group_mem_bytes, acc_v, - std::move(k), - std::forward(args)...); + std::move(k)); } catch (std::exception& ex) { - std::cerr << "Failed in " << __FUNCTION__ << ", with exception: " - << ex.what() << std::endl; + std::cerr << "Failed in " << __func__ << ", with exception: " + << ex.what() << std::endl; throw; } } @@ -175,8 +167,7 @@ namespace hip_impl dim3 dim_blocks, int group_mem_bytes, hipStream_t stream, - K k, - Ts&&... args) + K k) { grid_launch_hip_impl_( New_grid_launch_tag{}, @@ -184,9 +175,7 @@ namespace hip_impl std::move(dim_blocks), group_mem_bytes, std::move(stream), - std::move(k), - hipLaunchParm{}, - std::forward(args)...); + std::move(k)); } template @@ -199,8 +188,7 @@ namespace hip_impl int group_mem_bytes, hipStream_t stream, const char* kernel_name, - K k, - Ts&&... args) + K k) { grid_launch_hip_impl_( New_grid_launch_tag{}, @@ -209,9 +197,7 @@ namespace hip_impl group_mem_bytes, std::move(stream), kernel_name, - std::move(k), - hipLaunchParm{}, - std::forward(args)...); + std::move(k)); } template @@ -223,8 +209,7 @@ namespace hip_impl int group_mem_bytes, hipStream_t stream, const char* kernel_name, - K k, - Ts&& ... args) + K k) { grid_launch_hip_impl_( is_new_grid_launch_t{}, @@ -233,8 +218,7 @@ namespace hip_impl group_mem_bytes, std::move(stream), kernel_name, - std::move(k), - std::forward(args)...); + std::move(k)); } template @@ -245,8 +229,7 @@ namespace hip_impl dim3 dim_blocks, int group_mem_bytes, hipStream_t stream, - K k, - Ts&& ... args) + K k) { grid_launch_hip_impl_( is_new_grid_launch_t{}, @@ -254,610 +237,649 @@ namespace hip_impl std::move(dim_blocks), group_mem_bytes, std::move(stream), - std::move(k), - std::forward(args)...); + std::move(k)); } - namespace - { - template - constexpr - inline - T&& forward_(std::remove_reference_t& x) [[hc]] - { - return static_cast(x); - } + // TODO: these are temporary and purposefully noisy and disruptive. + #define make_kernel_name_hip(k, n)\ + HIP_kernel_functor_name_begin ## _ ## k ## _ ## \ + HIP_kernel_functor_name_end ## _ ## n - template - struct Forwarder { - template - void operator()(Ts&&...args) const [[hc]] - { - k(forward_(args)...); - } - }; - } + #define make_kernel_functor_hip_27(\ + function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, p9,\ + p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, p22, p23,\ + p24)\ + struct make_kernel_name_hip(function_name, 25) {\ + std::decay_t _p0_;\ + std::decay_t _p1_;\ + std::decay_t _p2_;\ + std::decay_t _p3_;\ + std::decay_t _p4_;\ + std::decay_t _p5_;\ + std::decay_t _p6_;\ + std::decay_t _p7_;\ + std::decay_t _p8_;\ + std::decay_t _p9_;\ + std::decay_t _p10_;\ + std::decay_t _p11_;\ + std::decay_t _p12_;\ + std::decay_t _p13_;\ + std::decay_t _p14_;\ + std::decay_t _p15_;\ + std::decay_t _p16_;\ + std::decay_t _p17_;\ + std::decay_t _p18_;\ + std::decay_t _p19_;\ + std::decay_t _p20_;\ + std::decay_t _p21_;\ + std::decay_t _p22_;\ + std::decay_t _p23_;\ + std::decay_t _p24_;\ + void operator()(const hc::tiled_index<3>&) const [[hc]]\ + {\ + kernel_name(\ + _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_,\ + _p10_, _p11_, _p12_, _p13_, _p14_, _p15_, _p16_, _p17_,\ + _p18_, _p19_, _p20_, _p21_, _p22_, _p23_, _p24_);\ + }\ + } + #define make_kernel_functor_hip_26(\ + function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, p9,\ + p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, p22, p23)\ + struct make_kernel_name_hip(function_name, 24) {\ + std::decay_t _p0_;\ + std::decay_t _p1_;\ + std::decay_t _p2_;\ + std::decay_t _p3_;\ + std::decay_t _p4_;\ + std::decay_t _p5_;\ + std::decay_t _p6_;\ + std::decay_t _p7_;\ + std::decay_t _p8_;\ + std::decay_t _p9_;\ + std::decay_t _p10_;\ + std::decay_t _p11_;\ + std::decay_t _p12_;\ + std::decay_t _p13_;\ + std::decay_t _p14_;\ + std::decay_t _p15_;\ + std::decay_t _p16_;\ + std::decay_t _p17_;\ + std::decay_t _p18_;\ + std::decay_t _p19_;\ + std::decay_t _p20_;\ + std::decay_t _p21_;\ + std::decay_t _p22_;\ + std::decay_t _p23_;\ + void operator()(const hc::tiled_index<3>&) const [[hc]]\ + {\ + kernel_name(\ + _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_,\ + _p10_, _p11_, _p12_, _p13_, _p14_, _p15_, _p16_, _p17_,\ + _p18_, _p19_, _p20_, _p21_, _p22_, _p23_);\ + }\ + } + #define make_kernel_functor_hip_25(\ + function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, p9,\ + p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, p22)\ + struct make_kernel_name_hip(function_name, 23) {\ + std::decay_t _p0_;\ + std::decay_t _p1_;\ + std::decay_t _p2_;\ + std::decay_t _p3_;\ + std::decay_t _p4_;\ + std::decay_t _p5_;\ + std::decay_t _p6_;\ + std::decay_t _p7_;\ + std::decay_t _p8_;\ + std::decay_t _p9_;\ + std::decay_t _p10_;\ + std::decay_t _p11_;\ + std::decay_t _p12_;\ + std::decay_t _p13_;\ + std::decay_t _p14_;\ + std::decay_t _p15_;\ + std::decay_t _p16_;\ + std::decay_t _p17_;\ + std::decay_t _p18_;\ + std::decay_t _p19_;\ + std::decay_t _p20_;\ + std::decay_t _p21_;\ + std::decay_t _p22_;\ + __attribute__((used, flatten))\ + void operator()(const hc::tiled_index<3>&) const [[hc]]\ + {\ + kernel_name(\ + _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_,\ + _p10_, _p11_, _p12_, _p13_, _p14_, _p15_, _p16_, _p17_,\ + _p18_, _p19_, _p20_, _p21_, _p22_);\ + }\ + } + #define make_kernel_functor_hip_24(\ + function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, p9,\ + p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21)\ + struct make_kernel_name_hip(function_name, 22) {\ + std::decay_t _p0_;\ + std::decay_t _p1_;\ + std::decay_t _p2_;\ + std::decay_t _p3_;\ + std::decay_t _p4_;\ + std::decay_t _p5_;\ + std::decay_t _p6_;\ + std::decay_t _p7_;\ + std::decay_t _p8_;\ + std::decay_t _p9_;\ + std::decay_t _p10_;\ + std::decay_t _p11_;\ + std::decay_t _p12_;\ + std::decay_t _p13_;\ + std::decay_t _p14_;\ + std::decay_t _p15_;\ + std::decay_t _p16_;\ + std::decay_t _p17_;\ + std::decay_t _p18_;\ + std::decay_t _p19_;\ + std::decay_t _p20_;\ + std::decay_t _p21_;\ + void operator()(const hc::tiled_index<3>&) const [[hc]]\ + {\ + kernel_name(\ + _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_,\ + _p10_, _p11_, _p12_, _p13_, _p14_, _p15_, _p16_, _p17_,\ + _p18_, _p19_, _p20_, _p21_);\ + }\ + } + #define make_kernel_functor_hip_23(\ + function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, p9,\ + p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20)\ + struct make_kernel_name_hip(function_name, 21) {\ + std::decay_t _p0_;\ + std::decay_t _p1_;\ + std::decay_t _p2_;\ + std::decay_t _p3_;\ + std::decay_t _p4_;\ + std::decay_t _p5_;\ + std::decay_t _p6_;\ + std::decay_t _p7_;\ + std::decay_t _p8_;\ + std::decay_t _p9_;\ + std::decay_t _p10_;\ + std::decay_t _p11_;\ + std::decay_t _p12_;\ + std::decay_t _p13_;\ + std::decay_t _p14_;\ + std::decay_t _p15_;\ + std::decay_t _p16_;\ + std::decay_t _p17_;\ + std::decay_t _p18_;\ + std::decay_t _p19_;\ + std::decay_t _p20_;\ + void operator()(const hc::tiled_index<3>&) const [[hc]]\ + {\ + kernel_name(\ + _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_,\ + _p10_, _p11_, _p12_, _p13_, _p14_, _p15_, _p16_, _p17_,\ + _p18_, _p19_, _p20_);\ + }\ + } + #define make_kernel_functor_hip_22(\ + function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, p9,\ + p10, p11, p12, p13, p14, p15, p16, p17, p18, p19)\ + struct make_kernel_name_hip(function_name, 20) {\ + std::decay_t _p0_;\ + std::decay_t _p1_;\ + std::decay_t _p2_;\ + std::decay_t _p3_;\ + std::decay_t _p4_;\ + std::decay_t _p5_;\ + std::decay_t _p6_;\ + std::decay_t _p7_;\ + std::decay_t _p8_;\ + std::decay_t _p9_;\ + std::decay_t _p10_;\ + std::decay_t _p11_;\ + std::decay_t _p12_;\ + std::decay_t _p13_;\ + std::decay_t _p14_;\ + std::decay_t _p15_;\ + std::decay_t _p16_;\ + std::decay_t _p17_;\ + std::decay_t _p18_;\ + std::decay_t _p19_;\ + void operator()(const hc::tiled_index<3>&) const [[hc]]\ + {\ + kernel_name(\ + _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_,\ + _p10_, _p11_, _p12_, _p13_, _p14_, _p15_, _p16_, _p17_,\ + _p18_, _p19_);\ + }\ + } + #define make_kernel_functor_hip_21(\ + function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, p9,\ + p10, p11, p12, p13, p14, p15, p16, p17, p18)\ + struct make_kernel_name_hip(function_name, 19) {\ + std::decay_t _p0_;\ + std::decay_t _p1_;\ + std::decay_t _p2_;\ + std::decay_t _p3_;\ + std::decay_t _p4_;\ + std::decay_t _p5_;\ + std::decay_t _p6_;\ + std::decay_t _p7_;\ + std::decay_t _p8_;\ + std::decay_t _p9_;\ + std::decay_t _p10_;\ + std::decay_t _p11_;\ + std::decay_t _p12_;\ + std::decay_t _p13_;\ + std::decay_t _p14_;\ + std::decay_t _p15_;\ + std::decay_t _p16_;\ + std::decay_t _p17_;\ + std::decay_t _p18_;\ + void operator()(const hc::tiled_index<3>&) const [[hc]]\ + {\ + kernel_name(\ + _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_,\ + _p10_, _p11_, _p12_, _p13_, _p14_, _p15_, _p16_, _p17_,\ + _p18_);\ + }\ + } + #define make_kernel_functor_hip_20(\ + function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, p9,\ + p10, p11, p12, p13, p14, p15, p16, p17)\ + struct make_kernel_name_hip(function_name, 18) {\ + std::decay_t _p0_;\ + std::decay_t _p1_;\ + std::decay_t _p2_;\ + std::decay_t _p3_;\ + std::decay_t _p4_;\ + std::decay_t _p5_;\ + std::decay_t _p6_;\ + std::decay_t _p7_;\ + std::decay_t _p8_;\ + std::decay_t _p9_;\ + std::decay_t _p10_;\ + std::decay_t _p11_;\ + std::decay_t _p12_;\ + std::decay_t _p13_;\ + std::decay_t _p14_;\ + std::decay_t _p15_;\ + std::decay_t _p16_;\ + std::decay_t _p17_;\ + void operator()(const hc::tiled_index<3>&) const [[hc]]\ + {\ + kernel_name(\ + _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_,\ + _p10_, _p11_, _p12_, _p13_, _p14_, _p15_, _p16_, _p17_);\ + }\ + } + #define make_kernel_functor_hip_19(\ + function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, p9,\ + p10, p11, p12, p13, p14, p15, p16)\ + struct make_kernel_name_hip(function_name, 17) {\ + std::decay_t _p0_;\ + std::decay_t _p1_;\ + std::decay_t _p2_;\ + std::decay_t _p3_;\ + std::decay_t _p4_;\ + std::decay_t _p5_;\ + std::decay_t _p6_;\ + std::decay_t _p7_;\ + std::decay_t _p8_;\ + std::decay_t _p9_;\ + std::decay_t _p10_;\ + std::decay_t _p11_;\ + std::decay_t _p12_;\ + std::decay_t _p13_;\ + std::decay_t _p14_;\ + std::decay_t _p15_;\ + std::decay_t _p16_;\ + void operator()(const hc::tiled_index<3>&) const [[hc]]\ + {\ + kernel_name(\ + _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_,\ + _p10_, _p11_, _p12_, _p13_, _p14_, _p15_, _p16_);\ + }\ + } + #define make_kernel_functor_hip_18(\ + function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, p9,\ + p10, p11, p12, p13, p14, p15)\ + struct make_kernel_name_hip(function_name, 16) {\ + std::decay_t _p0_;\ + std::decay_t _p1_;\ + std::decay_t _p2_;\ + std::decay_t _p3_;\ + std::decay_t _p4_;\ + std::decay_t _p5_;\ + std::decay_t _p6_;\ + std::decay_t _p7_;\ + std::decay_t _p8_;\ + std::decay_t _p9_;\ + std::decay_t _p10_;\ + std::decay_t _p11_;\ + std::decay_t _p12_;\ + std::decay_t _p13_;\ + std::decay_t _p14_;\ + std::decay_t _p15_;\ + void operator()(const hc::tiled_index<3>&) const [[hc]]\ + {\ + kernel_name(\ + _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_,\ + _p10_, _p11_, _p12_, _p13_, _p14_, _p15_);\ + }\ + } + #define make_kernel_functor_hip_17(\ + function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, p9,\ + p10, p11, p12, p13, p14)\ + struct make_kernel_name_hip(function_name, 15) {\ + std::decay_t _p0_;\ + std::decay_t _p1_;\ + std::decay_t _p2_;\ + std::decay_t _p3_;\ + std::decay_t _p4_;\ + std::decay_t _p5_;\ + std::decay_t _p6_;\ + std::decay_t _p7_;\ + std::decay_t _p8_;\ + std::decay_t _p9_;\ + std::decay_t _p10_;\ + std::decay_t _p11_;\ + std::decay_t _p12_;\ + std::decay_t _p13_;\ + std::decay_t _p14_;\ + void operator()(const hc::tiled_index<3>&) const [[hc]]\ + {\ + kernel_name(\ + _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_,\ + _p10_, _p11_, _p12_, _p13_, _p14_);\ + }\ + } + #define make_kernel_functor_hip_16(\ + function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, p9,\ + p10, p11, p12, p13)\ + struct make_kernel_name_hip(function_name, 14) {\ + std::decay_t _p0_;\ + std::decay_t _p1_;\ + std::decay_t _p2_;\ + std::decay_t _p3_;\ + std::decay_t _p4_;\ + std::decay_t _p5_;\ + std::decay_t _p6_;\ + std::decay_t _p7_;\ + std::decay_t _p8_;\ + std::decay_t _p9_;\ + std::decay_t _p10_;\ + std::decay_t _p11_;\ + std::decay_t _p12_;\ + std::decay_t _p13_;\ + void operator()(const hc::tiled_index<3>&) const [[hc]]\ + {\ + kernel_name(\ + _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_,\ + _p10_, _p11_, _p12_, _p13_);\ + }\ + } + #define make_kernel_functor_hip_15(\ + function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, p9,\ + p10, p11, p12)\ + struct make_kernel_name_hip(function_name, 13) {\ + std::decay_t _p0_;\ + std::decay_t _p1_;\ + std::decay_t _p2_;\ + std::decay_t _p3_;\ + std::decay_t _p4_;\ + std::decay_t _p5_;\ + std::decay_t _p6_;\ + std::decay_t _p7_;\ + std::decay_t _p8_;\ + std::decay_t _p9_;\ + std::decay_t _p10_;\ + std::decay_t _p11_;\ + std::decay_t _p12_;\ + void operator()(const hc::tiled_index<3>&) const [[hc]]\ + {\ + kernel_name(\ + _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_,\ + _p10_, _p11_, _p12_);\ + }\ + } + #define make_kernel_functor_hip_14(\ + function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, p9,\ + p10, p11)\ + struct make_kernel_name_hip(function_name, 12) {\ + std::decay_t _p0_;\ + std::decay_t _p1_;\ + std::decay_t _p2_;\ + std::decay_t _p3_;\ + std::decay_t _p4_;\ + std::decay_t _p5_;\ + std::decay_t _p6_;\ + std::decay_t _p7_;\ + std::decay_t _p8_;\ + std::decay_t _p9_;\ + std::decay_t _p10_;\ + std::decay_t _p11_;\ + void operator()(const hc::tiled_index<3>&) const [[hc]]\ + {\ + kernel_name(\ + _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_,\ + _p10_, _p11_);\ + }\ + } + #define make_kernel_functor_hip_13(\ + function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10)\ + struct make_kernel_name_hip(function_name, 11) {\ + std::decay_t _p0_;\ + std::decay_t _p1_;\ + std::decay_t _p2_;\ + std::decay_t _p3_;\ + std::decay_t _p4_;\ + std::decay_t _p5_;\ + std::decay_t _p6_;\ + std::decay_t _p7_;\ + std::decay_t _p8_;\ + std::decay_t _p9_;\ + std::decay_t _p10_;\ + void operator()(const hc::tiled_index<3>&) const [[hc]]\ + {\ + kernel_name(\ + _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_,\ + _p10_);\ + }\ + } + #define make_kernel_functor_hip_12(\ + function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, p9)\ + struct make_kernel_name_hip(function_name, 10) {\ + std::decay_t _p0_;\ + std::decay_t _p1_;\ + std::decay_t _p2_;\ + std::decay_t _p3_;\ + std::decay_t _p4_;\ + std::decay_t _p5_;\ + std::decay_t _p6_;\ + std::decay_t _p7_;\ + std::decay_t _p8_;\ + std::decay_t _p9_;\ + void operator()(const hc::tiled_index<3>&) const [[hc]]\ + {\ + kernel_name(\ + _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_,\ + _p9_);\ + }\ + } + #define make_kernel_functor_hip_11(\ + function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8)\ + struct make_kernel_name_hip(function_name, 9) {\ + std::decay_t _p0_;\ + std::decay_t _p1_;\ + std::decay_t _p2_;\ + std::decay_t _p3_;\ + std::decay_t _p4_;\ + std::decay_t _p5_;\ + std::decay_t _p6_;\ + std::decay_t _p7_;\ + std::decay_t _p8_;\ + void operator()(const hc::tiled_index<3>&) const [[hc]]\ + {\ + kernel_name(\ + _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_);\ + }\ + } + #define make_kernel_functor_hip_10(\ + function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6, p7)\ + struct make_kernel_name_hip(function_name, 8) {\ + std::decay_t _p0_;\ + std::decay_t _p1_;\ + std::decay_t _p2_;\ + std::decay_t _p3_;\ + std::decay_t _p4_;\ + std::decay_t _p5_;\ + std::decay_t _p6_;\ + std::decay_t _p7_;\ + void operator()(const hc::tiled_index<3>&) const [[hc]]\ + {\ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_);\ + }\ + } + #define make_kernel_functor_hip_9(\ + function_name, kernel_name, p0, p1, p2, p3, p4, p5, p6)\ + struct make_kernel_name_hip(function_name, 7) {\ + std::decay_t _p0_;\ + std::decay_t _p1_;\ + std::decay_t _p2_;\ + std::decay_t _p3_;\ + std::decay_t _p4_;\ + std::decay_t _p5_;\ + std::decay_t _p6_;\ + void operator()(const hc::tiled_index<3>&) const [[hc]]\ + {\ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_);\ + }\ + } + #define make_kernel_functor_hip_8(\ + function_name, kernel_name, p0, p1, p2, p3, p4, p5)\ + struct make_kernel_name_hip(function_name, 6) {\ + std::decay_t _p0_;\ + std::decay_t _p1_;\ + std::decay_t _p2_;\ + std::decay_t _p3_;\ + std::decay_t _p4_;\ + std::decay_t _p5_;\ + void operator()(const hc::tiled_index<3>&) const [[hc]]\ + {\ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_);\ + }\ + } + #define make_kernel_functor_hip_7(\ + function_name, kernel_name, p0, p1, p2, p3, p4)\ + struct make_kernel_name_hip(function_name, 5) {\ + std::decay_t _p0_;\ + std::decay_t _p1_;\ + std::decay_t _p2_;\ + std::decay_t _p3_;\ + std::decay_t _p4_;\ + void operator()(const hc::tiled_index<3>&) const [[hc]]\ + {\ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_);\ + }\ + } + #define make_kernel_functor_hip_6(function_name, kernel_name, p0, p1, p2, p3)\ + struct make_kernel_name_hip(function_name, 4) {\ + std::decay_t _p0_;\ + std::decay_t _p1_;\ + std::decay_t _p2_;\ + std::decay_t _p3_;\ + void operator()(const hc::tiled_index<3>&) const [[hc]]\ + {\ + kernel_name(_p0_, _p1_, _p2_, _p3_);\ + }\ + } + #define make_kernel_functor_hip_5(function_name, kernel_name, p0, p1, p2)\ + struct make_kernel_name_hip(function_name, 3) {\ + std::decay_t _p0_;\ + std::decay_t _p1_;\ + std::decay_t _p2_;\ + void operator()(const hc::tiled_index<3>&) const [[hc]]\ + {\ + kernel_name(_p0_, _p1_, _p2_);\ + }\ + } + #define make_kernel_functor_hip_4(function_name, kernel_name, p0, p1)\ + struct make_kernel_name_hip(function_name, 2) {\ + std::decay_t _p0_;\ + std::decay_t _p1_;\ + void operator()(const hc::tiled_index<3>&) const [[hc]]\ + {\ + kernel_name(_p0_, _p1_);\ + }\ + } + #define fofo(f, n) kernel_prefix_hip ## f ## kernel_suffix_hip ## n + #define make_kernel_functor_hip_3(function_name, kernel_name, p0)\ + struct make_kernel_name_hip(function_name, 1) {\ + std::decay_t _p0_;\ + void operator()(const hc::tiled_index<3>&) const [[hc]]\ + {\ + kernel_name(_p0_);\ + }\ + } + #define make_kernel_functor_hip_2(function_name, kernel_name)\ + struct make_kernel_name_hip(function_name, 0) {\ + void operator()(const hc::tiled_index<3>&) [[hc]]\ + {\ + return kernel_name(hipLaunchParm{});\ + }\ + } + #define make_kernel_functor_hip_1(...) + #define make_kernel_functor_hip_0(...) + #define make_kernel_functor_hip_(...)\ + overload_macro_hip_(make_kernel_functor_hip_, __VA_ARGS__) - template - requires(Domain == {Ts...}) - inline - void grid_launch( - New_grid_launch_tag, - dim3 num_blocks, - dim3 dim_blocks, - int group_mem_bytes, - hipStream_t stream, - Ts&&... args) - { - grid_launch_hip_impl_( - New_grid_launch_tag{}, - std::move(num_blocks), - std::move(dim_blocks), - group_mem_bytes, - std::move(stream), - Forwarder{}, - std::forward(args)...); - } - template - requires(Domain == {Ts...}) - inline - void grid_launch( - Old_grid_launch_tag, - dim3 num_blocks, - dim3 dim_blocks, - int group_mem_bytes, - hipStream_t stream, - Ts&&... args) - { - grid_launch_hip_( - New_grid_launch_tag{}, - std::move(num_blocks), - std::move(dim_blocks), - group_mem_bytes, - std::move(stream), - hipLaunchParm{}, - std::forward(args)...); - } - - template - requires(Domain == {Ts...}) - inline - std::enable_if_t::value> grid_launch_hip_( - dim3 num_blocks, - dim3 dim_blocks, - int group_mem_bytes, - hipStream_t stream, - Ts&&... args) - { - grid_launch_hip_( - is_new_grid_launch_t{}, - std::move(num_blocks), - std::move(dim_blocks), - group_mem_bytes, - std::move(stream), - std::forward(args)...); - } - - // TODO: these are temporary, they need to be completely removed once we - // enable C++14 support and can have proper generic, variadic lambdas. - #define make_kernel_lambda_hip_26(\ - kernel_name,\ - p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15,\ - p16, p17, p18, p19, p20, p21, p22, p23, p24)\ - [](const std::decay_t& _p0_,\ - const std::decay_t& _p1_,\ - const std::decay_t& _p2_,\ - const std::decay_t& _p3_,\ - const std::decay_t& _p4_,\ - const std::decay_t& _p5_,\ - const std::decay_t& _p6_,\ - const std::decay_t& _p7_,\ - const std::decay_t& _p8_,\ - const std::decay_t& _p9_,\ - const std::decay_t& _p10_,\ - const std::decay_t& _p11_,\ - const std::decay_t& _p12_,\ - const std::decay_t& _p13_,\ - const std::decay_t& _p14_,\ - const std::decay_t& _p15_,\ - const std::decay_t& _p16_,\ - const std::decay_t& _p17_,\ - const std::decay_t& _p18_,\ - const std::decay_t& _p19_,\ - const std::decay_t& _p20_,\ - const std::decay_t& _p21_,\ - const std::decay_t& _p22_,\ - const std::decay_t& _p23_,\ - const std::decay_t& _p24_) [[hc]] {\ - kernel_name(\ - _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_,\ - _p9_, _p10_, _p11_, _p12_, _p13_, _p14_, _p15_, _p16_, _p17_,\ - _p18_, _p19_, _p20_, _p21_, _p22_, _p23_, _p24_);\ - } - #define make_kernel_lambda_hip_25(\ - kernel_name,\ - p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15,\ - p16, p17, p18, p19, p20, p21, p22, p23)\ - [](const std::decay_t& _p0_,\ - const std::decay_t& _p1_,\ - const std::decay_t& _p2_,\ - const std::decay_t& _p3_,\ - const std::decay_t& _p4_,\ - const std::decay_t& _p5_,\ - const std::decay_t& _p6_,\ - const std::decay_t& _p7_,\ - const std::decay_t& _p8_,\ - const std::decay_t& _p9_,\ - const std::decay_t& _p10_,\ - const std::decay_t& _p11_,\ - const std::decay_t& _p12_,\ - const std::decay_t& _p13_,\ - const std::decay_t& _p14_,\ - const std::decay_t& _p15_,\ - const std::decay_t& _p16_,\ - const std::decay_t& _p17_,\ - const std::decay_t& _p18_,\ - const std::decay_t& _p19_,\ - const std::decay_t& _p20_,\ - const std::decay_t& _p21_,\ - const std::decay_t& _p22_,\ - const std::decay_t& _p23_) [[hc]] {\ - kernel_name(\ - _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_,\ - _p9_, _p10_, _p11_, _p12_, _p13_, _p14_, _p15_, _p16_, _p17_,\ - _p18_, _p19_, _p20_, _p21_, _p22_, _p23_);\ - } - #define make_kernel_lambda_hip_24(\ - kernel_name,\ - p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15,\ - p16, p17, p18, p19, p20, p21, p22)\ - [](const std::decay_t& _p0_,\ - const std::decay_t& _p1_,\ - const std::decay_t& _p2_,\ - const std::decay_t& _p3_,\ - const std::decay_t& _p4_,\ - const std::decay_t& _p5_,\ - const std::decay_t& _p6_,\ - const std::decay_t& _p7_,\ - const std::decay_t& _p8_,\ - const std::decay_t& _p9_,\ - const std::decay_t& _p10_,\ - const std::decay_t& _p11_,\ - const std::decay_t& _p12_,\ - const std::decay_t& _p13_,\ - const std::decay_t& _p14_,\ - const std::decay_t& _p15_,\ - const std::decay_t& _p16_,\ - const std::decay_t& _p17_,\ - const std::decay_t& _p18_,\ - const std::decay_t& _p19_,\ - const std::decay_t& _p20_,\ - const std::decay_t& _p21_,\ - const std::decay_t& _p22_) [[hc]] {\ - kernel_name(\ - _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_,\ - _p9_, _p10_, _p11_, _p12_, _p13_, _p14_, _p15_, _p16_, _p17_,\ - _p18_, _p19_, _p20_, _p21_, _p22_);\ - } - #define make_kernel_lambda_hip_23(\ - kernel_name,\ - p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15,\ - p16, p17, p18, p19, p20, p21)\ - [](const std::decay_t& _p0_,\ - const std::decay_t& _p1_,\ - const std::decay_t& _p2_,\ - const std::decay_t& _p3_,\ - const std::decay_t& _p4_,\ - const std::decay_t& _p5_,\ - const std::decay_t& _p6_,\ - const std::decay_t& _p7_,\ - const std::decay_t& _p8_,\ - const std::decay_t& _p9_,\ - const std::decay_t& _p10_,\ - const std::decay_t& _p11_,\ - const std::decay_t& _p12_,\ - const std::decay_t& _p13_,\ - const std::decay_t& _p14_,\ - const std::decay_t& _p15_,\ - const std::decay_t& _p16_,\ - const std::decay_t& _p17_,\ - const std::decay_t& _p18_,\ - const std::decay_t& _p19_,\ - const std::decay_t& _p20_,\ - const std::decay_t& _p21_) [[hc]] {\ - kernel_name(\ - _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_,\ - _p9_, _p10_, _p11_, _p12_, _p13_, _p14_, _p15_, _p16_, _p17_,\ - _p18_, _p19_, _p20_, _p21_);\ - } - #define make_kernel_lambda_hip_22(\ - kernel_name,\ - p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15,\ - p16, p17, p18, p19, p20)\ - [](const std::decay_t& _p0_,\ - const std::decay_t& _p1_,\ - const std::decay_t& _p2_,\ - const std::decay_t& _p3_,\ - const std::decay_t& _p4_,\ - const std::decay_t& _p5_,\ - const std::decay_t& _p6_,\ - const std::decay_t& _p7_,\ - const std::decay_t& _p8_,\ - const std::decay_t& _p9_,\ - const std::decay_t& _p10_,\ - const std::decay_t& _p11_,\ - const std::decay_t& _p12_,\ - const std::decay_t& _p13_,\ - const std::decay_t& _p14_,\ - const std::decay_t& _p15_,\ - const std::decay_t& _p16_,\ - const std::decay_t& _p17_,\ - const std::decay_t& _p18_,\ - const std::decay_t& _p19_,\ - const std::decay_t& _p20_) [[hc]] {\ - kernel_name(\ - _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_,\ - _p9_, _p10_, _p11_, _p12_, _p13_, _p14_, _p15_, _p16_, _p17_,\ - _p18_, _p19_, _p20_);\ - } - #define make_kernel_lambda_hip_21(\ - kernel_name,\ - p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15,\ - p16, p17, p18, p19)\ - [](const std::decay_t& _p0_,\ - const std::decay_t& _p1_,\ - const std::decay_t& _p2_,\ - const std::decay_t& _p3_,\ - const std::decay_t& _p4_,\ - const std::decay_t& _p5_,\ - const std::decay_t& _p6_,\ - const std::decay_t& _p7_,\ - const std::decay_t& _p8_,\ - const std::decay_t& _p9_,\ - const std::decay_t& _p10_,\ - const std::decay_t& _p11_,\ - const std::decay_t& _p12_,\ - const std::decay_t& _p13_,\ - const std::decay_t& _p14_,\ - const std::decay_t& _p15_,\ - const std::decay_t& _p16_,\ - const std::decay_t& _p17_,\ - const std::decay_t& _p18_,\ - const std::decay_t& _p19_) [[hc]] {\ - kernel_name(\ - _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_,\ - _p9_, _p10_, _p11_, _p12_, _p13_, _p14_, _p15_, _p16_, _p17_,\ - _p18_, _p19_);\ - } - #define make_kernel_lambda_hip_20(\ - kernel_name,\ - p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15,\ - p16, p17, p18)\ - [](const std::decay_t& _p0_,\ - const std::decay_t& _p1_,\ - const std::decay_t& _p2_,\ - const std::decay_t& _p3_,\ - const std::decay_t& _p4_,\ - const std::decay_t& _p5_,\ - const std::decay_t& _p6_,\ - const std::decay_t& _p7_,\ - const std::decay_t& _p8_,\ - const std::decay_t& _p9_,\ - const std::decay_t& _p10_,\ - const std::decay_t& _p11_,\ - const std::decay_t& _p12_,\ - const std::decay_t& _p13_,\ - const std::decay_t& _p14_,\ - const std::decay_t& _p15_,\ - const std::decay_t& _p16_,\ - const std::decay_t& _p17_,\ - const std::decay_t& _p18_) [[hc]] {\ - kernel_name(\ - _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_,\ - _p9_, _p10_, _p11_, _p12_, _p13_, _p14_, _p15_, _p16_, _p17_,\ - _p18_);\ - } - #define make_kernel_lambda_hip_19(\ - kernel_name,\ - p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15,\ - p16, p17)\ - [](const std::decay_t& _p0_,\ - const std::decay_t& _p1_,\ - const std::decay_t& _p2_,\ - const std::decay_t& _p3_,\ - const std::decay_t& _p4_,\ - const std::decay_t& _p5_,\ - const std::decay_t& _p6_,\ - const std::decay_t& _p7_,\ - const std::decay_t& _p8_,\ - const std::decay_t& _p9_,\ - const std::decay_t& _p10_,\ - const std::decay_t& _p11_,\ - const std::decay_t& _p12_,\ - const std::decay_t& _p13_,\ - const std::decay_t& _p14_,\ - const std::decay_t& _p15_,\ - const std::decay_t& _p16_,\ - const std::decay_t& _p17_) [[hc]] {\ - kernel_name(\ - _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_,\ - _p9_, _p10_, _p11_, _p12_, _p13_, _p14_, _p15_, _p16_, _p17_);\ - } - #define make_kernel_lambda_hip_18(\ - kernel_name,\ - p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15,\ - p16)\ - [](const std::decay_t& _p0_,\ - const std::decay_t& _p1_,\ - const std::decay_t& _p2_,\ - const std::decay_t& _p3_,\ - const std::decay_t& _p4_,\ - const std::decay_t& _p5_,\ - const std::decay_t& _p6_,\ - const std::decay_t& _p7_,\ - const std::decay_t& _p8_,\ - const std::decay_t& _p9_,\ - const std::decay_t& _p10_,\ - const std::decay_t& _p11_,\ - const std::decay_t& _p12_,\ - const std::decay_t& _p13_,\ - const std::decay_t& _p14_,\ - const std::decay_t& _p15_,\ - const std::decay_t& _p16_) [[hc]] {\ - kernel_name(\ - _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_,\ - _p9_, _p10_, _p11_, _p12_, _p13_, _p14_, _p15_, _p16_);\ - } - #define make_kernel_lambda_hip_17(\ - kernel_name,\ - p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15)\ - [](const std::decay_t& _p0_,\ - const std::decay_t& _p1_,\ - const std::decay_t& _p2_,\ - const std::decay_t& _p3_,\ - const std::decay_t& _p4_,\ - const std::decay_t& _p5_,\ - const std::decay_t& _p6_,\ - const std::decay_t& _p7_,\ - const std::decay_t& _p8_,\ - const std::decay_t& _p9_,\ - const std::decay_t& _p10_,\ - const std::decay_t& _p11_,\ - const std::decay_t& _p12_,\ - const std::decay_t& _p13_,\ - const std::decay_t& _p14_,\ - const std::decay_t& _p15_) [[hc]] {\ - kernel_name(\ - _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_,\ - _p9_, _p10_, _p11_, _p12_, _p13_, _p14_, _p15_);\ - } - #define make_kernel_lambda_hip_16(\ - kernel_name,\ - p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14)\ - [](const std::decay_t& _p0_,\ - const std::decay_t& _p1_,\ - const std::decay_t& _p2_,\ - const std::decay_t& _p3_,\ - const std::decay_t& _p4_,\ - const std::decay_t& _p5_,\ - const std::decay_t& _p6_,\ - const std::decay_t& _p7_,\ - const std::decay_t& _p8_,\ - const std::decay_t& _p9_,\ - const std::decay_t& _p10_,\ - const std::decay_t& _p11_,\ - const std::decay_t& _p12_,\ - const std::decay_t& _p13_,\ - const std::decay_t& _p14_) [[hc]] {\ - kernel_name(\ - _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_,\ - _p9_, _p10_, _p11_, _p12_, _p13_, _p14_);\ - } - #define make_kernel_lambda_hip_15(\ - kernel_name,\ - p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13)\ - [](const std::decay_t& _p0_,\ - const std::decay_t& _p1_,\ - const std::decay_t& _p2_,\ - const std::decay_t& _p3_,\ - const std::decay_t& _p4_,\ - const std::decay_t& _p5_,\ - const std::decay_t& _p6_,\ - const std::decay_t& _p7_,\ - const std::decay_t& _p8_,\ - const std::decay_t& _p9_,\ - const std::decay_t& _p10_,\ - const std::decay_t& _p11_,\ - const std::decay_t& _p12_,\ - const std::decay_t& _p13_) [[hc]] {\ - kernel_name(\ - _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_,\ - _p9_, _p10_, _p11_, _p12_, _p13_);\ - } - #define make_kernel_lambda_hip_14(\ - kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12)\ - [](const std::decay_t& _p0_,\ - const std::decay_t& _p1_,\ - const std::decay_t& _p2_,\ - const std::decay_t& _p3_,\ - const std::decay_t& _p4_,\ - const std::decay_t& _p5_,\ - const std::decay_t& _p6_,\ - const std::decay_t& _p7_,\ - const std::decay_t& _p8_,\ - const std::decay_t& _p9_,\ - const std::decay_t& _p10_,\ - const std::decay_t& _p11_,\ - const std::decay_t& _p12_) [[hc]] {\ - kernel_name(\ - _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_,\ - _p9_, _p10_, _p11_, _p12_);\ - } - #define make_kernel_lambda_hip_13(\ - kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11)\ - [](const std::decay_t& _p0_,\ - const std::decay_t& _p1_,\ - const std::decay_t& _p2_,\ - const std::decay_t& _p3_,\ - const std::decay_t& _p4_,\ - const std::decay_t& _p5_,\ - const std::decay_t& _p6_,\ - const std::decay_t& _p7_,\ - const std::decay_t& _p8_,\ - const std::decay_t& _p9_,\ - const std::decay_t& _p10_,\ - const std::decay_t& _p11_) [[hc]] {\ - kernel_name(\ - _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_,\ - _p9_, _p10_, _p11_);\ - } - #define make_kernel_lambda_hip_12(\ - kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10)\ - [](const std::decay_t& _p0_,\ - const std::decay_t& _p1_,\ - const std::decay_t& _p2_,\ - const std::decay_t& _p3_,\ - const std::decay_t& _p4_,\ - const std::decay_t& _p5_,\ - const std::decay_t& _p6_,\ - const std::decay_t& _p7_,\ - const std::decay_t& _p8_,\ - const std::decay_t& _p9_,\ - const std::decay_t& _p10_) [[hc]] {\ - kernel_name(\ - _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_,\ - _p10_);\ - } - #define make_kernel_lambda_hip_11(\ - kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, p9)\ - [](const std::decay_t& _p0_,\ - const std::decay_t& _p1_,\ - const std::decay_t& _p2_,\ - const std::decay_t& _p3_,\ - const std::decay_t& _p4_,\ - const std::decay_t& _p5_,\ - const std::decay_t& _p6_,\ - const std::decay_t& _p7_,\ - const std::decay_t& _p8_,\ - const std::decay_t& _p9_) [[hc]] {\ - kernel_name(\ - _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_);\ - } - #define make_kernel_lambda_hip_10(\ - kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8)\ - [](const std::decay_t& _p0_,\ - const std::decay_t& _p1_,\ - const std::decay_t& _p2_,\ - const std::decay_t& _p3_,\ - const std::decay_t& _p4_,\ - const std::decay_t& _p5_,\ - const std::decay_t& _p6_,\ - const std::decay_t& _p7_,\ - const std::decay_t& _p8_) [[hc]] {\ - kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_);\ - } - #define make_kernel_lambda_hip_9(\ - kernel_name, p0, p1, p2, p3, p4, p5, p6, p7)\ - [](const std::decay_t& _p0_,\ - const std::decay_t& _p1_,\ - const std::decay_t& _p2_,\ - const std::decay_t& _p3_,\ - const std::decay_t& _p4_,\ - const std::decay_t& _p5_,\ - const std::decay_t& _p6_,\ - const std::decay_t& _p7_) [[hc]] {\ - kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_);\ - } - #define make_kernel_lambda_hip_8(kernel_name, p0, p1, p2, p3, p4, p5, p6)\ - [](const std::decay_t& _p0_,\ - const std::decay_t& _p1_,\ - const std::decay_t& _p2_,\ - const std::decay_t& _p3_,\ - const std::decay_t& _p4_,\ - const std::decay_t& _p5_,\ - const std::decay_t& _p6_) [[hc]] {\ - kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_);\ - } - #define make_kernel_lambda_hip_7(kernel_name, p0, p1, p2, p3, p4, p5)\ - [](const std::decay_t& _p0_,\ - const std::decay_t& _p1_,\ - const std::decay_t& _p2_,\ - const std::decay_t& _p3_,\ - const std::decay_t& _p4_,\ - const std::decay_t& _p5_) [[hc]] {\ - kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_);\ - } - #define make_kernel_lambda_hip_6(kernel_name, p0, p1, p2, p3, p4)\ - [](const std::decay_t& _p0_,\ - const std::decay_t& _p1_,\ - const std::decay_t& _p2_,\ - const std::decay_t& _p3_,\ - const std::decay_t& _p4_) [[hc]] {\ - kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_);\ - } - #define make_kernel_lambda_hip_5(kernel_name, p0, p1, p2, p3)\ - [](const std::decay_t& _p0_,\ - const std::decay_t& _p1_,\ - const std::decay_t& _p2_,\ - const std::decay_t& _p3_) [[hc]] {\ - kernel_name(_p0_, _p1_, _p2_, _p3_);\ - } - #define make_kernel_lambda_hip_4(kernel_name, p0, p1, p2)\ - [](const std::decay_t& _p0_,\ - const std::decay_t& _p1_,\ - const std::decay_t& _p2_) [[hc]] {\ - kernel_name(_p0_, _p1_, _p2_);\ - } - #define make_kernel_lambda_hip_3(kernel_name, p0, p1)\ - [](const std::decay_t& _p0_,\ - const std::decay_t& _p1_) [[hc]] {\ - kernel_name(_p0_, _p1_);\ - } - #define make_kernel_lambda_hip_2(kernel_name, p0)\ - [](const std::decay_t& _p0_) [[hc]] {\ - kernel_name(_p0_);\ - } - #define make_kernel_lambda_hip_1(kernel_name)\ - []() [[hc]] { return kernel_name(hipLaunchParm{}); } - - #define make_kernel_lambda_hip_(...)\ - overload_macro_hip_(make_kernel_lambda_hip_, __VA_ARGS__) + #define hipLaunchNamedKernelGGL(\ + function_name,\ + kernel_name,\ + num_blocks,\ + dim_blocks,\ + group_mem_bytes,\ + stream,\ + ...)\ + do {\ + make_kernel_functor_hip_(function_name, kernel_name, __VA_ARGS__)\ + hip_kernel_functor_impl_{__VA_ARGS__};\ + hip_impl::grid_launch_hip_(\ + num_blocks,\ + dim_blocks,\ + group_mem_bytes,\ + stream,\ + #kernel_name,\ + hip_kernel_functor_impl_);\ + } while(0) #define hipLaunchKernelGGL(\ - kernel_name,\ - num_blocks,\ - dim_blocks,\ - group_mem_bytes,\ - stream,\ - ...)\ - do {\ - hip_impl::grid_launch_hip_(\ - num_blocks,\ - dim_blocks,\ - group_mem_bytes,\ - stream,\ - #kernel_name,\ - make_kernel_lambda_hip_(kernel_name, __VA_ARGS__),\ - ##__VA_ARGS__);\ - } while(0) + kernel_name, num_blocks, dim_blocks, group_mem_bytes, stream, ...)\ + do {\ + hipLaunchNamedKernelGGL(\ + unnamed,\ + kernel_name,\ + num_blocks,\ + dim_blocks,\ + group_mem_bytes,\ + stream,\ + ##__VA_ARGS__);\ + } while (0) #define hipLaunchKernel(\ - kernel_name,\ - num_blocks,\ - dim_blocks,\ - group_mem_bytes,\ - stream,\ - ...)\ - do {\ - hipLaunchKernelGGL(\ - kernel_name,\ - num_blocks,\ - dim_blocks,\ - group_mem_bytes,\ - stream,\ - hipLaunchParm{},\ - ##__VA_ARGS__);\ - } while(0) + kernel_name, num_blocks, dim_blocks, group_mem_bytes, stream, ...)\ + do {\ + hipLaunchKernelGGL(\ + kernel_name,\ + num_blocks,\ + dim_blocks,\ + group_mem_bytes,\ + stream,\ + hipLaunchParm{},\ + ##__VA_ARGS__);\ + } while(0) } #endif //GENERIC_GRID_LAUNCH diff --git a/include/hip/hcc_detail/hip_runtime.h b/include/hip/hcc_detail/hip_runtime.h index 4d8876d8f4..129020d9cd 100644 --- a/include/hip/hcc_detail/hip_runtime.h +++ b/include/hip/hcc_detail/hip_runtime.h @@ -149,8 +149,15 @@ extern int HIP_TRACE_API; #endif /* Device feature flags */ -//TODO-HCC this is currently ignored by HCC target of HIP -#define __launch_bounds__(requiredMaxThreadsPerBlock, minBlocksPerMultiprocessor) +#define launch_bounds_impl0(requiredMaxThreadsPerBlock)\ + __attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock))) +#define launch_bounds_impl1(\ + requiredMaxThreadsPerBlock, minBlocksPerMultiprocessor)\ + __attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock),\ + amdgpu_waves_per_eu(minBlocksPerMultiprocessor))) +#define select_impl_(_1, _2, impl_, ...) impl_ +#define __launch_bounds__(...) select_impl_(\ + __VA_ARGS__, launch_bounds_impl1, launch_bounds_impl0)(__VA_ARGS__) // Detect if we are compiling C++ mode or C mode #if defined(__cplusplus) diff --git a/include/hip/hcc_detail/host_defines.h b/include/hip/hcc_detail/host_defines.h index 5864cfa0e7..140cbb0678 100644 --- a/include/hip/hcc_detail/host_defines.h +++ b/include/hip/hcc_detail/host_defines.h @@ -48,7 +48,7 @@ THE SOFTWARE. #define __global__ __attribute__((hc_grid_launch)) __attribute__((used)) #else //#warning "GGL global define reached" -#define __global__ __attribute__((hc, weak)) +#define __global__ __attribute__((annotate("hip__global__"), hc, used)) #endif //GENERIC_GRID_LAUNCH #define __noinline__ __attribute__((noinline))