From 38feccc1d2fca4522578039fea7ff3e4201f5418 Mon Sep 17 00:00:00 2001 From: "Sun, Peng" Date: Tue, 21 Mar 2017 12:26:57 -0500 Subject: [PATCH] Update GGL implementation to extended overload set for make_lambda_wrapper Change-Id: I949f113671ddf155db8689e8a7f23d415839a7b5 [ROCm/hip commit: ec04521617e7d0ee17c69f019edd80f4c737dbd7] --- .../include/hip/hcc_detail/grid_launch_v2.hpp | 332 ++++++++++++++++-- .../hip/include/hip/hcc_detail/helpers.hpp | 9 +- .../hip/include/hip/hcc_detail/hip_runtime.h | 2 +- 3 files changed, 310 insertions(+), 33 deletions(-) diff --git a/projects/hip/include/hip/hcc_detail/grid_launch_v2.hpp b/projects/hip/include/hip/hcc_detail/grid_launch_v2.hpp index 9ce0722496..b1134ee9cc 100644 --- a/projects/hip/include/hip/hcc_detail/grid_launch_v2.hpp +++ b/projects/hip/include/hip/hcc_detail/grid_launch_v2.hpp @@ -235,7 +235,248 @@ namespace glo_tests }; } - #define make_lambda_wrapper9(kernel_name, p0, p1, p2, p3, p4, p5, p6, p7) \ + // TODO: these are temporary, they need to be uglified and them completely + // removed once we enable C++14 support and can have proper generic, + // variadic lambdas. + #define make_lambda_wrapper21( \ + kernel_name, \ + p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, \ + p16, p17, p18, p19) \ + []( \ + std::decay_t _p0_, \ + std::decay_t _p1_, \ + std::decay_t _p2_, \ + std::decay_t _p3_, \ + std::decay_t _p4_, \ + std::decay_t _p5_, \ + std::decay_t _p6_, \ + std::decay_t _p7_, \ + std::decay_t _p8_, \ + std::decay_t _p9_, \ + std::decay_t _p10_, \ + std::decay_t _p11_, \ + std::decay_t _p12_, \ + std::decay_t _p13_, \ + std::decay_t _p14_, \ + std::decay_t _p15_, \ + std::decay_t _p16_, \ + std::decay_t _p17_, \ + std::decay_t _p18_, \ + std::decay_t _p19_) [[hc]] { \ + kernel_name( \ + _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, \ + _p9_, _p10_, _p11_, _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, \ + _p18_, _p19_); \ + } + #define make_lambda_wrapper20( \ + kernel_name, \ + p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, \ + p16, p17, p18) \ + []( \ + std::decay_t _p0_, \ + std::decay_t _p1_, \ + std::decay_t _p2_, \ + std::decay_t _p3_, \ + std::decay_t _p4_, \ + std::decay_t _p5_, \ + std::decay_t _p6_, \ + std::decay_t _p7_, \ + std::decay_t _p8_, \ + std::decay_t _p9_, \ + std::decay_t _p10_, \ + std::decay_t _p11_, \ + std::decay_t _p12_, \ + std::decay_t _p13_, \ + std::decay_t _p14_, \ + std::decay_t _p15_, \ + std::decay_t _p16_, \ + std::decay_t _p17_, \ + std::decay_t _p18_) [[hc]] { \ + kernel_name( \ + _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, \ + _p9_, _p10_, _p11_, _p12_, _p13_, _p14_, _p15_, _p16_, _p17_, \ + _p18_); \ + } + #define make_lambda_wrapper19( \ + kernel_name, \ + p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, \ + p16, p17) \ + []( \ + std::decay_t _p0_, \ + std::decay_t _p1_, \ + std::decay_t _p2_, \ + std::decay_t _p3_, \ + std::decay_t _p4_, \ + std::decay_t _p5_, \ + std::decay_t _p6_, \ + std::decay_t _p7_, \ + std::decay_t _p8_, \ + std::decay_t _p9_, \ + std::decay_t _p10_, \ + std::decay_t _p11_, \ + std::decay_t _p12_, \ + std::decay_t _p13_, \ + std::decay_t _p14_, \ + std::decay_t _p15_, \ + std::decay_t _p16_, \ + std::decay_t _p17_) [[hc]] { \ + kernel_name( \ + _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, \ + _p9_, _p10_, _p11_, _p12_, _p13_, _p14_, _p15_, _p16_, _p17_); \ + } + #define make_lambda_wrapper18( \ + kernel_name, \ + p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, \ + p16) \ + []( \ + std::decay_t _p0_, \ + std::decay_t _p1_, \ + std::decay_t _p2_, \ + std::decay_t _p3_, \ + std::decay_t _p4_, \ + std::decay_t _p5_, \ + std::decay_t _p6_, \ + std::decay_t _p7_, \ + std::decay_t _p8_, \ + std::decay_t _p9_, \ + std::decay_t _p10_, \ + std::decay_t _p11_, \ + std::decay_t _p12_, \ + std::decay_t _p13_, \ + std::decay_t _p14_, \ + std::decay_t _p15_, \ + std::decay_t _p16_) [[hc]] { \ + kernel_name( \ + _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, \ + _p9_, _p10_, _p11_, _p12_, _p13_, _p14_, _p15_, _p16_); \ + } + #define make_lambda_wrapper17( \ + kernel_name, \ + p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15) \ + []( \ + std::decay_t _p0_, \ + std::decay_t _p1_, \ + std::decay_t _p2_, \ + std::decay_t _p3_, \ + std::decay_t _p4_, \ + std::decay_t _p5_, \ + std::decay_t _p6_, \ + std::decay_t _p7_, \ + std::decay_t _p8_, \ + std::decay_t _p9_, \ + std::decay_t _p10_, \ + std::decay_t _p11_, \ + std::decay_t _p12_, \ + std::decay_t _p13_, \ + std::decay_t _p14_, \ + std::decay_t _p15_) [[hc]] { \ + kernel_name( \ + _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, \ + _p9_, _p10_, _p11_, _p12_, _p13_, _p14_, _p15_); \ + } + #define make_lambda_wrapper16( \ + kernel_name, \ + p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14) \ + []( \ + std::decay_t _p0_, \ + std::decay_t _p1_, \ + std::decay_t _p2_, \ + std::decay_t _p3_, \ + std::decay_t _p4_, \ + std::decay_t _p5_, \ + std::decay_t _p6_, \ + std::decay_t _p7_, \ + std::decay_t _p8_, \ + std::decay_t _p9_, \ + std::decay_t _p10_, \ + std::decay_t _p11_, \ + std::decay_t _p12_, \ + std::decay_t _p13_, \ + std::decay_t _p14_) [[hc]] { \ + kernel_name( \ + _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, \ + _p9_, _p10_, _p11_, _p12_, _p13_, _p14_); \ + } + #define make_lambda_wrapper15( \ + kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13)\ + []( \ + std::decay_t _p0_, \ + std::decay_t _p1_, \ + std::decay_t _p2_, \ + std::decay_t _p3_, \ + std::decay_t _p4_, \ + std::decay_t _p5_, \ + std::decay_t _p6_, \ + std::decay_t _p7_, \ + std::decay_t _p8_, \ + std::decay_t _p9_, \ + std::decay_t _p10_, \ + std::decay_t _p11_, \ + std::decay_t _p12_, \ + std::decay_t _p13_) [[hc]] { \ + kernel_name( \ + _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, \ + _p9_, _p10_, _p11_, _p12_, _p13_); \ + } + #define make_lambda_wrapper14( \ + kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12) \ + []( \ + std::decay_t _p0_, \ + std::decay_t _p1_, \ + std::decay_t _p2_, \ + std::decay_t _p3_, \ + std::decay_t _p4_, \ + std::decay_t _p5_, \ + std::decay_t _p6_, \ + std::decay_t _p7_, \ + std::decay_t _p8_, \ + std::decay_t _p9_, \ + std::decay_t _p10_, \ + std::decay_t _p11_, \ + std::decay_t _p12_) [[hc]] { \ + kernel_name( \ + _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, \ + _p9_, _p10_, _p11_, _p12_); \ + } + #define make_lambda_wrapper13( \ + kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11) \ + []( \ + std::decay_t _p0_, \ + std::decay_t _p1_, \ + std::decay_t _p2_, \ + std::decay_t _p3_, \ + std::decay_t _p4_, \ + std::decay_t _p5_, \ + std::decay_t _p6_, \ + std::decay_t _p7_, \ + std::decay_t _p8_, \ + std::decay_t _p9_, \ + std::decay_t _p10_, \ + std::decay_t _p11_) [[hc]] { \ + kernel_name( \ + _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, \ + _p9_, _p10_, _p11_); \ + } + #define make_lambda_wrapper12( \ + kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10) \ + []( \ + std::decay_t _p0_, \ + std::decay_t _p1_, \ + std::decay_t _p2_, \ + std::decay_t _p3_, \ + std::decay_t _p4_, \ + std::decay_t _p5_, \ + std::decay_t _p6_, \ + std::decay_t _p7_, \ + std::decay_t _p8_, \ + std::decay_t _p9_, \ + std::decay_t _p10_) [[hc]] { \ + kernel_name( \ + _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_, \ + _p10_); \ + } + #define make_lambda_wrapper11( \ + kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8, p9) \ []( \ std::decay_t _p0_, \ std::decay_t _p1_, \ @@ -244,57 +485,87 @@ namespace glo_tests std::decay_t _p4_, \ std::decay_t _p5_, \ std::decay_t _p6_, \ - std::decay_t _p7_) [[hc]] { \ + std::decay_t _p7_, \ + std::decay_t _p8_, \ + std::decay_t _p9_) [[hc]] { \ + kernel_name( \ + _p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_, _p9_); \ + } + #define make_lambda_wrapper10( \ + kernel_name, p0, p1, p2, p3, p4, p5, p6, p7, p8) \ + []( \ + std::decay_t _p0_, \ + std::decay_t _p1_, \ + std::decay_t _p2_, \ + std::decay_t _p3_, \ + std::decay_t _p4_, \ + std::decay_t _p5_, \ + std::decay_t _p6_, \ + std::decay_t _p7_, \ + std::decay_t _p8_) [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_, _p8_); \ + } + #define make_lambda_wrapper9(kernel_name, p0, p1, p2, p3, p4, p5, p6, p7) \ + []( \ + std::decay_t _p0_, \ + std::decay_t _p1_, \ + std::decay_t _p2_, \ + std::decay_t _p3_, \ + std::decay_t _p4_, \ + std::decay_t _p5_, \ + std::decay_t _p6_, \ + std::decay_t _p7_) [[hc]] { \ kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_); \ } #define make_lambda_wrapper8(kernel_name, p0, p1, p2, p3, p4, p5, p6) \ []( \ - std::decay_t _p0_, \ - std::decay_t _p1_, \ - std::decay_t _p2_, \ - std::decay_t _p3_, \ - std::decay_t _p4_, \ - std::decay_t _p5_, \ - std::decay_t _p6_) [[hc]] { \ + std::decay_t _p0_, \ + std::decay_t _p1_, \ + std::decay_t _p2_, \ + std::decay_t _p3_, \ + std::decay_t _p4_, \ + std::decay_t _p5_, \ + std::decay_t _p6_) [[hc]] { \ kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_); \ } #define make_lambda_wrapper7(kernel_name, p0, p1, p2, p3, p4, p5) \ []( \ - std::decay_t _p0_, \ - std::decay_t _p1_, \ - std::decay_t _p2_, \ - std::decay_t _p3_, \ - std::decay_t _p4_, \ - std::decay_t _p5_) [[hc]] { \ + std::decay_t _p0_, \ + std::decay_t _p1_, \ + std::decay_t _p2_, \ + std::decay_t _p3_, \ + std::decay_t _p4_, \ + std::decay_t _p5_) [[hc]] { \ kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_); \ } #define make_lambda_wrapper6(kernel_name, p0, p1, p2, p3, p4) \ []( \ - std::decay_t _p0_, \ - std::decay_t _p1_, \ - std::decay_t _p2_, \ - std::decay_t _p3_, \ - std::decay_t _p4_) [[hc]] { \ + std::decay_t _p0_, \ + std::decay_t _p1_, \ + std::decay_t _p2_, \ + std::decay_t _p3_, \ + std::decay_t _p4_) [[hc]] { \ kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_); \ } #define make_lambda_wrapper5(kernel_name, p0, p1, p2, p3) \ - [](std::decay_t _p0_, \ - std::decay_t _p1_, \ - std::decay_t _p2_, \ - std::decay_t _p3_) [[hc]] { \ + []( \ + std::decay_t _p0_, \ + std::decay_t _p1_, \ + std::decay_t _p2_, \ + std::decay_t _p3_) [[hc]] { \ kernel_name(_p0_, _p1_, _p2_, _p3_); \ } #define make_lambda_wrapper4(kernel_name, p0, p1, p2) \ []( \ - std::decay_t _p0_, \ - std::decay_t _p1_, \ - std::decay_t _p2_) [[hc]] { \ + std::decay_t _p0_, \ + std::decay_t _p1_, \ + std::decay_t _p2_) [[hc]] { \ kernel_name(_p0_, _p1_, _p2_); \ } #define make_lambda_wrapper3(kernel_name, p0, p1) \ []( \ - std::decay_t _p0_, \ - std::decay_t _p1_) [[hc]] { \ + std::decay_t _p0_, \ + std::decay_t _p1_) [[hc]] { \ kernel_name(_p0_, _p1_); \ } #define make_lambda_wrapper2(kernel_name, p0) \ @@ -323,7 +594,8 @@ namespace glo_tests make_lambda_wrapper(kernel_name, __VA_ARGS__), \ ##__VA_ARGS__); \ } - #define hipLaunchKernel( \ + + #define hipLaunchKernelV2( \ kernel_name, \ num_blocks, \ dim_blocks, \ diff --git a/projects/hip/include/hip/hcc_detail/helpers.hpp b/projects/hip/include/hip/hcc_detail/helpers.hpp index ea9217977b..301d740066 100644 --- a/projects/hip/include/hip/hcc_detail/helpers.hpp +++ b/projects/hip/include/hip/hcc_detail/helpers.hpp @@ -128,9 +128,14 @@ namespace std namespace glo_tests // Only for documentation, macros ignore namespaces. { - #define count_macro_args_impl(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _n, ...) _n + #define count_macro_args_impl( \ + _0, _1, _2, _3, _4, _5, _6, _7, \ + _8, _9, _10, _11, _12, _13, _14, _15, \ + _16, _17, _18, _19, _20, _21, _n, ...) _n #define count_macro_args(...) \ - count_macro_args_impl(,##__VA_ARGS__, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + count_macro_args_impl( \ + , ##__VA_ARGS__, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9,\ + 8, 7, 6, 5, 4, 3, 2, 1, 0) #define overloaded_macro_expand(macro, arg_cnt) macro##arg_cnt #define overload_macro_impl(macro, arg_cnt) \ diff --git a/projects/hip/include/hip/hcc_detail/hip_runtime.h b/projects/hip/include/hip/hcc_detail/hip_runtime.h index 870dcd0b34..590cc33bd1 100644 --- a/projects/hip/include/hip/hcc_detail/hip_runtime.h +++ b/projects/hip/include/hip/hcc_detail/hip_runtime.h @@ -416,7 +416,7 @@ static inline __device__ void* memset(void* ptr, int val, size_t size) #define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE) -#define HIP_KERNEL_NAME(...) __VA_ARGS__ +#define HIP_KERNEL_NAME(...) (__VA_ARGS__) #define HIP_SYMBOL(X) #X #if defined __HCC_CPP__