From cb31fbab620e401ae4876e5d686180679fed410f Mon Sep 17 00:00:00 2001 From: "Sun, Peng" Date: Mon, 20 Mar 2017 16:34:24 -0500 Subject: [PATCH] merge Alex' GGL fix for non-specialized kernel function launch Change-Id: Idbf7ca669c38ee5c0f654bcabdd1b498abb29f69 [ROCm/clr commit: e21d4f03f1c094699abc0b21cbbd6bd7bd2c5718] --- .../include/hip/hcc_detail/concepts.hpp | 22 ++ .../include/hip/hcc_detail/grid_launch_v2.hpp | 203 ++++++++++++++---- .../hipamd/include/hip/hcc_detail/helpers.hpp | 46 +++- 3 files changed, 227 insertions(+), 44 deletions(-) diff --git a/projects/clr/hipamd/include/hip/hcc_detail/concepts.hpp b/projects/clr/hipamd/include/hip/hcc_detail/concepts.hpp index 373ec15411..c746c1cfe6 100644 --- a/projects/clr/hipamd/include/hip/hcc_detail/concepts.hpp +++ b/projects/clr/hipamd/include/hip/hcc_detail/concepts.hpp @@ -1,3 +1,25 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + // // Created by alexv on 25/10/16. // diff --git a/projects/clr/hipamd/include/hip/hcc_detail/grid_launch_v2.hpp b/projects/clr/hipamd/include/hip/hcc_detail/grid_launch_v2.hpp index 9f7492b4d5..02f214bd67 100644 --- a/projects/clr/hipamd/include/hip/hcc_detail/grid_launch_v2.hpp +++ b/projects/clr/hipamd/include/hip/hcc_detail/grid_launch_v2.hpp @@ -1,3 +1,25 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + // // Created by alexv on 25/10/16. // @@ -9,14 +31,10 @@ #include "hc.hpp" #include "hcc_acc.h" -//#include -//#include - #include #include #include - namespace glo_tests { namespace @@ -112,6 +130,26 @@ namespace glo_tests std::forward(args)...); } + namespace + { + template + constexpr + inline + T&& forward(std::remove_reference_t& x) [[hc]] + { + return static_cast(x); + } + + template + struct Forwarder { + template + void operator()(Ts&&...args) const [[hc]] + { + k(forward(args)...); + } + }; + } + template requires(Domain == {Ts...}) static @@ -124,14 +162,13 @@ namespace glo_tests hipStream_t stream, Ts&&... args) { - grid_launch( + grid_launch_impl( + New_grid_launch_tag{}, std::move(num_blocks), std::move(dim_blocks), group_mem_bytes, std::move(stream), - [](decltype(std::decay_t(args))... f_args) [[hc]] { - k(f_args...); - }, + Forwarder{}, std::forward(args)...); } @@ -177,26 +214,116 @@ namespace glo_tests std::forward(args)...); } - template struct Wrapper; + namespace + { + template struct Wrapper; - template - struct Wrapper::value>> { - template - requires(Domain == {Ts...}) - void operator()(Ts&&... args) const - { - grid_launch_impl( - is_new_grid_launch_t{}, - std::forward(args)...); + template + struct Wrapper::value>> { + template + requires(Domain == {Ts...}) + void operator()(Ts&&... args) const + { + grid_launch(std::forward(args)...); + } + }; + + template + struct Wrapper::value>> { + template + void operator()(Ts&&...) const {} + }; + } + + #define make_lambda_wrapper9(kernel_name, p0, p1, p2, p3, p4, p5, p6, p7) \ + []( \ + std::decay_t _p0_, \ + std::decay_t _p1_, \ + std::decay_t _p2_, \ + std::decay_t _p3_, \ + std::decay_t _p4_, \ + std::decay_t _p5_, \ + std::decay_t _p6_, \ + std::decay_t _p7_) [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_, _p7_); \ } - }; + #define make_lambda_wrapper8(kernel_name, p0, p1, p2, p3, p4, p5, p6) \ + []( \ + std::decay_t _p0_, \ + std::decay_t _p1_, \ + std::decay_t _p2_, \ + std::decay_t _p3_, \ + std::decay_t _p4_, \ + std::decay_t _p5_, \ + std::decay_t _p6_) [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_, _p6_); \ + } + #define make_lambda_wrapper7(kernel_name, p0, p1, p2, p3, p4, p5) \ + []( \ + std::decay_t _p0_, \ + std::decay_t _p1_, \ + std::decay_t _p2_, \ + std::decay_t _p3_, \ + std::decay_t _p4_, \ + std::decay_t _p5_) [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_, _p5_); \ + } + #define make_lambda_wrapper6(kernel_name, p0, p1, p2, p3, p4) \ + []( \ + std::decay_t _p0_, \ + std::decay_t _p1_, \ + std::decay_t _p2_, \ + std::decay_t _p3_, \ + std::decay_t _p4_) [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_, _p4_); \ + } + #define make_lambda_wrapper5(kernel_name, p0, p1, p2, p3) \ + [](std::decay_t _p0_, \ + std::decay_t _p1_, \ + std::decay_t _p2_, \ + std::decay_t _p3_) [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_, _p3_); \ + } + #define make_lambda_wrapper4(kernel_name, p0, p1, p2) \ + []( \ + std::decay_t _p0_, \ + std::decay_t _p1_, \ + std::decay_t _p2_) [[hc]] { \ + kernel_name(_p0_, _p1_, _p2_); \ + } + #define make_lambda_wrapper3(kernel_name, p0, p1) \ + []( \ + std::decay_t _p0_, \ + std::decay_t _p1_) [[hc]] { \ + kernel_name(_p0_, _p1_); \ + } + #define make_lambda_wrapper2(kernel_name, p0) \ + [](std::decay_t _p0_) [[hc]] { \ + kernel_name(_p0_); \ + } + #define make_lambda_wrapper1(kernel_name) \ + []() [[hc]] { kernel_name(lp); } - template - struct Wrapper::value>> { - template - void operator()(Ts&&...) const {} - }; -//#warning "GGL hipLaunchKernel defined" + #define make_lambda_wrapper(...) \ + overload_macro(make_lambda_wrapper, __VA_ARGS__) + + #define hipLaunchKernelV3( \ + kernel_name, \ + num_blocks, \ + dim_blocks, \ + group_mem_bytes, \ + stream, \ + ...) \ + { \ + glo_tests::grid_launch( \ + num_blocks, \ + dim_blocks, \ + group_mem_bytes, \ + stream, \ + make_lambda_wrapper(kernel_name, __VA_ARGS__), \ + ##__VA_ARGS__); \ + } +#warning "GGL hipLaunchKernel Reached" #define hipLaunchKernel( \ kernel_name, \ num_blocks, \ @@ -205,23 +332,13 @@ namespace glo_tests stream, \ ...) \ { \ - using F = decltype(kernel_name); \ - if (!std::is_function::value) { \ - glo_tests::Wrapper{}( \ - num_blocks, \ - dim_blocks, \ - group_mem_bytes, \ - stream, \ - kernel_name, \ - ##__VA_ARGS__); \ - } \ - else { \ - glo_tests::grid_launch( \ - num_blocks, \ - dim_blocks, \ - group_mem_bytes, \ - stream, \ - ##__VA_ARGS__); \ - } \ + hipLaunchKernelV3( \ + kernel_name, \ + num_blocks, \ + dim_blocks, \ + group_mem_bytes, \ + stream, \ + hipLaunchParm{}, \ + ##__VA_ARGS__); \ } } diff --git a/projects/clr/hipamd/include/hip/hcc_detail/helpers.hpp b/projects/clr/hipamd/include/hip/hcc_detail/helpers.hpp index ca3864911f..ea9217977b 100644 --- a/projects/clr/hipamd/include/hip/hcc_detail/helpers.hpp +++ b/projects/clr/hipamd/include/hip/hcc_detail/helpers.hpp @@ -1,3 +1,25 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + // // Created by alexv on 08/11/16. // @@ -22,7 +44,8 @@ namespace std using decay_t = typename decay::type; template using result_of_t = typename result_of::type; - + template + using remove_reference_t = typename remove_reference::type; template< FunctionalProcedure F, unsigned int n = 0u, @@ -92,5 +115,26 @@ namespace std template struct is_callable : is_callable_impl {}; #endif + template + struct disjunction : false_type {}; + template + struct disjunction : B1 {}; + template + struct disjunction + : conditional_t> + {}; #endif } + +namespace glo_tests // Only for documentation, macros ignore namespaces. +{ + #define count_macro_args_impl(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _n, ...) _n + #define count_macro_args(...) \ + count_macro_args_impl(,##__VA_ARGS__, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + + #define overloaded_macro_expand(macro, arg_cnt) macro##arg_cnt + #define overload_macro_impl(macro, arg_cnt) \ + overloaded_macro_expand(macro, arg_cnt) + #define overload_macro(macro, ...) \ + overload_macro_impl(macro, count_macro_args(__VA_ARGS__)) (__VA_ARGS__) +}