diff --git a/projects/hip/CMakeLists.txt b/projects/hip/CMakeLists.txt index dcecaa8d2f..9d6bf26f56 100644 --- a/projects/hip/CMakeLists.txt +++ b/projects/hip/CMakeLists.txt @@ -179,6 +179,7 @@ if(HIP_PLATFORM STREQUAL "hcc") src/hip_peer.cpp src/hip_stream.cpp src/hip_module.cpp + src/grid_launch.cpp src/env.cpp) set(SOURCE_FILES_DEVICE diff --git a/projects/hip/include/hip/hcc_detail/concepts.hpp b/projects/hip/include/hip/hcc_detail/concepts.hpp index 6824ad9bdf..5c50f5d577 100644 --- a/projects/hip/include/hip/hcc_detail/concepts.hpp +++ b/projects/hip/include/hip/hcc_detail/concepts.hpp @@ -22,7 +22,7 @@ THE SOFTWARE. #pragma once -namespace glo_tests // Documentation only. +namespace hip_impl // Documentation only. { #define requires(...) diff --git a/projects/hip/include/hip/hcc_detail/grid_launch_v2.hpp b/projects/hip/include/hip/hcc_detail/grid_launch_GGL.hpp similarity index 86% rename from projects/hip/include/hip/hcc_detail/grid_launch_v2.hpp rename to projects/hip/include/hip/hcc_detail/grid_launch_GGL.hpp index 8b1eded2f3..e2965184f6 100644 --- a/projects/hip/include/hip/hcc_detail/grid_launch_v2.hpp +++ b/projects/hip/include/hip/hcc_detail/grid_launch_GGL.hpp @@ -27,7 +27,7 @@ THE SOFTWARE. #include "hc.hpp" #include "hip_hcc.h" - +#include "hip_runtime.h" #include #include #include @@ -38,13 +38,39 @@ namespace hip_impl { struct New_grid_launch_tag {}; struct Old_grid_launch_tag {}; - } - template - using is_new_grid_launch_t = typename std::conditional< - std::is_callable{}, - New_grid_launch_tag, - Old_grid_launch_tag>::type; + template + class RAII_guard { + D dtor_; + public: + RAII_guard() = default; + + RAII_guard(const C& ctor, D dtor) : dtor_{std::move(dtor)} + { + ctor(); + } + + RAII_guard(const RAII_guard&) = default; + RAII_guard(RAII_guard&&) = default; + + RAII_guard& operator=(const RAII_guard&) = default; + RAII_guard& operator=(RAII_guard&&) = default; + + ~RAII_guard() { dtor_(); } + }; + + template + RAII_guard make_RAII_guard(const C& ctor, D dtor) + { + return RAII_guard{ctor, std::move(dtor)}; + } + + template + using is_new_grid_launch_t = typename std::conditional< + std::is_callable{}, + New_grid_launch_tag, + Old_grid_launch_tag>::type; + } // TODO: - dispatch rank should be derived from the domain dimensions passed // in, and not always assumed to be 3; @@ -52,12 +78,12 @@ namespace hip_impl template requires(Domain == {Ts...}) inline - void grid_launch_impl( + void grid_launch_hip_impl_( New_grid_launch_tag, dim3 num_blocks, dim3 dim_blocks, int group_mem_bytes, - hipStream_t stream, + const hc::accelerator_view& acc_v, K k, Ts&&... args) { @@ -69,21 +95,69 @@ namespace hip_impl dim_blocks.y, dim_blocks.x, group_mem_bytes); - hc::accelerator_view* av = nullptr; - if (hipHccGetAcceleratorView(stream, &av) != HIP_SUCCESS) { - throw std::runtime_error{"Failed to retrieve accelerator_view!"}; + try { + hc::parallel_for_each( + acc_v, + d, + [=](const hc::tiled_index<3>& idx) [[hc]] { + k(args...); + }); } + catch (std::exception& ex) { + std::cerr << "Failed in " << __FUNCTION__ << ", with exception: " + << ex.what() << std::endl; + throw; + } + } - hc::parallel_for_each(*av, d, [=](const hc::tiled_index<3>& idx) [[hc]] { - k(args...); - }); + // TODO: these are workarounds, they should be removed. + + hc::accelerator_view lock_stream_hip_(hipStream_t&, void*&); + void unlock_stream_hip_( + hipStream_t, void*, const char*, hc::accelerator_view*); + + template + requires(Domain == {Ts...}) + inline + void grid_launch_hip_impl_( + New_grid_launch_tag, + dim3 num_blocks, + dim3 dim_blocks, + int group_mem_bytes, + hipStream_t stream, + const char* kernel_name, + K k, + Ts&&... args) + { + void* lck_stream = nullptr; + auto acc_v = lock_stream_hip_(stream, lck_stream); + auto stream_guard = make_RAII_guard( + [](){ /* perhaps use a slimmed down ihipPrintKernelLaunch here */ }, + std::bind( + unlock_stream_hip_, stream, lck_stream, kernel_name, &acc_v)); + + try { + grid_launch_hip_impl_( + New_grid_launch_tag{}, + std::move(num_blocks), + std::move(dim_blocks), + group_mem_bytes, + acc_v, + std::move(k), + std::forward(args)...); + } + catch (std::exception& ex) { + std::cerr << "Failed in " << __FUNCTION__ << ", with exception: " + << ex.what() << std::endl; + throw; + } } template requires(Domain == {hipLaunchParm, Ts...}) inline - void grid_launch_impl( + void grid_launch_hip_impl_( Old_grid_launch_tag, dim3 num_blocks, dim3 dim_blocks, @@ -92,7 +166,7 @@ namespace hip_impl K k, Ts&&... args) { - grid_launch_impl( + grid_launch_hip_impl_( New_grid_launch_tag{}, std::move(num_blocks), std::move(dim_blocks), @@ -103,10 +177,58 @@ namespace hip_impl std::forward(args)...); } + template + requires(Domain == {hipLaunchParm, Ts...}) + inline + void grid_launch_hip_impl_( + Old_grid_launch_tag, + dim3 num_blocks, + dim3 dim_blocks, + int group_mem_bytes, + hipStream_t stream, + const char* kernel_name, + K k, + Ts&&... args) + { + grid_launch_hip_impl_( + New_grid_launch_tag{}, + std::move(num_blocks), + std::move(dim_blocks), + group_mem_bytes, + std::move(stream), + kernel_name, + std::move(k), + hipLaunchParm{}, + std::forward(args)...); + } + template requires(Domain == {Ts...}) inline - std::enable_if_t::value> grid_launch( + std::enable_if_t::value> grid_launch_hip_( + dim3 num_blocks, + dim3 dim_blocks, + int group_mem_bytes, + hipStream_t stream, + const char* kernel_name, + K k, + Ts&& ... args) + { + grid_launch_hip_impl_( + is_new_grid_launch_t{}, + std::move(num_blocks), + std::move(dim_blocks), + group_mem_bytes, + std::move(stream), + kernel_name, + std::move(k), + std::forward(args)...); + } + + template + requires(Domain == {Ts...}) + inline + std::enable_if_t::value> grid_launch_hip_( dim3 num_blocks, dim3 dim_blocks, int group_mem_bytes, @@ -114,7 +236,7 @@ namespace hip_impl K k, Ts&& ... args) { - grid_launch_impl( + grid_launch_hip_impl_( is_new_grid_launch_t{}, std::move(num_blocks), std::move(dim_blocks), @@ -129,7 +251,7 @@ namespace hip_impl template constexpr inline - T&& forward(std::remove_reference_t& x) [[hc]] + T&& forward_(std::remove_reference_t& x) [[hc]] { return static_cast(x); } @@ -139,7 +261,7 @@ namespace hip_impl template void operator()(Ts&&...args) const [[hc]] { - k(forward(args)...); + k(forward_(args)...); } }; } @@ -155,7 +277,7 @@ namespace hip_impl hipStream_t stream, Ts&&... args) { - grid_launch_impl( + grid_launch_hip_impl_( New_grid_launch_tag{}, std::move(num_blocks), std::move(dim_blocks), @@ -176,7 +298,7 @@ namespace hip_impl hipStream_t stream, Ts&&... args) { - grid_launch( + grid_launch_hip_( New_grid_launch_tag{}, std::move(num_blocks), std::move(dim_blocks), @@ -189,14 +311,14 @@ namespace hip_impl template requires(Domain == {Ts...}) inline - std::enable_if_t::value> grid_launch( + std::enable_if_t::value> grid_launch_hip_( dim3 num_blocks, dim3 dim_blocks, int group_mem_bytes, hipStream_t stream, Ts&&... args) { - grid_launch( + grid_launch_hip_( is_new_grid_launch_t{}, std::move(num_blocks), std::move(dim_blocks), @@ -685,7 +807,7 @@ namespace hip_impl kernel_name(_p0_);\ } #define make_kernel_lambda_hip_1(kernel_name)\ - []() [[hc]] { kernel_name(lp); } + []() [[hc]] { return kernel_name(hipLaunchParm{}); } #define make_kernel_lambda_hip_(...)\ overload_macro_hip_(make_kernel_lambda_hip_, __VA_ARGS__) @@ -697,15 +819,16 @@ namespace hip_impl group_mem_bytes,\ stream,\ ...)\ - {\ - hip_impl::grid_launch(\ + do {\ + hip_impl::grid_launch_hip_(\ num_blocks,\ dim_blocks,\ group_mem_bytes,\ stream,\ + #kernel_name,\ make_kernel_lambda_hip_(kernel_name, __VA_ARGS__),\ ##__VA_ARGS__);\ - } + } while(0) #define hipLaunchKernel(\ kernel_name,\ diff --git a/projects/hip/include/hip/hcc_detail/helpers.hpp b/projects/hip/include/hip/hcc_detail/helpers.hpp index 5ab866dc66..e5a84a4678 100644 --- a/projects/hip/include/hip/hcc_detail/helpers.hpp +++ b/projects/hip/include/hip/hcc_detail/helpers.hpp @@ -123,7 +123,7 @@ namespace std #endif } -namespace // Only for documentation, macros ignore namespaces. +namespace hip_impl // Only for documentation, macros ignore namespaces. { #define count_macro_args_impl_hip_(\ _0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15,\ diff --git a/projects/hip/include/hip/hcc_detail/hip_runtime.h b/projects/hip/include/hip/hcc_detail/hip_runtime.h index 9692b16a4c..e0375269e7 100644 --- a/projects/hip/include/hip/hcc_detail/hip_runtime.h +++ b/projects/hip/include/hip/hcc_detail/hip_runtime.h @@ -68,7 +68,7 @@ THE SOFTWARE. #else namespace hip_impl { - struct Empty_launch_parm{}; + struct Empty_launch_parm {}; } #define hipLaunchParm hip_impl::Empty_launch_parm #endif //GENERIC_GRID_LAUNCH @@ -81,7 +81,7 @@ namespace hip_impl #endif //HCC #if GENERIC_GRID_LAUNCH==1 && defined __HCC__ -#include "grid_launch_v2.hpp" +#include "grid_launch_GGL.hpp" #endif//GENERIC_GRID_LAUNCH extern int HIP_TRACE_API;