diff --git a/projects/clr/hipamd/include/hip/hcc_detail/concepts.hpp b/projects/clr/hipamd/include/hip/hcc_detail/concepts.hpp new file mode 100644 index 0000000000..373ec15411 --- /dev/null +++ b/projects/clr/hipamd/include/hip/hcc_detail/concepts.hpp @@ -0,0 +1,11 @@ +// +// Created by alexv on 25/10/16. +// +#pragma once + +namespace glo_tests // Documentation only. +{ + #define requires(...) + + #define FunctionalProcedure typename +} diff --git a/projects/clr/hipamd/include/hip/hcc_detail/grid_launch_v2.hpp b/projects/clr/hipamd/include/hip/hcc_detail/grid_launch_v2.hpp new file mode 100644 index 0000000000..ab11433a5b --- /dev/null +++ b/projects/clr/hipamd/include/hip/hcc_detail/grid_launch_v2.hpp @@ -0,0 +1,227 @@ +// +// Created by alexv on 25/10/16. +// +#pragma once + +#include "concepts.hpp" +#include "helpers.hpp" + +#include "hc.hpp" +#include "hcc_acc.h" + +//#include +//#include + +#include +#include +#include + + +namespace glo_tests +{ + namespace + { + struct New_grid_launch_tag {}; + struct Old_grid_launch_tag {}; + } + + template + using is_new_grid_launch_t = typename std::conditional< + std::is_callable{}, + New_grid_launch_tag, + Old_grid_launch_tag>::type; + + // TODO: - dispatch rank should be derived from the domain dimensions passed + // in, and not always assumed to be 3; + + template + requires(Domain == {Ts...}) + static + inline + void grid_launch_impl( + New_grid_launch_tag, + dim3 num_blocks, + dim3 dim_blocks, + int group_mem_bytes, + hipStream_t stream, + K k, + Ts&&... args) + { + const auto d = hc::extent<3>{ + num_blocks.z * dim_blocks.z, + num_blocks.y * dim_blocks.y, + num_blocks.x * dim_blocks.x}.tile_with_dynamic( + dim_blocks.z, + dim_blocks.y, + dim_blocks.x, + group_mem_bytes); + hc::accelerator_view* av = nullptr; + + if (hipHccGetAcceleratorView(stream, &av) != HIP_SUCCESS) { + throw std::runtime_error{"Failed to retrieve accelerator_view!"}; + } + + hc::parallel_for_each(*av, d, [=](hc::tiled_index<3> idx) [[hc]] { + k(args...); + }); + } + + template + requires(Domain == {hipLaunchParm, Ts...}) + static + inline + void grid_launch_impl( + Old_grid_launch_tag, + dim3 num_blocks, + dim3 dim_blocks, + int group_mem_bytes, + hipStream_t stream, + K k, + Ts&&... args) + { + grid_launch_impl( + New_grid_launch_tag{}, + std::move(num_blocks), + std::move(dim_blocks), + group_mem_bytes, + std::move(stream), + std::move(k), + hipLaunchParm{}, + std::forward(args)...); + } + + template + requires(Domain == {Ts...}) + static + inline + std::enable_if_t::value> grid_launch( + dim3 num_blocks, + dim3 dim_blocks, + int group_mem_bytes, + hipStream_t stream, + K k, + Ts&& ... args) + { + grid_launch_impl( + is_new_grid_launch_t{}, + std::move(num_blocks), + std::move(dim_blocks), + group_mem_bytes, + std::move(stream), + std::move(k), + std::forward(args)...); + } + + template + requires(Domain == {Ts...}) + static + inline + void grid_launch( + New_grid_launch_tag, + dim3 num_blocks, + dim3 dim_blocks, + int group_mem_bytes, + hipStream_t stream, + Ts&&... args) + { + grid_launch( + std::move(num_blocks), + std::move(dim_blocks), + group_mem_bytes, + std::move(stream), + [](decltype(std::decay_t(args))... f_args) [[hc]] { + k(f_args...); + }, + std::forward(args)...); + } + + template + requires(Domain == {Ts...}) + static + inline + void grid_launch( + Old_grid_launch_tag, + dim3 num_blocks, + dim3 dim_blocks, + int group_mem_bytes, + hipStream_t stream, + Ts&&... args) + { + grid_launch( + New_grid_launch_tag{}, + std::move(num_blocks), + std::move(dim_blocks), + group_mem_bytes, + std::move(stream), + hipLaunchParm{}, + std::forward(args)...); + } + + template + requires(Domain == {Ts...}) + static + inline + std::enable_if_t::value> grid_launch( + dim3 num_blocks, + dim3 dim_blocks, + int group_mem_bytes, + hipStream_t stream, + Ts&&... args) + { + grid_launch( + is_new_grid_launch_t{}, + std::move(num_blocks), + std::move(dim_blocks), + group_mem_bytes, + std::move(stream), + std::forward(args)...); + } + + template struct Wrapper; + + template + struct Wrapper::value>> { + template + requires(Domain == {Ts...}) + void operator()(Ts&&... args) const + { + grid_launch_impl( + is_new_grid_launch_t{}, + std::forward(args)...); + } + }; + + template + struct Wrapper::value>> { + template + void operator()(Ts&&...) const {} + }; +#warning "GGL hipLaunchKernel defined" + #define hipLaunchKernel( \ + kernel_name, \ + num_blocks, \ + dim_blocks, \ + group_mem_bytes, \ + stream, \ + ...) \ + { \ + using F = decltype(kernel_name); \ + if (!std::is_function::value) { \ + glo_tests::Wrapper{}( \ + num_blocks, \ + dim_blocks, \ + group_mem_bytes, \ + stream, \ + kernel_name, \ + ##__VA_ARGS__); \ + } \ + else { \ + glo_tests::grid_launch( \ + num_blocks, \ + dim_blocks, \ + group_mem_bytes, \ + stream, \ + ##__VA_ARGS__); \ + } \ + } +} diff --git a/projects/clr/hipamd/include/hip/hcc_detail/helpers.hpp b/projects/clr/hipamd/include/hip/hcc_detail/helpers.hpp new file mode 100644 index 0000000000..ca3864911f --- /dev/null +++ b/projects/clr/hipamd/include/hip/hcc_detail/helpers.hpp @@ -0,0 +1,96 @@ +// +// Created by alexv on 08/11/16. +// +#pragma once + +#include // For std::conditional, std::decay, std::enable_if, + // std::false_type, std result_of and std::true_type. +#include // For std::declval. + +namespace std +{ + #if (__cplusplus < 201406L) + template + using void_t = void; + + #if (__cplusplus < 201402L) + template + using enable_if_t = typename enable_if::type; + template + using conditional_t = typename conditional::type; + template + using decay_t = typename decay::type; + template + using result_of_t = typename result_of::type; + + template< + FunctionalProcedure F, + unsigned int n = 0u, + typename = void> + struct is_callable_impl : is_callable_impl {}; + + // Pointer to member function, call through non-pointer. + template + struct is_callable_impl< + F(C, Ts...), + 0u, + void_t().*declval())(declval()...))> + > : true_type { + }; + + // Pointer to member function, call through pointer. + template + struct is_callable_impl< + F(C, Ts...), + 1u, + void_t()).*declval())(declval()...))> + > : std::true_type { + }; + + // Pointer to member data, call through non-pointer, no args. + template + struct is_callable_impl< + F(C), + 2u, + void_t().*declval())> + > : true_type { + }; + + // Pointer to member data, call through pointer, no args. + template + struct is_callable_impl< + F(C), + 3u, + void_t().*declval())> + > : true_type { + }; + + // General call, n args. + template + struct is_callable_impl< + F(Ts...), + 4u, + void_t()(declval()...))> + > : true_type { + }; + + // Not callable. + template + struct is_callable_impl : false_type {}; + + template + struct is_callable : is_callable_impl {}; + #else + template + struct is_callable_impl : false_type {}; + + template + struct is_callable_impl< + F(Ts...), + void_t>> : true_type {}; + + template + struct is_callable : is_callable_impl {}; + #endif + #endif +} diff --git a/projects/clr/hipamd/include/hip/hcc_detail/hip_ldg.h b/projects/clr/hipamd/include/hip/hcc_detail/hip_ldg.h index 65292951f0..6bf7a618d0 100644 --- a/projects/clr/hipamd/include/hip/hcc_detail/hip_ldg.h +++ b/projects/clr/hipamd/include/hip/hcc_detail/hip_ldg.h @@ -23,11 +23,11 @@ THE SOFTWARE. #ifndef HIP_LDG_H #define HIP_LDG_H -#if __HCC__ +#if defined __HCC__ #if __hcc_workweek__ >= 16164 #include "hip_vector_types.h" #include "host_defines.h" - +#warning "LDG header included" __device__ char __ldg(const char* ); __device__ char2 __ldg(const char2* ); diff --git a/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime.h b/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime.h index af294cdb53..9d0a43b1df 100644 --- a/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime.h +++ b/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime.h @@ -32,7 +32,6 @@ THE SOFTWARE. //--- // Top part of file can be compiled with any compiler - //#include #if __cplusplus #include @@ -40,7 +39,8 @@ THE SOFTWARE. #include #include #include -#endif +#endif//__cplusplus + // Define NVCC_COMPAT for CUDA compatibility #define NVCC_COMPAT #define CUDA_SUCCESS hipSuccess @@ -58,20 +58,30 @@ THE SOFTWARE. //--- // Remainder of this file only compiles with HCC -#ifdef __HCC__ +#if defined __HCC__ #include - -#if defined (GRID_LAUNCH_VERSION) and (GRID_LAUNCH_VERSION >= 20) -// Use field names for grid_launch 2.0 structure, if HCC supports GL 2.0. +//TODO-HCC-GL - change this to typedef. +//typedef grid_launch_parm hipLaunchParm ; +struct EmptyLaunchParm{}; +#ifndef GENERIC_GRID_LAUNCH + #define hipLaunchParm grid_launch_parm #else + #define hipLaunchParm EmptyLaunchParm +#endif //GENERIC_GRID_LAUNCH + +#if defined (GRID_LAUNCH_VERSION) and (GRID_LAUNCH_VERSION >= 20) || defined GENERIC_GRID_LAUNCH +#else // Use field names for grid_launch 2.0 structure, if HCC supports GL 2.0. #error (HCC must support GRID_LAUNCH_20) -#endif +#endif //GRID_LAUNCH_VERSION + +#endif //HCC + +#if defined GENERIC_GRID_LAUNCH && defined __HCC__ +#include "grid_launch_v2.hpp" +#endif//GENERIC_GRID_LAUNCH extern int HIP_TRACE_API; -//TODO-HCC-GL - change this to typedef. -//typedef grid_launch_parm hipLaunchParm ; -#define hipLaunchParm grid_launch_parm #ifdef __cplusplus //#include #include @@ -266,7 +276,7 @@ __device__ float __shfl(float input, int lane, int width); __device__ float __shfl_up(float input, unsigned int lane_delta, int width); __device__ float __shfl_down(float input, unsigned int lane_delta, int width); __device__ float __shfl_xor(float input, int lane_mask, int width); -#endif +#endif //__cplusplus __device__ unsigned __hip_ds_bpermute(int index, unsigned src); __device__ float __hip_ds_bpermutef(int index, float src); @@ -278,7 +288,7 @@ __device__ float __hip_ds_swizzlef(float src, int pattern); __device__ int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl); -#endif +#endif //__HIP_ARCH_GFX803__ == 1 __host__ __device__ int min(int arg1, int arg2); __host__ __device__ int max(int arg1, int arg2); @@ -409,14 +419,15 @@ static inline __device__ void* memset(void* ptr, int val, size_t size) #define HIP_KERNEL_NAME(...) __VA_ARGS__ #define HIP_SYMBOL(X) #X -#ifdef __HCC_CPP__ +#if defined __HCC_CPP__ extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_launch_parm *lp, const char *kernelNameStr); extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block, grid_launch_parm *lp, const char *kernelNameStr); extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block, grid_launch_parm *lp, const char *kernelNameStr); extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block, grid_launch_parm *lp, const char *kernelNameStr); extern void ihipPostLaunchKernel(const char *kernelName, hipStream_t stream, grid_launch_parm &lp); - +#ifndef GENERIC_GRID_LAUNCH +#warning "Original hipLaunchKernel defined" // Due to multiple overloaded versions of ihipPreLaunchKernel, the numBlocks3D and blockDim3D can be either size_t or dim3 types #define hipLaunchKernel(_kernelName, _numBlocks3D, _blockDim3D, _groupMemBytes, _stream, ...) \ do {\ @@ -426,13 +437,13 @@ do {\ _kernelName (lp, ##__VA_ARGS__);\ ihipPostLaunchKernel(#_kernelName, trueStream, lp);\ } while(0) - +#endif //GENERIC_GRID_LAUNCH #elif defined (__HCC_C__) //TODO - develop C interface. -#endif +#endif //__HCC_CPP__ /** * extern __shared__ @@ -446,7 +457,6 @@ do {\ #define HIP_DYNAMIC_SHARED_ATTRIBUTE ADDRESS_SPACE_3 -#endif // __HCC__ /** @@ -470,4 +480,4 @@ do {\ -#endif +#endif//HIP_HCC_DETAIL_RUNTIME_H diff --git a/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h b/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h index 0d3ecc6613..c769548214 100644 --- a/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h +++ b/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h @@ -27,7 +27,8 @@ THE SOFTWARE. * @file hcc_detail/hip_runtime_api.h * @brief Contains C function APIs for HIP runtime. This file does not use any HCC builtin or special language extensions (-hc mode) ; those functions in hip_runtime.h. */ - +// guard for grid_launch_v2 +#define GENERIC_GRID_LAUNCH #include #include #include diff --git a/projects/clr/hipamd/include/hip/hcc_detail/host_defines.h b/projects/clr/hipamd/include/hip/hcc_detail/host_defines.h index 93695a0038..d7128d6fab 100644 --- a/projects/clr/hipamd/include/hip/hcc_detail/host_defines.h +++ b/projects/clr/hipamd/include/hip/hcc_detail/host_defines.h @@ -37,7 +37,14 @@ THE SOFTWARE. #define __host__ __attribute__((cpu)) #define __device__ __attribute__((hc)) +#warning "HOST DEFINE header included" +#ifndef GENERIC_GRID_LAUNCH +#warning "original global define reached" #define __global__ __attribute__((hc_grid_launch)) __attribute__((used)) +#else +#warning "GGL global define reached" +#define __global__ [[hc]] __attribute__((weak)) +#endif //GENERIC_GRID_LAUNCH #define __noinline__ __attribute__((noinline)) #define __forceinline__ __attribute__((always_inline)) diff --git a/projects/clr/hipamd/samples/0_Intro/square/square.hipref.cpp b/projects/clr/hipamd/samples/0_Intro/square/square.hipref.cpp index e694bfb8a4..118f8acf13 100644 --- a/projects/clr/hipamd/samples/0_Intro/square/square.hipref.cpp +++ b/projects/clr/hipamd/samples/0_Intro/square/square.hipref.cpp @@ -83,7 +83,7 @@ int main(int argc, char *argv[]) const unsigned threadsPerBlock = 256; printf ("info: launch 'vector_square' kernel\n"); - hipLaunchKernel(vector_square, dim3(blocks), dim3(threadsPerBlock), 0, 0, C_d, A_d, N); + hipLaunchKernel(vector_square, dim3(blocks), dim3(threadsPerBlock), 0, nullptr, C_d, A_d, N); printf ("info: copy Device2Host\n"); CHECK ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); diff --git a/projects/clr/hipamd/src/hip_hcc.cpp b/projects/clr/hipamd/src/hip_hcc.cpp index 760f46046a..4d922e65e7 100644 --- a/projects/clr/hipamd/src/hip_hcc.cpp +++ b/projects/clr/hipamd/src/hip_hcc.cpp @@ -1556,7 +1556,6 @@ void ihipPostLaunchKernel(const char *kernelName, hipStream_t stream, grid_launc MARKER_END(); } - //================================================================================================= // HIP API Implementation // diff --git a/projects/clr/hipamd/src/trace_helper.h b/projects/clr/hipamd/src/trace_helper.h index 3bf2857c3a..abff491916 100644 --- a/projects/clr/hipamd/src/trace_helper.h +++ b/projects/clr/hipamd/src/trace_helper.h @@ -28,7 +28,6 @@ THE SOFTWARE. #include #include #include - //--- // Helper functions to convert HIP function arguments into strings. // Handles POD data types as well as enumerations (ie hipMemcpyKind). @@ -71,7 +70,7 @@ inline std::string ToString(hipEvent_t v) ss << v; return ss.str(); }; - +#ifndef GENERIC_GRID_LAUNCH // hipStream_t template <> inline std::string ToString(hipStream_t v) @@ -85,6 +84,7 @@ inline std::string ToString(hipStream_t v) return ss.str(); }; +#endif //GENERIC_GRID_LAUNCH // hipMemcpyKind specialization template <>