Initial integration with Alex' Generic Grid Launch
Change-Id: I559afb80e9e39ec0d119bb3bf3b85ef9e448caf6
[ROCm/clr commit: ad882222b0]
이 커밋은 다음에 포함됨:
@@ -0,0 +1,11 @@
|
||||
//
|
||||
// Created by alexv on 25/10/16.
|
||||
//
|
||||
#pragma once
|
||||
|
||||
namespace glo_tests // Documentation only.
|
||||
{
|
||||
#define requires(...)
|
||||
|
||||
#define FunctionalProcedure typename
|
||||
}
|
||||
@@ -0,0 +1,227 @@
|
||||
//
|
||||
// Created by alexv on 25/10/16.
|
||||
//
|
||||
#pragma once
|
||||
|
||||
#include "concepts.hpp"
|
||||
#include "helpers.hpp"
|
||||
|
||||
#include "hc.hpp"
|
||||
#include "hcc_acc.h"
|
||||
|
||||
//#include <hip/hcc.h>
|
||||
//#include <hip/hip_runtime.h>
|
||||
|
||||
#include <stdexcept>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
|
||||
|
||||
namespace glo_tests
|
||||
{
|
||||
namespace
|
||||
{
|
||||
struct New_grid_launch_tag {};
|
||||
struct Old_grid_launch_tag {};
|
||||
}
|
||||
|
||||
template<FunctionalProcedure F, typename... Ts>
|
||||
using is_new_grid_launch_t = typename std::conditional<
|
||||
std::is_callable<F(Ts...)>{},
|
||||
New_grid_launch_tag,
|
||||
Old_grid_launch_tag>::type;
|
||||
|
||||
// TODO: - dispatch rank should be derived from the domain dimensions passed
|
||||
// in, and not always assumed to be 3;
|
||||
|
||||
template<FunctionalProcedure K, typename... Ts>
|
||||
requires(Domain<K> == {Ts...})
|
||||
static
|
||||
inline
|
||||
void grid_launch_impl(
|
||||
New_grid_launch_tag,
|
||||
dim3 num_blocks,
|
||||
dim3 dim_blocks,
|
||||
int group_mem_bytes,
|
||||
hipStream_t stream,
|
||||
K k,
|
||||
Ts&&... args)
|
||||
{
|
||||
const auto d = hc::extent<3>{
|
||||
num_blocks.z * dim_blocks.z,
|
||||
num_blocks.y * dim_blocks.y,
|
||||
num_blocks.x * dim_blocks.x}.tile_with_dynamic(
|
||||
dim_blocks.z,
|
||||
dim_blocks.y,
|
||||
dim_blocks.x,
|
||||
group_mem_bytes);
|
||||
hc::accelerator_view* av = nullptr;
|
||||
|
||||
if (hipHccGetAcceleratorView(stream, &av) != HIP_SUCCESS) {
|
||||
throw std::runtime_error{"Failed to retrieve accelerator_view!"};
|
||||
}
|
||||
|
||||
hc::parallel_for_each(*av, d, [=](hc::tiled_index<3> idx) [[hc]] {
|
||||
k(args...);
|
||||
});
|
||||
}
|
||||
|
||||
template<FunctionalProcedure K, typename... Ts>
|
||||
requires(Domain<K> == {hipLaunchParm, Ts...})
|
||||
static
|
||||
inline
|
||||
void grid_launch_impl(
|
||||
Old_grid_launch_tag,
|
||||
dim3 num_blocks,
|
||||
dim3 dim_blocks,
|
||||
int group_mem_bytes,
|
||||
hipStream_t stream,
|
||||
K k,
|
||||
Ts&&... args)
|
||||
{
|
||||
grid_launch_impl(
|
||||
New_grid_launch_tag{},
|
||||
std::move(num_blocks),
|
||||
std::move(dim_blocks),
|
||||
group_mem_bytes,
|
||||
std::move(stream),
|
||||
std::move(k),
|
||||
hipLaunchParm{},
|
||||
std::forward<Ts>(args)...);
|
||||
}
|
||||
|
||||
template<FunctionalProcedure K, typename... Ts>
|
||||
requires(Domain<K> == {Ts...})
|
||||
static
|
||||
inline
|
||||
std::enable_if_t<!std::is_function<K>::value> grid_launch(
|
||||
dim3 num_blocks,
|
||||
dim3 dim_blocks,
|
||||
int group_mem_bytes,
|
||||
hipStream_t stream,
|
||||
K k,
|
||||
Ts&& ... args)
|
||||
{
|
||||
grid_launch_impl(
|
||||
is_new_grid_launch_t<K, Ts...>{},
|
||||
std::move(num_blocks),
|
||||
std::move(dim_blocks),
|
||||
group_mem_bytes,
|
||||
std::move(stream),
|
||||
std::move(k),
|
||||
std::forward<Ts>(args)...);
|
||||
}
|
||||
|
||||
template<FunctionalProcedure K, K* k, typename... Ts>
|
||||
requires(Domain<K> == {Ts...})
|
||||
static
|
||||
inline
|
||||
void grid_launch(
|
||||
New_grid_launch_tag,
|
||||
dim3 num_blocks,
|
||||
dim3 dim_blocks,
|
||||
int group_mem_bytes,
|
||||
hipStream_t stream,
|
||||
Ts&&... args)
|
||||
{
|
||||
grid_launch(
|
||||
std::move(num_blocks),
|
||||
std::move(dim_blocks),
|
||||
group_mem_bytes,
|
||||
std::move(stream),
|
||||
[](decltype(std::decay_t<Ts>(args))... f_args) [[hc]] {
|
||||
k(f_args...);
|
||||
},
|
||||
std::forward<Ts>(args)...);
|
||||
}
|
||||
|
||||
template<FunctionalProcedure K, K* k, typename... Ts>
|
||||
requires(Domain<K> == {Ts...})
|
||||
static
|
||||
inline
|
||||
void grid_launch(
|
||||
Old_grid_launch_tag,
|
||||
dim3 num_blocks,
|
||||
dim3 dim_blocks,
|
||||
int group_mem_bytes,
|
||||
hipStream_t stream,
|
||||
Ts&&... args)
|
||||
{
|
||||
grid_launch<K, k>(
|
||||
New_grid_launch_tag{},
|
||||
std::move(num_blocks),
|
||||
std::move(dim_blocks),
|
||||
group_mem_bytes,
|
||||
std::move(stream),
|
||||
hipLaunchParm{},
|
||||
std::forward<Ts>(args)...);
|
||||
}
|
||||
|
||||
template<FunctionalProcedure K, K* k, typename... Ts>
|
||||
requires(Domain<K> == {Ts...})
|
||||
static
|
||||
inline
|
||||
std::enable_if_t<std::is_function<K>::value> grid_launch(
|
||||
dim3 num_blocks,
|
||||
dim3 dim_blocks,
|
||||
int group_mem_bytes,
|
||||
hipStream_t stream,
|
||||
Ts&&... args)
|
||||
{
|
||||
grid_launch<K, k>(
|
||||
is_new_grid_launch_t<K*, Ts...>{},
|
||||
std::move(num_blocks),
|
||||
std::move(dim_blocks),
|
||||
group_mem_bytes,
|
||||
std::move(stream),
|
||||
std::forward<Ts>(args)...);
|
||||
}
|
||||
|
||||
template<typename, typename = void> struct Wrapper;
|
||||
|
||||
template<FunctionalProcedure K>
|
||||
struct Wrapper<K, std::enable_if_t<!std::is_function<K>::value>> {
|
||||
template<typename... Ts>
|
||||
requires(Domain<K> == {Ts...})
|
||||
void operator()(Ts&&... args) const
|
||||
{
|
||||
grid_launch_impl(
|
||||
is_new_grid_launch_t<K, Ts...>{},
|
||||
std::forward<Ts>(args)...);
|
||||
}
|
||||
};
|
||||
|
||||
template<FunctionalProcedure K>
|
||||
struct Wrapper<K, std::enable_if_t<std::is_function<K>::value>> {
|
||||
template<typename... Ts>
|
||||
void operator()(Ts&&...) const {}
|
||||
};
|
||||
#warning "GGL hipLaunchKernel defined"
|
||||
#define hipLaunchKernel( \
|
||||
kernel_name, \
|
||||
num_blocks, \
|
||||
dim_blocks, \
|
||||
group_mem_bytes, \
|
||||
stream, \
|
||||
...) \
|
||||
{ \
|
||||
using F = decltype(kernel_name); \
|
||||
if (!std::is_function<F>::value) { \
|
||||
glo_tests::Wrapper<F>{}( \
|
||||
num_blocks, \
|
||||
dim_blocks, \
|
||||
group_mem_bytes, \
|
||||
stream, \
|
||||
kernel_name, \
|
||||
##__VA_ARGS__); \
|
||||
} \
|
||||
else { \
|
||||
glo_tests::grid_launch<F, &kernel_name>( \
|
||||
num_blocks, \
|
||||
dim_blocks, \
|
||||
group_mem_bytes, \
|
||||
stream, \
|
||||
##__VA_ARGS__); \
|
||||
} \
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,96 @@
|
||||
//
|
||||
// Created by alexv on 08/11/16.
|
||||
//
|
||||
#pragma once
|
||||
|
||||
#include <type_traits> // For std::conditional, std::decay, std::enable_if,
|
||||
// std::false_type, std result_of and std::true_type.
|
||||
#include <utility> // For std::declval.
|
||||
|
||||
namespace std
|
||||
{
|
||||
#if (__cplusplus < 201406L)
|
||||
template<typename...>
|
||||
using void_t = void;
|
||||
|
||||
#if (__cplusplus < 201402L)
|
||||
template<bool cond, typename T = void>
|
||||
using enable_if_t = typename enable_if<cond, T>::type;
|
||||
template<bool cond, typename T, typename U>
|
||||
using conditional_t = typename conditional<cond, T, U>::type;
|
||||
template<typename T>
|
||||
using decay_t = typename decay<T>::type;
|
||||
template<FunctionalProcedure F, typename... Ts>
|
||||
using result_of_t = typename result_of<F(Ts...)>::type;
|
||||
|
||||
template<
|
||||
FunctionalProcedure F,
|
||||
unsigned int n = 0u,
|
||||
typename = void>
|
||||
struct is_callable_impl : is_callable_impl<F, n + 1u> {};
|
||||
|
||||
// Pointer to member function, call through non-pointer.
|
||||
template<FunctionalProcedure F, typename C, typename... Ts>
|
||||
struct is_callable_impl<
|
||||
F(C, Ts...),
|
||||
0u,
|
||||
void_t<decltype((declval<C>().*declval<F>())(declval<Ts>()...))>
|
||||
> : true_type {
|
||||
};
|
||||
|
||||
// Pointer to member function, call through pointer.
|
||||
template<FunctionalProcedure F, typename C, typename... Ts>
|
||||
struct is_callable_impl<
|
||||
F(C, Ts...),
|
||||
1u,
|
||||
void_t<decltype(((*declval<C>()).*declval<F>())(declval<Ts>()...))>
|
||||
> : std::true_type {
|
||||
};
|
||||
|
||||
// Pointer to member data, call through non-pointer, no args.
|
||||
template<FunctionalProcedure F, typename C>
|
||||
struct is_callable_impl<
|
||||
F(C),
|
||||
2u,
|
||||
void_t<decltype(declval<C>().*declval<F>())>
|
||||
> : true_type {
|
||||
};
|
||||
|
||||
// Pointer to member data, call through pointer, no args.
|
||||
template<FunctionalProcedure F, typename C>
|
||||
struct is_callable_impl<
|
||||
F(C),
|
||||
3u,
|
||||
void_t<decltype(*declval<C>().*declval<F>())>
|
||||
> : true_type {
|
||||
};
|
||||
|
||||
// General call, n args.
|
||||
template<FunctionalProcedure F, typename... Ts>
|
||||
struct is_callable_impl<
|
||||
F(Ts...),
|
||||
4u,
|
||||
void_t<decltype(declval<F>()(declval<Ts>()...))>
|
||||
> : true_type {
|
||||
};
|
||||
|
||||
// Not callable.
|
||||
template<FunctionalProcedure F>
|
||||
struct is_callable_impl<F, 5u> : false_type {};
|
||||
|
||||
template<typename Call>
|
||||
struct is_callable : is_callable_impl<Call> {};
|
||||
#else
|
||||
template<typename, typename = void>
|
||||
struct is_callable_impl : false_type {};
|
||||
|
||||
template<FunctionalProcedure F, typename... Ts>
|
||||
struct is_callable_impl<
|
||||
F(Ts...),
|
||||
void_t<result_of_t<F(Ts...)>>> : true_type {};
|
||||
|
||||
template<typename F>
|
||||
struct is_callable : is_callable_impl<F> {};
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
@@ -23,11 +23,11 @@ THE SOFTWARE.
|
||||
#ifndef HIP_LDG_H
|
||||
#define HIP_LDG_H
|
||||
|
||||
#if __HCC__
|
||||
#if defined __HCC__
|
||||
#if __hcc_workweek__ >= 16164
|
||||
#include "hip_vector_types.h"
|
||||
#include "host_defines.h"
|
||||
|
||||
#warning "LDG header included"
|
||||
|
||||
__device__ char __ldg(const char* );
|
||||
__device__ char2 __ldg(const char2* );
|
||||
|
||||
@@ -32,7 +32,6 @@ THE SOFTWARE.
|
||||
//---
|
||||
// Top part of file can be compiled with any compiler
|
||||
|
||||
|
||||
//#include <cstring>
|
||||
#if __cplusplus
|
||||
#include <cmath>
|
||||
@@ -40,7 +39,8 @@ THE SOFTWARE.
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include <stddef.h>
|
||||
#endif
|
||||
#endif//__cplusplus
|
||||
|
||||
// Define NVCC_COMPAT for CUDA compatibility
|
||||
#define NVCC_COMPAT
|
||||
#define CUDA_SUCCESS hipSuccess
|
||||
@@ -58,20 +58,30 @@ THE SOFTWARE.
|
||||
|
||||
//---
|
||||
// Remainder of this file only compiles with HCC
|
||||
#ifdef __HCC__
|
||||
#if defined __HCC__
|
||||
#include <grid_launch.h>
|
||||
|
||||
#if defined (GRID_LAUNCH_VERSION) and (GRID_LAUNCH_VERSION >= 20)
|
||||
// Use field names for grid_launch 2.0 structure, if HCC supports GL 2.0.
|
||||
//TODO-HCC-GL - change this to typedef.
|
||||
//typedef grid_launch_parm hipLaunchParm ;
|
||||
struct EmptyLaunchParm{};
|
||||
#ifndef GENERIC_GRID_LAUNCH
|
||||
#define hipLaunchParm grid_launch_parm
|
||||
#else
|
||||
#define hipLaunchParm EmptyLaunchParm
|
||||
#endif //GENERIC_GRID_LAUNCH
|
||||
|
||||
#if defined (GRID_LAUNCH_VERSION) and (GRID_LAUNCH_VERSION >= 20) || defined GENERIC_GRID_LAUNCH
|
||||
#else // Use field names for grid_launch 2.0 structure, if HCC supports GL 2.0.
|
||||
#error (HCC must support GRID_LAUNCH_20)
|
||||
#endif
|
||||
#endif //GRID_LAUNCH_VERSION
|
||||
|
||||
#endif //HCC
|
||||
|
||||
#if defined GENERIC_GRID_LAUNCH && defined __HCC__
|
||||
#include "grid_launch_v2.hpp"
|
||||
#endif//GENERIC_GRID_LAUNCH
|
||||
|
||||
extern int HIP_TRACE_API;
|
||||
|
||||
//TODO-HCC-GL - change this to typedef.
|
||||
//typedef grid_launch_parm hipLaunchParm ;
|
||||
#define hipLaunchParm grid_launch_parm
|
||||
#ifdef __cplusplus
|
||||
//#include <hip/hcc_detail/hip_texture.h>
|
||||
#include <hip/hcc_detail/hip_ldg.h>
|
||||
@@ -266,7 +276,7 @@ __device__ float __shfl(float input, int lane, int width);
|
||||
__device__ float __shfl_up(float input, unsigned int lane_delta, int width);
|
||||
__device__ float __shfl_down(float input, unsigned int lane_delta, int width);
|
||||
__device__ float __shfl_xor(float input, int lane_mask, int width);
|
||||
#endif
|
||||
#endif //__cplusplus
|
||||
|
||||
__device__ unsigned __hip_ds_bpermute(int index, unsigned src);
|
||||
__device__ float __hip_ds_bpermutef(int index, float src);
|
||||
@@ -278,7 +288,7 @@ __device__ float __hip_ds_swizzlef(float src, int pattern);
|
||||
|
||||
__device__ int __hip_move_dpp(int src, int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl);
|
||||
|
||||
#endif
|
||||
#endif //__HIP_ARCH_GFX803__ == 1
|
||||
|
||||
__host__ __device__ int min(int arg1, int arg2);
|
||||
__host__ __device__ int max(int arg1, int arg2);
|
||||
@@ -409,14 +419,15 @@ static inline __device__ void* memset(void* ptr, int val, size_t size)
|
||||
#define HIP_KERNEL_NAME(...) __VA_ARGS__
|
||||
#define HIP_SYMBOL(X) #X
|
||||
|
||||
#ifdef __HCC_CPP__
|
||||
#if defined __HCC_CPP__
|
||||
extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_launch_parm *lp, const char *kernelNameStr);
|
||||
extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block, grid_launch_parm *lp, const char *kernelNameStr);
|
||||
extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block, grid_launch_parm *lp, const char *kernelNameStr);
|
||||
extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block, grid_launch_parm *lp, const char *kernelNameStr);
|
||||
extern void ihipPostLaunchKernel(const char *kernelName, hipStream_t stream, grid_launch_parm &lp);
|
||||
|
||||
|
||||
#ifndef GENERIC_GRID_LAUNCH
|
||||
#warning "Original hipLaunchKernel defined"
|
||||
// Due to multiple overloaded versions of ihipPreLaunchKernel, the numBlocks3D and blockDim3D can be either size_t or dim3 types
|
||||
#define hipLaunchKernel(_kernelName, _numBlocks3D, _blockDim3D, _groupMemBytes, _stream, ...) \
|
||||
do {\
|
||||
@@ -426,13 +437,13 @@ do {\
|
||||
_kernelName (lp, ##__VA_ARGS__);\
|
||||
ihipPostLaunchKernel(#_kernelName, trueStream, lp);\
|
||||
} while(0)
|
||||
|
||||
#endif //GENERIC_GRID_LAUNCH
|
||||
|
||||
#elif defined (__HCC_C__)
|
||||
|
||||
//TODO - develop C interface.
|
||||
|
||||
#endif
|
||||
#endif //__HCC_CPP__
|
||||
|
||||
/**
|
||||
* extern __shared__
|
||||
@@ -446,7 +457,6 @@ do {\
|
||||
|
||||
#define HIP_DYNAMIC_SHARED_ATTRIBUTE ADDRESS_SPACE_3
|
||||
|
||||
#endif // __HCC__
|
||||
|
||||
|
||||
/**
|
||||
@@ -470,4 +480,4 @@ do {\
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
#endif//HIP_HCC_DETAIL_RUNTIME_H
|
||||
|
||||
@@ -27,7 +27,8 @@ THE SOFTWARE.
|
||||
* @file hcc_detail/hip_runtime_api.h
|
||||
* @brief Contains C function APIs for HIP runtime. This file does not use any HCC builtin or special language extensions (-hc mode) ; those functions in hip_runtime.h.
|
||||
*/
|
||||
|
||||
// guard for grid_launch_v2
|
||||
#define GENERIC_GRID_LAUNCH
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <iostream>
|
||||
|
||||
@@ -37,7 +37,14 @@ THE SOFTWARE.
|
||||
#define __host__ __attribute__((cpu))
|
||||
#define __device__ __attribute__((hc))
|
||||
|
||||
#warning "HOST DEFINE header included"
|
||||
#ifndef GENERIC_GRID_LAUNCH
|
||||
#warning "original global define reached"
|
||||
#define __global__ __attribute__((hc_grid_launch)) __attribute__((used))
|
||||
#else
|
||||
#warning "GGL global define reached"
|
||||
#define __global__ [[hc]] __attribute__((weak))
|
||||
#endif //GENERIC_GRID_LAUNCH
|
||||
|
||||
#define __noinline__ __attribute__((noinline))
|
||||
#define __forceinline__ __attribute__((always_inline))
|
||||
|
||||
@@ -83,7 +83,7 @@ int main(int argc, char *argv[])
|
||||
const unsigned threadsPerBlock = 256;
|
||||
|
||||
printf ("info: launch 'vector_square' kernel\n");
|
||||
hipLaunchKernel(vector_square, dim3(blocks), dim3(threadsPerBlock), 0, 0, C_d, A_d, N);
|
||||
hipLaunchKernel(vector_square<float>, dim3(blocks), dim3(threadsPerBlock), 0, nullptr, C_d, A_d, N);
|
||||
|
||||
printf ("info: copy Device2Host\n");
|
||||
CHECK ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
@@ -1556,7 +1556,6 @@ void ihipPostLaunchKernel(const char *kernelName, hipStream_t stream, grid_launc
|
||||
MARKER_END();
|
||||
}
|
||||
|
||||
|
||||
//=================================================================================================
|
||||
// HIP API Implementation
|
||||
//
|
||||
|
||||
@@ -28,7 +28,6 @@ THE SOFTWARE.
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <string>
|
||||
|
||||
//---
|
||||
// Helper functions to convert HIP function arguments into strings.
|
||||
// Handles POD data types as well as enumerations (ie hipMemcpyKind).
|
||||
@@ -71,7 +70,7 @@ inline std::string ToString(hipEvent_t v)
|
||||
ss << v;
|
||||
return ss.str();
|
||||
};
|
||||
|
||||
#ifndef GENERIC_GRID_LAUNCH
|
||||
// hipStream_t
|
||||
template <>
|
||||
inline std::string ToString(hipStream_t v)
|
||||
@@ -85,6 +84,7 @@ inline std::string ToString(hipStream_t v)
|
||||
|
||||
return ss.str();
|
||||
};
|
||||
#endif //GENERIC_GRID_LAUNCH
|
||||
|
||||
// hipMemcpyKind specialization
|
||||
template <>
|
||||
|
||||
새 이슈에서 참조
사용자 차단