diff --git a/docs/markdown/hip_kernel_language.md b/docs/markdown/hip_kernel_language.md
index e382ede4b3..23cde7fee6 100644
--- a/docs/markdown/hip_kernel_language.md
+++ b/docs/markdown/hip_kernel_language.md
@@ -299,6 +299,42 @@ Following is the list of supported single precision mathematical functions.
| float tanhf ( float x )
Calculate the hyperbolic tangent of the input argument. | ✓ | ✓ |
| float truncf ( float x )
Truncate input argument to the integral part. | ✓ | ✓ |
| float tgammaf ( float x )
Calculate the gamma function of the input argument. | ✓ | ✓ |
+| float erfcinvf ( float y )
Calculate the inverse complementary function of the input argument. | ✓ | ✓ |
+| float erfcxf ( float x )
Calculate the scaled complementary error function of the input argument. | ✓ | ✓ |
+| float erfinvf ( float y )
Calculate the inverse error function of the input argument. | ✓ | ✓ |
+| float fdividef ( float x, float y )
Divide two floating point values. | ✓ | ✓ |
+| float frexpf ( float x, int *nptr )
Extract mantissa and exponent of a floating-point value. | ✓ | ✓ |
+| float j0f ( float x )
Calculate the value of the Bessel function of the first kind of order 0 for the input argument. | ✓ | ✓ |
+| float j1f ( float x )
Calculate the value of the Bessel function of the first kind of order 1 for the input argument. | ✓ | ✓ |
+| float jnf ( int n, float x )
Calculate the value of the Bessel function of the first kind of order n for the input argument. | ✓ | ✓ |
+| float lgammaf ( float x )
Calculate the natural logarithm of the absolute value of the gamma function of the input argument. | ✓ | ✓ |
+| long long int llrintf ( float x )
Round input to nearest integer value. | ✓ | ✓ |
+| long long int llroundf ( float x )
Round to nearest integer value. | ✓ | ✓ |
+| long int lrintf ( float x )
Round input to nearest integer value. | ✓ | ✓ |
+| long int lroundf ( float x )
Round to nearest integer value. | ✓ | ✓ |
+| float modff ( float x, float *iptr )
Break down the input argument into fractional and integral parts. | ✓ | ✓ |
+| float nextafterf ( float x, float y )
Returns next representable single-precision floating-point value after argument. | ✓ | ✓ |
+| float norm3df ( float a, float b, float c )
Calculate the square root of the sum of squares of three coordinates of the argument. | ✓ | ✓ |
+| float norm4df ( float a, float b, float c, float d )
Calculate the square root of the sum of squares of four coordinates of the argument. | ✓ | ✓ |
+| float normcdff ( float y )
Calculate the standard normal cumulative distribution function. | ✓ | ✓ |
+| float normcdfinvf ( float y )
Calculate the inverse of the standard normal cumulative distribution function. | ✓ | ✓ |
+| float normf ( int dim, const float *a )
Calculate the square root of the sum of squares of any number of coordinates. | ✓ | ✓ |
+| float rcbrtf ( float x )
Calculate the reciprocal cube root function. | ✓ | ✓ |
+| float remquof ( float x, float y, int *quo )
Compute single-precision floating-point remainder and part of quotient. | ✓ | ✓ |
+| float rhypotf ( float x, float y )
Calculate one over the square root of the sum of squares of two arguments. | ✓ | ✓ |
+| float rintf ( float x )
Round input to nearest integer value in floating-point. | ✓ | ✓ |
+| float rnorm3df ( float a, float b, float c )
Calculate one over the square root of the sum of squares of three coordinates of the argument. | ✓ | ✓ |
+| float rnorm4df ( float a, float b, float c, float d )
Calculate one over the square root of the sum of squares of four coordinates of the argument. | ✓ | ✓ |
+| float rnormf ( int dim, const float *a )
Calculate the reciprocal of square root of the sum of squares of any number of coordinates. | ✓ | ✓ |
+| float scalblnf ( float x, long int n )
Scale floating-point input by integer power of two. | ✓ | ✓ |
+| void sincosf ( float x, float *sptr, float *cptr )
Calculate the sine and cosine of the first input argument. | ✓ | ✓ |
+| void sincospif ( float x, float *sptr, float *cptr )
Calculate the sine and cosine of the first input argument multiplied by PI. | ✓ | ✓ |
+| float y0f ( float x )
Calculate the value of the Bessel function of the second kind of order 0 for the input argument. | ✓ | ✓ |
+| float y1f ( float x )
Calculate the value of the Bessel function of the second kind of order 1 for the input argument. | ✓ | ✓ |
+| float ynf ( int n, float x )
Calculate the value of the Bessel function of the second kind of order n for the input argument. | ✓ | ✓ |
+
+
+
[1] __RETURN_TYPE is dependent on compiler. It is usually 'int' for C compilers and 'bool' for C++ compilers. [↩](#a1)
### Double Precision Mathematical Functions
@@ -360,6 +396,40 @@ Following is the list of supported double precision mathematical functions.
| double tanh ( double x )
Calculate the hyperbolic tangent of the input argument. | ✓ | ✓ |
| double tgamma ( double x )
Calculate the gamma function of the input argument. | ✓ | ✓ |
| double trunc ( double x )
Truncate input argument to the integral part. | ✓ | ✓ |
+| double erfcinv ( double y )
Calculate the inverse complementary function of the input argument. | ✓ | ✓ |
+| double erfcx ( double x )
Calculate the scaled complementary error function of the input argument. | ✓ | ✓ |
+| double erfinv ( double y )
Calculate the inverse error function of the input argument. | ✓ | ✓ |
+| double frexp ( float x, int *nptr )
Extract mantissa and exponent of a floating-point value. | ✓ | ✓ |
+| double j0 ( double x )
Calculate the value of the Bessel function of the first kind of order 0 for the input argument. | ✓ | ✓ |
+| double j1 ( double x )
Calculate the value of the Bessel function of the first kind of order 1 for the input argument. | ✓ | ✓ |
+| double jn ( int n, double x )
Calculate the value of the Bessel function of the first kind of order n for the input argument. | ✓ | ✓ |
+| double lgamma ( double x )
Calculate the natural logarithm of the absolute value of the gamma function of the input argument. | ✓ | ✓ |
+| long long int llrint ( double x )
Round input to nearest integer value. | ✓ | ✓ |
+| long long int llround ( double x )
Round to nearest integer value. | ✓ | ✓ |
+| long int lrint ( double x )
Round input to nearest integer value. | ✓ | ✓ |
+| long int lround ( double x )
Round to nearest integer value. | ✓ | ✓ |
+| double modf ( double x, double *iptr )
Break down the input argument into fractional and integral parts. | ✓ | ✓ |
+| double nextafter ( double x, double y )
Returns next representable single-precision floating-point value after argument. | ✓ | ✓ |
+| double norm3d ( double a, double b, double c )
Calculate the square root of the sum of squares of three coordinates of the argument. | ✓ | ✓ |
+| float norm4d ( double a, double b, double c, double d )
Calculate the square root of the sum of squares of four coordinates of the argument. | ✓ | ✓ |
+| double normcdf ( double y )
Calculate the standard normal cumulative distribution function. | ✓ | ✓ |
+| double normcdfinv ( double y )
Calculate the inverse of the standard normal cumulative distribution function. | ✓ | ✓ |
+| double rcbrt ( double x )
Calculate the reciprocal cube root function. | ✓ | ✓ |
+| double remquo ( double x, double y, int *quo )
Compute single-precision floating-point remainder and part of quotient. | ✓ | ✓ |
+| double rhypot ( double x, double y )
Calculate one over the square root of the sum of squares of two arguments. | ✓ | ✓ |
+| double rint ( double x )
Round input to nearest integer value in floating-point. | ✓ | ✓ |
+| double rnorm3d ( double a, double b, double c )
Calculate one over the square root of the sum of squares of three coordinates of the argument. | ✓ | ✓ |
+| double rnorm4d ( double a, double b, double c, double d )
Calculate one over the square root of the sum of squares of four coordinates of the argument. | ✓ | ✓ |
+| double rnorm ( int dim, const double *a )
Calculate the reciprocal of square root of the sum of squares of any number of coordinates. | ✓ | ✓ |
+| double scalbln ( double x, long int n )
Scale floating-point input by integer power of two. | ✓ | ✓ |
+| void sincos ( double x, double *sptr, double *cptr )
Calculate the sine and cosine of the first input argument. | ✓ | ✓ |
+| void sincospi ( double x, double *sptr, double *cptr )
Calculate the sine and cosine of the first input argument multiplied by PI. | ✓ | ✓ |
+| double y0f ( double x )
Calculate the value of the Bessel function of the second kind of order 0 for the input argument. | ✓ | ✓ |
+| double y1 ( double x )
Calculate the value of the Bessel function of the second kind of order 1 for the input argument. | ✓ | ✓ |
+| double yn ( int n, double x )
Calculate the value of the Bessel function of the second kind of order n for the input argument. | ✓ | ✓ |
+
+
+
[1] __RETURN_TYPE is dependent on compiler. It is usually 'int' for C compilers and 'bool' for C++ compilers. [↩](#a2)
### Integer Intrinsics
diff --git a/docs/markdown/hip_porting_guide.md b/docs/markdown/hip_porting_guide.md
index de5c590e12..6ae0a6d31e 100644
--- a/docs/markdown/hip_porting_guide.md
+++ b/docs/markdown/hip_porting_guide.md
@@ -464,37 +464,6 @@ hipcc-cmd: /opt/hcc/bin/hcc -hc -I/opt/hcc/include -stdlib=libc++ -I../../../..
If you pass a ".cu" file, hcc will attempt to compile it as a Cuda language file. You must tell hcc that its in fact a C++ file: use the "-x c++" option.
-#### grid_launch kernel dispatch - fallback
-HIP uses an hcc language feature called "grid_launch". The [[hc_grid_launch]] attribute that can be attached to a function definition, and the first parameter is of type grid_launch_parm.
-When a [[hc_grid_launch]] function is called, hcc runtime uses the grid_launch_parm to control the execution configuration of the kernel
-(including the grid and group dimensions, the queue, and dynamic group memory allocations). By default, the hipLaunchKernel macro creates a grid_launch_parm structure and launches a
-[[hc_grid_launch]] kernel. grid_launch is a relatively new addition to hcc so this section describes how to fall back to a traditional calling sequence which invokes a standard host function
-which calls a hc::parallel_for_each to launch the kernel.
-
-First, set DISABLE_GRID_LAUNCH:
-include/hip_common.h
-```
-// Set this define to disable GRID_LAUNCH
-#define DISABLE_GRID_LAUNCH
-```
-
-Inside any kernel use the KERNELBEGIN as the first line in the kernel function, and KERNELEND as the last line. For example:
-```
-__global__ void
-MyKernel(hipLaunchParm lp, float *C, const float *A, size_t N)
-{
- KERNELBEGIN; // Required if hc_grid_launch is disabled
-
- int tid = hipBlockIdx_x*MAX_THREADS_PER_BLOCK + hipThreadIdx_x;
-
- if (tid < N) {
- C[tid] = A[tid];
- }
-
- KERNELEND; // Required if hc_grid_launch is disabled
-}
-```
-
#### HIP Environment Variables
On the HCC path, HIP provides a number of environment variables that control the behavior of HIP. Some of these are useful for appliction development (for example HIP_VISIBLE_DEVICES, HIP_LAUNCH_BLOCKING),
diff --git a/include/hcc_detail/hip_runtime.h b/include/hcc_detail/hip_runtime.h
index 3d0bd7b1f2..21da60631a 100644
--- a/include/hcc_detail/hip_runtime.h
+++ b/include/hcc_detail/hip_runtime.h
@@ -38,6 +38,8 @@ THE SOFTWARE.
#include
+
+
#define CUDA_SUCCESS hipSuccess
#include
@@ -46,6 +48,16 @@ THE SOFTWARE.
// Remainder of this file only compiles with HCC
#ifdef __HCC__
#include
+
+#if defined (GRID_LAUNCH_VERSION) and (GRID_LAUNCH_VERSION >= 20)
+// Use field names for grid_launch 2.0 structure, if HCC supports GL 2.0.
+#define USE_GRID_LAUNCH_20 1
+#else
+#define USE_GRID_LAUNCH_20 0
+#endif
+
+
+
extern int HIP_TRACE_API;
//TODO-HCC-GL - change this to typedef.
@@ -485,45 +497,23 @@ __device__ float __dsqrt_rz(double x);
* Kernel launching
*/
-// Choose correct polarity of xyz/zyx ordering:
-#if __hcc_workweek__ >= 16123
-#define hipThreadIdx_x (amp_get_local_id(0))
-#define hipThreadIdx_y (amp_get_local_id(1))
-#define hipThreadIdx_z (amp_get_local_id(2))
+#define hipThreadIdx_x (hc_get_workitem_id(0))
+#define hipThreadIdx_y (hc_get_workitem_id(1))
+#define hipThreadIdx_z (hc_get_workitem_id(2))
#define hipBlockIdx_x (hc_get_group_id(0))
#define hipBlockIdx_y (hc_get_group_id(1))
#define hipBlockIdx_z (hc_get_group_id(2))
-#define hipBlockDim_x (amp_get_local_size(0))
-#define hipBlockDim_y (amp_get_local_size(1))
-#define hipBlockDim_z (amp_get_local_size(2))
+#define hipBlockDim_x (hc_get_group_size(0))
+#define hipBlockDim_y (hc_get_group_size(1))
+#define hipBlockDim_z (hc_get_group_size(2))
#define hipGridDim_x (hc_get_num_groups(0))
#define hipGridDim_y (hc_get_num_groups(1))
#define hipGridDim_z (hc_get_num_groups(2))
-#else
-
-#define hipThreadIdx_x (amp_get_local_id(2))
-#define hipThreadIdx_y (amp_get_local_id(1))
-#define hipThreadIdx_z (amp_get_local_id(0))
-
-#define hipBlockIdx_x (hc_get_group_id(2))
-#define hipBlockIdx_y (hc_get_group_id(1))
-#define hipBlockIdx_z (hc_get_group_id(0))
-
-#define hipBlockDim_x (amp_get_local_size(2))
-#define hipBlockDim_y (amp_get_local_size(1))
-#define hipBlockDim_z (amp_get_local_size(0))
-
-#define hipGridDim_x (hc_get_num_groups(2))
-#define hipGridDim_y (hc_get_num_groups(1))
-#define hipGridDim_z (hc_get_num_groups(0))
-
-#endif // __hcc_workweek__ check
-
#define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE)
#define HIP_KERNEL_NAME(...) __VA_ARGS__
@@ -539,7 +529,20 @@ void ihipPostLaunchKernel(hipStream_t stream, grid_launch_parm &lp);
#define KNRM "\x1B[0m"
#define KGRN "\x1B[32m"
-#if not defined(DISABLE_GRID_LAUNCH)
+#if USE_GRID_LAUNCH_20
+#define hipLaunchKernel(_kernelName, _numBlocks3D, _blockDim3D, _groupMemBytes, _stream, ...) \
+do {\
+ grid_launch_parm lp;\
+ lp.dynamic_group_mem_bytes = _groupMemBytes; \
+ hipStream_t trueStream = (ihipPreLaunchKernel(_stream, _numBlocks3D, _blockDim3D, &lp)); \
+ if (HIP_TRACE_API) {\
+ fprintf(stderr, KGRN "<grid_dim.x = grid.x;
+ lp->grid_dim.y = grid.y;
+ lp->grid_dim.z = grid.z;
+ lp->group_dim.x = block.x;
+ lp->group_dim.y = block.y;
+ lp->group_dim.z = block.z;
+ lp->barrier_bit = barrier_bit_queue_default;
+ lp->launch_fence = -1;
+#else
lp->gridDim.x = grid.x;
lp->gridDim.y = grid.y;
lp->gridDim.z = grid.z;
lp->groupDim.x = block.x;
lp->groupDim.y = block.y;
lp->groupDim.z = block.z;
+#endif
stream->lockopen_preKernelCommand();
// *av = &stream->_av;
lp->av = &stream->_av;
@@ -1111,12 +1122,23 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block, gri
{
std::call_once(hip_initialized, ihipInit);
stream = ihipSyncAndResolveStream(stream);
+#if USE_GRID_LAUNCH_20
+ lp->grid_dim.x = grid;
+ lp->grid_dim.y = 1;
+ lp->grid_dim.z = 1;
+ lp->group_dim.x = block.x;
+ lp->group_dim.y = block.y;
+ lp->group_dim.z = block.z;
+ lp->barrier_bit = barrier_bit_queue_default;
+ lp->launch_fence = -1;
+#else
lp->gridDim.x = grid;
lp->gridDim.y = 1;
lp->gridDim.z = 1;
lp->groupDim.x = block.x;
lp->groupDim.y = block.y;
lp->groupDim.z = block.z;
+#endif
stream->lockopen_preKernelCommand();
// *av = &stream->_av;
lp->av = &stream->_av;
@@ -1130,12 +1152,23 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block, gri
{
std::call_once(hip_initialized, ihipInit);
stream = ihipSyncAndResolveStream(stream);
+#if USE_GRID_LAUNCH_20
+ lp->grid_dim.x = grid.x;
+ lp->grid_dim.y = grid.y;
+ lp->grid_dim.z = grid.z;
+ lp->group_dim.x = block;
+ lp->group_dim.y = 1;
+ lp->group_dim.z = 1;
+ lp->barrier_bit = barrier_bit_queue_default;
+ lp->launch_fence = -1;
+#else
lp->gridDim.x = grid.x;
lp->gridDim.y = grid.y;
lp->gridDim.z = grid.z;
lp->groupDim.x = block;
lp->groupDim.y = 1;
lp->groupDim.z = 1;
+#endif
stream->lockopen_preKernelCommand();
// *av = &stream->_av;
lp->av = &stream->_av;
@@ -1149,12 +1182,23 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block, g
{
std::call_once(hip_initialized, ihipInit);
stream = ihipSyncAndResolveStream(stream);
+#if USE_GRID_LAUNCH_20
+ lp->grid_dim.x = grid;
+ lp->grid_dim.y = 1;
+ lp->grid_dim.z = 1;
+ lp->group_dim.x = block;
+ lp->group_dim.y = 1;
+ lp->group_dim.z = 1;
+ lp->barrier_bit = barrier_bit_queue_default;
+ lp->launch_fence = -1;
+#else
lp->gridDim.x = grid;
lp->gridDim.y = 1;
lp->gridDim.z = 1;
lp->groupDim.x = block;
lp->groupDim.y = 1;
lp->groupDim.z = 1;
+#endif
stream->lockopen_preKernelCommand();
// *av = &stream->_av;
lp->av = &stream->_av;
diff --git a/tests/src/kernel/hipGridLaunch.cpp b/tests/src/kernel/hipGridLaunch.cpp
index f13781362e..b195a0171d 100644
--- a/tests/src/kernel/hipGridLaunch.cpp
+++ b/tests/src/kernel/hipGridLaunch.cpp
@@ -21,7 +21,6 @@ THE SOFTWARE.
*/
// Test the Grid_Launch syntax.
-#undef DISABLE_GRID_LAUNCH /* Tell hip_*.h to compile in GL mode */
#include "hip_runtime.h"
#include "test_common.h"