From 73f88cb3bc9abfcda7a1d446841c23f8b71a9ba0 Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Tue, 21 Jun 2016 11:11:20 -0500 Subject: [PATCH 1/5] added device functions to docs Change-Id: I11ad1af86274620782986e227888b2d5f0544d8f --- docs/markdown/hip_kernel_language.md | 70 ++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/docs/markdown/hip_kernel_language.md b/docs/markdown/hip_kernel_language.md index e382ede4b3..23cde7fee6 100644 --- a/docs/markdown/hip_kernel_language.md +++ b/docs/markdown/hip_kernel_language.md @@ -299,6 +299,42 @@ Following is the list of supported single precision mathematical functions. | float tanhf ( float x )
Calculate the hyperbolic tangent of the input argument. | ✓ | ✓ | | float truncf ( float x )
Truncate input argument to the integral part. | ✓ | ✓ | | float tgammaf ( float x )
Calculate the gamma function of the input argument. | ✓ | ✓ | +| float erfcinvf ( float y )
Calculate the inverse complementary function of the input argument. | ✓ | ✓ | +| float erfcxf ( float x )
Calculate the scaled complementary error function of the input argument. | ✓ | ✓ | +| float erfinvf ( float y )
Calculate the inverse error function of the input argument. | ✓ | ✓ | +| float fdividef ( float x, float y )
Divide two floating point values. | ✓ | ✓ | +| float frexpf ( float x, int *nptr )
Extract mantissa and exponent of a floating-point value. | ✓ | ✓ | +| float j0f ( float x )
Calculate the value of the Bessel function of the first kind of order 0 for the input argument. | ✓ | ✓ | +| float j1f ( float x )
Calculate the value of the Bessel function of the first kind of order 1 for the input argument. | ✓ | ✓ | +| float jnf ( int n, float x )
Calculate the value of the Bessel function of the first kind of order n for the input argument. | ✓ | ✓ | +| float lgammaf ( float x )
Calculate the natural logarithm of the absolute value of the gamma function of the input argument. | ✓ | ✓ | +| long long int llrintf ( float x )
Round input to nearest integer value. | ✓ | ✓ | +| long long int llroundf ( float x )
Round to nearest integer value. | ✓ | ✓ | +| long int lrintf ( float x )
Round input to nearest integer value. | ✓ | ✓ | +| long int lroundf ( float x )
Round to nearest integer value. | ✓ | ✓ | +| float modff ( float x, float *iptr )
Break down the input argument into fractional and integral parts. | ✓ | ✓ | +| float nextafterf ( float x, float y )
Returns next representable single-precision floating-point value after argument. | ✓ | ✓ | +| float norm3df ( float a, float b, float c )
Calculate the square root of the sum of squares of three coordinates of the argument. | ✓ | ✓ | +| float norm4df ( float a, float b, float c, float d )
Calculate the square root of the sum of squares of four coordinates of the argument. | ✓ | ✓ | +| float normcdff ( float y )
Calculate the standard normal cumulative distribution function. | ✓ | ✓ | +| float normcdfinvf ( float y )
Calculate the inverse of the standard normal cumulative distribution function. | ✓ | ✓ | +| float normf ( int dim, const float *a )
Calculate the square root of the sum of squares of any number of coordinates. | ✓ | ✓ | +| float rcbrtf ( float x )
Calculate the reciprocal cube root function. | ✓ | ✓ | +| float remquof ( float x, float y, int *quo )
Compute single-precision floating-point remainder and part of quotient. | ✓ | ✓ | +| float rhypotf ( float x, float y )
Calculate one over the square root of the sum of squares of two arguments. | ✓ | ✓ | +| float rintf ( float x )
Round input to nearest integer value in floating-point. | ✓ | ✓ | +| float rnorm3df ( float a, float b, float c )
Calculate one over the square root of the sum of squares of three coordinates of the argument. | ✓ | ✓ | +| float rnorm4df ( float a, float b, float c, float d )
Calculate one over the square root of the sum of squares of four coordinates of the argument. | ✓ | ✓ | +| float rnormf ( int dim, const float *a )
Calculate the reciprocal of square root of the sum of squares of any number of coordinates. | ✓ | ✓ | +| float scalblnf ( float x, long int n )
Scale floating-point input by integer power of two. | ✓ | ✓ | +| void sincosf ( float x, float *sptr, float *cptr )
Calculate the sine and cosine of the first input argument. | ✓ | ✓ | +| void sincospif ( float x, float *sptr, float *cptr )
Calculate the sine and cosine of the first input argument multiplied by PI. | ✓ | ✓ | +| float y0f ( float x )
Calculate the value of the Bessel function of the second kind of order 0 for the input argument. | ✓ | ✓ | +| float y1f ( float x )
Calculate the value of the Bessel function of the second kind of order 1 for the input argument. | ✓ | ✓ | +| float ynf ( int n, float x )
Calculate the value of the Bessel function of the second kind of order n for the input argument. | ✓ | ✓ | + + + [1] __RETURN_TYPE is dependent on compiler. It is usually 'int' for C compilers and 'bool' for C++ compilers. [↩](#a1) ### Double Precision Mathematical Functions @@ -360,6 +396,40 @@ Following is the list of supported double precision mathematical functions. | double tanh ( double x )
Calculate the hyperbolic tangent of the input argument. | ✓ | ✓ | | double tgamma ( double x )
Calculate the gamma function of the input argument. | ✓ | ✓ | | double trunc ( double x )
Truncate input argument to the integral part. | ✓ | ✓ | +| double erfcinv ( double y )
Calculate the inverse complementary function of the input argument. | ✓ | ✓ | +| double erfcx ( double x )
Calculate the scaled complementary error function of the input argument. | ✓ | ✓ | +| double erfinv ( double y )
Calculate the inverse error function of the input argument. | ✓ | ✓ | +| double frexp ( float x, int *nptr )
Extract mantissa and exponent of a floating-point value. | ✓ | ✓ | +| double j0 ( double x )
Calculate the value of the Bessel function of the first kind of order 0 for the input argument. | ✓ | ✓ | +| double j1 ( double x )
Calculate the value of the Bessel function of the first kind of order 1 for the input argument. | ✓ | ✓ | +| double jn ( int n, double x )
Calculate the value of the Bessel function of the first kind of order n for the input argument. | ✓ | ✓ | +| double lgamma ( double x )
Calculate the natural logarithm of the absolute value of the gamma function of the input argument. | ✓ | ✓ | +| long long int llrint ( double x )
Round input to nearest integer value. | ✓ | ✓ | +| long long int llround ( double x )
Round to nearest integer value. | ✓ | ✓ | +| long int lrint ( double x )
Round input to nearest integer value. | ✓ | ✓ | +| long int lround ( double x )
Round to nearest integer value. | ✓ | ✓ | +| double modf ( double x, double *iptr )
Break down the input argument into fractional and integral parts. | ✓ | ✓ | +| double nextafter ( double x, double y )
Returns next representable single-precision floating-point value after argument. | ✓ | ✓ | +| double norm3d ( double a, double b, double c )
Calculate the square root of the sum of squares of three coordinates of the argument. | ✓ | ✓ | +| float norm4d ( double a, double b, double c, double d )
Calculate the square root of the sum of squares of four coordinates of the argument. | ✓ | ✓ | +| double normcdf ( double y )
Calculate the standard normal cumulative distribution function. | ✓ | ✓ | +| double normcdfinv ( double y )
Calculate the inverse of the standard normal cumulative distribution function. | ✓ | ✓ | +| double rcbrt ( double x )
Calculate the reciprocal cube root function. | ✓ | ✓ | +| double remquo ( double x, double y, int *quo )
Compute single-precision floating-point remainder and part of quotient. | ✓ | ✓ | +| double rhypot ( double x, double y )
Calculate one over the square root of the sum of squares of two arguments. | ✓ | ✓ | +| double rint ( double x )
Round input to nearest integer value in floating-point. | ✓ | ✓ | +| double rnorm3d ( double a, double b, double c )
Calculate one over the square root of the sum of squares of three coordinates of the argument. | ✓ | ✓ | +| double rnorm4d ( double a, double b, double c, double d )
Calculate one over the square root of the sum of squares of four coordinates of the argument. | ✓ | ✓ | +| double rnorm ( int dim, const double *a )
Calculate the reciprocal of square root of the sum of squares of any number of coordinates. | ✓ | ✓ | +| double scalbln ( double x, long int n )
Scale floating-point input by integer power of two. | ✓ | ✓ | +| void sincos ( double x, double *sptr, double *cptr )
Calculate the sine and cosine of the first input argument. | ✓ | ✓ | +| void sincospi ( double x, double *sptr, double *cptr )
Calculate the sine and cosine of the first input argument multiplied by PI. | ✓ | ✓ | +| double y0f ( double x )
Calculate the value of the Bessel function of the second kind of order 0 for the input argument. | ✓ | ✓ | +| double y1 ( double x )
Calculate the value of the Bessel function of the second kind of order 1 for the input argument. | ✓ | ✓ | +| double yn ( int n, double x )
Calculate the value of the Bessel function of the second kind of order n for the input argument. | ✓ | ✓ | + + + [1] __RETURN_TYPE is dependent on compiler. It is usually 'int' for C compilers and 'bool' for C++ compilers. [↩](#a2) ### Integer Intrinsics From 100a744d926ee6b0ace439c0aa07b2803d17ded4 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Mon, 20 Jun 2016 23:28:45 -0500 Subject: [PATCH 2/5] Switch to hc_* coordinate builtins (replace amp_) Change-Id: I0a8871f0c9f047eb45a7391fd032100af2bbd4e0 --- include/hcc_detail/hip_runtime.h | 34 ++++++-------------------------- 1 file changed, 6 insertions(+), 28 deletions(-) diff --git a/include/hcc_detail/hip_runtime.h b/include/hcc_detail/hip_runtime.h index 3d0bd7b1f2..066db4bbec 100644 --- a/include/hcc_detail/hip_runtime.h +++ b/include/hcc_detail/hip_runtime.h @@ -485,45 +485,23 @@ __device__ float __dsqrt_rz(double x); * Kernel launching */ -// Choose correct polarity of xyz/zyx ordering: -#if __hcc_workweek__ >= 16123 -#define hipThreadIdx_x (amp_get_local_id(0)) -#define hipThreadIdx_y (amp_get_local_id(1)) -#define hipThreadIdx_z (amp_get_local_id(2)) +#define hipThreadIdx_x (hc_get_workitem_id(0)) +#define hipThreadIdx_y (hc_get_workitem_id(1)) +#define hipThreadIdx_z (hc_get_workitem_id(2)) #define hipBlockIdx_x (hc_get_group_id(0)) #define hipBlockIdx_y (hc_get_group_id(1)) #define hipBlockIdx_z (hc_get_group_id(2)) -#define hipBlockDim_x (amp_get_local_size(0)) -#define hipBlockDim_y (amp_get_local_size(1)) -#define hipBlockDim_z (amp_get_local_size(2)) +#define hipBlockDim_x (hc_get_group_size(0)) +#define hipBlockDim_y (hc_get_group_size(1)) +#define hipBlockDim_z (hc_get_group_size(2)) #define hipGridDim_x (hc_get_num_groups(0)) #define hipGridDim_y (hc_get_num_groups(1)) #define hipGridDim_z (hc_get_num_groups(2)) -#else - -#define hipThreadIdx_x (amp_get_local_id(2)) -#define hipThreadIdx_y (amp_get_local_id(1)) -#define hipThreadIdx_z (amp_get_local_id(0)) - -#define hipBlockIdx_x (hc_get_group_id(2)) -#define hipBlockIdx_y (hc_get_group_id(1)) -#define hipBlockIdx_z (hc_get_group_id(0)) - -#define hipBlockDim_x (amp_get_local_size(2)) -#define hipBlockDim_y (amp_get_local_size(1)) -#define hipBlockDim_z (amp_get_local_size(0)) - -#define hipGridDim_x (hc_get_num_groups(2)) -#define hipGridDim_y (hc_get_num_groups(1)) -#define hipGridDim_z (hc_get_num_groups(0)) - -#endif // __hcc_workweek__ check - #define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE) #define HIP_KERNEL_NAME(...) __VA_ARGS__ From 9f29cc698914d2c7b4bebc6113180361dddf06f8 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Mon, 20 Jun 2016 23:46:42 -0500 Subject: [PATCH 3/5] Grid-launch updates to 2.0 and cleanup of old. _ Use fields from GRID_LAUNCH_20 structure (See USE_GRID_LAUNCH_20 define, currently set to 0) "1" will require HCC support. - Remove old DISABLE_GRID_LAUNCH support. Change-Id: I584ce648d217251789a6283cf27feb24cb7dc8d1 --- docs/markdown/hip_porting_guide.md | 31 -------------------- include/hcc_detail/hip_runtime.h | 46 +++++++++++++----------------- include/hcc_detail/host_defines.h | 4 --- include/hip_common.h | 3 -- src/hip_hcc.cpp | 44 ++++++++++++++++++++++++++++ tests/src/kernel/hipGridLaunch.cpp | 1 - 6 files changed, 64 insertions(+), 65 deletions(-) diff --git a/docs/markdown/hip_porting_guide.md b/docs/markdown/hip_porting_guide.md index de5c590e12..6ae0a6d31e 100644 --- a/docs/markdown/hip_porting_guide.md +++ b/docs/markdown/hip_porting_guide.md @@ -464,37 +464,6 @@ hipcc-cmd: /opt/hcc/bin/hcc -hc -I/opt/hcc/include -stdlib=libc++ -I../../../.. If you pass a ".cu" file, hcc will attempt to compile it as a Cuda language file. You must tell hcc that it’s in fact a C++ file: use the "-x c++" option. -#### grid_launch kernel dispatch - fallback -HIP uses an hcc language feature called "grid_launch". The [[hc_grid_launch]] attribute that can be attached to a function definition, and the first parameter is of type grid_launch_parm. -When a [[hc_grid_launch]] function is called, hcc runtime uses the grid_launch_parm to control the execution configuration of the kernel -(including the grid and group dimensions, the queue, and dynamic group memory allocations). By default, the hipLaunchKernel macro creates a grid_launch_parm structure and launches a -[[hc_grid_launch]] kernel. grid_launch is a relatively new addition to hcc so this section describes how to fall back to a traditional calling sequence which invokes a standard host function -which calls a hc::parallel_for_each to launch the kernel. - -First, set DISABLE_GRID_LAUNCH: -include/hip_common.h -``` -// Set this define to disable GRID_LAUNCH -#define DISABLE_GRID_LAUNCH -``` - -Inside any kernel use the KERNELBEGIN as the first line in the kernel function, and KERNELEND as the last line. For example: -``` -__global__ void -MyKernel(hipLaunchParm lp, float *C, const float *A, size_t N) -{ - KERNELBEGIN; // Required if hc_grid_launch is disabled - - int tid = hipBlockIdx_x*MAX_THREADS_PER_BLOCK + hipThreadIdx_x; - - if (tid < N) { - C[tid] = A[tid]; - } - - KERNELEND; // Required if hc_grid_launch is disabled -} -``` - #### HIP Environment Variables On the HCC path, HIP provides a number of environment variables that control the behavior of HIP. Some of these are useful for appliction development (for example HIP_VISIBLE_DEVICES, HIP_LAUNCH_BLOCKING), diff --git a/include/hcc_detail/hip_runtime.h b/include/hcc_detail/hip_runtime.h index 066db4bbec..8594954e75 100644 --- a/include/hcc_detail/hip_runtime.h +++ b/include/hcc_detail/hip_runtime.h @@ -38,6 +38,10 @@ THE SOFTWARE. #include +// Use field names for grid_launch 2.0 structure: +#define USE_GRID_LAUNCH_20 0 + + #define CUDA_SUCCESS hipSuccess #include @@ -517,7 +521,20 @@ void ihipPostLaunchKernel(hipStream_t stream, grid_launch_parm &lp); #define KNRM "\x1B[0m" #define KGRN "\x1B[32m" -#if not defined(DISABLE_GRID_LAUNCH) +#if USE_GRID_LAUNCH_20 +#define hipLaunchKernel(_kernelName, _numBlocks3D, _blockDim3D, _groupMemBytes, _stream, ...) \ +do {\ + grid_launch_parm lp;\ + lp.dynamic_group_mem_bytes = _groupMemBytes; \ + hipStream_t trueStream = (ihipPreLaunchKernel(_stream, _numBlocks3D, _blockDim3D, &lp)); \ + if (HIP_TRACE_API) {\ + fprintf(stderr, KGRN "<grid_dim.x = grid.x; + lp->grid_dim.y = grid.y; + lp->grid_dim.z = grid.z; + lp->group_dim.x = block.x; + lp->group_dim.y = block.y; + lp->group_dim.z = block.z; + lp->barrier_bit = barrier_bit_queue_default; + lp->launch_fence = -1; +#else lp->gridDim.x = grid.x; lp->gridDim.y = grid.y; lp->gridDim.z = grid.z; lp->groupDim.x = block.x; lp->groupDim.y = block.y; lp->groupDim.z = block.z; +#endif stream->lockopen_preKernelCommand(); // *av = &stream->_av; lp->av = &stream->_av; @@ -1109,12 +1120,23 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block, gri { std::call_once(hip_initialized, ihipInit); stream = ihipSyncAndResolveStream(stream); +#if USE_GRID_LAUNCH_20 + lp->grid_dim.x = grid; + lp->grid_dim.y = 1; + lp->grid_dim.z = 1; + lp->group_dim.x = block.x; + lp->group_dim.y = block.y; + lp->group_dim.z = block.z; + lp->barrier_bit = barrier_bit_queue_default; + lp->launch_fence = -1; +#else lp->gridDim.x = grid; lp->gridDim.y = 1; lp->gridDim.z = 1; lp->groupDim.x = block.x; lp->groupDim.y = block.y; lp->groupDim.z = block.z; +#endif stream->lockopen_preKernelCommand(); // *av = &stream->_av; lp->av = &stream->_av; @@ -1128,12 +1150,23 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block, gri { std::call_once(hip_initialized, ihipInit); stream = ihipSyncAndResolveStream(stream); +#if USE_GRID_LAUNCH_20 + lp->grid_dim.x = grid.x; + lp->grid_dim.y = grid.y; + lp->grid_dim.z = grid.z; + lp->group_dim.x = block; + lp->group_dim.y = 1; + lp->group_dim.z = 1; + lp->barrier_bit = barrier_bit_queue_default; + lp->launch_fence = -1; +#else lp->gridDim.x = grid.x; lp->gridDim.y = grid.y; lp->gridDim.z = grid.z; lp->groupDim.x = block; lp->groupDim.y = 1; lp->groupDim.z = 1; +#endif stream->lockopen_preKernelCommand(); // *av = &stream->_av; lp->av = &stream->_av; @@ -1147,12 +1180,23 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block, g { std::call_once(hip_initialized, ihipInit); stream = ihipSyncAndResolveStream(stream); +#if USE_GRID_LAUNCH_20 + lp->grid_dim.x = grid; + lp->grid_dim.y = 1; + lp->grid_dim.z = 1; + lp->group_dim.x = block; + lp->group_dim.y = 1; + lp->group_dim.z = 1; + lp->barrier_bit = barrier_bit_queue_default; + lp->launch_fence = -1; +#else lp->gridDim.x = grid; lp->gridDim.y = 1; lp->gridDim.z = 1; lp->groupDim.x = block; lp->groupDim.y = 1; lp->groupDim.z = 1; +#endif stream->lockopen_preKernelCommand(); // *av = &stream->_av; lp->av = &stream->_av; diff --git a/tests/src/kernel/hipGridLaunch.cpp b/tests/src/kernel/hipGridLaunch.cpp index f13781362e..b195a0171d 100644 --- a/tests/src/kernel/hipGridLaunch.cpp +++ b/tests/src/kernel/hipGridLaunch.cpp @@ -21,7 +21,6 @@ THE SOFTWARE. */ // Test the Grid_Launch syntax. -#undef DISABLE_GRID_LAUNCH /* Tell hip_*.h to compile in GL mode */ #include "hip_runtime.h" #include "test_common.h" From 702b9eff1e4af40169e85084e8516d22d0d3078f Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Tue, 21 Jun 2016 16:04:04 -0500 Subject: [PATCH 4/5] Use GRID_LAUNCH_VERSION define provided by HCC grid_launch.h Change-Id: I5a7b31f13c4055cdca3a8854728d59db7dc5a7bb --- include/hcc_detail/hip_runtime.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/include/hcc_detail/hip_runtime.h b/include/hcc_detail/hip_runtime.h index 8594954e75..21da60631a 100644 --- a/include/hcc_detail/hip_runtime.h +++ b/include/hcc_detail/hip_runtime.h @@ -38,8 +38,6 @@ THE SOFTWARE. #include -// Use field names for grid_launch 2.0 structure: -#define USE_GRID_LAUNCH_20 0 #define CUDA_SUCCESS hipSuccess @@ -50,6 +48,16 @@ THE SOFTWARE. // Remainder of this file only compiles with HCC #ifdef __HCC__ #include + +#if defined (GRID_LAUNCH_VERSION) and (GRID_LAUNCH_VERSION >= 20) +// Use field names for grid_launch 2.0 structure, if HCC supports GL 2.0. +#define USE_GRID_LAUNCH_20 1 +#else +#define USE_GRID_LAUNCH_20 0 +#endif + + + extern int HIP_TRACE_API; //TODO-HCC-GL - change this to typedef. @@ -529,7 +537,7 @@ do {\ hipStream_t trueStream = (ihipPreLaunchKernel(_stream, _numBlocks3D, _blockDim3D, &lp)); \ if (HIP_TRACE_API) {\ fprintf(stderr, KGRN "< Date: Fri, 24 Jun 2016 21:10:37 +0530 Subject: [PATCH 5/5] hip_samples package now contains uncompressed samples Change-Id: I69e773ebeff59733e016abd5a90bd1637798b1f5 --- packaging/hip_samples.txt | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/packaging/hip_samples.txt b/packaging/hip_samples.txt index 6adf9160bf..f289f2a8e5 100644 --- a/packaging/hip_samples.txt +++ b/packaging/hip_samples.txt @@ -1,17 +1,13 @@ cmake_minimum_required(VERSION 2.8.3) project(hip_samples) -add_custom_target(create_installer_script ALL - COMMAND tar cvzf ${PROJECT_BINARY_DIR}/samples.tgz --exclude='*.o' . - COMMAND @hip_SOURCE_DIR@/packaging/create_hip_samples_installer.sh ${PROJECT_BINARY_DIR}/samples.tgz ${PROJECT_BINARY_DIR}/unpack_hip_samples.sh - WORKING_DIRECTORY @hip_SOURCE_DIR@/samples) -install(PROGRAMS unpack_hip_samples.sh DESTINATION bin) +install(DIRECTORY @hip_SOURCE_DIR@/samples DESTINATION .) ############################# # Packaging steps ############################# set(CPACK_SET_DESTDIR TRUE) -set(CPACK_INSTALL_PREFIX "/opt/rocm") +set(CPACK_INSTALL_PREFIX "/opt/rocm/hip") set(CPACK_PACKAGE_NAME "hip_samples") set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "HIP: Heterogenous-computing Interface for Portability [SAMPLES]") set(CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc.")