Merge branch 'amd-develop' into amd-master
Change-Id: Ibb383539352e3ad5bcdaaf22f88ec6954b2fb55e
This commit is contained in:
@@ -299,6 +299,42 @@ Following is the list of supported single precision mathematical functions.
|
||||
| float tanhf ( float x ) <br><sub>Calculate the hyperbolic tangent of the input argument.</sub> | ✓ | ✓ |
|
||||
| float truncf ( float x ) <br><sub>Truncate input argument to the integral part.</sub> | ✓ | ✓ |
|
||||
| float tgammaf ( float x ) <br><sub>Calculate the gamma function of the input argument.</sub> | ✓ | ✓ |
|
||||
| float erfcinvf ( float y ) <br><sub>Calculate the inverse complementary function of the input argument.</sub> | ✓ | ✓ |
|
||||
| float erfcxf ( float x ) <br><sub>Calculate the scaled complementary error function of the input argument.</sub> | ✓ | ✓ |
|
||||
| float erfinvf ( float y ) <br><sub>Calculate the inverse error function of the input argument.</sub> | ✓ | ✓ |
|
||||
| float fdividef ( float x, float y ) <br><sub>Divide two floating point values.</sub> | ✓ | ✓ |
|
||||
| float frexpf ( float x, int *nptr ) <br><sub>Extract mantissa and exponent of a floating-point value.</sub> | ✓ | ✓ |
|
||||
| float j0f ( float x ) <br><sub>Calculate the value of the Bessel function of the first kind of order 0 for the input argument.</sub> | ✓ | ✓ |
|
||||
| float j1f ( float x ) <br><sub>Calculate the value of the Bessel function of the first kind of order 1 for the input argument.</sub> | ✓ | ✓ |
|
||||
| float jnf ( int n, float x ) <br><sub>Calculate the value of the Bessel function of the first kind of order n for the input argument.</sub> | ✓ | ✓ |
|
||||
| float lgammaf ( float x ) <br><sub>Calculate the natural logarithm of the absolute value of the gamma function of the input argument.</sub> | ✓ | ✓ |
|
||||
| long long int llrintf ( float x ) <br><sub>Round input to nearest integer value.</sub> | ✓ | ✓ |
|
||||
| long long int llroundf ( float x ) <br><sub>Round to nearest integer value.</sub> | ✓ | ✓ |
|
||||
| long int lrintf ( float x ) <br><sub>Round input to nearest integer value.</sub> | ✓ | ✓ |
|
||||
| long int lroundf ( float x ) <br><sub>Round to nearest integer value.</sub> | ✓ | ✓ |
|
||||
| float modff ( float x, float *iptr ) <br><sub>Break down the input argument into fractional and integral parts.</sub> | ✓ | ✓ |
|
||||
| float nextafterf ( float x, float y ) <br><sub>Returns next representable single-precision floating-point value after argument.</sub> | ✓ | ✓ |
|
||||
| float norm3df ( float a, float b, float c ) <br><sub>Calculate the square root of the sum of squares of three coordinates of the argument.</sub> | ✓ | ✓ |
|
||||
| float norm4df ( float a, float b, float c, float d ) <br><sub>Calculate the square root of the sum of squares of four coordinates of the argument.</sub> | ✓ | ✓ |
|
||||
| float normcdff ( float y ) <br><sub>Calculate the standard normal cumulative distribution function.</sub> | ✓ | ✓ |
|
||||
| float normcdfinvf ( float y ) <br><sub>Calculate the inverse of the standard normal cumulative distribution function.</sub> | ✓ | ✓ |
|
||||
| float normf ( int dim, const float *a ) <br><sub>Calculate the square root of the sum of squares of any number of coordinates.</sub> | ✓ | ✓ |
|
||||
| float rcbrtf ( float x ) <br><sub>Calculate the reciprocal cube root function.</sub> | ✓ | ✓ |
|
||||
| float remquof ( float x, float y, int *quo ) <br><sub>Compute single-precision floating-point remainder and part of quotient.</sub> | ✓ | ✓ |
|
||||
| float rhypotf ( float x, float y ) <br><sub>Calculate one over the square root of the sum of squares of two arguments.</sub> | ✓ | ✓ |
|
||||
| float rintf ( float x ) <br><sub>Round input to nearest integer value in floating-point.</sub> | ✓ | ✓ |
|
||||
| float rnorm3df ( float a, float b, float c ) <br><sub>Calculate one over the square root of the sum of squares of three coordinates of the argument.</sub> | ✓ | ✓ |
|
||||
| float rnorm4df ( float a, float b, float c, float d ) <br><sub>Calculate one over the square root of the sum of squares of four coordinates of the argument.</sub> | ✓ | ✓ |
|
||||
| float rnormf ( int dim, const float *a ) <br><sub>Calculate the reciprocal of square root of the sum of squares of any number of coordinates.</sub> | ✓ | ✓ |
|
||||
| float scalblnf ( float x, long int n ) <br><sub>Scale floating-point input by integer power of two.</sub> | ✓ | ✓ |
|
||||
| void sincosf ( float x, float *sptr, float *cptr ) <br><sub>Calculate the sine and cosine of the first input argument.</sub> | ✓ | ✓ |
|
||||
| void sincospif ( float x, float *sptr, float *cptr ) <br><sub>Calculate the sine and cosine of the first input argument multiplied by PI.</sub> | ✓ | ✓ |
|
||||
| float y0f ( float x ) <br><sub>Calculate the value of the Bessel function of the second kind of order 0 for the input argument.</sub> | ✓ | ✓ |
|
||||
| float y1f ( float x ) <br><sub>Calculate the value of the Bessel function of the second kind of order 1 for the input argument.</sub> | ✓ | ✓ |
|
||||
| float ynf ( int n, float x ) <br><sub>Calculate the value of the Bessel function of the second kind of order n for the input argument.</sub> | ✓ | ✓ |
|
||||
|
||||
|
||||
|
||||
<sub><b id="f1"><sup>[1]</sup></b> __RETURN_TYPE is dependent on compiler. It is usually 'int' for C compilers and 'bool' for C++ compilers.</sub> [↩](#a1)
|
||||
|
||||
### Double Precision Mathematical Functions
|
||||
@@ -360,6 +396,40 @@ Following is the list of supported double precision mathematical functions.
|
||||
| double tanh ( double x ) <br><sub>Calculate the hyperbolic tangent of the input argument.</sub> | ✓ | ✓ |
|
||||
| double tgamma ( double x ) <br><sub>Calculate the gamma function of the input argument.</sub> | ✓ | ✓ |
|
||||
| double trunc ( double x ) <br><sub>Truncate input argument to the integral part.</sub> | ✓ | ✓ |
|
||||
| double erfcinv ( double y ) <br><sub>Calculate the inverse complementary function of the input argument.</sub> | ✓ | ✓ |
|
||||
| double erfcx ( double x ) <br><sub>Calculate the scaled complementary error function of the input argument.</sub> | ✓ | ✓ |
|
||||
| double erfinv ( double y ) <br><sub>Calculate the inverse error function of the input argument.</sub> | ✓ | ✓ |
|
||||
| double frexp ( float x, int *nptr ) <br><sub>Extract mantissa and exponent of a floating-point value.</sub> | ✓ | ✓ |
|
||||
| double j0 ( double x ) <br><sub>Calculate the value of the Bessel function of the first kind of order 0 for the input argument.</sub> | ✓ | ✓ |
|
||||
| double j1 ( double x ) <br><sub>Calculate the value of the Bessel function of the first kind of order 1 for the input argument.</sub> | ✓ | ✓ |
|
||||
| double jn ( int n, double x ) <br><sub>Calculate the value of the Bessel function of the first kind of order n for the input argument.</sub> | ✓ | ✓ |
|
||||
| double lgamma ( double x ) <br><sub>Calculate the natural logarithm of the absolute value of the gamma function of the input argument.</sub> | ✓ | ✓ |
|
||||
| long long int llrint ( double x ) <br><sub>Round input to nearest integer value.</sub> | ✓ | ✓ |
|
||||
| long long int llround ( double x ) <br><sub>Round to nearest integer value.</sub> | ✓ | ✓ |
|
||||
| long int lrint ( double x ) <br><sub>Round input to nearest integer value.</sub> | ✓ | ✓ |
|
||||
| long int lround ( double x ) <br><sub>Round to nearest integer value.</sub> | ✓ | ✓ |
|
||||
| double modf ( double x, double *iptr ) <br><sub>Break down the input argument into fractional and integral parts.</sub> | ✓ | ✓ |
|
||||
| double nextafter ( double x, double y ) <br><sub>Returns next representable single-precision floating-point value after argument.</sub> | ✓ | ✓ |
|
||||
| double norm3d ( double a, double b, double c ) <br><sub>Calculate the square root of the sum of squares of three coordinates of the argument.</sub> | ✓ | ✓ |
|
||||
| float norm4d ( double a, double b, double c, double d ) <br><sub>Calculate the square root of the sum of squares of four coordinates of the argument.</sub> | ✓ | ✓ |
|
||||
| double normcdf ( double y ) <br><sub>Calculate the standard normal cumulative distribution function.</sub> | ✓ | ✓ |
|
||||
| double normcdfinv ( double y ) <br><sub>Calculate the inverse of the standard normal cumulative distribution function.</sub> | ✓ | ✓ |
|
||||
| double rcbrt ( double x ) <br><sub>Calculate the reciprocal cube root function.</sub> | ✓ | ✓ |
|
||||
| double remquo ( double x, double y, int *quo ) <br><sub>Compute single-precision floating-point remainder and part of quotient.</sub> | ✓ | ✓ |
|
||||
| double rhypot ( double x, double y ) <br><sub>Calculate one over the square root of the sum of squares of two arguments.</sub> | ✓ | ✓ |
|
||||
| double rint ( double x ) <br><sub>Round input to nearest integer value in floating-point.</sub> | ✓ | ✓ |
|
||||
| double rnorm3d ( double a, double b, double c ) <br><sub>Calculate one over the square root of the sum of squares of three coordinates of the argument.</sub> | ✓ | ✓ |
|
||||
| double rnorm4d ( double a, double b, double c, double d ) <br><sub>Calculate one over the square root of the sum of squares of four coordinates of the argument.</sub> | ✓ | ✓ |
|
||||
| double rnorm ( int dim, const double *a ) <br><sub>Calculate the reciprocal of square root of the sum of squares of any number of coordinates.</sub> | ✓ | ✓ |
|
||||
| double scalbln ( double x, long int n ) <br><sub>Scale floating-point input by integer power of two.</sub> | ✓ | ✓ |
|
||||
| void sincos ( double x, double *sptr, double *cptr ) <br><sub>Calculate the sine and cosine of the first input argument.</sub> | ✓ | ✓ |
|
||||
| void sincospi ( double x, double *sptr, double *cptr ) <br><sub>Calculate the sine and cosine of the first input argument multiplied by PI.</sub> | ✓ | ✓ |
|
||||
| double y0f ( double x ) <br><sub>Calculate the value of the Bessel function of the second kind of order 0 for the input argument.</sub> | ✓ | ✓ |
|
||||
| double y1 ( double x ) <br><sub>Calculate the value of the Bessel function of the second kind of order 1 for the input argument.</sub> | ✓ | ✓ |
|
||||
| double yn ( int n, double x ) <br><sub>Calculate the value of the Bessel function of the second kind of order n for the input argument.</sub> | ✓ | ✓ |
|
||||
|
||||
|
||||
|
||||
<sub><b id="f2"><sup>[1]</sup></b> __RETURN_TYPE is dependent on compiler. It is usually 'int' for C compilers and 'bool' for C++ compilers.</sub> [↩](#a2)
|
||||
|
||||
### Integer Intrinsics
|
||||
|
||||
@@ -464,37 +464,6 @@ hipcc-cmd: /opt/hcc/bin/hcc -hc -I/opt/hcc/include -stdlib=libc++ -I../../../..
|
||||
If you pass a ".cu" file, hcc will attempt to compile it as a Cuda language file. You must tell hcc that its in fact a C++ file: use the "-x c++" option.
|
||||
|
||||
|
||||
#### grid_launch kernel dispatch - fallback
|
||||
HIP uses an hcc language feature called "grid_launch". The [[hc_grid_launch]] attribute that can be attached to a function definition, and the first parameter is of type grid_launch_parm.
|
||||
When a [[hc_grid_launch]] function is called, hcc runtime uses the grid_launch_parm to control the execution configuration of the kernel
|
||||
(including the grid and group dimensions, the queue, and dynamic group memory allocations). By default, the hipLaunchKernel macro creates a grid_launch_parm structure and launches a
|
||||
[[hc_grid_launch]] kernel. grid_launch is a relatively new addition to hcc so this section describes how to fall back to a traditional calling sequence which invokes a standard host function
|
||||
which calls a hc::parallel_for_each to launch the kernel.
|
||||
|
||||
First, set DISABLE_GRID_LAUNCH:
|
||||
include/hip_common.h
|
||||
```
|
||||
// Set this define to disable GRID_LAUNCH
|
||||
#define DISABLE_GRID_LAUNCH
|
||||
```
|
||||
|
||||
Inside any kernel use the KERNELBEGIN as the first line in the kernel function, and KERNELEND as the last line. For example:
|
||||
```
|
||||
__global__ void
|
||||
MyKernel(hipLaunchParm lp, float *C, const float *A, size_t N)
|
||||
{
|
||||
KERNELBEGIN; // Required if hc_grid_launch is disabled
|
||||
|
||||
int tid = hipBlockIdx_x*MAX_THREADS_PER_BLOCK + hipThreadIdx_x;
|
||||
|
||||
if (tid < N) {
|
||||
C[tid] = A[tid];
|
||||
}
|
||||
|
||||
KERNELEND; // Required if hc_grid_launch is disabled
|
||||
}
|
||||
```
|
||||
|
||||
#### HIP Environment Variables
|
||||
|
||||
On the HCC path, HIP provides a number of environment variables that control the behavior of HIP. Some of these are useful for appliction development (for example HIP_VISIBLE_DEVICES, HIP_LAUNCH_BLOCKING),
|
||||
|
||||
@@ -38,6 +38,8 @@ THE SOFTWARE.
|
||||
#include <stddef.h>
|
||||
|
||||
|
||||
|
||||
|
||||
#define CUDA_SUCCESS hipSuccess
|
||||
|
||||
#include <hip/hip_runtime_api.h>
|
||||
@@ -46,6 +48,16 @@ THE SOFTWARE.
|
||||
// Remainder of this file only compiles with HCC
|
||||
#ifdef __HCC__
|
||||
#include <grid_launch.h>
|
||||
|
||||
#if defined (GRID_LAUNCH_VERSION) and (GRID_LAUNCH_VERSION >= 20)
|
||||
// Use field names for grid_launch 2.0 structure, if HCC supports GL 2.0.
|
||||
#define USE_GRID_LAUNCH_20 1
|
||||
#else
|
||||
#define USE_GRID_LAUNCH_20 0
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
extern int HIP_TRACE_API;
|
||||
|
||||
//TODO-HCC-GL - change this to typedef.
|
||||
@@ -485,45 +497,23 @@ __device__ float __dsqrt_rz(double x);
|
||||
* Kernel launching
|
||||
*/
|
||||
|
||||
// Choose correct polarity of xyz/zyx ordering:
|
||||
#if __hcc_workweek__ >= 16123
|
||||
|
||||
#define hipThreadIdx_x (amp_get_local_id(0))
|
||||
#define hipThreadIdx_y (amp_get_local_id(1))
|
||||
#define hipThreadIdx_z (amp_get_local_id(2))
|
||||
#define hipThreadIdx_x (hc_get_workitem_id(0))
|
||||
#define hipThreadIdx_y (hc_get_workitem_id(1))
|
||||
#define hipThreadIdx_z (hc_get_workitem_id(2))
|
||||
|
||||
#define hipBlockIdx_x (hc_get_group_id(0))
|
||||
#define hipBlockIdx_y (hc_get_group_id(1))
|
||||
#define hipBlockIdx_z (hc_get_group_id(2))
|
||||
|
||||
#define hipBlockDim_x (amp_get_local_size(0))
|
||||
#define hipBlockDim_y (amp_get_local_size(1))
|
||||
#define hipBlockDim_z (amp_get_local_size(2))
|
||||
#define hipBlockDim_x (hc_get_group_size(0))
|
||||
#define hipBlockDim_y (hc_get_group_size(1))
|
||||
#define hipBlockDim_z (hc_get_group_size(2))
|
||||
|
||||
#define hipGridDim_x (hc_get_num_groups(0))
|
||||
#define hipGridDim_y (hc_get_num_groups(1))
|
||||
#define hipGridDim_z (hc_get_num_groups(2))
|
||||
|
||||
#else
|
||||
|
||||
#define hipThreadIdx_x (amp_get_local_id(2))
|
||||
#define hipThreadIdx_y (amp_get_local_id(1))
|
||||
#define hipThreadIdx_z (amp_get_local_id(0))
|
||||
|
||||
#define hipBlockIdx_x (hc_get_group_id(2))
|
||||
#define hipBlockIdx_y (hc_get_group_id(1))
|
||||
#define hipBlockIdx_z (hc_get_group_id(0))
|
||||
|
||||
#define hipBlockDim_x (amp_get_local_size(2))
|
||||
#define hipBlockDim_y (amp_get_local_size(1))
|
||||
#define hipBlockDim_z (amp_get_local_size(0))
|
||||
|
||||
#define hipGridDim_x (hc_get_num_groups(2))
|
||||
#define hipGridDim_y (hc_get_num_groups(1))
|
||||
#define hipGridDim_z (hc_get_num_groups(0))
|
||||
|
||||
#endif // __hcc_workweek__ check
|
||||
|
||||
#define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE)
|
||||
|
||||
#define HIP_KERNEL_NAME(...) __VA_ARGS__
|
||||
@@ -539,7 +529,20 @@ void ihipPostLaunchKernel(hipStream_t stream, grid_launch_parm &lp);
|
||||
#define KNRM "\x1B[0m"
|
||||
#define KGRN "\x1B[32m"
|
||||
|
||||
#if not defined(DISABLE_GRID_LAUNCH)
|
||||
#if USE_GRID_LAUNCH_20
|
||||
#define hipLaunchKernel(_kernelName, _numBlocks3D, _blockDim3D, _groupMemBytes, _stream, ...) \
|
||||
do {\
|
||||
grid_launch_parm lp;\
|
||||
lp.dynamic_group_mem_bytes = _groupMemBytes; \
|
||||
hipStream_t trueStream = (ihipPreLaunchKernel(_stream, _numBlocks3D, _blockDim3D, &lp)); \
|
||||
if (HIP_TRACE_API) {\
|
||||
fprintf(stderr, KGRN "<<hip-api: hipLaunchKernel '%s' gridDim:(%d,%d,%d) groupDim:(%d,%d,%d) groupMem:+%d stream=%p\n" KNRM, \
|
||||
#_kernelName, lp.grid_dim.x, lp.grid_dim.y, lp.grid_dim.z, lp.group_dim.x, lp.group_dim.y, lp.group_dim.z, lp.dynamic_group_mem_bytes, (void*)(_stream));\
|
||||
}\
|
||||
_kernelName (lp, ##__VA_ARGS__);\
|
||||
ihipPostLaunchKernel(trueStream, lp);\
|
||||
} while(0)
|
||||
#else
|
||||
#define hipLaunchKernel(_kernelName, _numBlocks3D, _blockDim3D, _groupMemBytes, _stream, ...) \
|
||||
do {\
|
||||
grid_launch_parm lp;\
|
||||
@@ -553,32 +556,9 @@ do {\
|
||||
ihipPostLaunchKernel(trueStream, lp);\
|
||||
} while(0)
|
||||
|
||||
#else
|
||||
#warning(DISABLE_GRID_LAUNCH set)
|
||||
|
||||
#define hipLaunchKernel(_kernelName, _numBlocks3D, _blockDim3D, _groupMemBytes, _stream, ...) \
|
||||
do {\
|
||||
grid_launch_parm lp;\
|
||||
lp.gridDim.x = _numBlocks3D.x * _blockDim3D.x;/*Convert from #blocks to #threads*/ \
|
||||
lp.gridDim.y = _numBlocks3D.y * _blockDim3D.y;/*Convert from #blocks to #threads*/ \
|
||||
lp.gridDim.z = _numBlocks3D.z * _blockDim3D.z;/*Convert from #blocks to #threads*/ \
|
||||
lp.groupDim.x = _blockDim3D.x; \
|
||||
lp.groupDim.y = _blockDim3D.y; \
|
||||
lp.groupDim.z = _blockDim3D.z; \
|
||||
lp.groupMemBytes = _groupMemBytes;\
|
||||
hc::completion_future cf;\
|
||||
lp.cf = &cf; \
|
||||
hipStream_t trueStream = (ihipPreLaunchKernel(_stream, &lp.av)); \
|
||||
if (HIP_TRACE_API) {\
|
||||
fprintf(stderr, "==hip-api: launch '%s' gridDim:[%d.%d.%d] groupDim:[%d.%d.%d] groupMem:+%d stream=%p\n", \
|
||||
#_kernelName, lp.gridDim.z, lp.gridDim.y, lp.gridDim.x, lp.groupDim.z, lp.groupDim.y, lp.groupDim.x, lp.groupMemBytes, (void*)(_stream));\
|
||||
}\
|
||||
_kernelName (lp, ##__VA_ARGS__);\
|
||||
ihipPostLaunchKernel(trueStream, cf);\
|
||||
} while(0)
|
||||
/*end hipLaunchKernel */
|
||||
#endif
|
||||
|
||||
|
||||
#elif defined (__HCC_C__)
|
||||
|
||||
//TODO - develop C interface.
|
||||
|
||||
@@ -35,11 +35,7 @@ THE SOFTWARE.
|
||||
#define __host__ __attribute__((cpu))
|
||||
#define __device__ __attribute__((hc))
|
||||
|
||||
#ifndef DISABLE_GRID_LAUNCH
|
||||
#define __global__ __attribute__((hc_grid_launch))
|
||||
#else
|
||||
#define __global__
|
||||
#endif
|
||||
|
||||
#define __noinline__ __attribute__((noinline))
|
||||
#define __forceinline__ __attribute__((always_inline))
|
||||
|
||||
@@ -22,9 +22,6 @@ THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
// Disable use of grid_launch feature in HCC compiler.
|
||||
//#define DISABLE_GRID_LAUNCH
|
||||
|
||||
// Common code included at start of every hip file.
|
||||
// Auto enable __HIP_PLATFORM_HCC__ if compiling with HCC
|
||||
// Other compiler (GCC,ICC,etc) need to set one of these macros explicitly
|
||||
|
||||
@@ -1,17 +1,13 @@
|
||||
cmake_minimum_required(VERSION 2.8.3)
|
||||
project(hip_samples)
|
||||
|
||||
add_custom_target(create_installer_script ALL
|
||||
COMMAND tar cvzf ${PROJECT_BINARY_DIR}/samples.tgz --exclude='*.o' .
|
||||
COMMAND @hip_SOURCE_DIR@/packaging/create_hip_samples_installer.sh ${PROJECT_BINARY_DIR}/samples.tgz ${PROJECT_BINARY_DIR}/unpack_hip_samples.sh
|
||||
WORKING_DIRECTORY @hip_SOURCE_DIR@/samples)
|
||||
install(PROGRAMS unpack_hip_samples.sh DESTINATION bin)
|
||||
install(DIRECTORY @hip_SOURCE_DIR@/samples DESTINATION .)
|
||||
|
||||
#############################
|
||||
# Packaging steps
|
||||
#############################
|
||||
set(CPACK_SET_DESTDIR TRUE)
|
||||
set(CPACK_INSTALL_PREFIX "/opt/rocm")
|
||||
set(CPACK_INSTALL_PREFIX "/opt/rocm/hip")
|
||||
set(CPACK_PACKAGE_NAME "hip_samples")
|
||||
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "HIP: Heterogenous-computing Interface for Portability [SAMPLES]")
|
||||
set(CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc.")
|
||||
|
||||
@@ -1093,12 +1093,23 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_
|
||||
{
|
||||
std::call_once(hip_initialized, ihipInit);
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
#if USE_GRID_LAUNCH_20
|
||||
lp->grid_dim.x = grid.x;
|
||||
lp->grid_dim.y = grid.y;
|
||||
lp->grid_dim.z = grid.z;
|
||||
lp->group_dim.x = block.x;
|
||||
lp->group_dim.y = block.y;
|
||||
lp->group_dim.z = block.z;
|
||||
lp->barrier_bit = barrier_bit_queue_default;
|
||||
lp->launch_fence = -1;
|
||||
#else
|
||||
lp->gridDim.x = grid.x;
|
||||
lp->gridDim.y = grid.y;
|
||||
lp->gridDim.z = grid.z;
|
||||
lp->groupDim.x = block.x;
|
||||
lp->groupDim.y = block.y;
|
||||
lp->groupDim.z = block.z;
|
||||
#endif
|
||||
stream->lockopen_preKernelCommand();
|
||||
// *av = &stream->_av;
|
||||
lp->av = &stream->_av;
|
||||
@@ -1111,12 +1122,23 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block, gri
|
||||
{
|
||||
std::call_once(hip_initialized, ihipInit);
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
#if USE_GRID_LAUNCH_20
|
||||
lp->grid_dim.x = grid;
|
||||
lp->grid_dim.y = 1;
|
||||
lp->grid_dim.z = 1;
|
||||
lp->group_dim.x = block.x;
|
||||
lp->group_dim.y = block.y;
|
||||
lp->group_dim.z = block.z;
|
||||
lp->barrier_bit = barrier_bit_queue_default;
|
||||
lp->launch_fence = -1;
|
||||
#else
|
||||
lp->gridDim.x = grid;
|
||||
lp->gridDim.y = 1;
|
||||
lp->gridDim.z = 1;
|
||||
lp->groupDim.x = block.x;
|
||||
lp->groupDim.y = block.y;
|
||||
lp->groupDim.z = block.z;
|
||||
#endif
|
||||
stream->lockopen_preKernelCommand();
|
||||
// *av = &stream->_av;
|
||||
lp->av = &stream->_av;
|
||||
@@ -1130,12 +1152,23 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block, gri
|
||||
{
|
||||
std::call_once(hip_initialized, ihipInit);
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
#if USE_GRID_LAUNCH_20
|
||||
lp->grid_dim.x = grid.x;
|
||||
lp->grid_dim.y = grid.y;
|
||||
lp->grid_dim.z = grid.z;
|
||||
lp->group_dim.x = block;
|
||||
lp->group_dim.y = 1;
|
||||
lp->group_dim.z = 1;
|
||||
lp->barrier_bit = barrier_bit_queue_default;
|
||||
lp->launch_fence = -1;
|
||||
#else
|
||||
lp->gridDim.x = grid.x;
|
||||
lp->gridDim.y = grid.y;
|
||||
lp->gridDim.z = grid.z;
|
||||
lp->groupDim.x = block;
|
||||
lp->groupDim.y = 1;
|
||||
lp->groupDim.z = 1;
|
||||
#endif
|
||||
stream->lockopen_preKernelCommand();
|
||||
// *av = &stream->_av;
|
||||
lp->av = &stream->_av;
|
||||
@@ -1149,12 +1182,23 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block, g
|
||||
{
|
||||
std::call_once(hip_initialized, ihipInit);
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
#if USE_GRID_LAUNCH_20
|
||||
lp->grid_dim.x = grid;
|
||||
lp->grid_dim.y = 1;
|
||||
lp->grid_dim.z = 1;
|
||||
lp->group_dim.x = block;
|
||||
lp->group_dim.y = 1;
|
||||
lp->group_dim.z = 1;
|
||||
lp->barrier_bit = barrier_bit_queue_default;
|
||||
lp->launch_fence = -1;
|
||||
#else
|
||||
lp->gridDim.x = grid;
|
||||
lp->gridDim.y = 1;
|
||||
lp->gridDim.z = 1;
|
||||
lp->groupDim.x = block;
|
||||
lp->groupDim.y = 1;
|
||||
lp->groupDim.z = 1;
|
||||
#endif
|
||||
stream->lockopen_preKernelCommand();
|
||||
// *av = &stream->_av;
|
||||
lp->av = &stream->_av;
|
||||
|
||||
@@ -21,7 +21,6 @@ THE SOFTWARE.
|
||||
*/
|
||||
// Test the Grid_Launch syntax.
|
||||
|
||||
#undef DISABLE_GRID_LAUNCH /* Tell hip_*.h to compile in GL mode */
|
||||
#include "hip_runtime.h"
|
||||
#include "test_common.h"
|
||||
|
||||
|
||||
Fai riferimento in un nuovo problema
Block a user