HIP: Heterogenous-computing Interface for Portability
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
Macros | Functions | Variables
hip_runtime.h File Reference

Contains definitions of APIs for HIP runtime. More...

#include <cstring>
#include <cmath>
#include <string.h>
#include <stddef.h>
#include <hip_runtime_api.h>
#include <hc.hpp>
#include <grid_launch.h>
#include <hcc_detail/hip_texture.h>
#include <hcc_detail/host_defines.h>
#include <hc_math.hpp>

Go to the source code of this file.

Macros

#define CUDA_SUCCESS   hipSuccess
 
#define hipLaunchParm   grid_launch_parm
 
#define __launch_bounds__(requiredMaxThreadsPerBlock, minBlocksPerMultiprocessor)
 
#define clock_t   long long int
 
#define hipThreadIdx_x   (amp_get_local_id(2))
 
#define hipThreadIdx_y   (amp_get_local_id(1))
 
#define hipThreadIdx_z   (amp_get_local_id(0))
 
#define hipBlockIdx_x   (hc_get_group_id(2))
 
#define hipBlockIdx_y   (hc_get_group_id(1))
 
#define hipBlockIdx_z   (hc_get_group_id(0))
 
#define hipBlockDim_x   (amp_get_local_size(2))
 
#define hipBlockDim_y   (amp_get_local_size(1))
 
#define hipBlockDim_z   (amp_get_local_size(0))
 
#define hipGridDim_x   (hc_get_num_groups(2))
 
#define hipGridDim_y   (hc_get_num_groups(1))
 
#define hipGridDim_z   (hc_get_num_groups(0))
 
#define __syncthreads()   hc_barrier(CLK_LOCAL_MEM_FENCE)
 
#define HIP_KERNEL_NAME(...)   __VA_ARGS__
 
#define KERNELBEGIN
 
#define KERNELEND
 

Functions

__device__ long long int clock64 ()
 
__device__ clock_t clock ()
 
__device__ int atomicAdd (int *address, int val)
 
__device__ unsigned int atomicAdd (unsigned int *address, unsigned int val)
 
__device__ unsigned long long int atomicAdd (unsigned long long int *address, unsigned long long int val)
 
__device__ float atomicAdd (float *address, float val)
 
__device__ int atomicSub (int *address, int val)
 
__device__ unsigned int atomicSub (unsigned int *address, unsigned int val)
 
__device__ int atomicExch (int *address, int val)
 
__device__ unsigned int atomicExch (unsigned int *address, unsigned int val)
 
__device__ unsigned long long int atomicExch (unsigned long long int *address, unsigned long long int val)
 
__device__ float atomicExch (float *address, float val)
 
__device__ int atomicMin (int *address, int val)
 
__device__ unsigned int atomicMin (unsigned int *address, unsigned int val)
 
__device__ unsigned long long int atomicMin (unsigned long long int *address, unsigned long long int val)
 
__device__ int atomicMax (int *address, int val)
 
__device__ unsigned int atomicMax (unsigned int *address, unsigned int val)
 
__device__ unsigned long long int atomicMax (unsigned long long int *address, unsigned long long int val)
 
__device__ int atomicCAS (int *address, int compare, int val)
 
__device__ unsigned int atomicCAS (unsigned int *address, unsigned int compare, unsigned int val)
 
__device__ unsigned long long int atomicCAS (unsigned long long int *address, unsigned long long int compare, unsigned long long int val)
 
__device__ int atomicAnd (int *address, int val)
 
__device__ unsigned int atomicAnd (unsigned int *address, unsigned int val)
 
__device__ unsigned long long int atomicAnd (unsigned long long int *address, unsigned long long int val)
 
__device__ int atomicOr (int *address, int val)
 
__device__ unsigned int atomicOr (unsigned int *address, unsigned int val)
 
__device__ unsigned long long int atomicOr (unsigned long long int *address, unsigned long long int val)
 
__device__ int atomicXor (int *address, int val)
 
__device__ unsigned int atomicXor (unsigned int *address, unsigned int val)
 
__device__ unsigned long long int atomicXor (unsigned long long int *address, unsigned long long int val)
 
__device__ unsigned int __popc (unsigned int input)
 
__device__ unsigned int __popcll (unsigned long long int input)
 
__device__ unsigned int __clz (unsigned int input)
 
__device__ unsigned int __clzll (unsigned long long int input)
 
__device__ unsigned int __clz (int input)
 
__device__ unsigned int __clzll (long long int input)
 
__device__ unsigned int __ffs (unsigned int input)
 
__device__ unsigned int __ffsll (unsigned long long int input)
 
__device__ unsigned int __ffs (int input)
 
__device__ unsigned int __ffsll (long long int input)
 
__device__ unsigned int __brev (unsigned int input)
 
__device__ unsigned long long int __brevll (unsigned long long int input)
 
__device__ int __all (int input)
 
__device__ int __any (int input)
 
__device__ unsigned long long int __ballot (int input)
 
__device__ int __shfl (int input, int lane, int width)
 
__device__ int __shfl_up (int input, unsigned int lane_delta, int width)
 
__device__ int __shfl_down (int input, unsigned int lane_delta, int width)
 
__device__ int __shfl_xor (int input, int lane_mask, int width)
 
__device__ float __shfl (float input, int lane, int width)
 
__device__ float __shfl_up (float input, unsigned int lane_delta, int width)
 
__device__ float __shfl_down (float input, unsigned int lane_delta, int width)
 
__device__ float __shfl_xor (float input, int lane_mask, int width)
 
int min (int arg1, int arg2) __attribute((hc
 
int max (int arg1, int arg2) __attribute((hc
 
__device__ float __cosf (float x)
 
__device__ float __expf (float x)
 
__device__ float __frsqrt_rn (float x)
 
__device__ float __fsqrt_rd (float x)
 
__device__ float __fsqrt_rn (float x)
 
__device__ float __fsqrt_ru (float x)
 
__device__ float __fsqrt_rz (float x)
 
__device__ float __log10f (float x)
 
__device__ float __log2f (float x)
 
__device__ float __logf (float x)
 
__device__ float __powf (float base, float exponent)
 
__device__ void __sincosf (float x, float *s, float *c)
 
__device__ float __sinf (float x)
 
__device__ float __tanf (float x)
 
__device__ float __dsqrt_rd (double x)
 
__device__ float __dsqrt_rn (double x)
 
__device__ float __dsqrt_ru (double x)
 
__device__ float __dsqrt_rz (double x)
 

Variables

int cpu
 
int warpSize
 
int HIP_PRINT_ENV
 Print all HIP-related environment variables.
 
int HIP_TRACE_API
 Trace HIP APIs.
 
int HIP_LAUNCH_BLOCKING
 Make all HIP APIs host-synchronous.
 

Detailed Description

Contains definitions of APIs for HIP runtime.

Macro Definition Documentation

#define hipThreadIdx_x   (amp_get_local_id(2))

Kernel launching

#define KERNELBEGIN
Value:
hc::extent<3> ext(lp.gridDim.x, lp.gridDim.y, lp.gridDim.z);\
auto __hipExtTile = ext.tile(lp.groupDim.x, lp.groupDim.y, lp.groupDim.z);\
__hipExtTile.set_dynamic_group_segment_size(lp.groupMemBytes);\
\
hc::completion_future cf = \
hc::parallel_for_each (\
*lp.av,\
__hipExtTile,\
[=] (hc::tiled_index<3> __hipIdx) mutable [[hc]] \
{
#define KERNELEND
Value:
}); \
if (HIP_TRACE_API) {\
fprintf(stderr, "hiptrace1: HIP_LAUNCH_BLOCKING ...\n");\
}\
cf.wait(); \
if (HIP_TRACE_API) {\
fprintf(stderr, "hiptrace1: ...completed.\n");\
}\
}
int HIP_TRACE_API
Trace HIP APIs.
Definition: hip_hcc.cpp:57
int HIP_LAUNCH_BLOCKING
Make all HIP APIs host-synchronous.
Definition: hip_hcc.cpp:58

Variable Documentation

int cpu
Initial value:
{
return (int)(hc::precise_math::fmin((float)arg1, (float)arg2))