HIP: Heterogenous-computing Interface for Portability
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
hip_runtime.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
27 //#pragma once
28 #ifndef HIP_RUNTIME_H
29 #define HIP_RUNTIME_H
30 
31 //---
32 // Top part of file can be compiled with any compiler
33 
34 
35 //#include <cstring>
36 //#include <cmath>
37 #include <string.h>
38 #include <stddef.h>
39 
40 
41 #define CUDA_SUCCESS hipSuccess
42 
43 #include <hip/hip_runtime_api.h>
44 //#include "hip/hcc_detail/hip_hcc.h"
45 //---
46 // Remainder of this file only compiles with HCC
47 #ifdef __HCC__
48 #if __cplusplus
49 #include <hc.hpp>
50 #endif
51 #include <grid_launch.h>
52 extern int HIP_TRACE_API;
53 
54 //TODO-HCC-GL - change this to typedef.
55 //typedef grid_launch_parm hipLaunchParm ;
56 #define hipLaunchParm grid_launch_parm
57 #ifdef __cplusplus
58 #include <hip/hcc_detail/hip_texture.h>
59 #endif
60 #include <hip/hcc_detail/host_defines.h>
61 // TODO-HCC remove old definitions ; ~1602 hcc supports __HCC_ACCELERATOR__ define.
62 #if defined (__KALMAR_ACCELERATOR__) && !defined (__HCC_ACCELERATOR__)
63 #define __HCC_ACCELERATOR__ __KALMAR_ACCELERATOR__
64 #endif
65 
66 // Feature tests:
67 #if defined(__HCC_ACCELERATOR__) && (__HCC_ACCELERATOR__ != 0)
68 // Device compile and not host compile:
69 
70 //TODO-HCC enable __HIP_ARCH_HAS_ATOMICS__ when HCC supports these.
71  // 32-bit Atomics:
72 #define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (1)
73 #define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (1)
74 #define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (1)
75 #define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (1)
76 #define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (0)
77 
78 // 64-bit Atomics:
79 #define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (1)
80 #define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (0)
81 
82 // Doubles
83 #define __HIP_ARCH_HAS_DOUBLES__ (1)
84 
85 //warp cross-lane operations:
86 #define __HIP_ARCH_HAS_WARP_VOTE__ (1)
87 #define __HIP_ARCH_HAS_WARP_BALLOT__ (1)
88 #define __HIP_ARCH_HAS_WARP_SHUFFLE__ (1)
89 #define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (0)
90 
91 //sync
92 #define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (0)
93 #define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (0)
94 
95 // misc
96 #define __HIP_ARCH_HAS_SURFACE_FUNCS__ (0)
97 #define __HIP_ARCH_HAS_3DGRID__ (1)
98 #define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (0)
99 
100 #endif
101 
102 
103 //TODO-HCC this is currently ignored by HCC target of HIP
104 #define __launch_bounds__(requiredMaxThreadsPerBlock, minBlocksPerMultiprocessor)
105 
106 // Detect if we are compiling C++ mode or C mode
107 #if defined(__cplusplus)
108 #define __HCC_CPP__
109 #elif defined(__STDC_VERSION__)
110 #define __HCC_C__
111 #endif
112 
113 #if __cplusplus
114 __device__ float acosf(float x);
115 __device__ float acoshf(float x);
116 __device__ float asinf(float x);
117 __device__ float asinhf(float x);
118 __device__ float atan2f(float y, float x);
119 __device__ float atanf(float x);
120 __device__ float atanhf(float x);
121 __device__ float cbrtf(float x);
122 __device__ float ceilf(float x);
123 __device__ float copysignf(float x, float y);
124 __device__ float cosf(float x);
125 __device__ float coshf(float x);
126 __device__ float cyl_bessel_i0f(float x);
127 __device__ float cyl_bessel_i1f(float x);
128 __device__ float erfcf(float x);
129 __device__ float erfcinvf(float y);
130 __device__ float erfcxf(float x);
131 __device__ float erff(float x);
132 __device__ float erfinvf(float y);
133 __device__ float exp10f(float x);
134 __device__ float exp2f(float x);
135 __device__ float expf(float x);
136 __device__ float expm1f(float x);
137 __device__ float fabsf(float x);
138 __device__ float fdimf(float x, float y);
139 __device__ float fdividef(float x, float y);
140 __device__ float floorf(float x);
141 __device__ float fmaf(float x, float y, float z);
142 __device__ float fmaxf(float x, float y);
143 __device__ float fminf(float x, float y);
144 __device__ float fmodf(float x, float y);
145 __device__ float frexpf(float x, float y);
146 __device__ float hypotf(float x, float y);
147 __device__ float ilogbf(float x);
148 __device__ unsigned isfinite(float a);
149 __device__ unsigned isinf(float a);
150 __device__ unsigned isnan(float a);
151 __device__ float j0f(float x);
152 __device__ float j1f(float x);
153 __device__ float jnf(int n, float x);
154 __device__ float ldexpf(float x, int exp);
155 __device__ float lgammaf(float x);
156 __device__ long long int llrintf(float x);
157 __device__ long long int llroundf(float x);
158 __device__ float log10f(float x);
159 __device__ float log1pf(float x);
160 __device__ float log2f(float x);
161 __device__ float logbf(float x);
162 __device__ float logf(float x);
163 __device__ long int lrintf(float x);
164 __device__ long int lroundf(float x);
165 __device__ float modff(float x, float *iptr);
166 __device__ float nanf(const char* tagp);
167 __device__ float nearbyintf(float x);
168 __device__ float nextafterf(float x, float y);
169 __device__ float norm3df(float a, float b, float c);
170 __device__ float norm4df(float a, float b, float c, float d);
171 __device__ float normcdff(float y);
172 __device__ float normcdfinvf(float y);
173 __device__ float normf(int dim, const float *a);
174 __device__ float powf(float x, float y);
175 __device__ float rcbtrf(float x);
176 __device__ float remainderf(float x, float y);
177 __device__ float remquof(float x, float y, int *quo);
178 __device__ float rhypotf(float x, float y);
179 __device__ float rintf(float x);
180 __device__ float rnorm3df(float a, float b, float c);
181 __device__ float rnorm4df(float a, float b, float c, float d);
182 __device__ float rnormf(int dim, const float* a);
183 __device__ float roundf(float x);
184 __device__ float rsqrtf(float x);
185 __device__ float scalblnf(float x, long int n);
186 __device__ float scalbnf(float x, int n);
187 __device__ unsigned signbit(float a);
188 __device__ void sincosf(float x, float *sptr, float *cptr);
189 __device__ void sincospif(float x, float *sptr, float *cptr);
190 __device__ float sinf(float x);
191 __device__ float sinhf(float x);
192 __device__ float sinpif(float x);
193 __device__ float sqrtf(float x);
194 __device__ float tanf(float x);
195 __device__ float tanhf(float x);
196 __device__ float tgammaf(float x);
197 __device__ float truncf(float x);
198 __device__ float y0f(float x);
199 __device__ float y1f(float x);
200 __device__ float ynf(int n, float x);
201 
202 __host__ __device__ float cospif(float x);
203 __host__ __device__ float sinpif(float x);
204 __device__ float sqrtf(float x);
205 __host__ __device__ float rsqrtf(float x);
206 
207 __device__ double acos(double x);
208 __device__ double acosh(double x);
209 __device__ double asin(double x);
210 __device__ double asinh(double x);
211 __device__ double atan(double x);
212 __device__ double atan2(double y, double x);
213 __device__ double atanh(double x);
214 __device__ double cbrt(double x);
215 __device__ double ceil(double x);
216 __device__ double copysign(double x, double y);
217 __device__ double cos(double x);
218 __device__ double cosh(double x);
219 __host__ __device__ double cospi(double x);
220 __device__ double cyl_bessel_i0(double x);
221 __device__ double cyl_bessel_i1(double x);
222 __device__ double erf(double x);
223 __device__ double erfc(double x);
224 __device__ double erfcinv(double y);
225 __device__ double erfcx(double x);
226 __device__ double exp(double x);
227 __device__ double exp10(double x);
228 __device__ double exp2(double x);
229 __device__ double expm1(double x);
230 __device__ double fabs(double x);
231 __device__ double fdim(double x, double y);
232 __device__ double floor(double x);
233 __device__ double fma(double x, double y, double z);
234 __device__ double fmax(double x, double y);
235 __device__ double fmin(double x, double y);
236 __device__ double fmod(double x, double y);
237 __device__ double frexp(double x, int *nptr);
238 __device__ double hypot(double x, double y);
239 __device__ double ilogb(double x);
240 __device__ unsigned isfinite(double x);
241 __device__ unsigned isinf(double x);
242 __device__ unsigned isnan(double x);
243 __device__ double j0(double x);
244 __device__ double j1(double x);
245 __device__ double jn(int n, double x);
246 __device__ double ldexp(double x, int exp);
247 __device__ double lgamma(double x);
248 __device__ long long llrint(double x);
249 __device__ long llround(double x);
250 __device__ double log(double x);
251 __device__ double log10(double x);
252 __device__ double log1p(double x);
253 __device__ double log2(double x);
254 __device__ double logb(double x);
255 __device__ long int lrint(double x);
256 __device__ long int lround(double x);
257 __device__ double modf(double x, double *iptr);
258 __device__ double nan(const char* tagp);
259 __device__ double nearbyint(double x);
260 __device__ double nextafter(double x, double y);
261 __device__ double norm(int dim, const double* t);
262 __device__ double norm3d(double a, double b, double c);
263 __device__ double norm4d(double a, double b, double d);
264 __device__ double normcdf(double y);
265 __device__ double normcdfinv(double y);
266 __device__ double pow(double x, double y);
267 __device__ double rcbrt(double x);
268 __device__ double remainder(double x, double y);
269 __device__ double remquo(double x, double y, int *quo);
270 __device__ double rhypot(double x, double y);
271 __device__ double rint(double x);
272 __device__ double rnorm(int dim, const double* t);
273 __device__ double rnorm3d(double a, double b, double c);
274 __device__ double rnorm4d(double a, double b, double c, double d);
275 __device__ double round(double x);
276 __host__ __device__ double rsqrt(double x);
277 __device__ double scalbln(double x, long int n);
278 __device__ double scalbn(double x, int n);
279 __device__ unsigned signbit(double a);
280 __device__ double sin(double a);
281 __device__ double sincos(double x, double *sptr, double *cptr);
282 __device__ double sincospi(double x, double *sptr, double *cptr);
283 __device__ double sinh(double x);
284 __host__ __device__ double sinpi(double x);
285 __device__ double sqrt(double x);
286 __device__ double tan(double x);
287 __device__ double tanh(double x);
288 __device__ double tgamma(double x);
289 __device__ double trunc(double x);
290 __device__ double y0(double x);
291 __device__ double y1(double y);
292 __device__ double yn(int n, double x);
293 #endif
294 
295 // TODO - hipify-clang - change to use the function call.
296 //#define warpSize hc::__wavesize()
297 extern const int warpSize;
298 
299 
300 #define clock_t long long int
301 __device__ long long int clock64();
302 __device__ clock_t clock();
303 #if __cplusplus
304 //atomicAdd()
305 __device__ int atomicAdd(int* address, int val);
306 __device__ unsigned int atomicAdd(unsigned int* address,
307  unsigned int val);
308 
309 __device__ unsigned long long int atomicAdd(unsigned long long int* address,
310  unsigned long long int val);
311 
312 __device__ float atomicAdd(float* address, float val);
313 
314 
315 //atomicSub()
316 __device__ int atomicSub(int* address, int val);
317 
318 __device__ unsigned int atomicSub(unsigned int* address,
319  unsigned int val);
320 
321 
322 //atomicExch()
323 __device__ int atomicExch(int* address, int val);
324 
325 __device__ unsigned int atomicExch(unsigned int* address,
326  unsigned int val);
327 
328 __device__ unsigned long long int atomicExch(unsigned long long int* address,
329  unsigned long long int val);
330 
331 __device__ float atomicExch(float* address, float val);
332 
333 
334 //atomicMin()
335 __device__ int atomicMin(int* address, int val);
336 __device__ unsigned int atomicMin(unsigned int* address,
337  unsigned int val);
338 __device__ unsigned long long int atomicMin(unsigned long long int* address,
339  unsigned long long int val);
340 
341 
342 //atomicMax()
343 __device__ int atomicMax(int* address, int val);
344 __device__ unsigned int atomicMax(unsigned int* address,
345  unsigned int val);
346 __device__ unsigned long long int atomicMax(unsigned long long int* address,
347  unsigned long long int val);
348 
349 
350 //atomicCAS()
351 __device__ int atomicCAS(int* address, int compare, int val);
352 __device__ unsigned int atomicCAS(unsigned int* address,
353  unsigned int compare,
354  unsigned int val);
355 __device__ unsigned long long int atomicCAS(unsigned long long int* address,
356  unsigned long long int compare,
357  unsigned long long int val);
358 
359 
360 //atomicAnd()
361 __device__ int atomicAnd(int* address, int val);
362 __device__ unsigned int atomicAnd(unsigned int* address,
363  unsigned int val);
364 __device__ unsigned long long int atomicAnd(unsigned long long int* address,
365  unsigned long long int val);
366 
367 
368 //atomicOr()
369 __device__ int atomicOr(int* address, int val);
370 __device__ unsigned int atomicOr(unsigned int* address,
371  unsigned int val);
372 __device__ unsigned long long int atomicOr(unsigned long long int* address,
373  unsigned long long int val);
374 
375 
376 //atomicXor()
377 __device__ int atomicXor(int* address, int val);
378 __device__ unsigned int atomicXor(unsigned int* address,
379  unsigned int val);
380 __device__ unsigned long long int atomicXor(unsigned long long int* address,
381  unsigned long long int val);
382 
383 
384 // integer intrinsic function __poc __clz __ffs __brev
385 __device__ unsigned int __popc( unsigned int input);
386 __device__ unsigned int __popcll( unsigned long long int input);
387 __device__ unsigned int __clz(unsigned int input);
388 __device__ unsigned int __clzll(unsigned long long int input);
389 __device__ unsigned int __clz(int input);
390 __device__ unsigned int __clzll(long long int input);
391 __device__ unsigned int __ffs(unsigned int input);
392 __device__ unsigned int __ffsll(unsigned long long int input);
393 __device__ unsigned int __ffs(int input);
394 __device__ unsigned int __ffsll(long long int input);
395 __device__ unsigned int __brev( unsigned int input);
396 __device__ unsigned long long int __brevll( unsigned long long int input);
397 
398 
399 // warp vote function __all __any __ballot
400 __device__ int __all( int input);
401 __device__ int __any( int input);
402 __device__ unsigned long long int __ballot( int input);
403 
404 // warp shuffle functions
405 #ifdef __cplusplus
406 
407 __device__ int __shfl(int input, int lane, int width=warpSize);
408 __device__ int __shfl_up(int input, unsigned int lane_delta, int width=warpSize);
409 __device__ int __shfl_down(int input, unsigned int lane_delta, int width=warpSize);
410 __device__ int __shfl_xor(int input, int lane_mask, int width=warpSize);
411 __device__ float __shfl(float input, int lane, int width=warpSize);
412 __device__ float __shfl_up(float input, unsigned int lane_delta, int width=warpSize);
413 __device__ float __shfl_down(float input, unsigned int lane_delta, int width=warpSize);
414 __device__ float __shfl_xor(float input, int lane_mask, int width=warpSize);
415 #else
416 __device__ int __shfl(int input, int lane, int width);
417 __device__ int __shfl_up(int input, unsigned int lane_delta, int width);
418 __device__ int __shfl_down(int input, unsigned int lane_delta, int width);
419 __device__ int __shfl_xor(int input, int lane_mask, int width);
420 __device__ float __shfl(float input, int lane, int width);
421 __device__ float __shfl_up(float input, unsigned int lane_delta, int width);
422 __device__ float __shfl_down(float input, unsigned int lane_delta, int width);
423 __device__ float __shfl_xor(float input, int lane_mask, int width);
424 #endif
425 
426 __host__ __device__ int min(int arg1, int arg2);
427 __host__ __device__ int max(int arg1, int arg2);
428 
429 //TODO - add a couple fast math operations here, the set here will grow :
430 __device__ float __cosf(float x);
431 __device__ float __expf(float x);
432 __device__ float __frsqrt_rn(float x);
433 __device__ float __fsqrt_rd(float x);
434 __device__ float __fsqrt_rn(float x);
435 __device__ float __fsqrt_ru(float x);
436 __device__ float __fsqrt_rz(float x);
437 __device__ float __log10f(float x);
438 __device__ float __log2f(float x);
439 __device__ float __logf(float x);
440 __device__ float __powf(float base, float exponent);
441 __device__ void __sincosf(float x, float *s, float *c) ;
442 __device__ float __sinf(float x);
443 __device__ float __tanf(float x);
444 __device__ float __dsqrt_rd(double x);
445 __device__ float __dsqrt_rn(double x);
446 __device__ float __dsqrt_ru(double x);
447 __device__ float __dsqrt_rz(double x);
448 #endif
449 
453 #if __hcc_workweek__ >= 16123
454 
455 #define hipThreadIdx_x (amp_get_local_id(0))
456 #define hipThreadIdx_y (amp_get_local_id(1))
457 #define hipThreadIdx_z (amp_get_local_id(2))
458 
459 #define hipBlockIdx_x (hc_get_group_id(0))
460 #define hipBlockIdx_y (hc_get_group_id(1))
461 #define hipBlockIdx_z (hc_get_group_id(2))
462 
463 #define hipBlockDim_x (amp_get_local_size(0))
464 #define hipBlockDim_y (amp_get_local_size(1))
465 #define hipBlockDim_z (amp_get_local_size(2))
466 
467 #define hipGridDim_x (hc_get_num_groups(0))
468 #define hipGridDim_y (hc_get_num_groups(1))
469 #define hipGridDim_z (hc_get_num_groups(2))
470 
471 #else
472 
473 #define hipThreadIdx_x (amp_get_local_id(2))
474 #define hipThreadIdx_y (amp_get_local_id(1))
475 #define hipThreadIdx_z (amp_get_local_id(0))
476 
477 #define hipBlockIdx_x (hc_get_group_id(2))
478 #define hipBlockIdx_y (hc_get_group_id(1))
479 #define hipBlockIdx_z (hc_get_group_id(0))
480 
481 #define hipBlockDim_x (amp_get_local_size(2))
482 #define hipBlockDim_y (amp_get_local_size(1))
483 #define hipBlockDim_z (amp_get_local_size(0))
484 
485 #define hipGridDim_x (hc_get_num_groups(2))
486 #define hipGridDim_y (hc_get_num_groups(1))
487 #define hipGridDim_z (hc_get_num_groups(0))
488 
489 #endif
490 
491 #define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE)
492 
493 #define HIP_KERNEL_NAME(...) __VA_ARGS__
494 
495 #ifdef __HCC_CPP__
496 hipStream_t ihipPreLaunchKernel(hipStream_t stream, hc::accelerator_view **av);
497 void ihipPostLaunchKernel(hipStream_t stream, hc::completion_future &cf);
498 
499 // TODO - move to common header file.
500 #define KNRM "\x1B[0m"
501 #define KGRN "\x1B[32m"
502 
503 #if not defined(DISABLE_GRID_LAUNCH)
504 #define hipLaunchKernel(_kernelName, _numBlocks3D, _blockDim3D, _groupMemBytes, _stream, ...) \
505 do {\
506  grid_launch_parm lp;\
507  lp.gridDim.x = _numBlocks3D.x; \
508  lp.gridDim.y = _numBlocks3D.y; \
509  lp.gridDim.z = _numBlocks3D.z; \
510  lp.groupDim.x = _blockDim3D.x; \
511  lp.groupDim.y = _blockDim3D.y; \
512  lp.groupDim.z = _blockDim3D.z; \
513  lp.groupMemBytes = _groupMemBytes;\
514  hc::completion_future cf;\
515  lp.cf = &cf; \
516  hipStream_t trueStream = (ihipPreLaunchKernel(_stream, &lp.av)); \
517  if (HIP_TRACE_API) {\
518  fprintf(stderr, KGRN "<<hip-api: hipLaunchKernel '%s' gridDim:[%d.%d.%d] groupDim:[%d.%d.%d] groupMem:+%d stream=%p\n" KNRM, \
519  #_kernelName, lp.gridDim.z, lp.gridDim.y, lp.gridDim.x, lp.groupDim.z, lp.groupDim.y, lp.groupDim.x, lp.groupMemBytes, (void*)(_stream));\
520  }\
521  _kernelName (lp, __VA_ARGS__);\
522  ihipPostLaunchKernel(trueStream, cf);\
523 } while(0)
524 
525 #else
526 #warning(DISABLE_GRID_LAUNCH set)
527 
528 #define hipLaunchKernel(_kernelName, _numBlocks3D, _blockDim3D, _groupMemBytes, _stream, ...) \
529 do {\
530  grid_launch_parm lp;\
531  lp.gridDim.x = _numBlocks3D.x * _blockDim3D.x;/*Convert from #blocks to #threads*/ \
532  lp.gridDim.y = _numBlocks3D.y * _blockDim3D.y;/*Convert from #blocks to #threads*/ \
533  lp.gridDim.z = _numBlocks3D.z * _blockDim3D.z;/*Convert from #blocks to #threads*/ \
534  lp.groupDim.x = _blockDim3D.x; \
535  lp.groupDim.y = _blockDim3D.y; \
536  lp.groupDim.z = _blockDim3D.z; \
537  lp.groupMemBytes = _groupMemBytes;\
538  hc::completion_future cf;\
539  lp.cf = &cf; \
540  hipStream_t trueStream = (ihipPreLaunchKernel(_stream, &lp.av)); \
541  if (HIP_TRACE_API) {\
542  fprintf(stderr, "==hip-api: launch '%s' gridDim:[%d.%d.%d] groupDim:[%d.%d.%d] groupMem:+%d stream=%p\n", \
543  #_kernelName, lp.gridDim.z, lp.gridDim.y, lp.gridDim.x, lp.groupDim.z, lp.groupDim.y, lp.groupDim.x, lp.groupMemBytes, (void*)(_stream));\
544  }\
545  _kernelName (lp, __VA_ARGS__);\
546  ihipPostLaunchKernel(trueStream, cf);\
547 } while(0)
548 /*end hipLaunchKernel */
549 #endif
550 
551 #elif defined (__HCC_C__)
552 
553 //TODO - develop C interface.
554 
555 #endif
556 
557 #endif // __HCC__
558 
559 
564 //extern int HIP_PRINT_ENV ; ///< Print all HIP-related environment variables.
565 //extern int HIP_TRACE_API; ///< Trace HIP APIs.
566 //extern int HIP_LAUNCH_BLOCKING ; ///< Make all HIP APIs host-synchronous
567 
573 // End doxygen API:
579 #endif
#define __host__
Definition: host_defines.h:35