HIP: Heterogenous-computing Interface for Portability
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
hip_runtime.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
27 //#pragma once
28 #ifndef HIP_RUNTIME_H
29 #define HIP_RUNTIME_H
30 
31 //---
32 // Top part of file can be compiled with any compiler
33 
34 
35 //#include <cstring>
36 #include <cmath>
37 #include <string.h>
38 #include <stddef.h>
39 
40 
41 #define CUDA_SUCCESS hipSuccess
42 
43 #include <hip/hip_runtime_api.h>
44 //#include "hip/hcc_detail/hip_hcc.h"
45 //---
46 // Remainder of this file only compiles with HCC
47 #ifdef __HCC__
48 #include <grid_launch.h>
49 extern int HIP_TRACE_API;
50 
51 //TODO-HCC-GL - change this to typedef.
52 //typedef grid_launch_parm hipLaunchParm ;
53 #define hipLaunchParm grid_launch_parm
54 #ifdef __cplusplus
55 #include <hip/hcc_detail/hip_texture.h>
56 #include <hip/hcc_detail/hip_ldg.h>
57 #endif
58 #include <hip/hcc_detail/host_defines.h>
59 // TODO-HCC remove old definitions ; ~1602 hcc supports __HCC_ACCELERATOR__ define.
60 #if defined (__KALMAR_ACCELERATOR__) && !defined (__HCC_ACCELERATOR__)
61 #define __HCC_ACCELERATOR__ __KALMAR_ACCELERATOR__
62 #endif
63 
64 // Feature tests:
65 #if defined(__HCC_ACCELERATOR__) && (__HCC_ACCELERATOR__ != 0)
66 // Device compile and not host compile:
67 
68 //TODO-HCC enable __HIP_ARCH_HAS_ATOMICS__ when HCC supports these.
69  // 32-bit Atomics:
70 #define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (1)
71 #define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (1)
72 #define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (1)
73 #define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (1)
74 #define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (0)
75 
76 // 64-bit Atomics:
77 #define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (1)
78 #define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (0)
79 
80 // Doubles
81 #define __HIP_ARCH_HAS_DOUBLES__ (1)
82 
83 //warp cross-lane operations:
84 #define __HIP_ARCH_HAS_WARP_VOTE__ (1)
85 #define __HIP_ARCH_HAS_WARP_BALLOT__ (1)
86 #define __HIP_ARCH_HAS_WARP_SHUFFLE__ (1)
87 #define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (0)
88 
89 //sync
90 #define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (0)
91 #define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (0)
92 
93 // misc
94 #define __HIP_ARCH_HAS_SURFACE_FUNCS__ (0)
95 #define __HIP_ARCH_HAS_3DGRID__ (1)
96 #define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (0)
97 
98 #endif /* Device feature flags */
99 
100 
101 //TODO-HCC this is currently ignored by HCC target of HIP
102 #define __launch_bounds__(requiredMaxThreadsPerBlock, minBlocksPerMultiprocessor)
103 
104 // Detect if we are compiling C++ mode or C mode
105 #if defined(__cplusplus)
106 #define __HCC_CPP__
107 #elif defined(__STDC_VERSION__)
108 #define __HCC_C__
109 #endif
110 
111 __device__ float acosf(float x);
112 __device__ float acoshf(float x);
113 __device__ float asinf(float x);
114 __device__ float asinhf(float x);
115 __device__ float atan2f(float y, float x);
116 __device__ float atanf(float x);
117 __device__ float atanhf(float x);
118 __device__ float cbrtf(float x);
119 __device__ float ceilf(float x);
120 __device__ float copysignf(float x, float y);
121 __device__ float cosf(float x);
122 __device__ float coshf(float x);
123 __device__ float cyl_bessel_i0f(float x);
124 __device__ float cyl_bessel_i1f(float x);
125 __device__ float erfcf(float x);
126 __device__ float erfcinvf(float y);
127 __device__ float erfcxf(float x);
128 __device__ float erff(float x);
129 __device__ float erfinvf(float y);
130 __device__ float exp10f(float x);
131 __device__ float exp2f(float x);
132 __device__ float expf(float x);
133 __device__ float expm1f(float x);
134 __device__ float fabsf(float x);
135 __device__ float fdimf(float x, float y);
136 __device__ float fdividef(float x, float y);
137 __device__ float floorf(float x);
138 __device__ float fmaf(float x, float y, float z);
139 __device__ float fmaxf(float x, float y);
140 __device__ float fminf(float x, float y);
141 __device__ float fmodf(float x, float y);
142 __device__ float frexpf(float x, float y);
143 __device__ float hypotf(float x, float y);
144 __device__ float ilogbf(float x);
145 __host__ __device__ unsigned isfinite(float a);
146 __device__ unsigned isinf(float a);
147 __device__ unsigned isnan(float a);
148 __device__ float j0f(float x);
149 __device__ float j1f(float x);
150 __device__ float jnf(int n, float x);
151 __device__ float ldexpf(float x, int exp);
152 __device__ float lgammaf(float x);
153 __device__ long long int llrintf(float x);
154 __device__ long long int llroundf(float x);
155 __device__ float log10f(float x);
156 __device__ float log1pf(float x);
157 __device__ float log2f(float x);
158 __device__ float logbf(float x);
159 __device__ float logf(float x);
160 __device__ long int lrintf(float x);
161 __device__ long int lroundf(float x);
162 __device__ float modff(float x, float *iptr);
163 __device__ float nanf(const char* tagp);
164 __device__ float nearbyintf(float x);
165 __device__ float nextafterf(float x, float y);
166 __device__ float norm3df(float a, float b, float c);
167 __device__ float norm4df(float a, float b, float c, float d);
168 __device__ float normcdff(float y);
169 __device__ float normcdfinvf(float y);
170 __device__ float normf(int dim, const float *a);
171 __device__ float powf(float x, float y);
172 __device__ float rcbtrf(float x);
173 __device__ float remainderf(float x, float y);
174 __device__ float remquof(float x, float y, int *quo);
175 __device__ float rhypotf(float x, float y);
176 __device__ float rintf(float x);
177 __device__ float rnorm3df(float a, float b, float c);
178 __device__ float rnorm4df(float a, float b, float c, float d);
179 __device__ float rnormf(int dim, const float* a);
180 __device__ float roundf(float x);
181 __device__ float rsqrtf(float x);
182 __device__ float scalblnf(float x, long int n);
183 __device__ float scalbnf(float x, int n);
184 __host__ __device__ unsigned signbit(float a);
185 __device__ void sincosf(float x, float *sptr, float *cptr);
186 __device__ void sincospif(float x, float *sptr, float *cptr);
187 __device__ float sinf(float x);
188 __device__ float sinhf(float x);
189 __device__ float sinpif(float x);
190 __device__ float sqrtf(float x);
191 __device__ float tanf(float x);
192 __device__ float tanhf(float x);
193 __device__ float tgammaf(float x);
194 __device__ float truncf(float x);
195 __device__ float y0f(float x);
196 __device__ float y1f(float x);
197 __device__ float ynf(int n, float x);
198 
199 __host__ __device__ float cospif(float x);
200 __host__ __device__ float sinpif(float x);
201 __device__ float sqrtf(float x);
202 __host__ __device__ float rsqrtf(float x);
203 
204 __device__ double acos(double x);
205 __device__ double acosh(double x);
206 __device__ double asin(double x);
207 __device__ double asinh(double x);
208 __device__ double atan(double x);
209 __device__ double atan2(double y, double x);
210 __device__ double atanh(double x);
211 __device__ double cbrt(double x);
212 __device__ double ceil(double x);
213 __device__ double copysign(double x, double y);
214 __device__ double cos(double x);
215 __device__ double cosh(double x);
216 __host__ __device__ double cospi(double x);
217 __device__ double cyl_bessel_i0(double x);
218 __device__ double cyl_bessel_i1(double x);
219 __device__ double erf(double x);
220 __device__ double erfc(double x);
221 __device__ double erfcinv(double y);
222 __device__ double erfcx(double x);
223 __device__ double exp(double x);
224 __device__ double exp10(double x);
225 __device__ double exp2(double x);
226 __device__ double expm1(double x);
227 __device__ double fabs(double x);
228 __device__ double fdim(double x, double y);
229 __device__ double floor(double x);
230 __device__ double fma(double x, double y, double z);
231 __device__ double fmax(double x, double y);
232 __device__ double fmin(double x, double y);
233 __device__ double fmod(double x, double y);
234 __device__ double frexp(double x, int *nptr);
235 __device__ double hypot(double x, double y);
236 __device__ double ilogb(double x);
237 __host__ __device__ unsigned isfinite(double x);
238 __device__ unsigned isinf(double x);
239 __device__ unsigned isnan(double x);
240 __device__ double j0(double x);
241 __device__ double j1(double x);
242 __device__ double jn(int n, double x);
243 __device__ double ldexp(double x, int exp);
244 __device__ double lgamma(double x);
245 __device__ long long llrint(double x);
246 __device__ long llround(double x);
247 __device__ double log(double x);
248 __device__ double log10(double x);
249 __device__ double log1p(double x);
250 __device__ double log2(double x);
251 __device__ double logb(double x);
252 __device__ long int lrint(double x);
253 __device__ long int lround(double x);
254 __device__ double modf(double x, double *iptr);
255 __device__ double nan(const char* tagp);
256 __device__ double nearbyint(double x);
257 __device__ double nextafter(double x, double y);
258 __device__ double norm(int dim, const double* t);
259 __device__ double norm3d(double a, double b, double c);
260 __device__ double norm4d(double a, double b, double d);
261 __device__ double normcdf(double y);
262 __device__ double normcdfinv(double y);
263 __device__ double pow(double x, double y);
264 __device__ double rcbrt(double x);
265 __device__ double remainder(double x, double y);
266 __device__ double remquo(double x, double y, int *quo);
267 __device__ double rhypot(double x, double y);
268 __device__ double rint(double x);
269 __device__ double rnorm(int dim, const double* t);
270 __device__ double rnorm3d(double a, double b, double c);
271 __device__ double rnorm4d(double a, double b, double c, double d);
272 __device__ double round(double x);
273 __host__ __device__ double rsqrt(double x);
274 __device__ double scalbln(double x, long int n);
275 __device__ double scalbn(double x, int n);
276 __host__ __device__ unsigned signbit(double a);
277 __device__ double sin(double a);
278 __device__ double sincos(double x, double *sptr, double *cptr);
279 __device__ double sincospi(double x, double *sptr, double *cptr);
280 __device__ double sinh(double x);
281 __host__ __device__ double sinpi(double x);
282 __device__ double sqrt(double x);
283 __device__ double tan(double x);
284 __device__ double tanh(double x);
285 __device__ double tgamma(double x);
286 __device__ double trunc(double x);
287 __device__ double y0(double x);
288 __device__ double y1(double y);
289 __device__ double yn(int n, double x);
290 
291 // TODO - hipify-clang - change to use the function call.
292 //#define warpSize hc::__wavesize()
293 extern const int warpSize;
294 
295 
296 #define clock_t long long int
297 __device__ long long int clock64();
298 __device__ clock_t clock();
299 
300 //atomicAdd()
301 __device__ int atomicAdd(int* address, int val);
302 __device__ unsigned int atomicAdd(unsigned int* address,
303  unsigned int val);
304 
305 __device__ unsigned long long int atomicAdd(unsigned long long int* address,
306  unsigned long long int val);
307 
308 __device__ float atomicAdd(float* address, float val);
309 
310 
311 //atomicSub()
312 __device__ int atomicSub(int* address, int val);
313 
314 __device__ unsigned int atomicSub(unsigned int* address,
315  unsigned int val);
316 
317 
318 //atomicExch()
319 __device__ int atomicExch(int* address, int val);
320 
321 __device__ unsigned int atomicExch(unsigned int* address,
322  unsigned int val);
323 
324 __device__ unsigned long long int atomicExch(unsigned long long int* address,
325  unsigned long long int val);
326 
327 __device__ float atomicExch(float* address, float val);
328 
329 
330 //atomicMin()
331 __device__ int atomicMin(int* address, int val);
332 __device__ unsigned int atomicMin(unsigned int* address,
333  unsigned int val);
334 __device__ unsigned long long int atomicMin(unsigned long long int* address,
335  unsigned long long int val);
336 
337 
338 //atomicMax()
339 __device__ int atomicMax(int* address, int val);
340 __device__ unsigned int atomicMax(unsigned int* address,
341  unsigned int val);
342 __device__ unsigned long long int atomicMax(unsigned long long int* address,
343  unsigned long long int val);
344 
345 
346 //atomicCAS()
347 __device__ int atomicCAS(int* address, int compare, int val);
348 __device__ unsigned int atomicCAS(unsigned int* address,
349  unsigned int compare,
350  unsigned int val);
351 __device__ unsigned long long int atomicCAS(unsigned long long int* address,
352  unsigned long long int compare,
353  unsigned long long int val);
354 
355 
356 //atomicAnd()
357 __device__ int atomicAnd(int* address, int val);
358 __device__ unsigned int atomicAnd(unsigned int* address,
359  unsigned int val);
360 __device__ unsigned long long int atomicAnd(unsigned long long int* address,
361  unsigned long long int val);
362 
363 
364 //atomicOr()
365 __device__ int atomicOr(int* address, int val);
366 __device__ unsigned int atomicOr(unsigned int* address,
367  unsigned int val);
368 __device__ unsigned long long int atomicOr(unsigned long long int* address,
369  unsigned long long int val);
370 
371 
372 //atomicXor()
373 __device__ int atomicXor(int* address, int val);
374 __device__ unsigned int atomicXor(unsigned int* address,
375  unsigned int val);
376 __device__ unsigned long long int atomicXor(unsigned long long int* address,
377  unsigned long long int val);
378 
379 //atomicInc()
380 __device__ unsigned int atomicInc(unsigned int* address,
381  unsigned int val);
382 
383 
384 //atomicDec()
385 __device__ unsigned int atomicDec(unsigned int* address,
386  unsigned int val);
387 
388 
389 // integer intrinsic function __poc __clz __ffs __brev
390 __device__ unsigned int __popc( unsigned int input);
391 __device__ unsigned int __popcll( unsigned long long int input);
392 __device__ unsigned int __clz(unsigned int input);
393 __device__ unsigned int __clzll(unsigned long long int input);
394 __device__ unsigned int __clz(int input);
395 __device__ unsigned int __clzll(long long int input);
396 __device__ unsigned int __ffs(unsigned int input);
397 __device__ unsigned int __ffsll(unsigned long long int input);
398 __device__ unsigned int __ffs(int input);
399 __device__ unsigned int __ffsll(long long int input);
400 __device__ unsigned int __brev( unsigned int input);
401 __device__ unsigned long long int __brevll( unsigned long long int input);
402 
403 
404 // warp vote function __all __any __ballot
405 __device__ int __all( int input);
406 __device__ int __any( int input);
407 __device__ unsigned long long int __ballot( int input);
408 
409 // warp shuffle functions
410 #ifdef __cplusplus
411 __device__ int __shfl(int input, int lane, int width=warpSize);
412 __device__ int __shfl_up(int input, unsigned int lane_delta, int width=warpSize);
413 __device__ int __shfl_down(int input, unsigned int lane_delta, int width=warpSize);
414 __device__ int __shfl_xor(int input, int lane_mask, int width=warpSize);
415 __device__ float __shfl(float input, int lane, int width=warpSize);
416 __device__ float __shfl_up(float input, unsigned int lane_delta, int width=warpSize);
417 __device__ float __shfl_down(float input, unsigned int lane_delta, int width=warpSize);
418 __device__ float __shfl_xor(float input, int lane_mask, int width=warpSize);
419 #else
420 __device__ int __shfl(int input, int lane, int width);
421 __device__ int __shfl_up(int input, unsigned int lane_delta, int width);
422 __device__ int __shfl_down(int input, unsigned int lane_delta, int width);
423 __device__ int __shfl_xor(int input, int lane_mask, int width);
424 __device__ float __shfl(float input, int lane, int width);
425 __device__ float __shfl_up(float input, unsigned int lane_delta, int width);
426 __device__ float __shfl_down(float input, unsigned int lane_delta, int width);
427 __device__ float __shfl_xor(float input, int lane_mask, int width);
428 #endif
429 
430 __host__ __device__ int min(int arg1, int arg2);
431 __host__ __device__ int max(int arg1, int arg2);
432 
433 //TODO - add a couple fast math operations here, the set here will grow :
434 __device__ float __cosf(float x);
435 __device__ float __expf(float x);
436 __device__ float __frsqrt_rn(float x);
437 __device__ float __fsqrt_rd(float x);
438 __device__ float __fsqrt_rn(float x);
439 __device__ float __fsqrt_ru(float x);
440 __device__ float __fsqrt_rz(float x);
441 __device__ float __log10f(float x);
442 __device__ float __log2f(float x);
443 __device__ float __logf(float x);
444 __device__ float __powf(float base, float exponent);
445 __device__ void __sincosf(float x, float *s, float *c) ;
446 __device__ float __sinf(float x);
447 __device__ float __tanf(float x);
448 __device__ float __dsqrt_rd(double x);
449 __device__ float __dsqrt_rn(double x);
450 __device__ float __dsqrt_ru(double x);
451 __device__ float __dsqrt_rz(double x);
456 // Choose correct polarity of xyz/zyx ordering:
457 #if __hcc_workweek__ >= 16123
458 
459 #define hipThreadIdx_x (amp_get_local_id(0))
460 #define hipThreadIdx_y (amp_get_local_id(1))
461 #define hipThreadIdx_z (amp_get_local_id(2))
462 
463 #define hipBlockIdx_x (hc_get_group_id(0))
464 #define hipBlockIdx_y (hc_get_group_id(1))
465 #define hipBlockIdx_z (hc_get_group_id(2))
466 
467 #define hipBlockDim_x (amp_get_local_size(0))
468 #define hipBlockDim_y (amp_get_local_size(1))
469 #define hipBlockDim_z (amp_get_local_size(2))
470 
471 #define hipGridDim_x (hc_get_num_groups(0))
472 #define hipGridDim_y (hc_get_num_groups(1))
473 #define hipGridDim_z (hc_get_num_groups(2))
474 
475 #else
476 
477 #define hipThreadIdx_x (amp_get_local_id(2))
478 #define hipThreadIdx_y (amp_get_local_id(1))
479 #define hipThreadIdx_z (amp_get_local_id(0))
480 
481 #define hipBlockIdx_x (hc_get_group_id(2))
482 #define hipBlockIdx_y (hc_get_group_id(1))
483 #define hipBlockIdx_z (hc_get_group_id(0))
484 
485 #define hipBlockDim_x (amp_get_local_size(2))
486 #define hipBlockDim_y (amp_get_local_size(1))
487 #define hipBlockDim_z (amp_get_local_size(0))
488 
489 #define hipGridDim_x (hc_get_num_groups(2))
490 #define hipGridDim_y (hc_get_num_groups(1))
491 #define hipGridDim_z (hc_get_num_groups(0))
492 
493 #endif // __hcc_workweek__ check
494 
495 #define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE)
496 
497 #define HIP_KERNEL_NAME(...) __VA_ARGS__
498 
499 #ifdef __HCC_CPP__
500 hipStream_t ihipPreLaunchKernel(hipStream_t stream, grid_launch_parm *lp);
501 void ihipPostLaunchKernel(hipStream_t stream, grid_launch_parm &lp);
502 
503 // TODO - move to common header file.
504 #define KNRM "\x1B[0m"
505 #define KGRN "\x1B[32m"
506 
507 #if not defined(DISABLE_GRID_LAUNCH)
508 #define hipLaunchKernel(_kernelName, _numBlocks3D, _blockDim3D, _groupMemBytes, _stream, ...) \
509 do {\
510  grid_launch_parm lp;\
511  lp.gridDim.x = _numBlocks3D.x; \
512  lp.gridDim.y = _numBlocks3D.y; \
513  lp.gridDim.z = _numBlocks3D.z; \
514  lp.groupDim.x = _blockDim3D.x; \
515  lp.groupDim.y = _blockDim3D.y; \
516  lp.groupDim.z = _blockDim3D.z; \
517  lp.groupMemBytes = _groupMemBytes; \
518  hipStream_t trueStream = (ihipPreLaunchKernel(_stream, &lp)); \
519  if (HIP_TRACE_API) {\
520  fprintf(stderr, KGRN "<<hip-api: hipLaunchKernel '%s' gridDim:(%d,%d,%d) groupDim:(%d,%d,%d) groupMem:+%d stream=%p\n" KNRM, \
521  #_kernelName, lp.gridDim.x, lp.gridDim.y, lp.gridDim.z, lp.groupDim.x, lp.groupDim.y, lp.groupDim.z, lp.groupMemBytes, (void*)(_stream));\
522  }\
523  _kernelName (lp, __VA_ARGS__);\
524  ihipPostLaunchKernel(trueStream, lp);\
525 } while(0)
526 
527 #else
528 #warning(DISABLE_GRID_LAUNCH set)
529 
530 #define hipLaunchKernel(_kernelName, _numBlocks3D, _blockDim3D, _groupMemBytes, _stream, ...) \
531 do {\
532  grid_launch_parm lp;\
533  lp.gridDim.x = _numBlocks3D.x * _blockDim3D.x;/*Convert from #blocks to #threads*/ \
534  lp.gridDim.y = _numBlocks3D.y * _blockDim3D.y;/*Convert from #blocks to #threads*/ \
535  lp.gridDim.z = _numBlocks3D.z * _blockDim3D.z;/*Convert from #blocks to #threads*/ \
536  lp.groupDim.x = _blockDim3D.x; \
537  lp.groupDim.y = _blockDim3D.y; \
538  lp.groupDim.z = _blockDim3D.z; \
539  lp.groupMemBytes = _groupMemBytes;\
540  hc::completion_future cf;\
541  lp.cf = &cf; \
542  hipStream_t trueStream = (ihipPreLaunchKernel(_stream, &lp.av)); \
543  if (HIP_TRACE_API) {\
544  fprintf(stderr, "==hip-api: launch '%s' gridDim:[%d.%d.%d] groupDim:[%d.%d.%d] groupMem:+%d stream=%p\n", \
545  #_kernelName, lp.gridDim.z, lp.gridDim.y, lp.gridDim.x, lp.groupDim.z, lp.groupDim.y, lp.groupDim.x, lp.groupMemBytes, (void*)(_stream));\
546  }\
547  _kernelName (lp, __VA_ARGS__);\
548  ihipPostLaunchKernel(trueStream, cf);\
549 } while(0)
550 /*end hipLaunchKernel */
551 #endif
552 
553 #elif defined (__HCC_C__)
554 
555 //TODO - develop C interface.
556 
557 #endif
558 
559 #endif // __HCC__
560 
561 
566 //extern int HIP_PRINT_ENV ; ///< Print all HIP-related environment variables.
567 //extern int HIP_TRACE_API; ///< Trace HIP APIs.
568 //extern int HIP_LAUNCH_BLOCKING ; ///< Make all HIP APIs host-synchronous
569 
575 // End doxygen API:
581 #endif
#define __host__
Definition: host_defines.h:35