HIP: Heterogenous-computing Interface for Portability
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
hip_hcc.h
1 /*
2 Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
23 #ifndef HIP_HCC_H
24 #define HIP_HCC_H
25 
26 #include <hc.hpp>
27 #include <hsa/hsa.h>
28 #include "hsa/hsa_ext_amd.h"
29 #include "hip_util.h"
30 
31 
32 #if defined(__HCC__) && (__hcc_workweek__ < 16354)
33 #error("This version of HIP requires a newer version of HCC.");
34 #endif
35 
36 #define USE_DISPATCH_HSA_KERNEL 1
37 //
38 
39 
40 //---
41 // Environment variables:
42 
43 // Intended to distinguish whether an environment variable should be visible only in debug mode, or in debug+release.
44 //static const int debug = 0;
45 extern const int release;
46 
47 // TODO - this blocks both kernels and memory ops. Perhaps should have separate env var for kernels?
48 extern int HIP_LAUNCH_BLOCKING;
49 extern int HIP_API_BLOCKING;
50 
51 extern int HIP_PRINT_ENV;
52 extern int HIP_PROFILE_API;
53 //extern int HIP_TRACE_API;
54 extern int HIP_ATP;
55 extern int HIP_DB;
56 extern int HIP_STAGING_SIZE; /* size of staging buffers, in KB */
57 extern int HIP_STREAM_SIGNALS; /* number of signals to allocate at stream creation */
58 extern int HIP_VISIBLE_DEVICES; /* Contains a comma-separated sequence of GPU identifiers */
59 extern int HIP_FORCE_P2P_HOST;
60 
61 extern int HIP_COHERENT_HOST_ALLOC;
62 
63 
64 //---
65 // Chicken bits for disabling functionality to work around potential issues:
66 extern int HIP_DISABLE_HW_KERNEL_DEP;
67 
68 
69 // Class to assign a short TID to each new thread, for HIP debugging purposes.
70 class ShortTid {
71 public:
72 
73  ShortTid() ;
74 
75  int tid() const { return _shortTid; };
76  uint64_t incApiSeqNum() { return ++_apiSeqNum; };
77  uint64_t apiSeqNum() const { return _apiSeqNum; };
78 
79 private:
80  int _shortTid;
81 
82  // monotonically increasing API sequence number for this threa.
83  uint64_t _apiSeqNum;
84 };
85 
86 struct ProfTrigger {
87 
88  static const uint64_t MAX_TRIGGER = std::numeric_limits<uint64_t>::max();
89 
90  void print (int tid) {
91  std::cout << "Enabling tracing for ";
92  for (auto iter=_profTrigger.begin(); iter != _profTrigger.end(); iter++) {
93  std::cout << "tid:" << tid << "." << *iter << ",";
94  }
95  std::cout << "\n";
96  };
97 
98  uint64_t nextTrigger() { return _profTrigger.empty() ? MAX_TRIGGER : _profTrigger.back(); };
99  void add(uint64_t trigger) { _profTrigger.push_back(trigger); };
100  void sort() { std::sort (_profTrigger.begin(), _profTrigger.end(), std::greater<int>()); };
101 private:
102  std::vector<uint64_t> _profTrigger;
103 };
104 
105 
106 
107 //---
108 //Extern tls
109 extern thread_local hipError_t tls_lastHipError;
110 extern thread_local ShortTid tls_shortTid;
111 
112 extern std::vector<ProfTrigger> g_dbStartTriggers;
113 extern std::vector<ProfTrigger> g_dbStopTriggers;
114 
115 //---
116 //Forward defs:
117 class ihipStream_t;
118 class ihipDevice_t;
119 class ihipCtx_t;
120 
121 // Color defs for debug messages:
122 #define KNRM "\x1B[0m"
123 #define KRED "\x1B[31m"
124 #define KGRN "\x1B[32m"
125 #define KYEL "\x1B[33m"
126 #define KBLU "\x1B[34m"
127 #define KMAG "\x1B[35m"
128 #define KCYN "\x1B[36m"
129 #define KWHT "\x1B[37m"
130 
131 extern const char *API_COLOR;
132 extern const char *API_COLOR_END;
133 
134 
135 // If set, thread-safety is enforced on all stream functions.
136 // Stream functions will acquire a mutex before entering critical sections.
137 #define STREAM_THREAD_SAFE 1
138 
139 
140 #define CTX_THREAD_SAFE 1
141 
142 
143 // Compile debug trace mode - this prints debug messages to stderr when env var HIP_DB is set.
144 // May be set to 0 to remove debug if checks - possible code size and performance difference?
145 #define COMPILE_HIP_DB 1
146 
147 
148 // Compile HIP tracing capability.
149 // 0x1 = print a string at function entry with arguments.
150 // 0x2 = prints a simple message with function name + return code when function exits.
151 // 0x3 = print both.
152 // Must be enabled at runtime with HIP_TRACE_API
153 #define COMPILE_HIP_TRACE_API 0x3
154 
155 
156 // Compile code that generates trace markers for CodeXL ATP at HIP function begin/end.
157 // ATP is standard CodeXL format that includes timestamps for kernels, HSA RT APIs, and HIP APIs.
158 #ifndef COMPILE_HIP_ATP_MARKER
159 #define COMPILE_HIP_ATP_MARKER 0
160 #endif
161 
162 
163 
164 
165 // Compile support for trace markers that are displayed on CodeXL GUI at start/stop of each function boundary.
166 // TODO - currently we print the trace message at the beginning. if we waited, we could also include return codes, and any values returned
167 // through ptr-to-args (ie the pointers allocated by hipMalloc).
168 #if COMPILE_HIP_ATP_MARKER
169 #include "CXLActivityLogger.h"
170 #define MARKER_BEGIN(markerName,group) amdtBeginMarker(markerName, group, nullptr);
171 #define MARKER_END() amdtEndMarker();
172 #define RESUME_PROFILING amdtResumeProfiling(AMDT_ALL_PROFILING);
173 #define STOP_PROFILING amdtStopProfiling(AMDT_ALL_PROFILING);
174 #else
175 // Swallow scoped markers:
176 #define MARKER_BEGIN(markerName,group)
177 #define MARKER_END()
178 #define RESUME_PROFILING
179 #define STOP_PROFILING
180 #endif
181 
182 
183 extern void recordApiTrace(std::string *fullStr, const std::string &apiStr);
184 
185 #if COMPILE_HIP_ATP_MARKER || (COMPILE_HIP_TRACE_API & 0x1)
186 #define API_TRACE(...)\
187 {\
188  if (HIP_PROFILE_API || (COMPILE_HIP_DB && HIP_TRACE_API)) {\
189  std::string apiStr = std::string(__func__) + " (" + ToString(__VA_ARGS__) + ')';\
190  std::string fullStr;\
191  recordApiTrace(&fullStr, apiStr);\
192  if (HIP_PROFILE_API == 0x1) {MARKER_BEGIN(__func__, "HIP") }\
193  else if (HIP_PROFILE_API == 0x2) {MARKER_BEGIN(fullStr.c_str(), "HIP"); }\
194  }\
195 }
196 #else
197 // Swallow API_TRACE
198 #define API_TRACE(...)
199 #endif
200 
201 
202 // Just initialize the HIP runtime, but don't log any trace information.
203 #define HIP_INIT()\
204  std::call_once(hip_initialized, ihipInit);\
205  ihipCtxStackUpdate();
206 
207 
208 // This macro should be called at the beginning of every HIP API.
209 // It initialies the hip runtime (exactly once), and
210 // generate trace string that can be output to stderr or to ATP file.
211 #define HIP_INIT_API(...) \
212  HIP_INIT()\
213  API_TRACE(__VA_ARGS__);
214 
215 #define ihipLogStatus(hipStatus) \
216  ({\
217  hipError_t localHipStatus = hipStatus; /*local copy so hipStatus only evaluated once*/ \
218  tls_lastHipError = localHipStatus;\
219  \
220  if ((COMPILE_HIP_TRACE_API & 0x2) && HIP_TRACE_API) {\
221  fprintf(stderr, " %ship-api tid:%d.%lu %-30s ret=%2d (%s)>>%s\n", (localHipStatus == 0) ? API_COLOR:KRED, tls_shortTid.tid(),tls_shortTid.apiSeqNum(), __func__, localHipStatus, ihipErrorString(localHipStatus), API_COLOR_END);\
222  }\
223  if (HIP_PROFILE_API) { MARKER_END(); }\
224  localHipStatus;\
225  })
226 
227 
228 
229 
230 //---
231 //HIP_DB Debug flags:
232 #define DB_API 0 /* 0x01 - shortcut to enable HIP_TRACE_API on single switch */
233 #define DB_SYNC 1 /* 0x02 - trace synchronization pieces */
234 #define DB_MEM 2 /* 0x04 - trace memory allocation / deallocation */
235 #define DB_COPY 3 /* 0x08 - trace memory copy and peer commands. . */
236 #define DB_SIGNAL 4 /* 0x10 - trace signal pool commands */
237 #define DB_MAX_FLAG 5
238 // When adding a new debug flag, also add to the char name table below.
239 //
240 
241 struct DbName {
242  const char *_color;
243  const char *_shortName;
244 };
245 
246 static const DbName dbName [] =
247 {
248  {KGRN, "api"}, // not used,
249  {KYEL, "sync"},
250  {KCYN, "mem"},
251  {KMAG, "copy"},
252  {KRED, "signal"},
253 };
254 
255 
256 
257 #if COMPILE_HIP_DB
258 #define tprintf(trace_level, ...) {\
259  if (HIP_DB & (1<<(trace_level))) {\
260  char msgStr[1000];\
261  snprintf(msgStr, 2000, __VA_ARGS__);\
262  fprintf (stderr, " %ship-%s tid:%d:%s%s", dbName[trace_level]._color, dbName[trace_level]._shortName, tls_shortTid.tid(), msgStr, KNRM); \
263  }\
264 }
265 #else
266 /* Compile to empty code */
267 #define tprintf(trace_level, ...)
268 #endif
269 
270 
271 
272 
273 
274 class ihipException : public std::exception
275 {
276 public:
277  ihipException(hipError_t e) : _code(e) {};
278 
279  hipError_t _code;
280 };
281 
282 
283 #ifdef __cplusplus
284 extern "C" {
285 #endif
286 
287 
288 #ifdef __cplusplus
289 }
290 #endif
291 
292 const hipStream_t hipStreamNull = 0x0;
293 
294 
295 // Used to remove lock, for performance or stimulating bugs.
297 {
298  public:
299  void lock() { }
300  bool try_lock() {return true; }
301  void unlock() { }
302 };
303 
304 
305 #if STREAM_THREAD_SAFE
306 typedef std::mutex StreamMutex;
307 #else
308 #warning "Stream thread-safe disabled"
309 typedef FakeMutex StreamMutex;
310 #endif
311 
312 // Pair Device and Ctx together, these could also be toggled separately if desired.
313 #if CTX_THREAD_SAFE
314 typedef std::mutex CtxMutex;
315 #else
316 typedef FakeMutex CtxMutex;
317 #warning "Device thread-safe disabled"
318 #endif
319 
320 //
321 //---
322 // Protects access to the member _data with a lock acquired on contruction/destruction.
323 // T must contain a _mutex field which meets the BasicLockable requirements (lock/unlock)
324 template<typename T>
326 {
327 public:
328  LockedAccessor(T &criticalData, bool autoUnlock=true) :
329  _criticalData(&criticalData),
330  _autoUnlock(autoUnlock)
331 
332  {
333  tprintf(DB_SYNC, "lock critical data %s.%p\n", typeid(T).name(), _criticalData);
334  _criticalData->_mutex.lock();
335  };
336 
337  ~LockedAccessor()
338  {
339  if (_autoUnlock) {
340  tprintf(DB_SYNC, "auto-unlock critical data %s.%p\n",typeid(T).name(), _criticalData);
341  _criticalData->_mutex.unlock();
342  }
343  }
344 
345  void unlock()
346  {
347  tprintf(DB_SYNC, "unlock critical data %s.%p\n", typeid(T).name(), _criticalData);
348  _criticalData->_mutex.unlock();
349  }
350 
351  // Syntactic sugar so -> can be used to get the underlying type.
352  T *operator->() { return _criticalData; };
353 
354 private:
355  T *_criticalData;
356  bool _autoUnlock;
357 };
358 
359 
360 template <typename MUTEX_TYPE>
361 struct LockedBase {
362 
363  // Experts-only interface for explicit locking.
364  // Most uses should use the lock-accessor.
365  void lock() { _mutex.lock(); }
366  void unlock() { _mutex.unlock(); }
367 
368  MUTEX_TYPE _mutex;
369 };
370 
374 #define HIP_IPC_HANDLE_SIZE 64
376 {
377 public:
378  hsa_amd_ipc_memory_t ipc_handle;
379  char reserved[HIP_IPC_HANDLE_SIZE];
380  size_t psize;
381 };
382 
384 public:
385  ihipFunction_t(const char *name) {
386  size_t nameSz = strlen(name);
387  char *kernelName = (char*)malloc(nameSz);
388  strncpy(kernelName, name, nameSz);
389  _kernelName = kernelName;
390  };
391 
392  ~ihipFunction_t() {
393  if (_kernelName) {
394  free((void*)_kernelName);
395  _kernelName = NULL;
396  };
397  };
398 public:
399  const char *_kernelName;
400  hsa_executable_symbol_t _kernelSymbol;
401  uint64_t _kernel;
402 };
403 
405 public:
406  hsa_executable_t executable;
407  hsa_code_object_t object;
408  std::string fileName;
409  void *ptr;
410  size_t size;
411 
412  ihipModule_t() : executable(), object(), fileName(), ptr(nullptr), size(0), hipFunctionTable() {}
413  ~ihipModule_t() {
414  for (int i = 0; i < hipFunctionTable.size(); ++i) {
415  ihipFunction_t *func = hipFunctionTable[i];
416  delete func;
417  }
418  hipFunctionTable.clear();
419  }
420 
421  void registerFunction(ihipFunction_t* func) {
422  hipFunctionTable.push_back(func);
423  }
424 private:
425  std::vector<ihipFunction_t*> hipFunctionTable;
426 };
427 
428 template <typename MUTEX_TYPE>
429 class ihipStreamCriticalBase_t : public LockedBase<MUTEX_TYPE>
430 {
431 public:
432  ihipStreamCriticalBase_t(hc::accelerator_view av) :
433  _kernelCnt(0),
434  _av(av)
435  {
436  };
437 
439  }
440 
442 
443 public:
444  // TODO - remove _kernelCnt mechanism:
445  uint32_t _kernelCnt; // Count of inflight kernels in this stream. Reset at ::wait().
446  hc::accelerator_view _av;
447 };
448 
449 
450 // if HIP code needs to acquire locks for both ihipCtx_t and ihipStream_t, it should first acquire the lock
451 // for the ihipCtx_t and then for the individual streams. The locks should not be acquired in reverse order
452 // or deadlock may occur. In some cases, it may be possible to reduce the range where the locks must be held.
453 // HIP routines should avoid acquiring and releasing the same lock during the execution of a single HIP API.
454 
455 
458 
459 
460 
461 //---
462 // Internal stream structure.
464 public:
465  enum ScheduleMode {Auto, Spin, Yield};
466  typedef uint64_t SeqNum_t ;
467 
468  ihipStream_t(ihipCtx_t *ctx, hc::accelerator_view av, unsigned int flags);
469  ~ihipStream_t();
470 
471  // kind is hipMemcpyKind
472  void locked_copySync (void* dst, const void* src, size_t sizeBytes, unsigned kind, bool resolveOn = true);
473 
474 
475  void locked_copyAsync(void* dst, const void* src, size_t sizeBytes, unsigned kind);
476 
477 
478  //---
479  // Member functions that begin with locked_ are thread-safe accessors - these acquire / release the critical mutex.
480  LockedAccessor_StreamCrit_t lockopen_preKernelCommand();
481  void lockclose_postKernelCommand(const char *kernelName, hc::accelerator_view *av);
482 
483 
484  void locked_wait(bool assertQueueEmpty=false);
485 
486  hc::accelerator_view* locked_getAv() { LockedAccessor_StreamCrit_t crit(_criticalData); return &(crit->_av); };
487 
488  void locked_waitEvent(hipEvent_t event);
489  void locked_recordEvent(hipEvent_t event);
490 
491 
492  //---
493 
494  // Use this if we already have the stream critical data mutex:
495  void wait(LockedAccessor_StreamCrit_t &crit, bool assertQueueEmpty=false);
496 
497  void launchModuleKernel(hc::accelerator_view av, hsa_signal_t signal,
498  uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
499  uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ,
500  uint32_t groupSegmentSize, uint32_t sharedMemBytes,
501  void *kernarg, size_t kernSize, uint64_t kernel);
502 
503 
504 
505  //-- Non-racy accessors:
506  // These functions access fields set at initialization time and are non-racy (so do not acquire mutex)
507  const ihipDevice_t * getDevice() const;
508  ihipCtx_t * getCtx() const;
509 
510 
511 public:
512  //---
513  //Public member vars - these are set at initialization and never change:
514  SeqNum_t _id; // monotonic sequence ID
515  unsigned _flags;
516 
517 
518 private:
519 
520 
521  // The unsigned return is hipMemcpyKind
522  unsigned resolveMemcpyDirection(bool srcInDeviceMem, bool dstInDeviceMem);
523  void resolveHcMemcpyDirection(unsigned hipMemKind,
524  const hc::AmPointerInfo *dstPtrInfo, const hc::AmPointerInfo *srcPtrInfo,
525  hc::hcCommandKind *hcCopyDir,
526  ihipCtx_t **copyDevice,
527  bool *forceUnpinnedCopy);
528 
529  bool canSeeMemory(const ihipCtx_t *thisCtx, const hc::AmPointerInfo *dstInfo, const hc::AmPointerInfo *srcInfo);
530 
531 
532 private: // Data
533  // Critical Data - MUST be accessed through LockedAccessor_StreamCrit_t
534  ihipStreamCritical_t _criticalData;
535 
536  ihipCtx_t *_ctx; // parent context that owns this stream.
537 
538  // Friends:
539  friend std::ostream& operator<<(std::ostream& os, const ihipStream_t& s);
541 
542  ScheduleMode _scheduleMode;
543 };
544 
545 
546 
547 //----
548 // Internal event structure:
549 enum hipEventStatus_t {
550  hipEventStatusUnitialized = 0, // event is unutilized, must be "Created" before use.
551  hipEventStatusCreated = 1,
552  hipEventStatusRecording = 2, // event has been enqueued to record something.
553  hipEventStatusRecorded = 3, // event has been recorded - timestamps are valid.
554 } ;
555 
556 
557 // internal hip event structure.
558 struct ihipEvent_t {
559  hipEventStatus_t _state;
560 
561  hipStream_t _stream; // Stream where the event is recorded, or NULL if all streams.
562  unsigned _flags;
563 
564  hc::completion_future _marker;
565  uint64_t _timestamp; // store timestamp, may be set on host or by marker.
566 } ;
567 
568 
569 
570 
571 
572 //----
573 // Properties of the HIP device.
574 // Multiple contexts can point to same device.
576 {
577 public:
578  ihipDevice_t(unsigned deviceId, unsigned deviceCnt, hc::accelerator &acc);
579  ~ihipDevice_t();
580 
581  // Accessors:
582  ihipCtx_t *getPrimaryCtx() const { return _primaryCtx; };
583 
584 public:
585  unsigned _deviceId; // device ID
586 
587  hc::accelerator _acc;
588  hsa_agent_t _hsaAgent; // hsa agent handle
589 
591  unsigned _computeUnits;
592  hipDeviceProp_t _props; // saved device properties.
593 
594  // TODO - report this through device properties, base on HCC API call.
595  int _isLargeBar;
596 
597  ihipCtx_t *_primaryCtx;
598 
599 private:
600  hipError_t initProperties(hipDeviceProp_t* prop);
601 };
602 //=============================================================================
603 
604 
605 
606 //=============================================================================
607 //class ihipCtxCriticalBase_t
608 template <typename MUTEX_TYPE>
610 {
611 public:
612  ihipCtxCriticalBase_t(unsigned deviceCnt) :
613  _peerCnt(0)
614  {
615  _peerAgents = new hsa_agent_t[deviceCnt];
616  };
617 
619  if (_peerAgents != nullptr) {
620  delete _peerAgents;
621  _peerAgents = nullptr;
622  }
623  _peerCnt = 0;
624  }
625 
626  // Streams:
627  void addStream(ihipStream_t *stream);
628  std::list<ihipStream_t*> &streams() { return _streams; };
629  const std::list<ihipStream_t*> &const_streams() const { return _streams; };
630 
631 
632  // Peer Accessor classes:
633  bool isPeerWatcher(const ihipCtx_t *peer); // returns True if peer has access to memory physically located on this device.
634  bool addPeerWatcher(const ihipCtx_t *thisCtx, ihipCtx_t *peer);
635  bool removePeerWatcher(const ihipCtx_t *thisCtx, ihipCtx_t *peer);
636  void resetPeerWatchers(ihipCtx_t *thisDevice);
637  void printPeerWatchers(FILE *f) const;
638 
639  uint32_t peerCnt() const { return _peerCnt; };
640  hsa_agent_t *peerAgents() const { return _peerAgents; };
641 
642 
643  // TODO - move private
644  std::list<ihipCtx_t*> _peers; // list of enabled peer devices.
645 
646  friend class LockedAccessor<ihipCtxCriticalBase_t>;
647 private:
648  //--- Stream Tracker:
649  std::list< ihipStream_t* > _streams; // streams associated with this device.
650 
651 
652  //--- Peer Tracker:
653  // These reflect the currently Enabled set of peers for this GPU:
654  // Enabled peers have permissions to access the memory physically allocated on this device.
655  // Note the peers always contain the self agent for easy interfacing with HSA APIs.
656  uint32_t _peerCnt; // number of enabled peers
657  hsa_agent_t *_peerAgents; // efficient packed array of enabled agents (to use for allocations.)
658 private:
659  void recomputePeerAgents();
660 };
661 // Note Mutex type Real/Fake selected based on CtxMutex
663 
664 // This type is used by functions that need access to the critical device structures.
666 //=============================================================================
667 
668 
669 //=============================================================================
670 //class ihipCtx_t:
671 // A HIP CTX (context) points at one of the existing devices and contains the streams,
672 // peer-to-peer mappings, creation flags. Multiple contexts can point to the same
673 // device.
674 //
676 {
677 public: // Functions:
678  ihipCtx_t(ihipDevice_t *device, unsigned deviceCnt, unsigned flags); // note: calls constructor for _criticalData
679  ~ihipCtx_t();
680 
681  // Functions which read or write the critical data are named locked_.
682  // ihipCtx_t does not use recursive locks so the ihip implementation must avoid calling a locked_ function from within a locked_ function.
683  // External functions which call several locked_ functions will acquire and release the lock for each function. if this occurs in
684  // performance-sensitive code we may want to refactor by adding non-locked functions and creating a new locked_ member function to call them all.
685  void locked_addStream(ihipStream_t *s);
686  void locked_removeStream(ihipStream_t *s);
687  void locked_reset();
688  void locked_waitAllStreams();
689  void locked_syncDefaultStream(bool waitOnSelf);
690 
691  ihipCtxCritical_t &criticalData() { return _criticalData; }; // TODO, move private. Fix P2P.
692 
693  const ihipDevice_t *getDevice() const { return _device; };
694  int getDeviceNum() const { return _device->_deviceId; };
695 
696  // TODO - review uses of getWriteableDevice(), can these be converted to getDevice()
697  ihipDevice_t *getWriteableDevice() const { return _device; };
698 
699  std::string toString() const;
700 
701 public: // Data
702  // The NULL stream is used if no other stream is specified.
703  // Default stream has special synchronization properties with other streams.
704  ihipStream_t *_defaultStream;
705 
706  // Flags specified when the context is created:
707  unsigned _ctxFlags;
708 
709 private:
710  ihipDevice_t *_device;
711 
712 
713 private: // Critical data, protected with locked access:
714  // Members of _protected data MUST be accessed through the LockedAccessor.
715  // Search for LockedAccessor<ihipCtxCritical_t> for examples; do not access _criticalData directly.
716  ihipCtxCritical_t _criticalData;
717 
718 };
719 
720 
721 
722 //=================================================================================================
723 // Global variable definition:
724 extern std::once_flag hip_initialized;
725 extern unsigned g_deviceCnt;
726 extern hsa_agent_t g_cpu_agent ; // the CPU agent.
727 
728 //=================================================================================================
729 // Extern functions:
730 extern void ihipInit();
731 extern const char *ihipErrorString(hipError_t);
732 extern ihipCtx_t *ihipGetTlsDefaultCtx();
733 extern void ihipSetTlsDefaultCtx(ihipCtx_t *ctx);
734 extern hipError_t ihipSynchronize(void);
735 extern void ihipCtxStackUpdate();
736 
737 extern ihipDevice_t *ihipGetDevice(int);
738 ihipCtx_t * ihipGetPrimaryCtx(unsigned deviceIndex);
739 
740 extern void ihipSetTs(hipEvent_t e);
741 
742 
743 hipStream_t ihipSyncAndResolveStream(hipStream_t);
744 
745 // Stream printf functions:
746 inline std::ostream& operator<<(std::ostream& os, const ihipStream_t& s)
747 {
748  os << "stream#";
749  os << s.getDevice()->_deviceId;;
750  os << '.';
751  os << s._id;
752  return os;
753 }
754 
755 inline std::ostream & operator<<(std::ostream& os, const dim3& s)
756 {
757  os << '{';
758  os << s.x;
759  os << ',';
760  os << s.y;
761  os << ',';
762  os << s.z;
763  os << '}';
764  return os;
765 }
766 
767 inline std::ostream & operator<<(std::ostream& os, const gl_dim3& s)
768 {
769  os << '{';
770  os << s.x;
771  os << ',';
772  os << s.y;
773  os << ',';
774  os << s.z;
775  os << '}';
776  return os;
777 }
778 
779 // Stream printf functions:
780 inline std::ostream& operator<<(std::ostream& os, const hipEvent_t& e)
781 {
782  os << "event:" << std::hex << static_cast<void*> (e);
783  return os;
784 }
785 
786 inline std::ostream& operator<<(std::ostream& os, const ihipCtx_t* c)
787 {
788  os << "ctx:" << static_cast<const void*> (c)
789  << ".dev:" << c->getDevice()->_deviceId;
790  return os;
791 }
792 
793 
794 // Helper functions that are used across src files:
795 namespace hip_internal {
796  hipError_t memcpyAsync (void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream);
797 };
798 
799 
800 #endif
Definition: hip_hcc.h:241
Definition: hip_hcc.h:575
Definition: hip_hcc.h:361
Definition: hip_hcc.h:86
friend hipError_t hipStreamQuery(hipStream_t)
Return hipSuccess if all of the operations in the specified stream have completed, or hipErrorNotReady if not.
Definition: hip_stream.cpp:113
Definition: hip_hcc.h:296
Definition: hip_hcc.h:375
uint32_t x
x
Definition: hip_runtime_api.h:152
Definition: hip_hcc.h:383
Definition: hip_hcc.h:675
Definition: hip_runtime_api.h:151
uint32_t y
y
Definition: hip_runtime_api.h:153
hipError_t
Definition: hip_runtime_api.h:152
hipMemcpyKind
Definition: hip_runtime_api.h:165
Definition: hip_hcc.h:404
unsigned _computeUnits
Number of compute units supported by the device:
Definition: hip_hcc.h:591
uint32_t z
z
Definition: hip_runtime_api.h:154
Definition: hip_hcc.h:70
Definition: hip_runtime_api.h:82
Definition: hip_hcc.h:558
Definition: hip_hcc.h:274
Definition: hip_hcc.h:609
Definition: hip_hcc.h:463
Definition: hip_hcc.h:429
hsa_amd_ipc_memory_t ipc_handle
ipc memory handle on ROCr
Definition: hip_hcc.h:378
Definition: hip_hcc.h:325