28 #include "hsa/hsa_ext_amd.h"
32 #if defined(__HCC__) && (__hcc_workweek__ < 16354)
33 #error("This version of HIP requires a newer version of HCC.");
36 #define USE_DISPATCH_HSA_KERNEL 1
45 extern const int release;
48 extern int HIP_LAUNCH_BLOCKING;
49 extern int HIP_API_BLOCKING;
51 extern int HIP_PRINT_ENV;
52 extern int HIP_PROFILE_API;
56 extern int HIP_STAGING_SIZE;
57 extern int HIP_STREAM_SIGNALS;
58 extern int HIP_VISIBLE_DEVICES;
59 extern int HIP_FORCE_P2P_HOST;
61 extern int HIP_COHERENT_HOST_ALLOC;
66 extern int HIP_DISABLE_HW_KERNEL_DEP;
75 int tid()
const {
return _shortTid; };
76 uint64_t incApiSeqNum() {
return ++_apiSeqNum; };
77 uint64_t apiSeqNum()
const {
return _apiSeqNum; };
88 static const uint64_t MAX_TRIGGER = std::numeric_limits<uint64_t>::max();
90 void print (
int tid) {
91 std::cout <<
"Enabling tracing for ";
92 for (
auto iter=_profTrigger.begin(); iter != _profTrigger.end(); iter++) {
93 std::cout <<
"tid:" << tid <<
"." << *iter <<
",";
98 uint64_t nextTrigger() {
return _profTrigger.empty() ? MAX_TRIGGER : _profTrigger.back(); };
99 void add(uint64_t trigger) { _profTrigger.push_back(trigger); };
100 void sort() { std::sort (_profTrigger.begin(), _profTrigger.end(), std::greater<int>()); };
102 std::vector<uint64_t> _profTrigger;
109 extern thread_local
hipError_t tls_lastHipError;
110 extern thread_local
ShortTid tls_shortTid;
112 extern std::vector<ProfTrigger> g_dbStartTriggers;
113 extern std::vector<ProfTrigger> g_dbStopTriggers;
122 #define KNRM "\x1B[0m"
123 #define KRED "\x1B[31m"
124 #define KGRN "\x1B[32m"
125 #define KYEL "\x1B[33m"
126 #define KBLU "\x1B[34m"
127 #define KMAG "\x1B[35m"
128 #define KCYN "\x1B[36m"
129 #define KWHT "\x1B[37m"
131 extern const char *API_COLOR;
132 extern const char *API_COLOR_END;
137 #define STREAM_THREAD_SAFE 1
140 #define CTX_THREAD_SAFE 1
145 #define COMPILE_HIP_DB 1
153 #define COMPILE_HIP_TRACE_API 0x3
158 #ifndef COMPILE_HIP_ATP_MARKER
159 #define COMPILE_HIP_ATP_MARKER 0
168 #if COMPILE_HIP_ATP_MARKER
169 #include "CXLActivityLogger.h"
170 #define MARKER_BEGIN(markerName,group) amdtBeginMarker(markerName, group, nullptr);
171 #define MARKER_END() amdtEndMarker();
172 #define RESUME_PROFILING amdtResumeProfiling(AMDT_ALL_PROFILING);
173 #define STOP_PROFILING amdtStopProfiling(AMDT_ALL_PROFILING);
176 #define MARKER_BEGIN(markerName,group)
178 #define RESUME_PROFILING
179 #define STOP_PROFILING
183 extern void recordApiTrace(std::string *fullStr,
const std::string &apiStr);
185 #if COMPILE_HIP_ATP_MARKER || (COMPILE_HIP_TRACE_API & 0x1)
186 #define API_TRACE(...)\
188 if (HIP_PROFILE_API || (COMPILE_HIP_DB && HIP_TRACE_API)) {\
189 std::string apiStr = std::string(__func__) + " (" + ToString(__VA_ARGS__) + ')';\
190 std::string fullStr;\
191 recordApiTrace(&fullStr, apiStr);\
192 if (HIP_PROFILE_API == 0x1) {MARKER_BEGIN(__func__, "HIP") }\
193 else if (HIP_PROFILE_API == 0x2) {MARKER_BEGIN(fullStr.c_str(), "HIP"); }\
198 #define API_TRACE(...)
204 std::call_once(hip_initialized, ihipInit);\
205 ihipCtxStackUpdate();
211 #define HIP_INIT_API(...) \
213 API_TRACE(__VA_ARGS__);
215 #define ihipLogStatus(hipStatus) \
217 hipError_t localHipStatus = hipStatus; \
218 tls_lastHipError = localHipStatus;\
220 if ((COMPILE_HIP_TRACE_API & 0x2) && HIP_TRACE_API) {\
221 fprintf(stderr, " %ship-api tid:%d.%lu %-30s ret=%2d (%s)>>%s\n", (localHipStatus == 0) ? API_COLOR:KRED, tls_shortTid.tid(),tls_shortTid.apiSeqNum(), __func__, localHipStatus, ihipErrorString(localHipStatus), API_COLOR_END);\
223 if (HIP_PROFILE_API) { MARKER_END(); }\
237 #define DB_MAX_FLAG 5
243 const char *_shortName;
246 static const DbName dbName [] =
258 #define tprintf(trace_level, ...) {\
259 if (HIP_DB & (1<<(trace_level))) {\
261 snprintf(msgStr, 2000, __VA_ARGS__);\
262 fprintf (stderr, " %ship-%s tid:%d:%s%s", dbName[trace_level]._color, dbName[trace_level]._shortName, tls_shortTid.tid(), msgStr, KNRM); \
267 #define tprintf(trace_level, ...)
300 bool try_lock() {
return true; }
305 #if STREAM_THREAD_SAFE
306 typedef std::mutex StreamMutex;
308 #warning "Stream thread-safe disabled"
314 typedef std::mutex CtxMutex;
317 #warning "Device thread-safe disabled"
329 _criticalData(&criticalData),
330 _autoUnlock(autoUnlock)
333 tprintf(DB_SYNC,
"lock critical data %s.%p\n",
typeid(T).name(), _criticalData);
334 _criticalData->_mutex.lock();
340 tprintf(DB_SYNC,
"auto-unlock critical data %s.%p\n",
typeid(T).name(), _criticalData);
341 _criticalData->_mutex.unlock();
347 tprintf(DB_SYNC,
"unlock critical data %s.%p\n",
typeid(T).name(), _criticalData);
348 _criticalData->_mutex.unlock();
352 T *operator->() {
return _criticalData; };
360 template <
typename MUTEX_TYPE>
365 void lock() { _mutex.lock(); }
366 void unlock() { _mutex.unlock(); }
374 #define HIP_IPC_HANDLE_SIZE 64
379 char reserved[HIP_IPC_HANDLE_SIZE];
386 size_t nameSz = strlen(name);
387 char *kernelName = (
char*)malloc(nameSz);
388 strncpy(kernelName, name, nameSz);
389 _kernelName = kernelName;
394 free((
void*)_kernelName);
399 const char *_kernelName;
400 hsa_executable_symbol_t _kernelSymbol;
406 hsa_executable_t executable;
407 hsa_code_object_t object;
408 std::string fileName;
412 ihipModule_t() : executable(), object(), fileName(), ptr(
nullptr), size(0), hipFunctionTable() {}
414 for (
int i = 0; i < hipFunctionTable.size(); ++i) {
418 hipFunctionTable.clear();
422 hipFunctionTable.push_back(func);
425 std::vector<ihipFunction_t*> hipFunctionTable;
428 template <
typename MUTEX_TYPE>
446 hc::accelerator_view _av;
465 enum ScheduleMode {Auto, Spin, Yield};
466 typedef uint64_t SeqNum_t ;
472 void locked_copySync (
void* dst,
const void* src,
size_t sizeBytes,
unsigned kind,
bool resolveOn =
true);
475 void locked_copyAsync(
void* dst,
const void* src,
size_t sizeBytes,
unsigned kind);
481 void lockclose_postKernelCommand(
const char *kernelName, hc::accelerator_view *av);
484 void locked_wait(
bool assertQueueEmpty=
false);
497 void launchModuleKernel(hc::accelerator_view av, hsa_signal_t signal,
498 uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
499 uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ,
500 uint32_t groupSegmentSize, uint32_t sharedMemBytes,
501 void *kernarg,
size_t kernSize, uint64_t kernel);
522 unsigned resolveMemcpyDirection(
bool srcInDeviceMem,
bool dstInDeviceMem);
523 void resolveHcMemcpyDirection(
unsigned hipMemKind,
524 const hc::AmPointerInfo *dstPtrInfo,
const hc::AmPointerInfo *srcPtrInfo,
525 hc::hcCommandKind *hcCopyDir,
527 bool *forceUnpinnedCopy);
529 bool canSeeMemory(
const ihipCtx_t *thisCtx,
const hc::AmPointerInfo *dstInfo,
const hc::AmPointerInfo *srcInfo);
539 friend std::ostream& operator<<(std::ostream& os,
const ihipStream_t& s);
542 ScheduleMode _scheduleMode;
549 enum hipEventStatus_t {
550 hipEventStatusUnitialized = 0,
551 hipEventStatusCreated = 1,
552 hipEventStatusRecording = 2,
553 hipEventStatusRecorded = 3,
559 hipEventStatus_t _state;
564 hc::completion_future _marker;
578 ihipDevice_t(
unsigned deviceId,
unsigned deviceCnt, hc::accelerator &acc);
582 ihipCtx_t *getPrimaryCtx()
const {
return _primaryCtx; };
587 hc::accelerator _acc;
588 hsa_agent_t _hsaAgent;
608 template <
typename MUTEX_TYPE>
615 _peerAgents =
new hsa_agent_t[deviceCnt];
619 if (_peerAgents !=
nullptr) {
621 _peerAgents =
nullptr;
628 std::list<ihipStream_t*> &streams() {
return _streams; };
629 const std::list<ihipStream_t*> &const_streams()
const {
return _streams; };
633 bool isPeerWatcher(
const ihipCtx_t *peer);
636 void resetPeerWatchers(
ihipCtx_t *thisDevice);
637 void printPeerWatchers(FILE *f)
const;
639 uint32_t peerCnt()
const {
return _peerCnt; };
640 hsa_agent_t *peerAgents()
const {
return _peerAgents; };
644 std::list<ihipCtx_t*> _peers;
649 std::list< ihipStream_t* > _streams;
657 hsa_agent_t *_peerAgents;
659 void recomputePeerAgents();
688 void locked_waitAllStreams();
689 void locked_syncDefaultStream(
bool waitOnSelf);
693 const ihipDevice_t *getDevice()
const {
return _device; };
694 int getDeviceNum()
const {
return _device->_deviceId; };
697 ihipDevice_t *getWriteableDevice()
const {
return _device; };
699 std::string toString()
const;
724 extern std::once_flag hip_initialized;
725 extern unsigned g_deviceCnt;
726 extern hsa_agent_t g_cpu_agent ;
730 extern void ihipInit();
731 extern const char *ihipErrorString(
hipError_t);
732 extern ihipCtx_t *ihipGetTlsDefaultCtx();
733 extern void ihipSetTlsDefaultCtx(
ihipCtx_t *ctx);
735 extern void ihipCtxStackUpdate();
738 ihipCtx_t * ihipGetPrimaryCtx(
unsigned deviceIndex);
746 inline std::ostream& operator<<(std::ostream& os,
const ihipStream_t& s)
749 os << s.getDevice()->_deviceId;;
755 inline std::ostream & operator<<(std::ostream& os,
const dim3& s)
767 inline std::ostream & operator<<(std::ostream& os,
const gl_dim3& s)
780 inline std::ostream& operator<<(std::ostream& os,
const hipEvent_t& e)
782 os <<
"event:" << std::hex << static_cast<void*> (e);
786 inline std::ostream& operator<<(std::ostream& os,
const ihipCtx_t* c)
788 os <<
"ctx:" <<
static_cast<const void*
> (c)
789 <<
".dev:" << c->getDevice()->_deviceId;
795 namespace hip_internal {
Definition: hip_hcc.h:241
Definition: hip_hcc.h:575
Definition: hip_hcc.h:361
friend hipError_t hipStreamQuery(hipStream_t)
Return hipSuccess if all of the operations in the specified stream have completed, or hipErrorNotReady if not.
Definition: hip_stream.cpp:113
Definition: hip_hcc.h:296
Definition: hip_hcc.h:375
uint32_t x
x
Definition: hip_runtime_api.h:152
Definition: hip_hcc.h:383
Definition: hip_hcc.h:675
Definition: hip_runtime_api.h:151
uint32_t y
y
Definition: hip_runtime_api.h:153
hipError_t
Definition: hip_runtime_api.h:152
hipMemcpyKind
Definition: hip_runtime_api.h:165
Definition: hip_hcc.h:404
unsigned _computeUnits
Number of compute units supported by the device:
Definition: hip_hcc.h:591
uint32_t z
z
Definition: hip_runtime_api.h:154
Definition: hip_runtime_api.h:82
Definition: hip_hcc.h:558
Definition: hip_hcc.h:274
Definition: hip_hcc.h:609
Definition: hip_hcc.h:463
Definition: hip_hcc.h:429
hsa_amd_ipc_memory_t ipc_handle
ipc memory handle on ROCr
Definition: hip_hcc.h:378
Definition: hip_hcc.h:325