@@ -1475,7 +1475,7 @@ hipStream_t ihipSyncAndResolveStream(hipStream_t stream)
|
||||
void ihipPrintKernelLaunch(const char *kernelName, const grid_launch_parm *lp, const hipStream_t stream)
|
||||
{
|
||||
|
||||
if (HIP_PROFILE_API || (COMPILE_HIP_DB && HIP_TRACE_API)) {
|
||||
if ((HIP_TRACE_API & (1<<TRACE_CMD)) || HIP_PROFILE_API || (COMPILE_HIP_DB && HIP_TRACE_API)) {
|
||||
std::stringstream os_pre;
|
||||
std::stringstream os;
|
||||
os_pre << "<<hip-api tid:";
|
||||
|
||||
@@ -179,54 +179,11 @@ extern const char *API_COLOR_END;
|
||||
#endif
|
||||
|
||||
|
||||
extern void recordApiTrace(std::string *fullStr, const std::string &apiStr);
|
||||
|
||||
#if COMPILE_HIP_ATP_MARKER || (COMPILE_HIP_TRACE_API & 0x1)
|
||||
#define API_TRACE(...)\
|
||||
{\
|
||||
tls_tidInfo.incApiSeqNum();\
|
||||
if (HIP_PROFILE_API || (COMPILE_HIP_DB && HIP_TRACE_API)) {\
|
||||
std::string apiStr = std::string(__func__) + " (" + ToString(__VA_ARGS__) + ')';\
|
||||
std::string fullStr;\
|
||||
recordApiTrace(&fullStr, apiStr);\
|
||||
if (HIP_PROFILE_API == 0x1) {MARKER_BEGIN(__func__, "HIP") }\
|
||||
else if (HIP_PROFILE_API == 0x2) {MARKER_BEGIN(fullStr.c_str(), "HIP"); }\
|
||||
}\
|
||||
}
|
||||
#else
|
||||
// Swallow API_TRACE
|
||||
#define API_TRACE(...)\
|
||||
tls_tidInfo.incApiSeqNum();
|
||||
#endif
|
||||
|
||||
|
||||
// Just initialize the HIP runtime, but don't log any trace information.
|
||||
#define HIP_INIT()\
|
||||
std::call_once(hip_initialized, ihipInit);\
|
||||
ihipCtxStackUpdate();
|
||||
#define HIP_SET_DEVICE()\
|
||||
ihipDeviceSetState();
|
||||
|
||||
// This macro should be called at the beginning of every HIP API.
|
||||
// It initialies the hip runtime (exactly once), and
|
||||
// generate trace string that can be output to stderr or to ATP file.
|
||||
#define HIP_INIT_API(...) \
|
||||
HIP_INIT()\
|
||||
API_TRACE(__VA_ARGS__);
|
||||
|
||||
#define ihipLogStatus(hipStatus) \
|
||||
({\
|
||||
hipError_t localHipStatus = hipStatus; /*local copy so hipStatus only evaluated once*/ \
|
||||
tls_lastHipError = localHipStatus;\
|
||||
\
|
||||
if ((COMPILE_HIP_TRACE_API & 0x2) && HIP_TRACE_API) {\
|
||||
fprintf(stderr, " %ship-api tid:%d.%lu %-30s ret=%2d (%s)>>%s\n", (localHipStatus == 0) ? API_COLOR:KRED, tls_tidInfo.tid(),tls_tidInfo.apiSeqNum(), __func__, localHipStatus, ihipErrorString(localHipStatus), API_COLOR_END);\
|
||||
}\
|
||||
if (HIP_PROFILE_API) { MARKER_END(); }\
|
||||
localHipStatus;\
|
||||
})
|
||||
|
||||
|
||||
//---
|
||||
//HIP Trace modes
|
||||
#define TRACE_ALL 0 // 0x1
|
||||
#define TRACE_CMD 1 // 0x2
|
||||
#define TRACE_MEM 2 // 0x4
|
||||
|
||||
|
||||
//---
|
||||
@@ -238,12 +195,14 @@ extern void recordApiTrace(std::string *fullStr, const std::string &apiStr);
|
||||
#define DB_MAX_FLAG 4
|
||||
// When adding a new debug flag, also add to the char name table below.
|
||||
//
|
||||
//
|
||||
|
||||
struct DbName {
|
||||
const char *_color;
|
||||
const char *_shortName;
|
||||
};
|
||||
|
||||
// This table must be kept in-sync with the defines above.
|
||||
static const DbName dbName [] =
|
||||
{
|
||||
{KGRN, "api"}, // not used,
|
||||
@@ -270,6 +229,74 @@ static const DbName dbName [] =
|
||||
|
||||
|
||||
|
||||
//---
|
||||
extern void recordApiTrace(std::string *fullStr, const std::string &apiStr);
|
||||
|
||||
#if COMPILE_HIP_ATP_MARKER || (COMPILE_HIP_TRACE_API & 0x1)
|
||||
#define API_TRACE(forceTrace, ...)\
|
||||
{\
|
||||
tls_tidInfo.incApiSeqNum();\
|
||||
if (forceTrace || (HIP_PROFILE_API || (COMPILE_HIP_DB && (HIP_TRACE_API & (1<<TRACE_ALL))))) {\
|
||||
std::string apiStr = std::string(__func__) + " (" + ToString(__VA_ARGS__) + ')';\
|
||||
std::string fullStr;\
|
||||
recordApiTrace(&fullStr, apiStr);\
|
||||
if (HIP_PROFILE_API == 0x1) {MARKER_BEGIN(__func__, "HIP") }\
|
||||
else if (HIP_PROFILE_API == 0x2) {MARKER_BEGIN(fullStr.c_str(), "HIP"); }\
|
||||
}\
|
||||
}
|
||||
#else
|
||||
// Swallow API_TRACE
|
||||
#define API_TRACE(IS_CMD, ...)\
|
||||
tls_tidInfo.incApiSeqNum();
|
||||
#endif
|
||||
|
||||
|
||||
// Just initialize the HIP runtime, but don't log any trace information.
|
||||
#define HIP_INIT()\
|
||||
std::call_once(hip_initialized, ihipInit);\
|
||||
ihipCtxStackUpdate();
|
||||
#define HIP_SET_DEVICE()\
|
||||
ihipDeviceSetState();
|
||||
|
||||
|
||||
|
||||
// This macro should be called at the beginning of every HIP API.
|
||||
// It initializes the hip runtime (exactly once), and
|
||||
// generates a trace string that can be output to stderr or to ATP file.
|
||||
#define HIP_INIT_API(...) \
|
||||
HIP_INIT()\
|
||||
API_TRACE(0, __VA_ARGS__);
|
||||
|
||||
|
||||
// Like above, but will trace with DB_CMD.
|
||||
// Replace HIP_INIT_API with this call inside important APIs that launch work on the GPU:
|
||||
// kernel launches, copy commands, memory sets, etc.
|
||||
#define HIP_INIT_CMD_API(...) \
|
||||
HIP_INIT()\
|
||||
API_TRACE((HIP_TRACE_API&(1<<TRACE_CMD)), __VA_ARGS__);
|
||||
|
||||
// This macro should be called at the end of every HIP API, and only at the end of top-level hip APIS (not internal hip)
|
||||
// It has dual function: logs the last error returned for use by hipGetLastError,
|
||||
// and also prints the closing message when the debug trace is enabled.
|
||||
#define ihipLogStatus(hipStatus) \
|
||||
({\
|
||||
hipError_t localHipStatus = hipStatus; /*local copy so hipStatus only evaluated once*/ \
|
||||
tls_lastHipError = localHipStatus;\
|
||||
\
|
||||
if ((COMPILE_HIP_TRACE_API & 0x2) && HIP_TRACE_API & (1<<TRACE_ALL)) {\
|
||||
fprintf(stderr, " %ship-api tid:%d.%lu %-30s ret=%2d (%s)>>%s\n", (localHipStatus == 0) ? API_COLOR:KRED, tls_tidInfo.tid(),tls_tidInfo.apiSeqNum(), __func__, localHipStatus, ihipErrorString(localHipStatus), API_COLOR_END);\
|
||||
}\
|
||||
if (HIP_PROFILE_API) { MARKER_END(); }\
|
||||
localHipStatus;\
|
||||
})
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class ihipException : public std::exception
|
||||
{
|
||||
|
||||
@@ -160,7 +160,7 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes)
|
||||
|
||||
hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
|
||||
{
|
||||
HIP_INIT_API(ptr, sizeBytes, flags);
|
||||
HIP_INIT_CMD_API(ptr, sizeBytes, flags);
|
||||
HIP_SET_DEVICE();
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
@@ -239,7 +239,7 @@ hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags)
|
||||
// width in bytes
|
||||
hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height)
|
||||
{
|
||||
HIP_INIT_API(ptr, pitch, width, height);
|
||||
HIP_INIT_CMD_API(ptr, pitch, width, height);
|
||||
HIP_SET_DEVICE();
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
@@ -291,7 +291,7 @@ hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannel
|
||||
hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc,
|
||||
size_t width, size_t height, unsigned int flags)
|
||||
{
|
||||
HIP_INIT_API(array, desc, width, height, flags);
|
||||
HIP_INIT_CMD_API(array, desc, width, height, flags);
|
||||
HIP_SET_DEVICE();
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
@@ -438,7 +438,7 @@ hipError_t hipHostUnregister(void *hostPtr)
|
||||
|
||||
hipError_t hipMemcpyToSymbol(const char* symbolName, const void *src, size_t count, size_t offset, hipMemcpyKind kind)
|
||||
{
|
||||
HIP_INIT_API(symbolName, src, count, offset, kind);
|
||||
HIP_INIT_CMD_API(symbolName, src, count, offset, kind);
|
||||
|
||||
if(symbolName == nullptr)
|
||||
{
|
||||
@@ -466,7 +466,7 @@ hipError_t hipMemcpyToSymbol(const char* symbolName, const void *src, size_t cou
|
||||
|
||||
hipError_t hipMemcpyToSymbolAsync(const char* symbolName, const void *src, size_t count, size_t offset, hipMemcpyKind kind, hipStream_t stream)
|
||||
{
|
||||
HIP_INIT_API(symbolName, src, count, offset, kind, stream);
|
||||
HIP_INIT_CMD_API(symbolName, src, count, offset, kind, stream);
|
||||
|
||||
if(symbolName == nullptr)
|
||||
{
|
||||
@@ -506,7 +506,7 @@ hipError_t hipMemcpyToSymbolAsync(const char* symbolName, const void *src, size_
|
||||
//---
|
||||
hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind)
|
||||
{
|
||||
HIP_INIT_API(dst, src, sizeBytes, kind);
|
||||
HIP_INIT_CMD_API(dst, src, sizeBytes, kind);
|
||||
|
||||
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
|
||||
|
||||
@@ -527,7 +527,7 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind
|
||||
|
||||
hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes)
|
||||
{
|
||||
HIP_INIT_API(dst, src, sizeBytes);
|
||||
HIP_INIT_CMD_API(dst, src, sizeBytes);
|
||||
|
||||
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
|
||||
|
||||
@@ -548,7 +548,7 @@ hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes)
|
||||
|
||||
hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes)
|
||||
{
|
||||
HIP_INIT_API(dst, src, sizeBytes);
|
||||
HIP_INIT_CMD_API(dst, src, sizeBytes);
|
||||
|
||||
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
|
||||
|
||||
@@ -569,7 +569,7 @@ hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t sizeBytes)
|
||||
|
||||
hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes)
|
||||
{
|
||||
HIP_INIT_API(dst, src, sizeBytes);
|
||||
HIP_INIT_CMD_API(dst, src, sizeBytes);
|
||||
|
||||
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
|
||||
|
||||
@@ -590,7 +590,7 @@ hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeByte
|
||||
|
||||
hipError_t hipMemcpyHtoH(void* dst, void* src, size_t sizeBytes)
|
||||
{
|
||||
HIP_INIT_API(dst, src, sizeBytes);
|
||||
HIP_INIT_CMD_API(dst, src, sizeBytes);
|
||||
|
||||
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
|
||||
|
||||
@@ -641,7 +641,7 @@ hipError_t memcpyAsync (void* dst, const void* src, size_t sizeBytes, hipMemcpyK
|
||||
|
||||
hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream)
|
||||
{
|
||||
HIP_INIT_API(dst, src, sizeBytes, kind, stream);
|
||||
HIP_INIT_CMD_API(dst, src, sizeBytes, kind, stream);
|
||||
|
||||
return ihipLogStatus(hip_internal::memcpyAsync(dst, src, sizeBytes, kind, stream));
|
||||
|
||||
@@ -650,21 +650,21 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcp
|
||||
|
||||
hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t sizeBytes, hipStream_t stream)
|
||||
{
|
||||
HIP_INIT_API(dst, src, sizeBytes, stream);
|
||||
HIP_INIT_CMD_API(dst, src, sizeBytes, stream);
|
||||
|
||||
return ihipLogStatus(hip_internal::memcpyAsync(dst, src, sizeBytes, hipMemcpyHostToDevice, stream));
|
||||
}
|
||||
|
||||
hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream)
|
||||
{
|
||||
HIP_INIT_API(dst, src, sizeBytes, stream);
|
||||
HIP_INIT_CMD_API(dst, src, sizeBytes, stream);
|
||||
|
||||
return ihipLogStatus(hip_internal::memcpyAsync(dst, src, sizeBytes, hipMemcpyDeviceToDevice, stream));
|
||||
}
|
||||
|
||||
hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, hipStream_t stream)
|
||||
{
|
||||
HIP_INIT_API(dst, src, sizeBytes, stream);
|
||||
HIP_INIT_CMD_API(dst, src, sizeBytes, stream);
|
||||
|
||||
return ihipLogStatus(hip_internal::memcpyAsync(dst, src, sizeBytes, hipMemcpyDeviceToHost, stream));
|
||||
}
|
||||
@@ -673,7 +673,7 @@ hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, h
|
||||
hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch,
|
||||
size_t width, size_t height, hipMemcpyKind kind) {
|
||||
|
||||
HIP_INIT_API(dst, dpitch, src, spitch, width, height, kind);
|
||||
HIP_INIT_CMD_API(dst, dpitch, src, spitch, width, height, kind);
|
||||
|
||||
if(width > dpitch || width > spitch)
|
||||
return ihipLogStatus(hipErrorUnknown);
|
||||
@@ -699,7 +699,7 @@ hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch,
|
||||
hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src,
|
||||
size_t spitch, size_t width, size_t height, hipMemcpyKind kind) {
|
||||
|
||||
HIP_INIT_API(dst, wOffset, hOffset, src, spitch, width, height, kind);
|
||||
HIP_INIT_CMD_API(dst, wOffset, hOffset, src, spitch, width, height, kind);
|
||||
|
||||
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
|
||||
|
||||
@@ -752,7 +752,7 @@ hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, con
|
||||
hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset,
|
||||
const void* src, size_t count, hipMemcpyKind kind) {
|
||||
|
||||
HIP_INIT_API(dst, wOffset, hOffset, src, count, kind);
|
||||
HIP_INIT_CMD_API(dst, wOffset, hOffset, src, count, kind);
|
||||
|
||||
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
|
||||
|
||||
@@ -811,7 +811,7 @@ ihipMemsetKernel(hipStream_t stream,
|
||||
// TODO-sync: function is async unless target is pinned host memory - then these are fully sync.
|
||||
hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream )
|
||||
{
|
||||
HIP_INIT_API(dst, value, sizeBytes, stream);
|
||||
HIP_INIT_CMD_API(dst, value, sizeBytes, stream);
|
||||
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
@@ -861,12 +861,12 @@ hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t s
|
||||
|
||||
hipError_t hipMemset(void* dst, int value, size_t sizeBytes )
|
||||
{
|
||||
hipStream_t stream = hipStreamNull;
|
||||
// TODO - call an ihip memset so HIP_TRACE is correct.
|
||||
HIP_INIT_API(dst, value, sizeBytes, stream);
|
||||
HIP_INIT_CMD_API(dst, value, sizeBytes);
|
||||
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
hipStream_t stream = hipStreamNull;
|
||||
// TODO - call an ihip memset so HIP_TRACE is correct.
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
|
||||
if (stream) {
|
||||
|
||||
Ссылка в новой задаче
Block a user