diff --git a/include/hcc_detail/hip_hcc.h b/include/hcc_detail/hip_hcc.h index e10b68695e..1a4e9780aa 100644 --- a/include/hcc_detail/hip_hcc.h +++ b/include/hcc_detail/hip_hcc.h @@ -87,7 +87,8 @@ class ihipCtx_t; #define KCYN "\x1B[36m" #define KWHT "\x1B[37m" -#define API_COLOR KGRN +extern const char *API_COLOR; +extern const char *API_COLOR_END; // If set, thread-safety is enforced on all stream functions. @@ -149,7 +150,7 @@ class ihipCtx_t; if (HIP_ATP_MARKER || (COMPILE_HIP_DB && HIP_TRACE_API)) {\ std::string s = std::string(__func__) + " (" + ToString(__VA_ARGS__) + ')';\ if (COMPILE_HIP_DB && HIP_TRACE_API) {\ - fprintf (stderr, API_COLOR "<>\n" KNRM, (localHipStatus == 0) ? API_COLOR:KRED, __func__, localHipStatus, ihipErrorString(localHipStatus));\ + fprintf(stderr, " %ship-api: %-30s ret=%2d (%s)>>%s\n", (localHipStatus == 0) ? API_COLOR:KRED, __func__, localHipStatus, ihipErrorString(localHipStatus), API_COLOR_END);\ }\ localHipStatus;\ }) @@ -365,8 +366,23 @@ public: class ihipFunction_t{ public: - hsa_executable_symbol_t kernel_symbol; - uint64_t kernel; + ihipFunction_t(const char *name) { + size_t nameSz = strlen(name); + char *kernelName = (char*)malloc(nameSz); + strncpy(kernelName, name, nameSz); + _kernelName = kernelName; + }; + + ~ihipFunction_t() { + if (_kernelName) { + free((void*)_kernelName); + _kernelName = NULL; + }; + }; +public: + const char *_kernelName; + hsa_executable_symbol_t _kernelSymbol; + uint64_t _kernel; }; @@ -719,6 +735,18 @@ inline std::ostream & operator<<(std::ostream& os, const dim3& s) return os; } +inline std::ostream & operator<<(std::ostream& os, const gl_dim3& s) +{ + os << '{'; + os << s.x; + os << ','; + os << s.y; + os << ','; + os << s.z; + os << '}'; + return os; +} + // Stream printf functions: inline std::ostream& operator<<(std::ostream& os, const hipEvent_t& e) { diff --git a/include/hcc_detail/hip_runtime.h b/include/hcc_detail/hip_runtime.h index f8166e7897..4e67cb7292 100644 --- a/include/hcc_detail/hip_runtime.h +++ b/include/hcc_detail/hip_runtime.h @@ -621,25 +621,19 @@ __device__ static inline void* memset(void* ptr, uint8_t val, size_t size) #define HIP_KERNEL_NAME(...) __VA_ARGS__ #ifdef __HCC_CPP__ -extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_launch_parm *lp); +extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_launch_parm *lp, const char *kernelNameStr); extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block, grid_launch_parm *lp); extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block, grid_launch_parm *lp); extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block, grid_launch_parm *lp); -extern void ihipPrintKernelLaunch(const char *kernelName, const grid_launch_parm *lp, const hipStream_t stream); extern void ihipPostLaunchKernel(hipStream_t stream, grid_launch_parm &lp); -// TODO - move to common header file. -#define KNRM "\x1B[0m" -#define KGRN "\x1B[32m" +// Due to multiple overloaded versions of ihipPreLaunchKernel, the numBlocks3D and blockDim3D can be either size_t or dim3 types #define hipLaunchKernel(_kernelName, _numBlocks3D, _blockDim3D, _groupMemBytes, _stream, ...) \ do {\ grid_launch_parm lp;\ lp.dynamic_group_mem_bytes = _groupMemBytes; \ - hipStream_t trueStream = (ihipPreLaunchKernel(_stream, _numBlocks3D, _blockDim3D, &lp)); \ - if (HIP_TRACE_API) {\ - ihipPrintKernelLaunch(#_kernelName, &lp, _stream); \ - }\ + hipStream_t trueStream = (ihipPreLaunchKernel(_stream, _numBlocks3D, _blockDim3D, &lp, #_kernelName)); \ _kernelName (lp, ##__VA_ARGS__);\ ihipPostLaunchKernel(trueStream, lp);\ } while(0) diff --git a/src/hip_device.cpp b/src/hip_device.cpp index 72c92ac76f..61221e64eb 100644 --- a/src/hip_device.cpp +++ b/src/hip_device.cpp @@ -161,7 +161,7 @@ hipError_t hipSetDevice(int deviceId) hipError_t hipDeviceSynchronize(void) { HIP_INIT_API(); - return ihipSynchronize(); + return ihipLogStatus(ihipSynchronize()); } @@ -182,7 +182,7 @@ hipError_t hipDeviceReset(void) if (ctx) { // Release ctx resources (streams and memory): - ctx->locked_reset(); + ctx->locked_reset(); } return ihipLogStatus(hipSuccess); diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index 97911d08eb..f4d20021a9 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -57,10 +57,14 @@ const int release = 1; #define MEMCPY_H2D_DIRECT_VS_STAGING_COPY_THRESHOLD 65336 #define MEMCPY_H2D_STAGING_VS_PININPLACE_COPY_THRESHOLD 1048576 +const char *API_COLOR = KGRN; +const char *API_COLOR_END = KNRM; + int HIP_LAUNCH_BLOCKING = 0; int HIP_PRINT_ENV = 0; int HIP_TRACE_API= 0; +std::string HIP_TRACE_API_COLOR("green"); int HIP_ATP_MARKER= 0; int HIP_DB= 0; int HIP_STAGING_SIZE = 64; /* size of staging buffers, in KB */ @@ -1123,6 +1127,7 @@ void ihipCtx_t::locked_waitAllStreams() +//--- // Read environment variables. void ihipReadEnv_I(int *var_ptr, const char *var_name1, const char *var_name2, const char *description) { @@ -1133,6 +1138,7 @@ void ihipReadEnv_I(int *var_ptr, const char *var_name1, const char *var_name2, c env = getenv(var_name2); } + // TODO: Refactor this code so it is a separate call rather than being part of ihipReadEnv_I, which should only read integers. // Check if the environment variable is either HIP_VISIBLE_DEVICES or CUDA_LAUNCH_BLOCKING, which // contains a sequence of comma-separated device IDs if (!(strcmp(var_name1,"HIP_VISIBLE_DEVICES") && strcmp(var_name2, "CUDA_VISIBLE_DEVICES")) && env){ @@ -1170,15 +1176,37 @@ void ihipReadEnv_I(int *var_ptr, const char *var_name1, const char *var_name2, c printf ("%-30s = %2d : %s\n", var_name1, *var_ptr, description); } } - } + +void ihipReadEnv_S(std::string *var_ptr, const char *var_name1, const char *var_name2, const char *description) +{ + char * env = getenv(var_name1); + + // Check second name if first not defined, used to allow HIP_ or CUDA_ env vars. + if ((env == NULL) && strcmp(var_name2, "0")) { + env = getenv(var_name2); + } + + if (env) { + *var_ptr = env; + } + if (HIP_PRINT_ENV) { + printf ("%-30s = %s : %s\n", var_name1, var_ptr->c_str(), description); + } +} + + #if defined (DEBUG) #define READ_ENV_I(_build, _ENV_VAR, _ENV_VAR2, _description) \ if ((_build == release) || (_build == debug) {\ ihipReadEnv_I(&_ENV_VAR, #_ENV_VAR, #_ENV_VAR2, _description);\ }; +#define READ_ENV_S(_build, _ENV_VAR, _ENV_VAR2, _description) \ + if ((_build == release) || (_build == debug) {\ + ihipReadEnv_S(&_ENV_VAR, #_ENV_VAR, #_ENV_VAR2, _description);\ + }; #else @@ -1187,6 +1215,11 @@ void ihipReadEnv_I(int *var_ptr, const char *var_name1, const char *var_name2, c ihipReadEnv_I(&_ENV_VAR, #_ENV_VAR, #_ENV_VAR2, _description);\ }; +#define READ_ENV_S(_build, _ENV_VAR, _ENV_VAR2, _description) \ + if (_build == release) {\ + ihipReadEnv_S(&_ENV_VAR, #_ENV_VAR, #_ENV_VAR2, _description);\ + }; + #endif @@ -1219,6 +1252,7 @@ void ihipInit() } READ_ENV_I(release, HIP_TRACE_API, 0, "Trace each HIP API call. Print function name and return code to stderr as program executes."); + READ_ENV_S(release, HIP_TRACE_API_COLOR, 0, "Color to use for HIP_API. None/Red/Green/Yellow/Blue/Magenta/Cyan/White"); READ_ENV_I(release, HIP_ATP_MARKER, 0, "Add HIP function begin/end to ATP file generated with CodeXL"); READ_ENV_I(release, HIP_STAGING_SIZE, 0, "Size of each staging buffer (in KB)" ); READ_ENV_I(release, HIP_STAGING_BUFFERS, 0, "Number of staging buffers to use in each direction. 0=use hsa_memory_copy."); @@ -1262,6 +1296,31 @@ void ihipInit() fprintf (stderr, "warning: env var HIP_ATP_MARKER=0x%x but COMPILE_HIP_ATP_MARKER=0. (perhaps enable COMPILE_HIP_DB in src code before compiling?)", HIP_ATP_MARKER); } + std::transform(HIP_TRACE_API_COLOR.begin(), HIP_TRACE_API_COLOR.end(), HIP_TRACE_API_COLOR.begin(), ::tolower); + + if (HIP_TRACE_API_COLOR == "none") { + API_COLOR = ""; + API_COLOR_END = ""; + } else if (HIP_TRACE_API_COLOR == "red") { + API_COLOR = KRED; + } else if (HIP_TRACE_API_COLOR == "green") { + API_COLOR = KGRN; + } else if (HIP_TRACE_API_COLOR == "yellow") { + API_COLOR = KYEL; + } else if (HIP_TRACE_API_COLOR == "blue") { + API_COLOR = KBLU; + } else if (HIP_TRACE_API_COLOR == "magenta") { + API_COLOR = KMAG; + } else if (HIP_TRACE_API_COLOR == "cyan") { + API_COLOR = KCYN; + } else if (HIP_TRACE_API_COLOR == "white") { + API_COLOR = KWHT; + } else { + fprintf (stderr, "warning: env var HIP_TRACE_API_COLOR=%s must be None/Red/Green/Yellow/Blue/Magenta/Cyan/White", HIP_TRACE_API_COLOR.c_str()); + }; + + + /* * Build a table of valid compute devices. @@ -1333,7 +1392,7 @@ hipStream_t ihipSyncAndResolveStream(hipStream_t stream) #endif return device->_defaultStream; } else { - // Have to wait for legacy default stream to be empty: + // ALl streams have to wait for legacy default stream to be empty: if (!(stream->_flags & hipStreamNonBlocking)) { tprintf(DB_SYNC, "stream %p wait default stream\n", stream); stream->getCtx()->_defaultStream->locked_wait(); @@ -1345,16 +1404,25 @@ hipStream_t ihipSyncAndResolveStream(hipStream_t stream) void ihipPrintKernelLaunch(const char *kernelName, const grid_launch_parm *lp, const hipStream_t stream) { - std::string streamString = ToString(stream); - fprintf(stderr, KGRN "<grid_dim.x, lp->grid_dim.y, lp->grid_dim.z, lp->group_dim.x, lp->group_dim.y, lp->group_dim.z, - lp->dynamic_group_mem_bytes, streamString.c_str());\ + std::stringstream os; + os << API_COLOR << "<grid_dim + << " groupDim:" << lp->group_dim + << " sharedMem:+" << lp->dynamic_group_mem_bytes + << " " << *stream + << API_COLOR_END << std::endl; + + std::cerr << os.str(); + + //fprintf(stderr, KGRN "<grid_dim.x, lp->grid_dim.y, lp->grid_dim.z, lp->group_dim.x, lp->group_dim.y, lp->group_dim.z, + // lp->dynamic_group_mem_bytes, streamString.c_str()); } // TODO - data-up to data-down: // Called just before a kernel is launched from hipLaunchKernel. // Allows runtime to track some information about the stream. -hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_launch_parm *lp) +hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_launch_parm *lp, const char *kernelNameStr) { HIP_INIT(); stream = ihipSyncAndResolveStream(stream); @@ -1370,6 +1438,11 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_ auto crit = stream->lockopen_preKernelCommand(); lp->av = &(crit->_av); lp->cf = new hc::completion_future; + + if (HIP_TRACE_API) { + ihipPrintKernelLaunch(kernelNameStr, lp, stream); + } + return (stream); } diff --git a/src/hip_module.cpp b/src/hip_module.cpp index 640b2bb7c4..594ddde2f9 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -30,7 +30,7 @@ THE SOFTWARE. //TODO Use Pool APIs from HCC to get memory regions. -namespace hipdrv{ +namespace hipdrv { hsa_status_t findSystemRegions(hsa_region_t region, void *data){ hsa_region_segment_t segment_id; @@ -99,7 +99,7 @@ uint64_t ElfSize(const void *emi){ } hipError_t hipModuleLoad(hipModule_t *module, const char *fname){ - HIP_INIT_API(fname); + HIP_INIT_API(module, fname); hipError_t ret = hipSuccess; *module = new ihipModule_t; @@ -187,7 +187,7 @@ hipError_t ihipModuleGetFunction(hipFunction_t *func, hipModule_t hmod, const ch ret = hipErrorInvalidContext; }else{ - *func = new ihipFunction_t; + *func = new ihipFunction_t(name); int deviceId = ctx->getDevice()->_deviceId; ihipDevice_t *currentDevice = ihipGetDevice(deviceId); hsa_agent_t gpuAgent = (hsa_agent_t)currentDevice->_hsaAgent; @@ -199,14 +199,14 @@ hipError_t ihipModuleGetFunction(hipFunction_t *func, hipModule_t hmod, const ch } status = hsa_executable_freeze(hmod->executable, NULL); - status = hsa_executable_get_symbol(hmod->executable, NULL, name, gpuAgent, 0, &(*func)->kernel_symbol); + status = hsa_executable_get_symbol(hmod->executable, NULL, name, gpuAgent, 0, &(*func)->_kernelSymbol); if(status != HSA_STATUS_SUCCESS){ return ihipLogStatus(hipErrorNotFound); } - status = hsa_executable_symbol_get_info((*func)->kernel_symbol, + status = hsa_executable_symbol_get_info((*func)->_kernelSymbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, - &(*func)->kernel); + &(*func)->_kernel); if(status != HSA_STATUS_SUCCESS){ return ihipLogStatus(hipErrorNotFound); @@ -215,9 +215,10 @@ hipError_t ihipModuleGetFunction(hipFunction_t *func, hipModule_t hmod, const ch return ihipLogStatus(ret); } + hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, const char *name){ - HIP_INIT_API(name); + HIP_INIT_API(hfunc, hmod, name); return ihipModuleGetFunction(hfunc, hmod, name); } @@ -226,8 +227,13 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, uint32_t sharedMemBytes, hipStream_t hStream, - void **kernelParams, void **extra){ - HIP_INIT_API(f->kernel); + void **kernelParams, void **extra) +{ + HIP_INIT_API(f, gridDimX, gridDimY, gridDimZ, + blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, + kernelParams, extra); + auto ctx = ihipGetTlsDefaultCtx(); hipError_t ret = hipSuccess; @@ -246,48 +252,47 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, memcpy(config, extra, sizeof(size_t)*5); if(config[0] == HIP_LAUNCH_PARAM_BUFFER_POINTER && config[2] == HIP_LAUNCH_PARAM_BUFFER_SIZE && config[4] == HIP_LAUNCH_PARAM_END){ kernSize = *(size_t*)(config[3]); - }else{ + } else { return ihipLogStatus(hipErrorNotInitialized); } }else{ return ihipLogStatus(hipErrorInvalidValue); } - uint32_t groupSegmentSize; - hsa_status_t status = hsa_executable_symbol_get_info(f->kernel_symbol, - HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, - &groupSegmentSize); + uint32_t groupSegmentSize; + hsa_status_t status = hsa_executable_symbol_get_info(f->kernel_symbol, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, + &groupSegmentSize); - uint32_t privateSegmentSize; - status = hsa_executable_symbol_get_info(f->kernel_symbol, - HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, - &privateSegmentSize); - - privateSegmentSize += sharedMemBytes; + uint32_t privateSegmentSize; + status = hsa_executable_symbol_get_info(f->kernel_symbol, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, + &privateSegmentSize); + privateSegmentSize += sharedMemBytes; -/* -Kernel argument preparation. -*/ + /* + Kernel argument preparation. + */ grid_launch_parm lp; - hStream = ihipPreLaunchKernel(hStream, 0, 0, &lp); + hStream = ihipPreLaunchKernel(hStream, 0, 0, &lp, f->_kernelName); -/* - Create signal -*/ + /* + Create signal + */ hsa_signal_t signal; status = hsa_signal_create(1, 0, NULL, &signal); -/* - Launch AQL packet -*/ + /* + Launch AQL packet + */ hStream->launchModuleKernel(*lp.av, signal, blockDimX, blockDimY, blockDimZ, - gridDimX, gridDimY, gridDimZ, groupSegmentSize, privateSegmentSize, config[1], kernSize, f->kernel); + gridDimX, gridDimY, gridDimZ, groupSegmentSize, privateSegmentSize, config[1], kernSize, f->_kernel); -/* - Wait for signal -*/ + /* + Wait for signal + */ hsa_signal_value_t value = hsa_signal_wait_acquire(signal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); @@ -301,8 +306,9 @@ Kernel argument preparation. hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes, - hipModule_t hmod, const char* name){ - HIP_INIT_API(name); + hipModule_t hmod, const char* name) +{ + HIP_INIT_API(dptr, bytes, hmod, name); hipError_t ret = hipSuccess; if(dptr == NULL || bytes == NULL){ return ihipLogStatus(hipErrorInvalidValue); @@ -314,13 +320,15 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes, hipFunction_t func; ihipModuleGetFunction(&func, hmod, name); *bytes = PrintSymbolSizes(hmod->ptr, name) + sizeof(amd_kernel_code_t); - *dptr = reinterpret_cast(func->kernel); + *dptr = reinterpret_cast(func->_kernel); return ihipLogStatus(ret); } } -hipError_t hipModuleLoadData(hipModule_t *module, const void *image){ - HIP_INIT_API(image); + +hipError_t hipModuleLoadData(hipModule_t *module, const void *image) +{ + HIP_INIT_API(module, image); hipError_t ret = hipSuccess; if(image == NULL || module == NULL){ return ihipLogStatus(hipErrorNotInitialized); diff --git a/tests/src/hipLaunchParm.cpp b/tests/src/hipLaunchParm.cpp index 2f4bf11ea2..26ad94f182 100644 --- a/tests/src/hipLaunchParm.cpp +++ b/tests/src/hipLaunchParm.cpp @@ -38,7 +38,7 @@ __global__ void vAdd(hipLaunchParm lp, float *a){} cmd;\ hipDeviceSynchronize();\ gettimeofday(&stop, NULL);\ - } while(0); + } while(0); @@ -61,7 +61,9 @@ int main() { float *Ad; hipMalloc((void**)&Ad, 1024); - hipLaunchKernel(vAdd, 1024, 1, 0, 0, Ad); + + // Test the different hipLaunchParm options: + hipLaunchKernel(vAdd, size_t(1024), 1, 0, 0, Ad); hipLaunchKernel(vAdd, 1024, dim3(1), 0, 0, Ad); hipLaunchKernel(vAdd, dim3(1024), 1, 0, 0, Ad); hipLaunchKernel(vAdd, dim3(1024), dim3(1), 0, 0, Ad); diff --git a/tests/src/runtimeApi/stream/hipStreamWaitEvent.cpp b/tests/src/runtimeApi/stream/hipStreamWaitEvent.cpp index c5a74b2bc0..7148f50628 100644 --- a/tests/src/runtimeApi/stream/hipStreamWaitEvent.cpp +++ b/tests/src/runtimeApi/stream/hipStreamWaitEvent.cpp @@ -27,7 +27,7 @@ unsigned p_streams = 6; //------ // Structure for one stream; -template +template class Streamer { public: Streamer(size_t numElements); @@ -99,7 +99,7 @@ void parseMyArguments(int argc, char *argv[]) //--- int main(int argc, char *argv[]) { - HipTest::parseStandardArguments(argc, argv, true); + HipTest::parseStandardArguments(argc, argv, false); parseMyArguments(argc, argv); typedef Streamer FloatStreamer;