diff --git a/docs/markdown/hip_faq.md b/docs/markdown/hip_faq.md index f2fa3346cc..70ad94ba43 100644 --- a/docs/markdown/hip_faq.md +++ b/docs/markdown/hip_faq.md @@ -229,43 +229,7 @@ If platform portability is important, use #ifdef __HIP_PLATFORM_HIPCC__ to guard ### How do I trace HIP application flow? -#### Using CodeXL markers for HIP Functions -HIP can generate markers at function being/end which are displayed on the CodeXL timeline view. -To do this, you need to install ROCm-Profiler and enable HIP to generate the markers: - -1. Install ROCm-Profiler -Installing HIP from the [rocm](http://gpuopen.com/getting-started-with-boltzmann-components-platforms-installation/) pre-built packages, installs the ROCm-Profiler as well. -Alternatively, you can build ROCm-Profiler using the instructions [here](https://github.com/RadeonOpenCompute/ROCm-Profiler#building-the-rocm-profiler). - -2. Build HIP with ATP markers enabled -HIP pre-built packages are enabled with ATP marker support by default. -To enable ATP marker support when building HIP from source, use the option ```-DCOMPILE_HIP_ATP_MARKER=1``` during the cmake configure step. - -3. Set HIP_ATP_MARKER -```shell -export HIP_ATP_MARKER=1 -``` - -4. Recompile the target application - -5. Run with profiler enabled to generate ATP file. -```shell -# Use profile to generate timeline view: -/opt/rocm/bin/rocm-profiler -o -A - -Or -/opt/rocm/bin/rocm-profiler -e HIP_ATP_MARKER=1 -o -A -``` - -#### Using HIP_TRACE_API -You can also print the HIP function strings to stderr using HIP_TRACE_API environment variable. This can also be combined with the more detailed debug information provided -by the HIP_DB switch. For example: -```shell -# Trace to stderr showing being/end of each function (with arguments) + intermediate debug trace during the execution of each function. -HIP_TRACE_API=1 HIP_DB=0x2 ./myHipApp -``` - -Note this trace mode uses colors. "less -r" can handle raw control characters and will display the debug output in proper colors. +See the [HIP Profiling Guide](hip_porting_guide.md) for more information. ### What if HIP generates error of "symbol multiply defined!" only on AMD machine? Unlike CUDA, in HCC, for functions defined in the header files, the keyword of "__forceinline__" does not imply "static". diff --git a/docs/markdown/hip_porting_guide.md b/docs/markdown/hip_porting_guide.md index 621726ee5f..c530df5098 100644 --- a/docs/markdown/hip_porting_guide.md +++ b/docs/markdown/hip_porting_guide.md @@ -564,7 +564,7 @@ HIP_LAUNCH_BLOCKING = 0 : Make HIP APIs 'host-synchronous', so they HIP_DB = 0 : Print various debug info. Bitmask, see hip_hcc.cpp for more information. HIP_TRACE_API = 0 : Trace each HIP API call. Print function name and return code to stderr as program executes. HIP_TRACE_API_COLOR = green : Color to use for HIP_API. None/Red/Green/Yellow/Blue/Magenta/Cyan/White -HIP_ATP_MARKER = 0 : Add HIP function begin/end to ATP file generated with CodeXL +HIP_PROFILE_API = 0 : Add HIP function begin/end to ATP file generated with CodeXL HIP_VISIBLE_DEVICES = 0 : Only devices whose index is present in the secquence are visible to HIP applications and they are enumerated in the order of secquence HIP_NUM_KERNELS_INFLIGHT = 128 : Number of kernels per stream diff --git a/samples/2_Cookbook/2_HIP_ATP_MARKER/Makefile b/samples/2_Cookbook/2_CodeXL_ATP/Makefile similarity index 100% rename from samples/2_Cookbook/2_HIP_ATP_MARKER/Makefile rename to samples/2_Cookbook/2_CodeXL_ATP/Makefile diff --git a/samples/2_Cookbook/2_HIP_ATP_MARKER/MatrixTranspose.cpp b/samples/2_Cookbook/2_CodeXL_ATP/MatrixTranspose.cpp similarity index 100% rename from samples/2_Cookbook/2_HIP_ATP_MARKER/MatrixTranspose.cpp rename to samples/2_Cookbook/2_CodeXL_ATP/MatrixTranspose.cpp diff --git a/samples/2_Cookbook/2_HIP_ATP_MARKER/Readme.md b/samples/2_Cookbook/2_CodeXL_ATP/Readme.md similarity index 100% rename from samples/2_Cookbook/2_HIP_ATP_MARKER/Readme.md rename to samples/2_Cookbook/2_CodeXL_ATP/Readme.md diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index 3a62f63d0a..5d96ceae4d 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -63,9 +63,9 @@ int HIP_LAUNCH_BLOCKING = 0; int HIP_PRINT_ENV = 0; int HIP_TRACE_API= 0; std::string HIP_TRACE_API_COLOR("green"); -int HIP_ATP_MARKER= 0; -std::string HIP_PROFILE_START_API; -std::string HIP_PROFILE_STOP_API; +int HIP_PROFILE_API= 0; +std::string HIP_DB_START_API; +std::string HIP_DB_STOP_API; int HIP_DB= 0; int HIP_VISIBLE_DEVICES = 0; /* Contains a comma-separated sequence of GPU identifiers */ int HIP_NUM_KERNELS_INFLIGHT = 128; @@ -94,8 +94,8 @@ std::atomic g_lastShortTid(1); // Indexed by short-tid: // -std::vector g_profStartTriggers; -std::vector g_profStopTriggers; +std::vector g_dbStartTriggers; +std::vector g_dbStopTriggers; @@ -201,15 +201,15 @@ void recordApiTrace(const std::string &s) auto apiSeqNum = tls_shortTid.incApiSeqNum(); auto tid = tls_shortTid.tid(); - if ((tid < g_profStartTriggers.size()) && (apiSeqNum >= g_profStartTriggers[tid].nextTrigger())) { + if ((tid < g_dbStartTriggers.size()) && (apiSeqNum >= g_dbStartTriggers[tid].nextTrigger())) { printf ("info: resume profiling at %lu\n", apiSeqNum); RESUME_PROFILING; - g_profStartTriggers.pop_back(); + g_dbStartTriggers.pop_back(); }; - if ((tid < g_profStopTriggers.size()) && (apiSeqNum >= g_profStopTriggers[tid].nextTrigger())) { + if ((tid < g_dbStopTriggers.size()) && (apiSeqNum >= g_dbStopTriggers[tid].nextTrigger())) { printf ("info: stop profiling at %lu\n", apiSeqNum); STOP_PROFILING; - g_profStopTriggers.pop_back(); + g_dbStopTriggers.pop_back(); }; @@ -1062,43 +1062,14 @@ void ihipReadEnv_I(int *var_ptr, const char *var_name1, const char *var_name2, c env = getenv(var_name2); } - // TODO: Refactor this code so it is a separate call rather than being part of ihipReadEnv_I, which should only read integers. - // Check if the environment variable is either HIP_VISIBLE_DEVICES or CUDA_LAUNCH_BLOCKING, which - // contains a sequence of comma-separated device IDs - if (!(strcmp(var_name1,"HIP_VISIBLE_DEVICES") && strcmp(var_name2, "CUDA_VISIBLE_DEVICES")) && env){ - // Parse the string stream of env and store the device ids to g_hip_visible_devices global variable - std::string str = env; - std::istringstream ss(str); - std::string device_id; - // Clean up the defult value - g_hip_visible_devices.clear(); - g_visible_device = true; - // Read the visible device numbers - while (std::getline(ss, device_id, ',')) { - if (atoi(device_id.c_str()) >= 0) { - g_hip_visible_devices.push_back(atoi(device_id.c_str())); - } else { // Any device number after invalid number will not present - break; - } - } - // Print out the number of ids - if (HIP_PRINT_ENV) { - printf ("%-30s = ", var_name1); - for(int i=0;i (var_ptr); @@ -1275,9 +1246,9 @@ std::string HIP_DB_callback(void *var_ptr, const char *envVarString) tokenize(e, '+', &tokens); for (auto t=tokens.begin(); t!= tokens.end(); t++) { for (int i=0; ic_str(), dbName[i]._shortName)) { + if (!strcmp(t->c_str(), dbName[i]._shortName)) { *var_ptr_int |= (1<= 0) { + g_hip_visible_devices.push_back(atoi(device_id.c_str())); + } else { // Any device number after invalid number will not present + break; + } + } + + std::string valueString; + // Print out the number of ids + for(int i=0;i g_profStartTriggers; -extern std::vector g_profStopTriggers; +extern std::vector g_dbStartTriggers; +extern std::vector g_dbStopTriggers; //--- //Forward defs: @@ -176,7 +176,7 @@ extern void recordApiTrace(const std::string &s); #if COMPILE_HIP_ATP_MARKER || (COMPILE_HIP_TRACE_API & 0x1) #define API_TRACE(...)\ {\ - if (HIP_ATP_MARKER || (COMPILE_HIP_DB && HIP_TRACE_API)) {\ + if (HIP_PROFILE_API || (COMPILE_HIP_DB && HIP_TRACE_API)) {\ recordApiTrace(std::string(__func__) + " (" + ToString(__VA_ARGS__) + ')');\ }\ }