Profiler - Remove rocprofiler-v1 remnants

Also force unset HSA_TOOLS_LIB so it doesn't break rocprofiler-sdk

Signed-off-by: Galantsev, Dmitrii <dmitrii.galantsev@amd.com>


[ROCm/rdc commit: e73eaf8115]
This commit is contained in:
Galantsev, Dmitrii
2025-06-27 07:00:01 -05:00
committed by Galantsev, Dmitrii
parent bb0c4b7653
commit 89a495e493
4 changed files with 13 additions and 130 deletions
-28
View File
@@ -578,34 +578,6 @@ The RAS plugin enables monitoring and counting of ECC (Error-Correcting Code) er
>
>#### 🐍 dmon RocProfiler Fields Return Zeros
>
>**Solution:**
>
>Set the `HSA_TOOLS_LIB` environment variable **before** running a compute job.
>
>```bash
>export HSA_TOOLS_LIB=/opt/rocm/lib/librocprofiler64.so.1
>```
>
>**Example:**
>
>```bash
># Terminal 1
>rdcd -u
>
># Terminal 2
>export HSA_TOOLS_LIB=/opt/rocm/lib/librocprofiler64.so.1
>gpu-burn
>
># Terminal 3
>rdci dmon -u -e 800,801 -i 0 -c 1
>
># Output:
>GPU OCCUPANCY_PERCENT ACTIVE_WAVES
>0 001.000 32640.000
>```
>
>#### ⚠️ `HSA_STATUS_ERROR_OUT_OF_RESOURCES`
>
>**Error Message:**
>
>```
-28
View File
@@ -309,34 +309,6 @@ Known issues
- Limited metrics on MI200.
- Consumer GPUs such as RX6800 have fewer supported metrics.
- dmon RocProfiler fields return zeros
**Solution:**
Set the ``HSA_TOOLS_LIB`` environment variable before running a compute job.
.. code-block:: shell
export HSA_TOOLS_LIB=/opt/rocm/lib/librocprofiler64.so.1
**Example:**
.. code-block:: shell
# Terminal 1
rdcd -u
# Terminal 2
export HSA_TOOLS_LIB=/opt/rocm/lib/librocprofiler64.so.1
gpu-burn
# Terminal 3
rdci dmon -u -e 800,801 -i 0 -c 1
# Output:
GPU OCCUPANCY_PERCENT ACTIVE_WAVES
0 001.000 32640.000
- HSA_STATUS_ERROR_OUT_OF_RESOURCES
**Error message:**
@@ -60,13 +60,12 @@ class RdcRocpLib : public RdcTelemetry {
rdc_status_t (*rdc_module_init_)(uint64_t);
rdc_status_t (*rdc_module_destroy_)();
/**
* @brief Extract current ROCM_PATH from library or the environment
* @brief Make sure HSA_TOOLS_LIB is not set as it breaks rocprofiler-sdk
* @details
* Rocprofilerv1 needed HSA_TOOLS_LIB set to librocprofiler64.so.1.
* That breaks rocprofiler-sdk because it tries to load both v1 and sdk libraries.
*/
std::string get_rocm_path();
/**
* @brief Set ROCP_METRICS environment variable needed by rocprofiler
*/
rdc_status_t set_rocprofiler_path();
void rdc_unset_hsa_tools_lib();
};
using RdcRocpLibPtr = std::shared_ptr<RdcRocpLib>;
+8 -68
View File
@@ -27,6 +27,7 @@ THE SOFTWARE.
#include <fstream>
#include <string>
#include "rdc/rdc.h"
#include "rdc_lib/RdcException.h"
#include "rdc_lib/RdcTelemetryLibInterface.h"
@@ -41,14 +42,10 @@ RdcRocpLib::RdcRocpLib()
telemetry_fields_unwatch_(nullptr),
rdc_module_init_(nullptr),
rdc_module_destroy_(nullptr) {
rdc_status_t status = set_rocprofiler_path();
if (status != RDC_ST_OK) {
RDC_LOG(RDC_ERROR, "Rocp related function will not work.");
throw RdcException(RDC_ST_FAIL_LOAD_MODULE, "rocprofiler path could not be set");
return;
}
// must happen before library is loaded
rdc_unset_hsa_tools_lib();
status = lib_loader_.load("librdc_rocp.so");
rdc_status_t status = lib_loader_.load("librdc_rocp.so");
if (status != RDC_ST_OK) {
RDC_LOG(RDC_ERROR, "Rocp related function will not work.");
return;
@@ -152,68 +149,11 @@ rdc_status_t RdcRocpLib::rdc_telemetry_fields_unwatch(rdc_gpu_field_t* fields,
return telemetry_fields_unwatch_(fields, fields_count);
}
std::string RdcRocpLib::get_rocm_path() {
// set default rocm path in case lookup fails
std::string rocm_path(ROCM_DIR);
const char* rocm_path_env = getenv("ROCM_PATH");
if (rocm_path_env != nullptr) {
rocm_path = rocm_path_env;
void RdcRocpLib::rdc_unset_hsa_tools_lib() {
int status = unsetenv("HSA_TOOLS_LIB");
if (status != 0) {
RDC_LOG(RDC_ERROR, "Failed to unset HSA_TOOLS_LIB environment variable.");
}
std::ifstream file("/proc/self/maps");
if (!file.is_open()) {
return rocm_path;
}
std::string line;
while (getline(file, line)) {
size_t index_end = line.find("librocprofiler-register.so");
size_t index_start = index_end;
if (index_end == std::string::npos) {
// no library on this line
continue;
}
// walk index backwards until it reaches a space
while ((index_start > 0) && (line[index_start - 1] != ' ')) {
index_start--;
}
// extract library path, drop library name
rocm_path = line.substr(index_start, index_end - index_start);
// appending "../" should result in "/opt/rocm/lib/.." or similar
rocm_path += "..";
return rocm_path;
}
return rocm_path;
}
rdc_status_t RdcRocpLib::set_rocprofiler_path() {
// rocprofiler requires ROCPROFILER_METRICS_PATH to be set
std::string rocprofiler_metrics_path = get_rocm_path() + "/share/rocprofiler-sdk/";
// set rocm prefix
int result = setenv("ROCPROFILER_METRICS_PATH", rocprofiler_metrics_path.c_str(), 0);
if (result != 0) {
RDC_LOG(RDC_ERROR, "setenv ROCPROFILER_METRICS_PATH failed! " << result);
return RDC_ST_PERM_ERROR;
}
// check that env exists
const char* rocprofiler_metrics_env = getenv("ROCPROFILER_METRICS_PATH");
if (rocprofiler_metrics_env == nullptr) {
RDC_LOG(RDC_ERROR, "ROCPROFILER_METRICS_PATH is not set!");
return RDC_ST_NO_DATA;
}
// check that file can be accessed
std::ifstream test_file(rocprofiler_metrics_env);
if (!test_file.good()) {
RDC_LOG(RDC_ERROR, "failed to open ROCPROFILER_METRICS_PATH: " << rocprofiler_metrics_env);
return RDC_ST_FILE_ERROR;
}
return RDC_ST_OK;
}
} // namespace rdc