SWDEV-457922: Temporary fix to prevent multiple instances of either rocprofiler v1 or v2 to be running, whether it's interception or standalone mode.

The fix works by using a pid file and a lock file, similar to how firefox prevents multiple instances.

Change-Id: I4974a80bf920eaf0376a53410573c1cc74ec17ac


[ROCm/rocprofiler commit: 056156135d]
Tento commit je obsažen v:
Saurabh Verma
2024-05-17 18:32:06 -05:00
rodič d7c2fa8867
revize c41bbe21d0
7 změnil soubory, kde provedl 176 přidání a 4 odebrání
+5 -2
Zobrazit soubor
@@ -70,7 +70,8 @@ set(OLD_LIB_SRC
${LIB_DIR}/core/intercept_queue.cpp
${LIB_DIR}/core/metrics.cpp
${LIB_DIR}/core/activity.cpp
${LIB_DIR}/util/hsa_rsrc_factory.cpp)
${LIB_DIR}/util/hsa_rsrc_factory.cpp
${LIB_DIR}/core/profiling_lock.cpp)
# ########################################################################################
# Configuring Basic/Derived Counters
@@ -213,6 +214,7 @@ file(GLOB CORE_SESSION_SRC_FILES ${CORE_SESSION_DIR}/session.cpp)
file(GLOB CORE_FILTER_SRC_FILES ${CORE_SESSION_DIR}/filter.cpp)
file(GLOB CORE_DEVICE_PROFILING_SRC_FILES ${CORE_SESSION_DIR}/device_profiling.cpp)
file(GLOB CORE_COUNTERS_SAMPLER_SRC_FILES ${CORE_SESSION_DIR}/counters_sampler.cpp)
file(GLOB CORE_PROFILING_LOCK_FILES ${PROJECT_SOURCE_DIR}/src/core/profiling_lock.cpp)
file(GLOB CORE_COUNTERS_SRC_FILES ${PROJECT_BINARY_DIR}/src/api/*_counter.cpp)
file(GLOB CORE_COUNTERS_PARENT_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/*.cpp)
@@ -289,7 +291,8 @@ add_library(
${ROCPROFILER_ROCTRACER_SRC_FILES}
${GENERATED_SOURCES}
${CORE_COUNTERS_SRC_FILES}
${CORE_PC_SAMPLING_FILES})
${CORE_PC_SAMPLING_FILES}
${CORE_PROFILING_LOCK_FILES})
set_target_properties(
rocprofiler-v2
PROPERTIES CXX_VISIBILITY_PRESET hidden
+2
Zobrazit soubor
@@ -9,6 +9,7 @@
#include "src/api/rocprofiler_singleton.h"
#include "src/utils/helper.h"
#include "src/core/isa_capture/code_object_track.hpp"
#include "core/profiling_lock.h"
// TODO(aelwazir): change that to adapt with our own Exception
// What about outside exceptions and callbacks exceptions!!
@@ -617,6 +618,7 @@ ROCPROFILER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
uint64_t failed_tool_count, const char* const* failed_tool_names) {
if (started) rocprofiler::fatal("HSA Tool started already!");
started = true;
ProfilingLock::Lock(PROFILER_V2_LOCK);
rocprofiler::HSASupport_Singleton::GetInstance().HSAInitialize(table);
return true;
}
+137
Zobrazit soubor
@@ -0,0 +1,137 @@
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <cstdlib>
#include <sstream>
#include <sstream>
#include <cstring>
#include "util/exception.h"
#include "profiling_lock.h"
#define ROCPROFILER_LOCK_FILE "/tmp/rocprofiler_process.lock"
#define ROCPROFILER_PID_FILE "/tmp/rocprofiler.pid"
int acquire_lock(char const* lockName) {
// umask to set permissions on file creation.
// base permissions (rw) given
mode_t m = umask(0);
int fd = open(lockName, O_RDWR | O_CREAT, 0666);
umask(m);
if (fd >= 0 && flock(fd, LOCK_EX | LOCK_NB) < 0) {
close(fd);
fd = -1;
}
return fd;
}
void release_lock(int fd, char const* lockName) {
if (fd < 0) return;
remove(lockName);
close(fd);
}
bool file_exists(const char* file_name) {
struct stat buffer;
return stat(file_name, &buffer) == 0;
}
int create_pid_file(const char* pid_file) {
FILE* file = fopen(pid_file, "w");
if (!file) return -1;
fprintf(file, "%d", (int)getpid());
fclose(file);
return 0;
}
int read_pid_file(const char* pid_file) {
FILE* file = fopen(pid_file, "r");
if (!file) return -1;
int pid_value = -1;
fscanf(file, "%d", &pid_value);
fclose(file);
return pid_value > 0 ? pid_value : -1;
}
bool check_process_exists(int pid) {
struct stat sts;
std::stringstream ss;
ss << "/proc/" << pid;
if (stat(ss.str().c_str(), &sts) == -1) {
return false;
}
return true;
}
void terminate_current_profiler_instance() {
EXC_RAISING(
0,
"\nA profiling instance already exists! Multiple profiling instances are not "
"allowed.\nCheck "
<< ROCPROFILER_PID_FILE
<< " and kill the process, delete this .pid file and try again.\nTerminating ...\n");
}
bool check_standalone_mode() {
static bool is_standalone_mode = [] {
// Checking environment variable to see if interception is enabled.
// value of zero indicates standalone mode
const char* intercept_env = getenv("ROCP_HSA_INTERCEPT");
int intercept_env_value = 0;
if (intercept_env != NULL) {
intercept_env_value = atoi(intercept_env);
}
return intercept_env_value == 0;
}();
return is_standalone_mode;
}
void ProfilingLock::Lock(LockMode mode) {
// check if the profiler v1 is running in standalone mode
bool is_standalone_mode_v1 = check_standalone_mode() && (mode == PROFILER_V1_LOCK);
ProfilingLock* profiling_lock = Instance();
if (file_exists(profiling_lock->pid_file)) {
profiling_lock->lock = acquire_lock(profiling_lock->lock_file);
if (profiling_lock->lock < 1) {
release_lock(profiling_lock->lock, profiling_lock->lock_file);
terminate_current_profiler_instance();
}
int pid = read_pid_file(profiling_lock->pid_file);
if (check_process_exists(pid)) {
release_lock(profiling_lock->lock, profiling_lock->lock_file);
terminate_current_profiler_instance();
}
if (is_standalone_mode_v1) create_pid_file(profiling_lock->pid_file);
release_lock(profiling_lock->lock, profiling_lock->lock_file);
} else {
profiling_lock->lock = acquire_lock(profiling_lock->lock_file);
if (profiling_lock->lock < 1) terminate_current_profiler_instance();
if (is_standalone_mode_v1) create_pid_file(profiling_lock->pid_file);
release_lock(profiling_lock->lock, profiling_lock->lock_file);
}
return;
}
ProfilingLock::ProfilingLock() {
lock_file = ROCPROFILER_LOCK_FILE;
pid_file = ROCPROFILER_PID_FILE;
lock = -1;
}
ProfilingLock::~ProfilingLock(){
this->lock = acquire_lock(this->lock_file);
if (this->lock < 1) return; // lock couldn't be acquired
remove(this->pid_file); // remove the pid file
release_lock(this->lock, this->lock_file);
return;
}
ProfilingLock* ProfilingLock::Instance() {
static ProfilingLock instance;
return &instance;
}
+24
Zobrazit soubor
@@ -0,0 +1,24 @@
#ifndef _SRC_CORE_PROFILING_LOCK_H
#define _SRC_CORE_PROFILING_LOCK_H
enum LockMode{
PROFILER_V1_LOCK,
PROFILER_V2_LOCK,
};
class ProfilingLock {
public:
static void Lock(LockMode mode);
~ProfilingLock();
private:
ProfilingLock();
static ProfilingLock *Instance();
const char *lock_file;
const char *pid_file;
int lock;
};
#endif
+2
Zobrazit soubor
@@ -39,6 +39,7 @@ THE SOFTWARE.
#include "util/exception.h"
#include "util/hsa_rsrc_factory.h"
#include "util/logger.h"
#include "core/profiling_lock.h"
#define PUBLIC_API __attribute__((visibility("default")))
#define CONSTRUCTOR_API __attribute__((constructor))
@@ -393,6 +394,7 @@ ROCPROFILER_EXPORT extern const uint32_t HSA_AMD_TOOL_PRIORITY = 25;
PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count,
const char* const* failed_tool_names) {
ONLOAD_TRACE_BEG();
ProfilingLock::Lock(PROFILER_V1_LOCK);
rocprofiler::SaveHsaApi(table);
rocprofiler::ProxyQueue::InitFactory();
+3 -1
Zobrazit soubor
@@ -58,6 +58,7 @@ file(GLOB CORE_SESSION_SRC_FILES ${CORE_SESSION_DIR}/session.cpp)
file(GLOB CORE_FILTER_SRC_FILES ${CORE_SESSION_DIR}/filter.cpp)
file(GLOB CORE_DEVICE_PROFILING_SRC_FILES ${CORE_SESSION_DIR}/device_profiling.cpp)
file(GLOB CORE_COUNTERS_SAMPLER_SRC_FILES ${CORE_SESSION_DIR}/counters_sampler.cpp)
file(GLOB CORE_PROFILING_LOCK_FILES ${PROJECT_SOURCE_DIR}/src/core/profiling_lock.cpp)
set(CORE_HW_DIR ${PROJECT_SOURCE_DIR}/src/core/hardware)
file(GLOB CORE_HW_SRC_FILES ${CORE_HW_DIR}/hsa_info.cpp)
@@ -209,7 +210,8 @@ set(runCoreUnitTests_SOURCES
${CORE_COUNTERS_METRICS_SRC_FILES}
${CORE_COUNTERS_MMIO_SRC_FILES}
${CORE_COUNTERS_PARENT_SRC_FILES}
${CORE_PC_SAMPLING_FILES})
${CORE_PC_SAMPLING_FILES}
${CORE_PROFILING_LOCK_FILES})
add_executable(runCoreUnitTests ${runCoreUnitTests_SOURCES})
+3 -1
Zobrazit soubor
@@ -36,6 +36,7 @@ file(GLOB CORE_SESSION_SRC_FILES ${CORE_SESSION_DIR}/session.cpp)
file(GLOB CORE_FILTER_SRC_FILES ${CORE_SESSION_DIR}/filter.cpp)
file(GLOB CORE_DEVICE_PROFILING_SRC_FILES ${CORE_SESSION_DIR}/device_profiling.cpp)
file(GLOB CORE_COUNTERS_SAMPLER_SRC_FILES ${CORE_SESSION_DIR}/counters_sampler.cpp)
file(GLOB CORE_PROFILING_LOCK_FILES ${PROJECT_SOURCE_DIR}/src/core/profiling_lock.cpp)
set(CORE_HW_DIR ${PROJECT_SOURCE_DIR}/src/core/hardware)
file(GLOB CORE_HW_SRC_FILES ${CORE_HW_DIR}/hsa_info.cpp)
@@ -104,7 +105,8 @@ set(runUnitTests_SOURCES
${CORE_COUNTERS_METRICS_SRC_FILES}
${CORE_COUNTERS_MMIO_SRC_FILES}
${CORE_COUNTERS_PARENT_SRC_FILES}
${CORE_PC_SAMPLING_FILES})
${CORE_PC_SAMPLING_FILES}
${CORE_PROFILING_LOCK_FILES})
add_executable(runUnitTests ${runUnitTests_SOURCES})