rocr: GPU core file location support (#1732)

* rocr: WIP Support dump of GPU core file

* WIP new core dump tests compile

* WIP: anony namespaces, test updates, progress

Added disabled Fault test. Other non-disabled coredump tests don't work.

* WIP: address code review feedback

* WIP: gpu core dump rocrtst works; combined

* WIP: remove rocrtst changes for this commit
Αυτή η υποβολή περιλαμβάνεται σε:
cfreeamd
2025-11-20 20:50:51 -06:00
υποβλήθηκε από GitHub
γονέας adf6a5ec3b
υποβολή 24c2a84e3f
4 αρχεία άλλαξαν με 407 προσθήκες και 61 διαγραφές
@@ -1323,8 +1323,8 @@ bool AqlQueue::ExceptionHandler(hsa_signal_value_t error_code, void* arg) {
return exceptionHandlerDone();
}
// Fallback if KFD does not support GPU core dump. In this case, there core dump is
// generated by hsa-runtime.
// Fallback if KFD does not support GPU core dump. In this case, the core
// dump is generated by hsa-runtime.
if (!core::Runtime::runtime_singleton_->KfdVersion().supports_core_dump &&
queue->agent_->supported_isas()[0]->GetMajorVersion() != 11) {
@@ -2236,7 +2236,7 @@ bool Runtime::VMFaultHandler(hsa_signal_value_t val, void* arg) {
PrintMemoryMapNear(reinterpret_cast<void*>(fault.VirtualAddress));
#endif
}
// Fallback if KFD does not support GPU core dump. In this case, there core dump is
// Fallback if KFD does not support GPU core dump. In this case, the core dump is
// generated by hsa-runtime.
if (faulty_agent &&
faulty_agent->supported_isas()[0]->GetMajorVersion() != 11 &&
@@ -298,6 +298,14 @@ class Flag {
var = os::GetEnvVar("HSA_CO_DMACOPY_SIZE");
co_dmacopy_size_ = var.empty() ? 1024*1024 : atoi(var.c_str());
var = os::GetEnvVar("HSA_COREDUMP_SHOW_PROGRESS");
enable_core_dump_progress_ = (var == "1");
var = os::GetEnvVar("HSA_DISABLE_COREDUMP_ON_EXCEPTION");
core_dump_disable_ = (var == "1");
core_dump_pattern_ = os::GetEnvVar("HSA_COREDUMP_PATTERN");
}
void parse_masks(uint32_t maxGpu, uint32_t maxCU) {
@@ -430,6 +438,17 @@ class Flag {
bool enable_dxg_detection() const { return enable_dxg_detection_; }
[[nodiscard]]
bool core_dump_disable() const { return core_dump_disable_; }
[[nodiscard]]
bool enable_core_dump_progress() const {
return enable_core_dump_progress_; }
[[nodiscard]]
const std::string& core_dump_pattern() const {
return core_dump_pattern_; }
void set_sdma(bool peer_sdma, bool sdma_gang) {
enable_peer_sdma_ = peer_sdma ? SDMA_ENABLE : SDMA_DISABLE;
enable_sdma_gang_ = sdma_gang ? SDMA_ENABLE : SDMA_DISABLE;
@@ -522,6 +541,10 @@ class Flag {
size_t co_dmacopy_size_;
bool core_dump_disable_ = false;
bool enable_core_dump_progress_ = false;
std::string core_dump_pattern_;
// Map GPU index post RVD to its default cu mask.
std::map<uint32_t, std::vector<uint32_t>> cu_mask_;
@@ -42,9 +42,15 @@
#include <unistd.h>
#include <sys/resource.h>
#include <sys/wait.h>
#include <sys/syscall.h>
#include <libgen.h>
#include <limits.h>
#include <elf.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <cstring>
#include <ctime>
#include <vector>
#include <sstream>
#include <fstream>
@@ -64,8 +70,165 @@ constexpr size_t MAX_BUFFER_SIZE = 4 * 1024 * 1024;
namespace rocr {
namespace amd {
namespace coredump {
namespace {
[[nodiscard]] std::string custom_core_dump() {
return core::Runtime::runtime_singleton_->flag().core_dump_pattern();
}
}
/* Implementation details */
namespace impl {
// Optional: Detect if running in a container
namespace {
[[nodiscard]] bool is_running_in_container() {
std::ifstream cgroup("/proc/1/cgroup");
if (!cgroup.is_open()) return false;
std::string line;
while (std::getline(cgroup, line)) {
if (line.find("docker") != std::string::npos ||
line.find("lxc") != std::string::npos ||
line.find("kubepods") != std::string::npos) {
return true;
}
}
return false;
}
} // anonymous namespace
// Read kernel core pattern from /proc/sys/kernel/core_pattern
static std::string read_kernel_core_pattern() {
std::ifstream pattern_file("/proc/sys/kernel/core_pattern");
if (!pattern_file.is_open()) {
return "";
}
std::string pattern;
std::getline(pattern_file, pattern);
return pattern;
}
// Substitute format specifiers in core pattern
namespace {
std::string substitute_core_pattern(const std::string& pattern) {
std::string result;
pid_t pid = getpid();
// Use gettid() if available (glibc >= 2.30), otherwise fallback to syscall
#if defined(__GLIBC__) && \
(__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 30))
pid_t tid = gettid();
#else
pid_t tid = static_cast<pid_t>(syscall(SYS_gettid));
#endif
time_t now = time(nullptr);
// Get hostname
std::array<char, 256> hostname{};
if (gethostname(hostname.data(), hostname.size()) != 0) {
strncpy(hostname.data(), "unknown", hostname.size() - 1);
}
hostname[hostname.size() - 1] = '\0';
// Get executable name
char exe_path[PATH_MAX];
ssize_t len = readlink("/proc/self/exe", exe_path, sizeof(exe_path) - 1);
std::string exe_name;
if (len > 0) {
exe_path[len] = '\0';
char* base = basename(exe_path);
exe_name = base ? std::string(base) : "unknown";
} else {
exe_name = "unknown";
}
// Parse pattern character by character
for (size_t i = 0; i < pattern.length(); i++) {
if (pattern[i] == '%' && i + 1 < pattern.length()) {
switch (pattern[i + 1]) {
case '%':
result += '%';
break;
case 'p':
result += std::to_string(pid);
break;
case 'i':
result += std::to_string(tid);
break;
case 'h':
result += hostname.data();
break;
case 'e':
result += exe_name;
break;
case 't':
result += std::to_string(now);
break;
// Unsupported specifiers are dropped (including %<NUL>)
default:
break;
}
i++; // Skip next character
} else {
result += pattern[i];
}
}
return result;
}
} // anonymous namespace
namespace {
[[nodiscard]] bool validate_dump_path(const std::string& filepath) {
// Reject pipe patterns
if (!filepath.empty() && filepath[0] == '|') {
fprintf(stderr, "GPU coredump: Pipe patterns not supported\n");
return false;
}
// Extract directory path
std::string dir;
size_t last_slash = filepath.find_last_of('/');
if (last_slash != std::string::npos) {
dir = filepath.substr(0, last_slash);
} else {
dir = ".";
}
// Check if directory exists and is writable
if (access(dir.c_str(), W_OK) != 0) {
fprintf(stderr, "GPU coredump: Directory %s not writable or does not exist\n", dir.c_str());
return false;
}
return true;
}
} // anonymous namespace
// Parse command line for pipe handler
namespace {
[[nodiscard]] std::vector<std::string> parse_command_line(const std::string& cmd) {
std::vector<std::string> args;
std::string current;
bool in_quotes = false;
bool escaped = false;
for (char c : cmd) {
if (escaped) {
current += c;
escaped = false;
} else if (c == '\\') {
escaped = true;
} else if (c == '"') {
in_quotes = !in_quotes;
} else if (c == ' ' && !in_quotes) {
if (!current.empty()) {
args.push_back(current);
current.clear();
}
} else {
current += c;
}
}
if (!current.empty()) {
args.push_back(current);
}
return args;
}
} // anonymous namespace
class PackageBuilder {
public:
PackageBuilder() : st_(std::stringstream::out | std::stringstream::binary) {}
@@ -293,9 +456,13 @@ struct LoadSegmentBuilder : public SegmentBuilder {
int fd_ = -1;
};
hsa_status_t build_core_dump(const std::string& filename, const SegmentsInfo& segments, size_t size_limit) {
std::unique_ptr<unsigned char[]> copy_buffer(new unsigned char[MAX_BUFFER_SIZE]);
// Write core dump to a file descriptor (for pipe handler)
namespace {
// Use size_limit of -1 for no limit (e.g, for pipes)
hsa_status_t write_core_dump_to_fd(int fd, const SegmentsInfo& segments,
size_t size_limit, bool show_progress) {
if (!segments.size()) return HSA_STATUS_SUCCESS;
auto copy_buffer = std::make_unique<unsigned char[]>(MAX_BUFFER_SIZE);
SegmentInfo front = segments.front();
off_t offset = sizeof(Elf64_Ehdr) + segments.size() * sizeof(Elf64_Phdr);
@@ -304,11 +471,14 @@ hsa_status_t build_core_dump(const std::string& filename, const SegmentsInfo& se
return HSA_STATUS_SUCCESS;
}
int fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
if (fd == -1) {
perror("Failed to create GPU coredump");
return HSA_STATUS_ERROR;
// Use posix_fallocate for regular files
struct stat fd_stat;
bool is_reg_file = false;
if (fstat(fd, &fd_stat) == 0 && S_ISREG(fd_stat.st_mode)) {
is_reg_file = true;
}
// Write ELF header
Elf64_Ehdr ehdr{};
ehdr.e_ident[EI_MAG0] = ELFMAG0;
ehdr.e_ident[EI_MAG1] = ELFMAG1;
@@ -333,21 +503,23 @@ hsa_status_t build_core_dump(const std::string& filename, const SegmentsInfo& se
ehdr.e_shnum = 0;
ehdr.e_shstrndx = 0;
if (write(fd, &ehdr, sizeof(ehdr)) == -1) {
perror("Failed to write ELF header");
close(fd);
if (write(fd, &ehdr, sizeof(ehdr)) != sizeof(ehdr)) {
perror("Failed to write ELF header to pipe");
return HSA_STATUS_ERROR;
}
/* Make sure that the underlying file has enough space for the file headers. */
int error = posix_fallocate(fd, sizeof(Elf64_Ehdr), segments.size() * sizeof(Elf64_Phdr));
if (error != 0) {
fprintf(stderr, "Failed to allocate file: %s\n", strerror(error));
close(fd);
return HSA_STATUS_ERROR;
if (is_reg_file) {
int error = posix_fallocate(fd, sizeof(Elf64_Ehdr), segments.size() * sizeof(Elf64_Phdr));
if (error != 0) {
fprintf(stderr, "Failed to allocate file: %s\n", strerror(error));
return HSA_STATUS_ERROR;
}
}
size_t idx = 0;
for (SegmentInfo seg : segments) {
// Write program headers
std::vector<Elf64_Phdr> phdrs;
phdrs.reserve(segments.size());
for (const SegmentInfo& seg : segments) {
Elf64_Phdr phdr{};
phdr.p_type = [](SegmentType s) {
switch (s) {
@@ -375,72 +547,187 @@ hsa_status_t build_core_dump(const std::string& filename, const SegmentsInfo& se
assert(false);
return (uint32_t)0;
}
}(seg.stype);
} (seg.stype);
if (size_limit != -1 && (offset + seg.size > size_limit)) {
printf("Core limit file reached. GPU core dump created: %s\n", filename.c_str());
close(fd);
if (show_progress) {
printf("Core limit file reached during pipe write\n");
}
return HSA_STATUS_SUCCESS;
}
phdr.p_offset = alignUp(offset, (uint64_t)1 << phdr.p_align);
if (pwrite(fd, &phdr, sizeof(phdr), sizeof(Elf64_Ehdr) + idx * sizeof(Elf64_Phdr)) == -1) {
perror("Failed to write ELF header");
close(fd);
phdrs.push_back(phdr);
offset += phdr.p_filesz;
}
// Write all program headers
for (const auto& phdr : phdrs) {
if (write(fd, &phdr, sizeof(phdr)) != sizeof(phdr)) {
perror("Failed to write program header to pipe");
return HSA_STATUS_ERROR;
}
/* Allocate stace for the segment on the file, and write the segment
content. */
error = posix_fallocate(fd, phdr.p_offset, phdr.p_filesz);
if (error != 0) {
fprintf(stderr, "Failed to allocate file: %s\n", strerror(error));
close(fd);
return HSA_STATUS_ERROR;
}
// Write segment data
for (size_t idx = 0; idx < segments.size(); idx++) {
const SegmentInfo& seg = segments[idx];
const Elf64_Phdr& phdr = phdrs[idx];
if (is_reg_file) {
int error = posix_fallocate(fd, phdr.p_offset, phdr.p_filesz);
if (error != 0) {
fprintf(stderr, "Failed to allocate file: %s\n", strerror(error));
return HSA_STATUS_ERROR;
}
}
size_t remaining = phdr.p_filesz;
while (remaining > 0) {
size_t curr_chunk = std::min(remaining, MAX_BUFFER_SIZE);
try {
hsa_status_t st = seg.builder->Read(copy_buffer.get(), curr_chunk,
phdr.p_vaddr + phdr.p_filesz - remaining);
if (st != HSA_STATUS_SUCCESS) {
close(fd);
return st;
}
if (pwrite(fd, copy_buffer.get(), curr_chunk, phdr.p_offset + phdr.p_filesz - remaining) ==
-1) {
perror("Failed to white core dump");
close(fd);
return HSA_STATUS_ERROR;
}
} catch (...) {
close(fd);
hsa_status_t st = seg.builder->Read(copy_buffer.get(), curr_chunk,
phdr.p_vaddr + phdr.p_filesz - remaining);
if (st != HSA_STATUS_SUCCESS) {
return st;
}
if (write(fd, copy_buffer.get(), curr_chunk) != (ssize_t)curr_chunk) {
perror("Failed to write segment data to pipe");
return HSA_STATUS_ERROR;
}
remaining -= curr_chunk;
}
offset += phdr.p_filesz;
idx++;
}
printf("GPU core dump created: %s\n", filename.c_str());
close(fd);
return HSA_STATUS_SUCCESS;
}
} // anonymous namespace
static hsa_status_t
build_core_dump(const std::string& filename, const SegmentsInfo& segments,
size_t size_limit, bool show_progress);
// Handle pipe pattern - fork/exec handler and pipe dump to it
namespace {
hsa_status_t write_to_pipe_handler(const std::string& pattern,
const SegmentsInfo& segments,
size_t size_limit,
bool show_progress) {
// Check if we're in a container
if (is_running_in_container() && custom_core_dump().empty()) {
fprintf(stderr,
"GPU coredump: System pipe patterns not supported in containers.\n"
"Falling back to file-based dump. Use custom pattern (HSA_COREDUMP_FILE)"
" to override.\n");
// Fall back to file-based dump
std::string filename = PREFIX_FILE_NAME + "." + std::to_string(getpid()) + ".gpu";
return build_core_dump(filename, segments, size_limit, show_progress);
}
// Extract program and arguments (remove leading '|')
std::string command = pattern.substr(1);
std::string substituted = substitute_core_pattern(command);
// Parse into program and args
std::vector<std::string> args = parse_command_line(substituted);
if (args.empty()) {
fprintf(stderr, "GPU coredump: Invalid pipe pattern\n");
return HSA_STATUS_ERROR;
}
// Create pipe for communication
int pipefd[2];
if (pipe(pipefd) == -1) {
perror("GPU coredump: pipe creation failed");
return HSA_STATUS_ERROR;
}
pid_t pid = fork();
if (pid == -1) {
perror("GPU coredump: fork failed");
close(pipefd[0]);
close(pipefd[1]);
return HSA_STATUS_ERROR;
}
if (pid == 0) {
// Child process - execute handler
close(pipefd[1]); // Close write end
// Redirect stdin to read end of pipe
if (dup2(pipefd[0], STDIN_FILENO) == -1) {
perror("GPU coredump: dup2 failed");
_exit(1);
}
close(pipefd[0]);
// Convert args to char* array for execvp
std::vector<char*> argv;
for (auto& arg : args) {
argv.push_back(const_cast<char*>(arg.c_str()));
}
argv.push_back(nullptr);
// Execute handler
execvp(argv[0], argv.data());
// If we get here, exec failed
perror("GPU coredump: execvp failed");
_exit(1);
} else {
hsa_status_t status;
// Parent process - write core dump to pipe
close(pipefd[0]); // Close read end
// Write core dump data to pipe
status = write_core_dump_to_fd(pipefd[1], segments, -1, show_progress);
close(pipefd[1]);
// Wait for child to finish
int child_status;
if (waitpid(pid, &child_status, 0) == -1) {
perror("GPU coredump: waitpid failed");
return HSA_STATUS_ERROR;
}
if (!WIFEXITED(child_status) || WEXITSTATUS(child_status) != 0) {
fprintf(stderr, "GPU coredump: handler exited with error (status: %d)\n",
WIFEXITED(child_status) ? WEXITSTATUS(child_status) : -1);
return HSA_STATUS_ERROR;
}
if (show_progress && status == HSA_STATUS_SUCCESS) {
printf("GPU core dump sent to pipe handler\n");
}
return status;
}
}
} // anonymous namespace
static hsa_status_t build_core_dump(const std::string& filename, const SegmentsInfo& segments,
size_t size_limit, bool show_progress) {
int fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
if (fd == -1) {
perror("Failed to create GPU coredump");
return HSA_STATUS_ERROR;
}
hsa_status_t result = write_core_dump_to_fd(fd, segments, size_limit, show_progress);
close(fd);
if (show_progress && result == HSA_STATUS_SUCCESS) {
printf("GPU core dump created: %s\n", filename.c_str());
}
return result;
}
} // namespace impl
hsa_status_t dump_gpu_core() {
impl::NoteSegmentBuilder nbuilder;
impl::LoadSegmentBuilder lbuilder;
impl::SegmentsInfo segments;
struct rlimit rlimit;
if (core::Runtime::runtime_singleton_->flag().core_dump_disable()) {
return HSA_STATUS_SUCCESS;
}
// Check ulimit -c
struct rlimit rlimit;
if (getrlimit(RLIMIT_CORE, &rlimit)) {
perror("Could not get core file size\n");
perror("Could not get core file size");
return HSA_STATUS_ERROR;
}
debug_print("core file size: %ld\n", rlimit.rlim_cur);
if (rlimit.rlim_cur == 0)
if (rlimit.rlim_cur == 0) {
return HSA_STATUS_SUCCESS;
}
impl::NoteSegmentBuilder nbuilder;
impl::LoadSegmentBuilder lbuilder;
impl::SegmentsInfo segments;
hsa_status_t status = nbuilder.Collect(segments);
if (status != HSA_STATUS_SUCCESS) return status;
@@ -448,10 +735,46 @@ hsa_status_t dump_gpu_core() {
status = lbuilder.Collect(segments);
if (status != HSA_STATUS_SUCCESS) return status;
std::stringstream st;
st << PREFIX_FILE_NAME << "." << getpid();
// Determine output pattern
std::string pattern;
bool kernel_pattern = false;
bool use_custom_pattern = !custom_core_dump().empty();
if (use_custom_pattern) {
pattern = custom_core_dump();
} else {
// Fallback to kernel core pattern
pattern = impl::read_kernel_core_pattern();
if (pattern.empty()) {
// If we can't read kernel pattern, use default
pattern = PREFIX_FILE_NAME + ".%p";
} else {
kernel_pattern = true;
}
}
return build_core_dump(st.str(), segments, rlimit.rlim_cur);
bool show_progress = core::Runtime::runtime_singleton_->flag().enable_core_dump_progress();
if (!pattern.empty() && pattern[0] == '|') {
if (show_progress) {
fprintf(stderr, "Generating GPU core dump via pipe handler\n");
}
return impl::write_to_pipe_handler(pattern, segments, rlimit.rlim_cur, show_progress);
} else {
// Regular file output
std::string filename = impl::substitute_core_pattern(pattern);
if (kernel_pattern && !use_custom_pattern) {
filename += ".gpu";
}
if (!impl::validate_dump_path(filename)) {
return HSA_STATUS_ERROR;
}
if (show_progress) {
fprintf(stderr, "Generating GPU core dump to: %s\n", filename.c_str());
}
return impl::build_core_dump(filename, segments, rlimit.rlim_cur, show_progress);
}
}
} // namespace coredump
} // namespace amd