core dump: Generates a core dump from a fault event

Extracts and creates a core dump ELF file from a fault event, using
core dump front end.

Signed-off-by: Alex Sierra <Alex.Sierra@amd.com>
Change-Id: Ibbbe41b3d13dd3fcb90161e927d48c329cf513a9


[ROCm/ROCR-Runtime commit: 803e37ded5]
Este commit está contenido en:
Alex Sierra
2022-10-24 21:45:33 +00:00
cometido por Alejandro Sierra Guiza
padre 4370aa1364
commit 9aa39b0979
Se han modificado 2 ficheros con 20 adiciones y 0 borrados
@@ -67,6 +67,7 @@
#include "core/inc/default_signal.h"
#include "core/inc/hsa_ext_amd_impl.h"
#include "core/inc/amd_gpu_pm4.h"
#include "core/inc/amd_core_dump.hpp"
namespace rocr {
namespace AMD {
@@ -1231,6 +1232,17 @@ bool AqlQueue::ExceptionHandler(hsa_signal_value_t error_code, void* arg) {
return false;
}
// Fallback if KFD does not support GPU core dump. In this case, there core dump is
// generated by hsa-runtime.
if (!core::Runtime::runtime_singleton_->KfdVersion().supports_core_dump) {
if (amd::coredump::dump_gpu_core())
debug_print("GPU core dump failed\n");
// supports_core_dump flag is overwritten to avoid generate core dump file again
// caught by a different exception handler. Such as VMFaultHandler.
core::Runtime::runtime_singleton_->KfdVersion(
core::Runtime::runtime_singleton_->KfdVersion().supports_exception_debugging, true);
}
for (auto& error : QueueErrors) {
if (error_code & (1 << (error.code - 1))) {
errorCode = error.status;
@@ -70,6 +70,7 @@
#include "core/util/os.h"
#include "core/inc/exceptions.h"
#include "inc/hsa_ven_amd_aqlprofile.h"
#include "core/inc/amd_core_dump.hpp"
#ifndef HSA_VERSION_MAJOR
#define HSA_VERSION_MAJOR 1
@@ -1358,6 +1359,13 @@ bool Runtime::VMFaultHandler(hsa_signal_value_t val, void* arg) {
HsaMemoryAccessFault& fault =
vm_fault_event->EventData.EventData.MemoryAccessFault;
// Fallback if KFD does not support GPU core dump. In this case, there core dump is
// generated by hsa-runtime.
if (!runtime_singleton_->KfdVersion().supports_core_dump) {
if (amd::coredump::dump_gpu_core())
debug_print("GPU core dump failed\n");
}
hsa_status_t custom_handler_status = HSA_STATUS_ERROR;
auto system_event_handlers = runtime_singleton_->GetSystemEventHandlers();
// If custom handler is registered, pack the fault info and call the handler