core dump: Front end core dump API

This API consists in one function to be called from a fault event at the
hsa-runtime to generate a core dump.

Signed-off-by: Alex Sierra <alex.sierra@amd.com>
Change-Id: Ib1b90d5beb13f93c4e8ebd21fd61705ebb12ca5d
Этот коммит содержится в:
Alex Sierra
2023-04-04 16:14:11 -05:00
коммит произвёл Alejandro Sierra Guiza
родитель 1083d5c35f
Коммит 514b222368
2 изменённых файлов: 196 добавлений и 0 удалений
+54
Просмотреть файл
@@ -0,0 +1,54 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef OPENSRC_HSA_RUNTIME_CORE_INC_AMD_CORE_DUMP_HPP_
#define OPENSRC_HSA_RUNTIME_CORE_INC_AMD_CORE_DUMP_HPP_
namespace rocr {
namespace amd {
namespace coredump {
hsa_status_t dump_gpu_core();
} // namespace coredump
} // namespace amd
} // namespace rocr
#endif // OPENSRC_HSA_RUNTIME_CORE_INC_AMD_CORE_DUMP_HPP_
+142
Просмотреть файл
@@ -49,9 +49,14 @@
#include <memory>
#include "core/util/utils.h"
#include "./amd_hsa_code_util.hpp"
#include "core/inc/amd_core_dump.hpp"
#include "hsakmt/hsakmt.h"
constexpr char SNAPSHOT_INFO_ALIGNMENT = 0x8;
constexpr uint32_t LOAD_ALIGNMENT_SHIFT = 4;
constexpr uint32_t NOTE_ALIGNMENT_SHIFT = 2;
const std::string PREFIX_FILE_NAME = "gpucore";
constexpr size_t MAX_BUFFER_SIZE = 4 * 1024 * 1024;
namespace rocr {
namespace amd {
@@ -252,7 +257,144 @@ struct LoadSegmentBuilder : public SegmentBuilder {
private:
int fd_ = -1;
};
hsa_status_t build_core_dump(const std::string& filename, const SegmentsInfo& segments) {
std::unique_ptr<unsigned char[]> copy_buffer(new unsigned char[MAX_BUFFER_SIZE]);
int fd = open(filename.c_str(), O_WRONLY | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
if (fd == -1) {
perror("Failed to create GPU coredump");
return HSA_STATUS_ERROR;
}
Elf64_Ehdr ehdr{};
off_t offset = sizeof(Elf64_Ehdr);
ehdr.e_ident[EI_MAG0] = ELFMAG0;
ehdr.e_ident[EI_MAG1] = ELFMAG1;
ehdr.e_ident[EI_MAG2] = ELFMAG2;
ehdr.e_ident[EI_MAG3] = ELFMAG3;
ehdr.e_ident[EI_CLASS] = ELFCLASS64;
ehdr.e_ident[EI_DATA] = ELFDATA2LSB;
ehdr.e_ident[EI_VERSION] = EV_CURRENT;
ehdr.e_ident[EI_OSABI] = ELF::ELFOSABI_AMDGPU_HSA;
ehdr.e_ident[EI_ABIVERSION] = 0;
ehdr.e_type = ET_CORE;
ehdr.e_machine = ELF::EM_AMDGPU;
ehdr.e_version = EV_CURRENT;
ehdr.e_entry = 0;
ehdr.e_phoff = offset;
ehdr.e_shoff = 0;
ehdr.e_flags = 0;
ehdr.e_ehsize = sizeof(Elf64_Ehdr);
ehdr.e_phentsize = sizeof(Elf64_Phdr);
ehdr.e_phnum = segments.size();
ehdr.e_shentsize = 0;
ehdr.e_shnum = 0;
ehdr.e_shstrndx = 0;
if (write(fd, &ehdr, sizeof(ehdr)) == -1) {
perror("Failed to write ELF header");
close(fd);
return HSA_STATUS_ERROR;
}
/* Make sure that the underlying file has enough space for the file headers. */
int error = posix_fallocate(fd, sizeof(Elf64_Ehdr), segments.size() * sizeof(Elf64_Phdr));
if (error != 0) {
fprintf(stderr, "Failed to allocate file: %s\n", strerror(error));
close(fd);
return HSA_STATUS_ERROR;
}
size_t idx = 0;
offset += segments.size() * sizeof(Elf64_Phdr);
for (SegmentInfo seg : segments) {
Elf64_Phdr phdr{};
phdr.p_type = [](SegmentType s) {
switch (s) {
case LOAD:
return PT_LOAD;
case NOTE:
return PT_NOTE;
default:
assert(false);
return PT_NULL;
}
}(seg.stype);
phdr.p_flags = seg.flags;
phdr.p_vaddr = seg.vaddr;
phdr.p_paddr = 0;
phdr.p_memsz = seg.size;
phdr.p_filesz = seg.size;
phdr.p_align = [](SegmentType s) {
switch (s) {
case LOAD:
return LOAD_ALIGNMENT_SHIFT;
case NOTE:
return NOTE_ALIGNMENT_SHIFT;
default:
assert(false);
return (uint32_t)0;
}
}(seg.stype);
phdr.p_offset = alignUp(offset, (uint64_t)1 << phdr.p_align);
if (pwrite(fd, &phdr, sizeof(phdr), sizeof(Elf64_Ehdr) + idx * sizeof(Elf64_Phdr)) == -1) {
perror("Failed to write ELF header");
close(fd);
return HSA_STATUS_ERROR;
}
/* Allocate stace for the segment on the file, and write the segment
content. */
error = posix_fallocate(fd, phdr.p_offset, phdr.p_filesz);
if (error != 0) {
fprintf(stderr, "Failed to allocate file: %s\n", strerror(error));
close(fd);
return HSA_STATUS_ERROR;
}
size_t remaining = phdr.p_filesz;
while (remaining > 0) {
size_t curr_chunk = std::min(remaining, MAX_BUFFER_SIZE);
try {
hsa_status_t st = seg.builder->Read(copy_buffer.get(), curr_chunk,
phdr.p_vaddr + phdr.p_filesz - remaining);
if (st != HSA_STATUS_SUCCESS) {
close(fd);
return st;
}
if (pwrite(fd, copy_buffer.get(), curr_chunk, phdr.p_offset + phdr.p_filesz - remaining) ==
-1) {
perror("Failed to white core dump");
close(fd);
return HSA_STATUS_ERROR;
}
} catch (...) {
close(fd);
return HSA_STATUS_ERROR;
}
remaining -= curr_chunk;
}
offset += phdr.p_filesz;
idx++;
}
printf("GPU core dump created: %s\n", filename.c_str());
close(fd);
return HSA_STATUS_SUCCESS;
}
} // namespace impl
hsa_status_t dump_gpu_core() {
impl::NoteSegmentBuilder nbuilder;
impl::LoadSegmentBuilder lbuilder;
impl::SegmentsInfo segments;
hsa_status_t status = nbuilder.Collect(segments);
if (status != HSA_STATUS_SUCCESS) return status;
status = lbuilder.Collect(segments);
if (status != HSA_STATUS_SUCCESS) return status;
std::stringstream st;
st << PREFIX_FILE_NAME << "." << getpid();
return build_core_dump(st.str(), segments);
}
} // namespace coredump
} // namespace amd
} // namespace rocr