This adds cursory support for globals to the HIP module loading API. The

style is purposefully alien so as to signal that HIP experts should turn
it into HIP worthy code as soon as possible.
This commit is contained in:
Alex Voicu
2017-10-09 13:27:11 +01:00
szülő 4e825c6768
commit ffffe052b6
13 fájl változott, egészen pontosan 4804 új sor hozzáadva és 104 régi sor törölve
+202 -104
Fájl megtekintése
@@ -23,15 +23,18 @@ THE SOFTWARE.
#include <fstream>
#include <stdio.h>
#include <stdlib.h>
#include <libelf.h>
#include <elf.h>
#include <gelf.h>
#include <cstdint>
#include <memory>
#include <mutex>
#include <string>
#include <vector>
#include <map>
#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"
#include "hsa/amd_hsa_kernel_code.h"
#include <hsa/hsa.h>
#include <hsa/hsa_ext_amd.h>
#include <hsa/amd_hsa_kernel_code.h>
#include "hip/hcc_detail/elfio/elfio.hpp"
#include "hip/hip_runtime.h"
#include "hip_hcc_internal.h"
#include "trace_helper.h"
@@ -114,6 +117,8 @@ namespace hipdrv {
} // End namespace hipdrv
uint64_t PrintSymbolSizes(const void *emi, const char *name){
using namespace ELFIO;
const Elf64_Ehdr *ehdr = (const Elf64_Ehdr*)emi;
if(NULL == ehdr || EV_CURRENT != ehdr->e_version){}
const Elf64_Shdr * shdr = (const Elf64_Shdr*)((char*)emi + ehdr->e_shoff);
@@ -138,6 +143,8 @@ uint64_t PrintSymbolSizes(const void *emi, const char *name){
}
uint64_t ElfSize(const void *emi){
using namespace ELFIO;
const Elf64_Ehdr *ehdr = (const Elf64_Ehdr*)emi;
const Elf64_Shdr *shdr = (const Elf64_Shdr*)((char*)emi + ehdr->e_shoff);
@@ -157,8 +164,155 @@ uint64_t ElfSize(const void *emi){
return total_size;
}
namespace
{
template<typename P>
inline
ELFIO::section* find_section_if(ELFIO::elfio& reader, P p)
{
using namespace std;
const auto it = find_if(
reader.sections.begin(), reader.sections.end(), move(p));
return it != reader.sections.end() ? *it : nullptr;
}
inline
std::vector<std::string> copy_names_of_undefined_symbols(
const ELFIO::symbol_section_accessor& section)
{
using namespace ELFIO;
using namespace std;
vector<string> r;
for (auto i = 0u; i != section.get_symbols_num(); ++i) {
// TODO: this is boyscout code, caching the temporaries
// may be of worth.
string name;
Elf64_Addr value = 0;
Elf_Xword size = 0;
Elf_Half sect_idx = 0;
uint8_t bind = 0;
uint8_t type = 0;
uint8_t other = 0;
section.get_symbol(
i, name, value, size, bind, type, sect_idx, other);
if (sect_idx == SHN_UNDEF && !name.empty()) {
r.push_back(std::move(name));
}
}
return r;
}
inline
std::pair<ELFIO::Elf64_Addr, ELFIO::Elf_Xword> find_symbol_address(
const ELFIO::symbol_section_accessor& section,
const std::string& symbol_name)
{
using namespace ELFIO;
using namespace std;
static constexpr pair<Elf64_Addr, Elf_Xword> r{0, 0};
for (auto i = 0u; i != section.get_symbols_num(); ++i) {
// TODO: this is boyscout code, caching the temporaries
// may be of worth.
string name;
Elf64_Addr value = 0;
Elf_Xword size = 0;
Elf_Half sect_idx = 0;
uint8_t bind = 0;
uint8_t type = 0;
uint8_t other = 0;
section.get_symbol(
i, name, value, size, bind, type, sect_idx, other);
if (name == symbol_name) return make_pair(value, size);
}
return r;
}
inline
void associate_code_object_symbols_with_host_allocation(
const ELFIO::elfio& reader,
ELFIO::section* code_object_dynsym,
ELFIO::section* process_symtab,
hsa_agent_t agent,
hsa_executable_t executable)
{
using namespace ELFIO;
using namespace std;
if (!code_object_dynsym || !process_symtab) return;
const auto undefined_symbols = copy_names_of_undefined_symbols(
symbol_section_accessor{reader, code_object_dynsym});
for (auto&& x : undefined_symbols) {
const auto tmp = find_symbol_address(
symbol_section_accessor{reader, process_symtab}, x);
assert(tmp.first);
void* p = nullptr;
hsa_amd_memory_lock(
reinterpret_cast<void*>(tmp.first), tmp.second, &agent, 1, &p);
hsa_executable_agent_global_variable_define(
executable, agent, x.c_str(), p);
static vector<
unique_ptr<void, decltype(hsa_amd_memory_unlock)*>> globals;
static mutex mtx;
lock_guard<std::mutex> lck{mtx};
globals.emplace_back(p, hsa_amd_memory_unlock);
}
}
inline
void load_code_object_and_freeze_executable(
const char* file, hsa_agent_t agent, hsa_executable_t executable)
{ // TODO: the following sequence is inefficient, should be refactored
// into a single load of the file and subsequent ELFIO
// processing.
using namespace std;
static const auto cor_deleter = [](hsa_code_object_reader_t* p) {
hsa_code_object_reader_destroy(*p);
};
using RAII_code_reader = unique_ptr<
hsa_code_object_reader_t, decltype(cor_deleter)>;
unique_ptr<FILE, decltype(fclose)*> cobj{fopen(file, "r"), fclose};
RAII_code_reader tmp{new hsa_code_object_reader_t, cor_deleter};
hsa_code_object_reader_create_from_file(fileno(cobj.get()), tmp.get());
hsa_executable_load_agent_code_object(
executable, agent, *tmp, nullptr, nullptr);
hsa_executable_freeze(executable, nullptr);
static vector<RAII_code_reader> code_readers;
static mutex mtx;
lock_guard<mutex> lck{mtx};
code_readers.push_back(move(tmp));
}
}
hipError_t hipModuleLoad(hipModule_t *module, const char *fname)
{
using namespace ELFIO;
hipError_t hipModuleLoad(hipModule_t *module, const char *fname){
HIP_INIT_API(module, fname);
hipError_t ret = hipSuccess;
*module = new ihipModule_t;
@@ -174,96 +328,40 @@ hipError_t hipModuleLoad(hipModule_t *module, const char *fname){
}else{
int deviceId = ctx->getDevice()->_deviceId;
ihipDevice_t *currentDevice = ihipGetDevice(deviceId);
std::ifstream in(fname, std::ios::binary | std::ios::ate);
if(!in.is_open() ){
hsa_executable_create_alt(
HSA_PROFILE_FULL,
HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
nullptr,
&(*module)->executable);
elfio reader;
if (!reader.load(fname)) {
return ihipLogStatus(hipErrorFileNotFound);
}
else {
// TODO: this may benefit from caching as well.
elfio self_reader;
self_reader.load("/proc/self/exe");
const auto symtab =
find_section_if(self_reader, [](const ELFIO::section* x) {
return x->get_type() == SHT_SYMTAB;
});
} else {
const auto code_object_dynsym =
find_section_if(reader, [](const ELFIO::section* x) {
return x->get_type() == SHT_DYNSYM;
});
size_t size = std::string::size_type(in.tellg());
void *p = NULL;
hsa_agent_t agent = currentDevice->_hsaAgent;
hsa_region_t sysRegion;
hsa_status_t status = hsa_agent_iterate_regions(agent, hipdrv::findSystemRegions, &sysRegion);
status = hsa_memory_allocate(sysRegion, size, (void**)&p);
associate_code_object_symbols_with_host_allocation(
reader,
code_object_dynsym,
symtab,
currentDevice->_hsaAgent,
(*module)->executable);
if(status != HSA_STATUS_SUCCESS){
return ihipLogStatus(hipErrorOutOfMemory);
}
char *ptr = (char*)p;
if(!ptr){
return ihipLogStatus(hipErrorOutOfMemory);
}
(*module)->ptr = p;
(*module)->size = size;
in.seekg(0, std::ios::beg);
std::copy(std::istreambuf_iterator<char>(in), std::istreambuf_iterator<char>(), ptr);
/*
Elf *e = elf_memory((char*)p, size);
if(elf_kind(e) != ELF_K_ELF){
return ihipLogStatus(hipErrorInvalidValue);
}
size_t numpHdrs;
if(elf_getphdrnum(e, &numpHdrs) != 0){
return ihipLogStatus(hipErrorInvalidValue);
}
for(size_t i=0;i<numpHdrs;i++){
GElf_Phdr pHdr;
if(gelf_getphdr(e, i, &pHdr) != &pHdr){
continue;
}
if(pHdr.p_type == PT_NOTE && pHdr.p_align >= sizeof(int)){
char* ptr = (char*) p + pHdr.p_offset;
char* segmentEnd = ptr + pHdr.p_filesz;
while(ptr < segmentEnd){
MyElfNote *note = (MyElfNote*) ptr;
char *name = (char*) &note[1];
char *desc = name + alignTo(note->n_namesz, sizeof(int));
if (note->n_type == 8) {
std::string metadatastr((const char*)desc, (size_t)note->n_descsz);
AMDGPU::RuntimeMD::Program::Metadata meta;
meta = meta.fromYAML(metadatastr);
for(int i=0;i<meta.Kernels.size();i++){
struct ihipKernArgInfo pl;
size_t totalSizeOfArgs = 0;
for(int j=0;j<meta.Kernels[i].Args.size();j++){
if(meta.Kernels[i].Args[j].Kind < 7) {
pl.Size.push_back(meta.Kernels[i].Args[j].Size);
pl.Align.push_back(meta.Kernels[i].Args[j].Align);
pl.ArgType.push_back(meta.Kernels[i].Args[j].TypeName);
pl.ArgName.push_back(meta.Kernels[i].Args[j].Name);
totalSizeOfArgs += meta.Kernels[i].Args[j].Align;
}
}
pl.totalSize = totalSizeOfArgs;
kernelArguments[meta.Kernels[i].Name] = pl;
}
break;
}
ptr += sizeof(*note)
+ alignTo(note->n_namesz, sizeof(int))
+ alignTo(note->n_descsz, sizeof(int));
}
}
}
*/
status = hsa_code_object_deserialize(ptr, size, NULL, &(*module)->object);
if(status != HSA_STATUS_SUCCESS){
return ihipLogStatus(hipErrorSharedObjectInitFailed);
}
status = hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, NULL, &(*module)->executable);
CHECKLOG_HSA(status, hipErrorNotInitialized);
status = hsa_executable_load_code_object((*module)->executable, agent, (*module)->object, NULL);
CHECKLOG_HSA(status, hipErrorNotInitialized);
status = hsa_executable_freeze((*module)->executable, NULL);
CHECKLOG_HSA(status, hipErrorNotInitialized);
load_code_object_and_freeze_executable(
fname, currentDevice->_hsaAgent, (*module)->executable);
}
}
@@ -283,16 +381,16 @@ hipError_t hipModuleUnload(hipModule_t hmod)
{
ret = hipErrorInvalidValue;
}
status = hsa_code_object_destroy(hmod->object);
if(status != HSA_STATUS_SUCCESS)
{
ret = hipErrorInvalidValue;
}
status = hsa_memory_free(hmod->ptr);
if(status != HSA_STATUS_SUCCESS)
{
ret = hipErrorInvalidValue;
}
// status = hsa_code_object_destroy(hmod->object);
// if(status != HSA_STATUS_SUCCESS)
// {
// ret = hipErrorInvalidValue;
// }
// status = hsa_memory_free(hmod->ptr);
// if(status != HSA_STATUS_SUCCESS)
// {
// ret = hipErrorInvalidValue;
// }
for(auto f = hmod->funcTrack.begin(); f != hmod->funcTrack.end(); ++f) {
delete *f;
}
@@ -404,7 +502,7 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f,
} else {
return ihipLogStatus(hipErrorNotInitialized);
}
}else{
return ihipLogStatus(hipErrorInvalidValue);
}
@@ -450,7 +548,7 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f,
hc::completion_future cf;
lp.av->dispatch_hsa_kernel(&aql, config[1] /* kernarg*/, kernArgSize,
lp.av->dispatch_hsa_kernel(&aql, config[1] /* kernarg*/, kernArgSize,
(startEvent || stopEvent) ? &cf : nullptr
#if (__hcc_workweek__ > 17312)
, f->_name.c_str()